Răsfoiți Sursa

Merge branch 'topic/ppgtt' into drm-intel-next-queued

Because whatever.*

* This should contain a fairly long list of issues and still
unresolved resgressions, but I didn't really get a vote.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Daniel Vetter 11 ani în urmă
părinte
comite
0e5539b923

+ 35 - 5
drivers/gpu/drm/i915/i915_debugfs.c

@@ -98,7 +98,7 @@ static const char *get_pin_flag(struct drm_i915_gem_object *obj)
 {
 	if (obj->user_pin_count > 0)
 		return "P";
-	else if (obj->pin_count > 0)
+	else if (i915_gem_obj_is_pinned(obj))
 		return "p";
 	else
 		return " ";
@@ -123,6 +123,8 @@ static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
+	int pin_count = 0;
+
 	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
@@ -139,8 +141,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
-	if (obj->pin_count)
-		seq_printf(m, " (pinned x %d)", obj->pin_count);
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		if (vma->pin_count > 0)
+			pin_count++;
+		seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->pin_display)
 		seq_printf(m, " (display)");
 	if (obj->fence_reg != I915_FENCE_REG_NONE)
@@ -447,7 +451,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
 
 	total_obj_size = total_gtt_size = count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (list == PINNED_LIST && obj->pin_count == 0)
+		if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj))
 			continue;
 
 		seq_puts(m, "   ");
@@ -1731,6 +1735,17 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int per_file_ctx(int id, void *ptr, void *data)
+{
+	struct i915_hw_context *ctx = ptr;
+	struct seq_file *m = data;
+	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx);
+
+	ppgtt->debug_dump(ppgtt, m);
+
+	return 0;
+}
+
 static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1760,6 +1775,7 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring;
+	struct drm_file *file;
 	int i;
 
 	if (INTEL_INFO(dev)->gen == 6)
@@ -1778,6 +1794,20 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 
 		seq_puts(m, "aliasing PPGTT:\n");
 		seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset);
+
+		ppgtt->debug_dump(ppgtt, m);
+	} else
+		return;
+
+	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
+		struct drm_i915_file_private *file_priv = file->driver_priv;
+		struct i915_hw_ppgtt *pvt_ppgtt;
+
+		pvt_ppgtt = ctx_to_ppgtt(file_priv->private_default_ctx);
+		seq_printf(m, "proc: %s\n",
+			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
+		seq_puts(m, "  default context:\n");
+		idr_for_each(&file_priv->context_idr, per_file_ctx, m);
 	}
 	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
 }
@@ -2927,7 +2957,7 @@ i915_drop_caches_set(void *data, u64 val)
 		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
 			list_for_each_entry_safe(vma, x, &vm->inactive_list,
 						 mm_list) {
-				if (vma->obj->pin_count)
+				if (vma->pin_count)
 					continue;
 
 				ret = i915_vma_unbind(vma);

+ 3 - 3
drivers/gpu/drm/i915/i915_dma.c

@@ -990,7 +990,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = HAS_WT(dev);
 		break;
 	case I915_PARAM_HAS_ALIASING_PPGTT:
-		value = dev_priv->mm.aliasing_ppgtt ? 1 : 0;
+		value = dev_priv->mm.aliasing_ppgtt || USES_FULL_PPGTT(dev);
 		break;
 	case I915_PARAM_HAS_WAIT_TIMEOUT:
 		value = 1;
@@ -1374,7 +1374,7 @@ cleanup_gem:
 	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
-	i915_gem_cleanup_aliasing_ppgtt(dev);
+	WARN_ON(dev_priv->mm.aliasing_ppgtt);
 	drm_mm_takedown(&dev_priv->gtt.base.mm);
 cleanup_power:
 	intel_display_power_put(dev, POWER_DOMAIN_VGA);
@@ -1775,8 +1775,8 @@ int i915_driver_unload(struct drm_device *dev)
 		i915_gem_free_all_phys_object(dev);
 		i915_gem_cleanup_ringbuffer(dev);
 		i915_gem_context_fini(dev);
+		WARN_ON(dev_priv->mm.aliasing_ppgtt);
 		mutex_unlock(&dev->struct_mutex);
-		i915_gem_cleanup_aliasing_ppgtt(dev);
 		i915_gem_cleanup_stolen(dev);
 
 		if (!I915_NEED_GFX_HWS(dev))

+ 2 - 1
drivers/gpu/drm/i915/i915_drv.c

@@ -113,7 +113,8 @@ MODULE_PARM_DESC(enable_hangcheck,
 int i915_enable_ppgtt __read_mostly = -1;
 module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0400);
 MODULE_PARM_DESC(i915_enable_ppgtt,
-		"Enable PPGTT (default: true)");
+		"Override PPGTT usage. "
+		"(-1=auto [default], 0=disabled, 1=aliasing, 2=full)");
 
 int i915_enable_psr __read_mostly = 0;
 module_param_named(enable_psr, i915_enable_psr, int, 0600);

+ 153 - 73
drivers/gpu/drm/i915/i915_drv.h

@@ -523,6 +523,57 @@ enum i915_cache_level {
 
 typedef uint32_t gen6_gtt_pte_t;
 
+/**
+ * A VMA represents a GEM BO that is bound into an address space. Therefore, a
+ * VMA's presence cannot be guaranteed before binding, or after unbinding the
+ * object into/from the address space.
+ *
+ * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+
+	/** This object's place on the active/inactive lists */
+	struct list_head mm_list;
+
+	struct list_head vma_link; /* Link in the object's VMA list */
+
+	/** This vma's place in the batchbuffer or on the eviction list */
+	struct list_head exec_list;
+
+	/**
+	 * Used for performing relocations during execbuffer insertion.
+	 */
+	struct hlist_node exec_node;
+	unsigned long exec_handle;
+	struct drm_i915_gem_exec_object2 *exec_entry;
+
+	/**
+	 * How many users have pinned this object in GTT space. The following
+	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
+	 * (via user_pin_count), execbuffer (objects are not allowed multiple
+	 * times for the same batchbuffer), and the framebuffer code. When
+	 * switching/pageflipping, the framebuffer code has at most two buffers
+	 * pinned per crtc.
+	 *
+	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
+	 * bits with absolutely no headroom. So use 4 bits. */
+	unsigned int pin_count:4;
+#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
+
+	/** Unmap an object from an address space. This usually consists of
+	 * setting the valid PTE entries to a reserved scratch page. */
+	void (*unbind_vma)(struct i915_vma *vma);
+	/* Map an object into an address space with the given cache flags. */
+#define GLOBAL_BIND (1<<0)
+	void (*bind_vma)(struct i915_vma *vma,
+			 enum i915_cache_level cache_level,
+			 u32 flags);
+};
+
 struct i915_address_space {
 	struct drm_mm mm;
 	struct drm_device *dev;
@@ -604,6 +655,8 @@ struct i915_gtt {
 
 struct i915_hw_ppgtt {
 	struct i915_address_space base;
+	struct kref ref;
+	struct drm_mm_node node;
 	unsigned num_pd_entries;
 	union {
 		struct page **pt_pages;
@@ -620,37 +673,12 @@ struct i915_hw_ppgtt {
 		dma_addr_t *pt_dma_addr;
 		dma_addr_t *gen8_pt_dma_addr[4];
 	};
-	int (*enable)(struct drm_device *dev);
-};
-
-/**
- * A VMA represents a GEM BO that is bound into an address space. Therefore, a
- * VMA's presence cannot be guaranteed before binding, or after unbinding the
- * object into/from the address space.
- *
- * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
- * will always be <= an objects lifetime. So object refcounting should cover us.
- */
-struct i915_vma {
-	struct drm_mm_node node;
-	struct drm_i915_gem_object *obj;
-	struct i915_address_space *vm;
-
-	/** This object's place on the active/inactive lists */
-	struct list_head mm_list;
-
-	struct list_head vma_link; /* Link in the object's VMA list */
-
-	/** This vma's place in the batchbuffer or on the eviction list */
-	struct list_head exec_list;
-
-	/**
-	 * Used for performing relocations during execbuffer insertion.
-	 */
-	struct hlist_node exec_node;
-	unsigned long exec_handle;
-	struct drm_i915_gem_exec_object2 *exec_entry;
 
+	int (*enable)(struct i915_hw_ppgtt *ppgtt);
+	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
+			 struct intel_ring_buffer *ring,
+			 bool synchronous);
+	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
 };
 
 struct i915_ctx_hang_stats {
@@ -675,9 +703,10 @@ struct i915_hw_context {
 	bool is_initialized;
 	uint8_t remap_slice;
 	struct drm_i915_file_private *file_priv;
-	struct intel_ring_buffer *ring;
+	struct intel_ring_buffer *last_ring;
 	struct drm_i915_gem_object *obj;
 	struct i915_ctx_hang_stats hang_stats;
+	struct i915_address_space *vm;
 
 	struct list_head link;
 };
@@ -1622,18 +1651,6 @@ struct drm_i915_gem_object {
 	 */
 	unsigned int fence_dirty:1;
 
-	/** How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
-	 * (via user_pin_count), execbuffer (objects are not allowed multiple
-	 * times for the same batchbuffer), and the framebuffer code. When
-	 * switching/pageflipping, the framebuffer code has at most two buffers
-	 * pinned per crtc.
-	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits. */
-	unsigned int pin_count:4;
-#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
-
 	/**
 	 * Is the object at the current location in the gtt mappable and
 	 * fenceable? Used to avoid costly recalculations.
@@ -1746,7 +1763,7 @@ struct drm_i915_file_private {
 	} mm;
 	struct idr context_idr;
 
-	struct i915_ctx_hang_stats hang_stats;
+	struct i915_hw_context *private_default_ctx;
 	atomic_t rps_wait_boost;
 };
 
@@ -1819,7 +1836,10 @@ struct drm_i915_file_private {
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
-#define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >=6 && !IS_VALLEYVIEW(dev))
+#define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev))
+#define HAS_PPGTT(dev)		(INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_BROADWELL(dev))
+#define USES_ALIASING_PPGTT(dev) intel_enable_ppgtt(dev, false)
+#define USES_FULL_PPGTT(dev)	intel_enable_ppgtt(dev, true)
 
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
@@ -2000,6 +2020,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
+void i915_init_vm(struct drm_i915_private *dev_priv,
+		  struct i915_address_space *vm);
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
@@ -2008,7 +2030,7 @@ int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
 				     bool map_and_fenceable,
 				     bool nonblocking);
-void i915_gem_object_unpin(struct drm_i915_gem_object *obj);
+void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
@@ -2172,6 +2194,13 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 				  struct i915_address_space *vm);
 
 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj);
+static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) {
+	struct i915_vma *vma;
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		if (vma->pin_count > 0)
+			return true;
+	return false;
+}
 
 /* Some GGTT VM helpers */
 #define obj_to_ggtt(obj) \
@@ -2211,46 +2240,51 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 }
 
 /* i915_gem_context.c */
+#define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_fini(struct drm_device *dev);
+void i915_gem_context_reset(struct drm_device *dev);
+int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
+int i915_gem_context_enable(struct drm_i915_private *dev_priv);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 int i915_switch_context(struct intel_ring_buffer *ring,
-			struct drm_file *file, int to_id);
+			struct drm_file *file, struct i915_hw_context *to);
+struct i915_hw_context *
+i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
 void i915_gem_context_free(struct kref *ctx_ref);
 static inline void i915_gem_context_reference(struct i915_hw_context *ctx)
 {
-	kref_get(&ctx->ref);
+	if (ctx->obj && HAS_HW_CONTEXTS(ctx->obj->base.dev))
+		kref_get(&ctx->ref);
 }
 
 static inline void i915_gem_context_unreference(struct i915_hw_context *ctx)
 {
-	kref_put(&ctx->ref, i915_gem_context_free);
+	if (ctx->obj && HAS_HW_CONTEXTS(ctx->obj->base.dev))
+		kref_put(&ctx->ref, i915_gem_context_free);
 }
 
-struct i915_ctx_hang_stats * __must_check
-i915_gem_context_get_hang_stats(struct drm_device *dev,
-				struct drm_file *file,
-				u32 id);
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
 
-/* i915_gem_gtt.c */
-void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
-void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
-			    struct drm_i915_gem_object *obj,
-			    enum i915_cache_level cache_level);
-void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
-			      struct drm_i915_gem_object *obj);
+/* i915_gem_evict.c */
+int __must_check i915_gem_evict_something(struct drm_device *dev,
+					  struct i915_address_space *vm,
+					  int min_size,
+					  unsigned alignment,
+					  unsigned cache_level,
+					  bool mappable,
+					  bool nonblock);
+int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
+int i915_gem_evict_everything(struct drm_device *dev);
 
+/* i915_gem_gtt.c */
 void i915_check_and_clear_faults(struct drm_device *dev);
 void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
-void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
-				enum i915_cache_level cache_level);
-void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
 void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
@@ -2261,18 +2295,64 @@ static inline void i915_gem_chipset_flush(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen < 6)
 		intel_gtt_chipset_flush();
 }
+int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
+static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full)
+{
+	if (i915_enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
+		return false;
 
+	if (i915_enable_ppgtt == 1 && full)
+		return false;
 
-/* i915_gem_evict.c */
-int __must_check i915_gem_evict_something(struct drm_device *dev,
-					  struct i915_address_space *vm,
-					  int min_size,
-					  unsigned alignment,
-					  unsigned cache_level,
-					  bool mappable,
-					  bool nonblock);
-int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
-int i915_gem_evict_everything(struct drm_device *dev);
+#ifdef CONFIG_INTEL_IOMMU
+	/* Disable ppgtt on SNB if VT-d is on. */
+	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
+		DRM_INFO("Disabling PPGTT because VT-d is on\n");
+		return false;
+	}
+#endif
+
+	if (full)
+		return HAS_PPGTT(dev);
+	else
+		return HAS_ALIASING_PPGTT(dev);
+}
+
+static inline void ppgtt_release(struct kref *kref)
+{
+	struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref);
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &ppgtt->base;
+
+	if (ppgtt == dev_priv->mm.aliasing_ppgtt ||
+	    (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) {
+		ppgtt->base.cleanup(&ppgtt->base);
+		return;
+	}
+
+	/*
+	 * Make sure vmas are unbound before we take down the drm_mm
+	 *
+	 * FIXME: Proper refcounting should take care of this, this shouldn't be
+	 * needed at all.
+	 */
+	if (!list_empty(&vm->active_list)) {
+		struct i915_vma *vma;
+
+		list_for_each_entry(vma, &vm->active_list, mm_list)
+			if (WARN_ON(list_empty(&vma->vma_link) ||
+				    list_is_singular(&vma->vma_link)))
+				break;
+
+		i915_gem_evict_vm(&ppgtt->base, true);
+	} else {
+		i915_gem_retire_requests(dev);
+		i915_gem_evict_vm(&ppgtt->base, false);
+	}
+
+	ppgtt->base.cleanup(&ppgtt->base);
+}
 
 /* i915_gem_stolen.c */
 int i915_gem_init_stolen(struct drm_device *dev);

+ 105 - 131
drivers/gpu/drm/i915/i915_gem.c

@@ -204,7 +204,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	pinned = 0;
 	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (obj->pin_count)
+		if (i915_gem_obj_is_pinned(obj))
 			pinned += i915_gem_obj_ggtt_size(obj);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -651,7 +651,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 	}
 
 out_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 out:
 	return ret;
 }
@@ -1420,7 +1420,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	/* Finally, remap it using the new GTT offset */
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
 unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
 out:
@@ -2035,13 +2035,17 @@ static void
 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
-	struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
+	struct i915_address_space *vm;
+	struct i915_vma *vma;
 
 	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
 	BUG_ON(!obj->active);
 
-	list_move_tail(&vma->mm_list, &ggtt_vm->inactive_list);
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+		vma = i915_gem_obj_to_vma(obj, vm);
+		if (vma && !list_empty(&vma->mm_list))
+			list_move_tail(&vma->mm_list, &vm->inactive_list);
+	}
 
 	list_del_init(&obj->ring_list);
 	obj->ring = NULL;
@@ -2270,7 +2274,10 @@ request_to_vm(struct drm_i915_gem_request *request)
 	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
 	struct i915_address_space *vm;
 
-	vm = &dev_priv->gtt.base;
+	if (request->ctx)
+		vm = request->ctx->vm;
+	else
+		vm = &dev_priv->gtt.base;
 
 	return vm;
 }
@@ -2346,7 +2353,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
 	if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
 		hs = &request->ctx->hang_stats;
 	else if (request->file_priv)
-		hs = &request->file_priv->hang_stats;
+		hs = &request->file_priv->private_default_ctx->hang_stats;
 
 	if (hs) {
 		if (guilty) {
@@ -2456,6 +2463,8 @@ void i915_gem_reset(struct drm_device *dev)
 
 	i915_gem_cleanup_ringbuffer(dev);
 
+	i915_gem_context_reset(dev);
+
 	i915_gem_restore_fences(dev);
 }
 
@@ -2474,6 +2483,24 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 
 	seqno = ring->get_seqno(ring, true);
 
+	/* Move any buffers on the active list that are no longer referenced
+	 * by the ringbuffer to the flushing/inactive lists as appropriate,
+	 * before we free the context associated with the requests.
+	 */
+	while (!list_empty(&ring->active_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->active_list,
+				      struct drm_i915_gem_object,
+				      ring_list);
+
+		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
+			break;
+
+		i915_gem_object_move_to_inactive(obj);
+	}
+
+
 	while (!list_empty(&ring->request_list)) {
 		struct drm_i915_gem_request *request;
 
@@ -2495,22 +2522,6 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 		i915_gem_free_request(request);
 	}
 
-	/* Move any buffers on the active list that are no longer referenced
-	 * by the ringbuffer to the flushing/inactive lists as appropriate.
-	 */
-	while (!list_empty(&ring->active_list)) {
-		struct drm_i915_gem_object *obj;
-
-		obj = list_first_entry(&ring->active_list,
-				      struct drm_i915_gem_object,
-				      ring_list);
-
-		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
-			break;
-
-		i915_gem_object_move_to_inactive(obj);
-	}
-
 	if (unlikely(ring->trace_irq_seqno &&
 		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
 		ring->irq_put(ring);
@@ -2753,9 +2764,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	/* For now we only ever use 1 vma per object */
-	WARN_ON(!list_is_singular(&obj->vma_list));
-
 	if (list_empty(&vma->vma_link))
 		return 0;
 
@@ -2765,7 +2773,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		return 0;
 	}
 
-	if (obj->pin_count)
+	if (vma->pin_count)
 		return -EBUSY;
 
 	BUG_ON(obj->pages == NULL);
@@ -2787,12 +2795,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 
 	trace_i915_vma_unbind(vma);
 
-	if (obj->has_global_gtt_mapping)
-		i915_gem_gtt_unbind_object(obj);
-	if (obj->has_aliasing_ppgtt_mapping) {
-		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
-		obj->has_aliasing_ppgtt_mapping = 0;
-	}
+	vma->unbind_vma(vma);
+
 	i915_gem_gtt_finish_object(obj);
 
 	list_del(&vma->mm_list);
@@ -2829,7 +2833,7 @@ i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
 	if (!i915_gem_obj_ggtt_bound(obj))
 		return 0;
 
-	if (obj->pin_count)
+	if (i915_gem_obj_to_ggtt(obj)->pin_count)
 		return -EBUSY;
 
 	BUG_ON(obj->pages == NULL);
@@ -2845,7 +2849,7 @@ int i915_gpu_idle(struct drm_device *dev)
 
 	/* Flush everything onto the inactive list. */
 	for_each_ring(ring, dev_priv, i) {
-		ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
+		ret = i915_switch_context(ring, NULL, ring->default_context);
 		if (ret)
 			return ret;
 
@@ -3312,17 +3316,12 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
-	BUG_ON(!i915_is_ggtt(vm));
-
 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unpin;
 	}
 
-	/* For now we only ever use 1 vma per object */
-	WARN_ON(!list_is_singular(&obj->vma_list));
-
 search_free:
 	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
 						  size, alignment,
@@ -3528,14 +3527,13 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct i915_vma *vma;
 	int ret;
 
 	if (obj->cache_level == cache_level)
 		return 0;
 
-	if (obj->pin_count) {
+	if (i915_gem_obj_is_pinned(obj)) {
 		DRM_DEBUG("can not change the cache level of pinned objects\n");
 		return -EBUSY;
 	}
@@ -3567,11 +3565,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				return ret;
 		}
 
-		if (obj->has_global_gtt_mapping)
-			i915_gem_gtt_bind_object(obj, cache_level);
-		if (obj->has_aliasing_ppgtt_mapping)
-			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-					       obj, cache_level);
+		list_for_each_entry(vma, &obj->vma_list, vma_link)
+			vma->bind_vma(vma, cache_level, 0);
 	}
 
 	list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3695,7 +3690,7 @@ static bool is_pin_display(struct drm_i915_gem_object *obj)
 	 * subtracting the potential reference by the user, any pin_count
 	 * remains, it must be due to another use by the display engine.
 	 */
-	return obj->pin_count - !!obj->user_pin_count;
+	return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count;
 }
 
 /*
@@ -3769,7 +3764,7 @@ err_unpin_display:
 void
 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
 {
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 	obj->pin_display = is_pin_display(obj);
 }
 
@@ -3899,21 +3894,22 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    bool map_and_fenceable,
 		    bool nonblocking)
 {
+	const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
 	struct i915_vma *vma;
 	int ret;
 
-	if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
-		return -EBUSY;
-
 	WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
 
 	vma = i915_gem_obj_to_vma(obj, vm);
 
 	if (vma) {
+		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
+			return -EBUSY;
+
 		if ((alignment &&
 		     vma->node.start & (alignment - 1)) ||
 		    (map_and_fenceable && !obj->map_and_fenceable)) {
-			WARN(obj->pin_count,
+			WARN(vma->pin_count,
 			     "bo is already pinned with incorrect alignment:"
 			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
 			     " obj->map_and_fenceable=%d\n",
@@ -3927,34 +3923,34 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
 	}
 
 	if (!i915_gem_obj_bound(obj, vm)) {
-		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-
 		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
 						 map_and_fenceable,
 						 nonblocking);
 		if (ret)
 			return ret;
 
-		if (!dev_priv->mm.aliasing_ppgtt)
-			i915_gem_gtt_bind_object(obj, obj->cache_level);
 	}
 
-	if (!obj->has_global_gtt_mapping && map_and_fenceable)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+	vma = i915_gem_obj_to_vma(obj, vm);
+
+	vma->bind_vma(vma, obj->cache_level, flags);
 
-	obj->pin_count++;
+	i915_gem_obj_to_vma(obj, vm)->pin_count++;
 	obj->pin_mappable |= map_and_fenceable;
 
 	return 0;
 }
 
 void
-i915_gem_object_unpin(struct drm_i915_gem_object *obj)
+i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj)
 {
-	BUG_ON(obj->pin_count == 0);
-	BUG_ON(!i915_gem_obj_bound_any(obj));
+	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
 
-	if (--obj->pin_count == 0)
+	BUG_ON(!vma);
+	BUG_ON(vma->pin_count == 0);
+	BUG_ON(!i915_gem_obj_ggtt_bound(obj));
+
+	if (--vma->pin_count == 0)
 		obj->pin_mappable = false;
 }
 
@@ -3966,6 +3962,9 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int ret;
 
+	if (INTEL_INFO(dev)->gen >= 6)
+		return -ENODEV;
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
@@ -4038,7 +4037,7 @@ i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
 	obj->user_pin_count--;
 	if (obj->user_pin_count == 0) {
 		obj->pin_filp = NULL;
-		i915_gem_object_unpin(obj);
+		i915_gem_object_ggtt_unpin(obj);
 	}
 
 out:
@@ -4118,7 +4117,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
-	if (obj->pin_count) {
+	if (i915_gem_obj_is_pinned(obj)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -4229,12 +4228,11 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->phys_obj)
 		i915_gem_detach_phys_object(dev, obj);
 
-	obj->pin_count = 0;
-	/* NB: 0 or 1 elements */
-	WARN_ON(!list_empty(&obj->vma_list) &&
-		!list_is_singular(&obj->vma_list));
 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
-		int ret = i915_vma_unbind(vma);
+		int ret;
+
+		vma->pin_count = 0;
+		ret = i915_vma_unbind(vma);
 		if (WARN_ON(ret == -ERESTARTSYS)) {
 			bool was_interruptible;
 
@@ -4283,41 +4281,6 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 	return NULL;
 }
 
-static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
-					      struct i915_address_space *vm)
-{
-	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
-	if (vma == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	INIT_LIST_HEAD(&vma->vma_link);
-	INIT_LIST_HEAD(&vma->mm_list);
-	INIT_LIST_HEAD(&vma->exec_list);
-	vma->vm = vm;
-	vma->obj = obj;
-
-	/* Keep GGTT vmas first to make debug easier */
-	if (i915_is_ggtt(vm))
-		list_add(&vma->vma_link, &obj->vma_list);
-	else
-		list_add_tail(&vma->vma_link, &obj->vma_list);
-
-	return vma;
-}
-
-struct i915_vma *
-i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
-				  struct i915_address_space *vm)
-{
-	struct i915_vma *vma;
-
-	vma = i915_gem_obj_to_vma(obj, vm);
-	if (!vma)
-		vma = __i915_gem_vma_create(obj, vm);
-
-	return vma;
-}
-
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
 	WARN_ON(vma->node.allocated);
@@ -4523,25 +4486,23 @@ i915_gem_init_hw(struct drm_device *dev)
 		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
 
 	/*
-	 * XXX: There was some w/a described somewhere suggesting loading
-	 * contexts before PPGTT.
+	 * XXX: Contexts should only be initialized once. Doing a switch to the
+	 * default context switch however is something we'd like to do after
+	 * reset or thaw (the latter may not actually be necessary for HW, but
+	 * goes with our code better). Context switching requires rings (for
+	 * the do_switch), but before enabling PPGTT. So don't move this.
 	 */
-	ret = i915_gem_context_init(dev);
+	ret = i915_gem_context_enable(dev_priv);
 	if (ret) {
-		i915_gem_cleanup_ringbuffer(dev);
-		DRM_ERROR("Context initialization failed %d\n", ret);
-		return ret;
-	}
-
-	if (dev_priv->mm.aliasing_ppgtt) {
-		ret = dev_priv->mm.aliasing_ppgtt->enable(dev);
-		if (ret) {
-			i915_gem_cleanup_aliasing_ppgtt(dev);
-			DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n");
-		}
+		DRM_ERROR("Context enable failed %d\n", ret);
+		goto err_out;
 	}
 
 	return 0;
+
+err_out:
+	i915_gem_cleanup_ringbuffer(dev);
+	return ret;
 }
 
 int i915_gem_init(struct drm_device *dev)
@@ -4560,10 +4521,16 @@ int i915_gem_init(struct drm_device *dev)
 
 	i915_gem_init_global_gtt(dev);
 
+	ret = i915_gem_context_init(dev);
+	if (ret)
+		return ret;
+
 	ret = i915_gem_init_hw(dev);
 	mutex_unlock(&dev->struct_mutex);
 	if (ret) {
-		i915_gem_cleanup_aliasing_ppgtt(dev);
+		WARN_ON(dev_priv->mm.aliasing_ppgtt);
+		i915_gem_context_fini(dev);
+		drm_mm_takedown(&dev_priv->gtt.base.mm);
 		return ret;
 	}
 
@@ -4658,14 +4625,16 @@ init_ring_lists(struct intel_ring_buffer *ring)
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
-static void i915_init_vm(struct drm_i915_private *dev_priv,
-			 struct i915_address_space *vm)
+void i915_init_vm(struct drm_i915_private *dev_priv,
+		  struct i915_address_space *vm)
 {
+	if (!i915_is_ggtt(vm))
+		drm_mm_init(&vm->mm, vm->start, vm->total);
 	vm->dev = dev_priv->dev;
 	INIT_LIST_HEAD(&vm->active_list);
 	INIT_LIST_HEAD(&vm->inactive_list);
 	INIT_LIST_HEAD(&vm->global_link);
-	list_add(&vm->global_link, &dev_priv->vm_list);
+	list_add_tail(&vm->global_link, &dev_priv->vm_list);
 }
 
 void
@@ -4950,6 +4919,7 @@ i915_gem_file_idle_work_handler(struct work_struct *work)
 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv;
+	int ret;
 
 	DRM_DEBUG_DRIVER("\n");
 
@@ -4965,9 +4935,11 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
 	INIT_DELAYED_WORK(&file_priv->mm.idle_work,
 			  i915_gem_file_idle_work_handler);
 
-	idr_init(&file_priv->context_idr);
+	ret = i915_gem_context_open(dev, file);
+	if (ret)
+		kfree(file_priv);
 
-	return 0;
+	return ret;
 }
 
 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
@@ -5014,7 +4986,7 @@ i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
 		if (obj->active)
 			continue;
 
-		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
+		if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0)
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
@@ -5031,7 +5003,8 @@ unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
 	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
 	struct i915_vma *vma;
 
-	if (vm == &dev_priv->mm.aliasing_ppgtt->base)
+	if (!dev_priv->mm.aliasing_ppgtt ||
+	    vm == &dev_priv->mm.aliasing_ppgtt->base)
 		vm = &dev_priv->gtt.base;
 
 	BUG_ON(list_empty(&o->vma_list));
@@ -5072,7 +5045,8 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
 	struct i915_vma *vma;
 
-	if (vm == &dev_priv->mm.aliasing_ppgtt->base)
+	if (!dev_priv->mm.aliasing_ppgtt ||
+	    vm == &dev_priv->mm.aliasing_ppgtt->base)
 		vm = &dev_priv->gtt.base;
 
 	BUG_ON(list_empty(&o->vma_list));
@@ -5127,7 +5101,7 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 		return NULL;
 
 	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
-	if (WARN_ON(vma->vm != obj_to_ggtt(obj)))
+	if (vma->vm != obj_to_ggtt(obj))
 		return NULL;
 
 	return vma;

+ 313 - 110
drivers/gpu/drm/i915/i915_gem_context.c

@@ -93,11 +93,19 @@
  * I've seen in a spec to date, and that was a workaround for a non-shipping
  * part. It should be safe to decrease this, but it's more future proof as is.
  */
-#define CONTEXT_ALIGN (64<<10)
+#define GEN6_CONTEXT_ALIGN (64<<10)
+#define GEN7_CONTEXT_ALIGN 4096
 
-static struct i915_hw_context *
-i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
-static int do_switch(struct i915_hw_context *to);
+static int do_switch(struct intel_ring_buffer *ring,
+		     struct i915_hw_context *to);
+
+static size_t get_context_alignment(struct drm_device *dev)
+{
+	if (IS_GEN6(dev))
+		return GEN6_CONTEXT_ALIGN;
+
+	return GEN7_CONTEXT_ALIGN;
+}
 
 static int get_context_size(struct drm_device *dev)
 {
@@ -131,14 +139,43 @@ void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct i915_hw_context *ctx = container_of(ctx_ref,
 						   typeof(*ctx), ref);
+	struct i915_hw_ppgtt *ppgtt = NULL;
 
-	list_del(&ctx->link);
+	/* We refcount even the aliasing PPGTT to keep the code symmetric */
+	if (USES_ALIASING_PPGTT(ctx->obj->base.dev))
+		ppgtt = ctx_to_ppgtt(ctx);
+
+	/* XXX: Free up the object before tearing down the address space, in
+	 * case we're bound in the PPGTT */
 	drm_gem_object_unreference(&ctx->obj->base);
+
+	if (ppgtt)
+		kref_put(&ppgtt->ref, ppgtt_release);
+	list_del(&ctx->link);
 	kfree(ctx);
 }
 
+static struct i915_hw_ppgtt *
+create_vm_for_ctx(struct drm_device *dev, struct i915_hw_context *ctx)
+{
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return ERR_PTR(-ENOMEM);
+
+	ret = i915_gem_init_ppgtt(dev, ppgtt);
+	if (ret) {
+		kfree(ppgtt);
+		return ERR_PTR(ret);
+	}
+
+	return ppgtt;
+}
+
 static struct i915_hw_context *
-create_hw_context(struct drm_device *dev,
+__create_hw_context(struct drm_device *dev,
 		  struct drm_i915_file_private *file_priv)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -166,18 +203,13 @@ create_hw_context(struct drm_device *dev,
 			goto err_out;
 	}
 
-	/* The ring associated with the context object is handled by the normal
-	 * object tracking code. We give an initial ring value simple to pass an
-	 * assertion in the context switch code.
-	 */
-	ctx->ring = &dev_priv->ring[RCS];
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 
 	/* Default context will never have a file_priv */
 	if (file_priv == NULL)
 		return ctx;
 
-	ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID + 1, 0,
+	ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID, 0,
 			GFP_KERNEL);
 	if (ret < 0)
 		goto err_out;
@@ -198,7 +230,7 @@ err_out:
 
 static inline bool is_default_context(struct i915_hw_context *ctx)
 {
-	return (ctx == ctx->ring->default_context);
+	return (ctx->id == DEFAULT_CONTEXT_ID);
 }
 
 /**
@@ -206,57 +238,129 @@ static inline bool is_default_context(struct i915_hw_context *ctx)
  * context state of the GPU for applications that don't utilize HW contexts, as
  * well as an idle case.
  */
-static int create_default_context(struct drm_i915_private *dev_priv)
+static struct i915_hw_context *
+i915_gem_create_context(struct drm_device *dev,
+			struct drm_i915_file_private *file_priv,
+			bool create_vm)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_context *ctx;
-	int ret;
+	int ret = 0;
 
-	BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	ctx = create_hw_context(dev_priv->dev, NULL);
+	ctx = __create_hw_context(dev, file_priv);
 	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	/* We may need to do things with the shrinker which require us to
-	 * immediately switch back to the default context. This can cause a
-	 * problem as pinning the default context also requires GTT space which
-	 * may not be available. To avoid this we always pin the
-	 * default context.
-	 */
-	ret = i915_gem_obj_ggtt_pin(ctx->obj, CONTEXT_ALIGN, false, false);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
-		goto err_destroy;
-	}
-
-	ret = do_switch(ctx);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Switch failed %d\n", ret);
-		goto err_unpin;
-	}
+		return ctx;
 
-	dev_priv->ring[RCS].default_context = ctx;
+	if (create_vm) {
+		struct i915_hw_ppgtt *ppgtt = create_vm_for_ctx(dev, ctx);
+
+		if (IS_ERR_OR_NULL(ppgtt)) {
+			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
+					 PTR_ERR(ppgtt));
+			ret = PTR_ERR(ppgtt);
+			goto err_destroy;
+		} else
+			ctx->vm = &ppgtt->base;
+
+		/* This case is reserved for the global default context and
+		 * should only happen once. */
+		if (!file_priv) {
+			if (WARN_ON(dev_priv->mm.aliasing_ppgtt)) {
+				ret = -EEXIST;
+				goto err_destroy;
+			}
+
+			dev_priv->mm.aliasing_ppgtt = ppgtt;
+
+			/* We may need to do things with the shrinker which
+			 * require us to immediately switch back to the default
+			 * context. This can cause a problem as pinning the
+			 * default context also requires GTT space which may not
+			 * be available. To avoid this we always pin the default
+			 * context.
+			 */
+			ret = i915_gem_obj_ggtt_pin(ctx->obj,
+						    get_context_alignment(dev),
+						    false, false);
+			if (ret) {
+				DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
+				goto err_destroy;
+			}
+
+			ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
+		}
+	} else if (USES_ALIASING_PPGTT(dev)) {
+		/* For platforms which only have aliasing PPGTT, we fake the
+		 * address space and refcounting. */
+		ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
+		kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
+	} else
+		ctx->vm = &dev_priv->gtt.base;
 
-	DRM_DEBUG_DRIVER("Default HW context loaded\n");
-	return 0;
+	return ctx;
 
-err_unpin:
-	i915_gem_object_unpin(ctx->obj);
 err_destroy:
 	i915_gem_context_unreference(ctx);
-	return ret;
+	return ERR_PTR(ret);
+}
+
+void i915_gem_context_reset(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring;
+	int i;
+
+	if (!HAS_HW_CONTEXTS(dev))
+		return;
+
+	/* Prevent the hardware from restoring the last context (which hung) on
+	 * the next switch */
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct i915_hw_context *dctx;
+		if (!(INTEL_INFO(dev)->ring_mask & (1<<i)))
+			continue;
+
+		/* Do a fake switch to the default context */
+		ring = &dev_priv->ring[i];
+		dctx = ring->default_context;
+		if (WARN_ON(!dctx))
+			continue;
+
+		if (!ring->last_context)
+			continue;
+
+		if (ring->last_context == dctx)
+			continue;
+
+		if (i == RCS) {
+			WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj,
+						      get_context_alignment(dev),
+						      false, false));
+			/* Fake a finish/inactive */
+			dctx->obj->base.write_domain = 0;
+			dctx->obj->active = 0;
+		}
+
+		i915_gem_context_unreference(ring->last_context);
+		i915_gem_context_reference(dctx);
+		ring->last_context = dctx;
+	}
 }
 
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
+	struct intel_ring_buffer *ring;
+	int i;
 
 	if (!HAS_HW_CONTEXTS(dev))
 		return 0;
 
-	/* If called from reset, or thaw... we've been here already */
-	if (dev_priv->ring[RCS].default_context)
+	/* Init should only be called once per module load. Eventually the
+	 * restriction on the context_disabled check can be loosened. */
+	if (WARN_ON(dev_priv->ring[RCS].default_context))
 		return 0;
 
 	dev_priv->hw_context_size = round_up(get_context_size(dev), 4096);
@@ -266,11 +370,23 @@ int i915_gem_context_init(struct drm_device *dev)
 		return -E2BIG;
 	}
 
-	ret = create_default_context(dev_priv);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Disabling HW Contexts; create failed %d\n",
-				 ret);
-		return ret;
+	dev_priv->ring[RCS].default_context =
+		i915_gem_create_context(dev, NULL, USES_ALIASING_PPGTT(dev));
+
+	if (IS_ERR_OR_NULL(dev_priv->ring[RCS].default_context)) {
+		DRM_DEBUG_DRIVER("Disabling HW Contexts; create failed %ld\n",
+				 PTR_ERR(dev_priv->ring[RCS].default_context));
+		return PTR_ERR(dev_priv->ring[RCS].default_context);
+	}
+
+	for (i = RCS + 1; i < I915_NUM_RINGS; i++) {
+		if (!(INTEL_INFO(dev)->ring_mask & (1<<i)))
+			continue;
+
+		ring = &dev_priv->ring[i];
+
+		/* NB: RCS will hold a ref for all rings */
+		ring->default_context = dev_priv->ring[RCS].default_context;
 	}
 
 	DRM_DEBUG_DRIVER("HW context support initialized\n");
@@ -281,6 +397,7 @@ void i915_gem_context_fini(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_context *dctx = dev_priv->ring[RCS].default_context;
+	int i;
 
 	if (!HAS_HW_CONTEXTS(dev))
 		return;
@@ -300,59 +417,125 @@ void i915_gem_context_fini(struct drm_device *dev)
 	if (dev_priv->ring[RCS].last_context == dctx) {
 		/* Fake switch to NULL context */
 		WARN_ON(dctx->obj->active);
-		i915_gem_object_unpin(dctx->obj);
+		i915_gem_object_ggtt_unpin(dctx->obj);
 		i915_gem_context_unreference(dctx);
+		dev_priv->ring[RCS].last_context = NULL;
+	}
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct intel_ring_buffer *ring = &dev_priv->ring[i];
+		if (!(INTEL_INFO(dev)->ring_mask & (1<<i)))
+			continue;
+
+		if (ring->last_context)
+			i915_gem_context_unreference(ring->last_context);
+
+		ring->default_context = NULL;
+		ring->last_context = NULL;
 	}
 
-	i915_gem_object_unpin(dctx->obj);
+	i915_gem_object_ggtt_unpin(dctx->obj);
 	i915_gem_context_unreference(dctx);
-	dev_priv->ring[RCS].default_context = NULL;
-	dev_priv->ring[RCS].last_context = NULL;
+	dev_priv->mm.aliasing_ppgtt = NULL;
+}
+
+int i915_gem_context_enable(struct drm_i915_private *dev_priv)
+{
+	struct intel_ring_buffer *ring;
+	int ret, i;
+
+	if (!HAS_HW_CONTEXTS(dev_priv->dev))
+		return 0;
+
+	/* This is the only place the aliasing PPGTT gets enabled, which means
+	 * it has to happen before we bail on reset */
+	if (dev_priv->mm.aliasing_ppgtt) {
+		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+		ppgtt->enable(ppgtt);
+	}
+
+	/* FIXME: We should make this work, even in reset */
+	if (i915_reset_in_progress(&dev_priv->gpu_error))
+		return 0;
+
+	BUG_ON(!dev_priv->ring[RCS].default_context);
+
+	for_each_ring(ring, dev_priv, i) {
+		ret = do_switch(ring, ring->default_context);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int context_idr_cleanup(int id, void *p, void *data)
 {
 	struct i915_hw_context *ctx = p;
 
-	BUG_ON(id == DEFAULT_CONTEXT_ID);
+	/* Ignore the default context because close will handle it */
+	if (is_default_context(ctx))
+		return 0;
 
 	i915_gem_context_unreference(ctx);
 	return 0;
 }
 
-struct i915_ctx_hang_stats *
-i915_gem_context_get_hang_stats(struct drm_device *dev,
-				struct drm_file *file,
-				u32 id)
+int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_hw_context *ctx;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (id == DEFAULT_CONTEXT_ID)
-		return &file_priv->hang_stats;
+	if (!HAS_HW_CONTEXTS(dev)) {
+		/* Cheat for hang stats */
+		file_priv->private_default_ctx =
+			kzalloc(sizeof(struct i915_hw_context), GFP_KERNEL);
+		file_priv->private_default_ctx->vm = &dev_priv->gtt.base;
+		return 0;
+	}
 
-	if (!HAS_HW_CONTEXTS(dev))
-		return ERR_PTR(-ENOENT);
+	idr_init(&file_priv->context_idr);
 
-	ctx = i915_gem_context_get(file->driver_priv, id);
-	if (ctx == NULL)
-		return ERR_PTR(-ENOENT);
+	mutex_lock(&dev->struct_mutex);
+	file_priv->private_default_ctx =
+		i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
+	mutex_unlock(&dev->struct_mutex);
+
+	if (IS_ERR(file_priv->private_default_ctx)) {
+		idr_destroy(&file_priv->context_idr);
+		return PTR_ERR(file_priv->private_default_ctx);
+	}
 
-	return &ctx->hang_stats;
+	return 0;
 }
 
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
+	if (!HAS_HW_CONTEXTS(dev)) {
+		kfree(file_priv->private_default_ctx);
+		return;
+	}
+
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
+	i915_gem_context_unreference(file_priv->private_default_ctx);
 	idr_destroy(&file_priv->context_idr);
 }
 
-static struct i915_hw_context *
+struct i915_hw_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
 {
-	return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id);
+	struct i915_hw_context *ctx;
+
+	if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev))
+		return file_priv->private_default_ctx;
+
+	ctx = (struct i915_hw_context *)idr_find(&file_priv->context_idr, id);
+	if (!ctx)
+		return ERR_PTR(-ENOENT);
+
+	return ctx;
 }
 
 static inline int
@@ -403,21 +586,31 @@ mi_set_context(struct intel_ring_buffer *ring,
 	return ret;
 }
 
-static int do_switch(struct i915_hw_context *to)
+static int do_switch(struct intel_ring_buffer *ring,
+		     struct i915_hw_context *to)
 {
-	struct intel_ring_buffer *ring = to->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct i915_hw_context *from = ring->last_context;
+	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to);
 	u32 hw_flags = 0;
 	int ret, i;
 
-	BUG_ON(from != NULL && from->obj != NULL && from->obj->pin_count == 0);
+	if (from != NULL && ring == &dev_priv->ring[RCS]) {
+		BUG_ON(from->obj == NULL);
+		BUG_ON(!i915_gem_obj_is_pinned(from->obj));
+	}
 
-	if (from == to && !to->remap_slice)
+	if (from == to && from->last_ring == ring && !to->remap_slice)
 		return 0;
 
-	ret = i915_gem_obj_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false);
-	if (ret)
-		return ret;
+	/* Trying to pin first makes error handling easier. */
+	if (ring == &dev_priv->ring[RCS]) {
+		ret = i915_gem_obj_ggtt_pin(to->obj,
+					    get_context_alignment(ring->dev),
+					    false, false);
+		if (ret)
+			return ret;
+	}
 
 	/*
 	 * Pin can switch back to the default context if we end up calling into
@@ -426,6 +619,18 @@ static int do_switch(struct i915_hw_context *to)
 	 */
 	from = ring->last_context;
 
+	if (USES_FULL_PPGTT(ring->dev)) {
+		ret = ppgtt->switch_mm(ppgtt, ring, false);
+		if (ret)
+			goto unpin_out;
+	}
+
+	if (ring != &dev_priv->ring[RCS]) {
+		if (from)
+			i915_gem_context_unreference(from);
+		goto done;
+	}
+
 	/*
 	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
 	 * that thanks to write = false in this call and us not setting any gpu
@@ -435,22 +640,21 @@ static int do_switch(struct i915_hw_context *to)
 	 * XXX: We need a real interface to do this instead of trickery.
 	 */
 	ret = i915_gem_object_set_to_gtt_domain(to->obj, false);
-	if (ret) {
-		i915_gem_object_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
-	if (!to->obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
+	if (!to->obj->has_global_gtt_mapping) {
+		struct i915_vma *vma = i915_gem_obj_to_vma(to->obj,
+							   &dev_priv->gtt.base);
+		vma->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
+	}
 
 	if (!to->is_initialized || is_default_context(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
 
 	ret = mi_set_context(ring, to, hw_flags);
-	if (ret) {
-		i915_gem_object_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
 	for (i = 0; i < MAX_L3_SLICES; i++) {
 		if (!(to->remap_slice & (1<<i)))
@@ -484,15 +688,23 @@ static int do_switch(struct i915_hw_context *to)
 		BUG_ON(from->obj->ring != ring);
 
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_unpin(from->obj);
+		i915_gem_object_ggtt_unpin(from->obj);
 		i915_gem_context_unreference(from);
 	}
 
+	to->is_initialized = true;
+
+done:
 	i915_gem_context_reference(to);
 	ring->last_context = to;
-	to->is_initialized = true;
+	to->last_ring = ring;
 
 	return 0;
+
+unpin_out:
+	if (ring->id == RCS)
+		i915_gem_object_ggtt_unpin(to->obj);
+	return ret;
 }
 
 /**
@@ -508,31 +720,19 @@ static int do_switch(struct i915_hw_context *to)
  */
 int i915_switch_context(struct intel_ring_buffer *ring,
 			struct drm_file *file,
-			int to_id)
+			struct i915_hw_context *to)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct i915_hw_context *to;
-
-	if (!HAS_HW_CONTEXTS(ring->dev))
-		return 0;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
 
-	if (ring != &dev_priv->ring[RCS])
-		return 0;
-
-	if (to_id == DEFAULT_CONTEXT_ID) {
-		to = ring->default_context;
-	} else {
-		if (file == NULL)
-			return -EINVAL;
+	BUG_ON(file && to == NULL);
 
-		to = i915_gem_context_get(file->driver_priv, to_id);
-		if (to == NULL)
-			return -ENOENT;
-	}
+	/* We have the fake context, but don't supports switching. */
+	if (!HAS_HW_CONTEXTS(ring->dev))
+		return 0;
 
-	return do_switch(to);
+	return do_switch(ring, to);
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@ -553,7 +753,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = create_hw_context(dev, file_priv);
+	ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
@@ -575,14 +775,17 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!(dev->driver->driver_features & DRIVER_GEM))
 		return -ENODEV;
 
+	if (args->ctx_id == DEFAULT_CONTEXT_ID)
+		return -ENOENT;
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
 
 	ctx = i915_gem_context_get(file_priv, args->ctx_id);
-	if (!ctx) {
+	if (IS_ERR(ctx)) {
 		mutex_unlock(&dev->struct_mutex);
-		return -ENOENT;
+		return PTR_ERR(ctx);
 	}
 
 	idr_remove(&ctx->file_priv->context_idr, ctx->id);

+ 3 - 2
drivers/gpu/drm/i915/i915_gem_evict.c

@@ -36,7 +36,8 @@
 static bool
 mark_free(struct i915_vma *vma, struct list_head *unwind)
 {
-	if (vma->obj->pin_count)
+	/* Freeing up memory requires no VMAs are pinned */
+	if (i915_gem_obj_is_pinned(vma->obj))
 		return false;
 
 	if (WARN_ON(!list_empty(&vma->exec_list)))
@@ -207,7 +208,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 	}
 
 	list_for_each_entry_safe(vma, next, &vm->inactive_list, mm_list)
-		if (vma->obj->pin_count == 0)
+		if (vma->pin_count == 0)
 			WARN_ON(i915_vma_unbind(vma));
 
 	return 0;

+ 83 - 79
drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -91,6 +91,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
 	       struct i915_address_space *vm,
 	       struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	struct list_head objects;
 	int i, ret;
@@ -125,6 +126,20 @@ eb_lookup_vmas(struct eb_vmas *eb,
 	i = 0;
 	while (!list_empty(&objects)) {
 		struct i915_vma *vma;
+		struct i915_address_space *bind_vm = vm;
+
+		if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
+		    USES_FULL_PPGTT(vm->dev)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		/* If we have secure dispatch, or the userspace assures us that
+		 * they know what they're doing, use the GGTT VM.
+		 */
+		if (((args->flags & I915_EXEC_SECURE) &&
+		    (i == (args->buffer_count - 1))))
+			bind_vm = &dev_priv->gtt.base;
 
 		obj = list_first_entry(&objects,
 				       struct drm_i915_gem_object,
@@ -138,7 +153,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		 * from the (obj, vm) we don't run the risk of creating
 		 * duplicated vmas for the same vm.
 		 */
-		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
+		vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
 		if (IS_ERR(vma)) {
 			DRM_DEBUG("Failed to lookup VMA\n");
 			ret = PTR_ERR(vma);
@@ -217,7 +232,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 		i915_gem_object_unpin_fence(obj);
 
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
-		i915_gem_object_unpin(obj);
+		vma->pin_count--;
 
 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
@@ -327,8 +342,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_vmas *eb,
-				   struct drm_i915_gem_relocation_entry *reloc,
-				   struct i915_address_space *vm)
+				   struct drm_i915_gem_relocation_entry *reloc)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_gem_object *target_obj;
@@ -352,8 +366,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	if (unlikely(IS_GEN6(dev) &&
 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 	    !target_i915_obj->has_global_gtt_mapping)) {
-		i915_gem_gtt_bind_object(target_i915_obj,
-					 target_i915_obj->cache_level);
+		struct i915_vma *vma =
+			list_first_entry(&target_i915_obj->vma_list,
+					 typeof(*vma), vma_link);
+		vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
 	}
 
 	/* Validate that the target is in a valid r/w GPU domain */
@@ -451,8 +467,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 		do {
 			u64 offset = r->presumed_offset;
 
-			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r,
-								 vma->vm);
+			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
 			if (ret)
 				return ret;
 
@@ -481,8 +496,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 	int i, ret;
 
 	for (i = 0; i < entry->relocation_count; i++) {
-		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i],
-							 vma->vm);
+		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
 		if (ret)
 			return ret;
 	}
@@ -527,11 +541,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_ring_buffer *ring,
 				bool *need_reloc)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	bool need_fence, need_mappable;
-	struct drm_i915_gem_object *obj = vma->obj;
+	u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
+		!vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
 	int ret;
 
 	need_fence =
@@ -560,14 +575,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 		}
 	}
 
-	/* Ensure ppgtt mapping exists if needed */
-	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
-		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
-				       obj, obj->cache_level);
-
-		obj->has_aliasing_ppgtt_mapping = 1;
-	}
-
 	if (entry->offset != vma->node.start) {
 		entry->offset = vma->node.start;
 		*need_reloc = true;
@@ -578,9 +585,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
 	}
 
-	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
-	    !obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+	vma->bind_vma(vma, obj->cache_level, flags);
 
 	return 0;
 }
@@ -900,22 +905,27 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 	return 0;
 }
 
-static int
+static struct i915_hw_context *
 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
-			  const u32 ctx_id)
+			  struct intel_ring_buffer *ring, const u32 ctx_id)
 {
+	struct i915_hw_context *ctx = NULL;
 	struct i915_ctx_hang_stats *hs;
 
-	hs = i915_gem_context_get_hang_stats(dev, file, ctx_id);
-	if (IS_ERR(hs))
-		return PTR_ERR(hs);
+	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_ID)
+		return ERR_PTR(-EINVAL);
+
+	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
+	if (IS_ERR(ctx))
+		return ctx;
 
+	hs = &ctx->hang_stats;
 	if (hs->banned) {
 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
-		return -EIO;
+		return ERR_PTR(-EIO);
 	}
 
-	return 0;
+	return ctx;
 }
 
 static void
@@ -939,7 +949,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
 			obj->last_write_seqno = intel_ring_get_seqno(ring);
-			if (obj->pin_count) /* check for potential scanout */
+			/* check for potential scanout */
+			if (i915_gem_obj_ggtt_bound(obj) &&
+			    i915_gem_obj_to_ggtt(obj)->pin_count)
 				intel_mark_fb_busy(obj, ring);
 		}
 
@@ -989,16 +1001,17 @@ static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
-		       struct drm_i915_gem_exec_object2 *exec,
-		       struct i915_address_space *vm)
+		       struct drm_i915_gem_exec_object2 *exec)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct eb_vmas *eb;
 	struct drm_i915_gem_object *batch_obj;
 	struct drm_clip_rect *cliprects = NULL;
 	struct intel_ring_buffer *ring;
+	struct i915_hw_context *ctx;
+	struct i915_address_space *vm;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
-	u32 exec_start, exec_len;
+	u32 exec_start = args->batch_start_offset, exec_len;
 	u32 mask, flags;
 	int ret, mode, i;
 	bool need_relocs;
@@ -1020,41 +1033,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		flags |= I915_DISPATCH_PINNED;
 
-	switch (args->flags & I915_EXEC_RING_MASK) {
-	case I915_EXEC_DEFAULT:
-	case I915_EXEC_RENDER:
-		ring = &dev_priv->ring[RCS];
-		break;
-	case I915_EXEC_BSD:
-		ring = &dev_priv->ring[VCS];
-		if (ctx_id != DEFAULT_CONTEXT_ID) {
-			DRM_DEBUG("Ring %s doesn't support contexts\n",
-				  ring->name);
-			return -EPERM;
-		}
-		break;
-	case I915_EXEC_BLT:
-		ring = &dev_priv->ring[BCS];
-		if (ctx_id != DEFAULT_CONTEXT_ID) {
-			DRM_DEBUG("Ring %s doesn't support contexts\n",
-				  ring->name);
-			return -EPERM;
-		}
-		break;
-	case I915_EXEC_VEBOX:
-		ring = &dev_priv->ring[VECS];
-		if (ctx_id != DEFAULT_CONTEXT_ID) {
-			DRM_DEBUG("Ring %s doesn't support contexts\n",
-				  ring->name);
-			return -EPERM;
-		}
-		break;
-
-	default:
+	if ((args->flags & I915_EXEC_RING_MASK) > I915_NUM_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %d\n",
 			  (int)(args->flags & I915_EXEC_RING_MASK));
 		return -EINVAL;
 	}
+
+	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
+		ring = &dev_priv->ring[RCS];
+	else
+		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
+
 	if (!intel_ring_initialized(ring)) {
 		DRM_DEBUG("execbuf with invalid ring: %d\n",
 			  (int)(args->flags & I915_EXEC_RING_MASK));
@@ -1136,11 +1125,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto pre_mutex_err;
 	}
 
-	ret = i915_gem_validate_context(dev, file, ctx_id);
-	if (ret) {
+	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
+	if (IS_ERR(ctx)) {
 		mutex_unlock(&dev->struct_mutex);
+		ret = PTR_ERR(ctx);
 		goto pre_mutex_err;
-	}
+	} 
+
+	i915_gem_context_reference(ctx);
+
+	vm = ctx->vm;
+	if (!USES_FULL_PPGTT(dev))
+		vm = &dev_priv->gtt.base;
 
 	eb = eb_create(args);
 	if (eb == NULL) {
@@ -1187,14 +1183,25 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
-		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
+	if (flags & I915_DISPATCH_SECURE &&
+	    !batch_obj->has_global_gtt_mapping) {
+		/* When we have multiple VMs, we'll need to make sure that we
+		 * allocate space first */
+		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
+		BUG_ON(!vma);
+		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
+	}
+
+	if (flags & I915_DISPATCH_SECURE)
+		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
+	else
+		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
 	if (ret)
 		goto err;
 
-	ret = i915_switch_context(ring, file, ctx_id);
+	ret = i915_switch_context(ring, file, ctx);
 	if (ret)
 		goto err;
 
@@ -1219,8 +1226,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
-	exec_start = i915_gem_obj_offset(batch_obj, vm) +
-		args->batch_start_offset;
+
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
@@ -1249,6 +1255,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
 err:
+	/* the request owns the ref now */
+	i915_gem_context_unreference(ctx);
 	eb_destroy(eb);
 
 	mutex_unlock(&dev->struct_mutex);
@@ -1270,7 +1278,6 @@ int
 i915_gem_execbuffer(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_execbuffer *args = data;
 	struct drm_i915_gem_execbuffer2 exec2;
 	struct drm_i915_gem_exec_object *exec_list = NULL;
@@ -1326,8 +1333,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	exec2.flags = I915_EXEC_RENDER;
 	i915_execbuffer2_set_context_id(exec2, 0);
 
-	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list,
-				     &dev_priv->gtt.base);
+	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
 	if (!ret) {
 		/* Copy the new buffer offsets back to the user's exec list. */
 		for (i = 0; i < args->buffer_count; i++)
@@ -1353,7 +1359,6 @@ int
 i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		     struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
 	int ret;
@@ -1384,8 +1389,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list,
-				     &dev_priv->gtt.base);
+	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
 	if (!ret) {
 		/* Copy the new buffer offsets back to the user's exec list. */
 		ret = copy_to_user(to_user_ptr(args->buffers_ptr),

+ 510 - 163
drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -22,6 +22,7 @@
  *
  */
 
+#include <linux/seq_file.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -70,6 +71,12 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
 #define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
 
+static void ppgtt_bind_vma(struct i915_vma *vma,
+			   enum i915_cache_level cache_level,
+			   u32 flags);
+static void ppgtt_unbind_vma(struct i915_vma *vma);
+static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
+
 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
 					     enum i915_cache_level level,
 					     bool valid)
@@ -199,12 +206,19 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 
 /* Broadwell Page Directory Pointer Descriptors */
 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
-			   uint64_t val)
+			   uint64_t val, bool synchronous)
 {
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	int ret;
 
 	BUG_ON(entry >= 4);
 
+	if (synchronous) {
+		I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
+		I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
+		return 0;
+	}
+
 	ret = intel_ring_begin(ring, 6);
 	if (ret)
 		return ret;
@@ -220,36 +234,23 @@ static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
 	return 0;
 }
 
-static int gen8_ppgtt_enable(struct drm_device *dev)
+static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
+			  struct intel_ring_buffer *ring,
+			  bool synchronous)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	int i, j, ret;
+	int i, ret;
 
 	/* bit of a hack to find the actual last used pd */
 	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
 
-	for_each_ring(ring, dev_priv, j) {
-		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-	}
-
 	for (i = used_pd - 1; i >= 0; i--) {
 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
-		for_each_ring(ring, dev_priv, j) {
-			ret = gen8_write_pdp(ring, i, addr);
-			if (ret)
-				goto err_out;
-		}
+		ret = gen8_write_pdp(ring, i, addr, synchronous);
+		if (ret)
+			return ret;
 	}
-	return 0;
 
-err_out:
-	for_each_ring(ring, dev_priv, j)
-		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
-	return ret;
+	return 0;
 }
 
 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
@@ -324,6 +325,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 		container_of(vm, struct i915_hw_ppgtt, base);
 	int i, j;
 
+	list_del(&vm->global_link);
 	drm_mm_takedown(&vm->mm);
 
 	for (i = 0; i < ppgtt->num_pd_pages ; i++) {
@@ -386,6 +388,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT);
 	ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
 	ppgtt->enable = gen8_ppgtt_enable;
+	ppgtt->switch_mm = gen8_mm_switch;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
@@ -458,6 +461,62 @@ err_out:
 	return ret;
 }
 
+static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
+{
+	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
+	struct i915_address_space *vm = &ppgtt->base;
+	gen6_gtt_pte_t __iomem *pd_addr;
+	gen6_gtt_pte_t scratch_pte;
+	uint32_t pd_entry;
+	int pte, pde;
+
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
+
+	pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
+		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
+
+	seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
+		   ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
+	for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
+		u32 expected;
+		gen6_gtt_pte_t *pt_vaddr;
+		dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
+		pd_entry = readl(pd_addr + pde);
+		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
+
+		if (pd_entry != expected)
+			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
+				   pde,
+				   pd_entry,
+				   expected);
+		seq_printf(m, "\tPDE: %x\n", pd_entry);
+
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
+		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
+			unsigned long va =
+				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
+				(pte * PAGE_SIZE);
+			int i;
+			bool found = false;
+			for (i = 0; i < 4; i++)
+				if (pt_vaddr[pte + i] != scratch_pte)
+					found = true;
+			if (!found)
+				continue;
+
+			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
+			for (i = 0; i < 4; i++) {
+				if (pt_vaddr[pte + i] != scratch_pte)
+					seq_printf(m, " %08x", pt_vaddr[pte + i]);
+				else
+					seq_puts(m, "  SCRATCH ");
+			}
+			seq_puts(m, "\n");
+		}
+		kunmap_atomic(pt_vaddr);
+	}
+}
+
 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
@@ -480,61 +539,221 @@ static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
 	readl(pd_addr);
 }
 
-static int gen6_ppgtt_enable(struct drm_device *dev)
+static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 {
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t pd_offset;
+	BUG_ON(ppgtt->pd_offset & 0x3f);
+
+	return (ppgtt->pd_offset / 64) << 16;
+}
+
+static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
+			 struct intel_ring_buffer *ring,
+			 bool synchronous)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	/* If we're in reset, we can assume the GPU is sufficiently idle to
+	 * manually frob these bits. Ideally we could use the ring functions,
+	 * except our error handling makes it quite difficult (can't use
+	 * intel_ring_begin, ring->flush, or intel_ring_advance)
+	 *
+	 * FIXME: We should try not to special case reset
+	 */
+	if (synchronous ||
+	    i915_reset_in_progress(&dev_priv->gpu_error)) {
+		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
+		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
+		POSTING_READ(RING_PP_DIR_BASE(ring));
+		return 0;
+	}
+
+	/* NB: TLBs must be flushed and invalidated before a switch */
+	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	if (ret)
+		return ret;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
+	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+	intel_ring_emit(ring, PP_DIR_DCLV_2G);
+	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+	intel_ring_emit(ring, get_pd_offset(ppgtt));
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
+			  struct intel_ring_buffer *ring,
+			  bool synchronous)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	/* If we're in reset, we can assume the GPU is sufficiently idle to
+	 * manually frob these bits. Ideally we could use the ring functions,
+	 * except our error handling makes it quite difficult (can't use
+	 * intel_ring_begin, ring->flush, or intel_ring_advance)
+	 *
+	 * FIXME: We should try not to special case reset
+	 */
+	if (synchronous ||
+	    i915_reset_in_progress(&dev_priv->gpu_error)) {
+		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
+		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
+		POSTING_READ(RING_PP_DIR_BASE(ring));
+		return 0;
+	}
+
+	/* NB: TLBs must be flushed and invalidated before a switch */
+	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	if (ret)
+		return ret;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
+	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+	intel_ring_emit(ring, PP_DIR_DCLV_2G);
+	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+	intel_ring_emit(ring, get_pd_offset(ppgtt));
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	/* XXX: RCS is the only one to auto invalidate the TLBs? */
+	if (ring->id != RCS) {
+		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
+			  struct intel_ring_buffer *ring,
+			  bool synchronous)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!synchronous)
+		return 0;
+
+	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
+
+	POSTING_READ(RING_PP_DIR_DCLV(ring));
+
+	return 0;
+}
+
+static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	int i;
+	int j, ret;
 
-	BUG_ON(ppgtt->pd_offset & 0x3f);
+	for_each_ring(ring, dev_priv, j) {
+		I915_WRITE(RING_MODE_GEN7(ring),
+			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 
-	gen6_write_pdes(ppgtt);
+		/* We promise to do a switch later with FULL PPGTT. If this is
+		 * aliasing, this is the one and only switch we'll do */
+		if (USES_FULL_PPGTT(dev))
+			continue;
 
-	pd_offset = ppgtt->pd_offset;
-	pd_offset /= 64; /* in cachelines, */
-	pd_offset <<= 16;
+		ret = ppgtt->switch_mm(ppgtt, ring, true);
+		if (ret)
+			goto err_out;
+	}
 
-	if (INTEL_INFO(dev)->gen == 6) {
-		uint32_t ecochk, gab_ctl, ecobits;
+	return 0;
 
-		ecobits = I915_READ(GAC_ECO_BITS);
-		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
-					 ECOBITS_PPGTT_CACHE64B);
+err_out:
+	for_each_ring(ring, dev_priv, j)
+		I915_WRITE(RING_MODE_GEN7(ring),
+			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
+	return ret;
+}
 
-		gab_ctl = I915_READ(GAB_CTL);
-		I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
+static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring;
+	uint32_t ecochk, ecobits;
+	int i;
 
-		ecochk = I915_READ(GAM_ECOCHK);
-		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
-				       ECOCHK_PPGTT_CACHE64B);
-		I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-	} else if (INTEL_INFO(dev)->gen >= 7) {
-		uint32_t ecochk, ecobits;
+	ecobits = I915_READ(GAC_ECO_BITS);
+	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
 
-		ecobits = I915_READ(GAC_ECO_BITS);
-		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
+	ecochk = I915_READ(GAM_ECOCHK);
+	if (IS_HASWELL(dev)) {
+		ecochk |= ECOCHK_PPGTT_WB_HSW;
+	} else {
+		ecochk |= ECOCHK_PPGTT_LLC_IVB;
+		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
+	}
+	I915_WRITE(GAM_ECOCHK, ecochk);
 
-		ecochk = I915_READ(GAM_ECOCHK);
-		if (IS_HASWELL(dev)) {
-			ecochk |= ECOCHK_PPGTT_WB_HSW;
-		} else {
-			ecochk |= ECOCHK_PPGTT_LLC_IVB;
-			ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
-		}
-		I915_WRITE(GAM_ECOCHK, ecochk);
+	for_each_ring(ring, dev_priv, i) {
+		int ret;
 		/* GFX_MODE is per-ring on gen7+ */
+		I915_WRITE(RING_MODE_GEN7(ring),
+			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+
+		/* We promise to do a switch later with FULL PPGTT. If this is
+		 * aliasing, this is the one and only switch we'll do */
+		if (USES_FULL_PPGTT(dev))
+			continue;
+
+		ret = ppgtt->switch_mm(ppgtt, ring, true);
+		if (ret)
+			return ret;
 	}
 
-	for_each_ring(ring, dev_priv, i) {
-		if (INTEL_INFO(dev)->gen >= 7)
-			I915_WRITE(RING_MODE_GEN7(ring),
-				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+	return 0;
+}
 
-		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
-		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
+static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring;
+	uint32_t ecochk, gab_ctl, ecobits;
+	int i;
+
+	ecobits = I915_READ(GAC_ECO_BITS);
+	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
+		   ECOBITS_PPGTT_CACHE64B);
+
+	gab_ctl = I915_READ(GAB_CTL);
+	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+	ecochk = I915_READ(GAM_ECOCHK);
+	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
+
+	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+
+	for_each_ring(ring, dev_priv, i) {
+		int ret = ppgtt->switch_mm(ppgtt, ring, true);
+		if (ret)
+			return ret;
 	}
+
 	return 0;
 }
 
@@ -608,7 +827,9 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 		container_of(vm, struct i915_hw_ppgtt, base);
 	int i;
 
+	list_del(&vm->global_link);
 	drm_mm_takedown(&ppgtt->base.mm);
+	drm_mm_remove_node(&ppgtt->node);
 
 	if (ppgtt->pt_dma_addr) {
 		for (i = 0; i < ppgtt->num_pd_entries; i++)
@@ -626,20 +847,51 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 
 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 {
+#define GEN6_PD_ALIGN (PAGE_SIZE * 16)
+#define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned first_pd_entry_in_global_pt;
-	int i;
-	int ret = -ENOMEM;
+	bool retried = false;
+	int i, ret;
 
-	/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
-	 * entries. For aliasing ppgtt support we just steal them at the end for
-	 * now. */
-	first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
+	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
+	 * allocator works in address space sizes, so it's multiplied by page
+	 * size. We allocate at the top of the GTT to avoid fragmentation.
+	 */
+	BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
+alloc:
+	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
+						  &ppgtt->node, GEN6_PD_SIZE,
+						  GEN6_PD_ALIGN, 0,
+						  0, dev_priv->gtt.base.total,
+						  DRM_MM_SEARCH_DEFAULT);
+	if (ret == -ENOSPC && !retried) {
+		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
+					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
+					       I915_CACHE_NONE, false, true);
+		if (ret)
+			return ret;
+
+		retried = true;
+		goto alloc;
+	}
+
+	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
+		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
 	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
 	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
-	ppgtt->enable = gen6_ppgtt_enable;
+	if (IS_GEN6(dev)) {
+		ppgtt->enable = gen6_ppgtt_enable;
+		ppgtt->switch_mm = gen6_mm_switch;
+	} else if (IS_HASWELL(dev)) {
+		ppgtt->enable = gen7_ppgtt_enable;
+		ppgtt->switch_mm = hsw_mm_switch;
+	} else if (IS_GEN7(dev)) {
+		ppgtt->enable = gen7_ppgtt_enable;
+		ppgtt->switch_mm = gen7_mm_switch;
+	} else
+		BUG();
 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
@@ -648,8 +900,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
 	ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
 				  GFP_KERNEL);
-	if (!ppgtt->pt_pages)
+	if (!ppgtt->pt_pages) {
+		drm_mm_remove_node(&ppgtt->node);
 		return -ENOMEM;
+	}
 
 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
@@ -678,8 +932,13 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 	ppgtt->base.clear_range(&ppgtt->base, 0,
 				ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
+	ppgtt->debug_dump = gen6_dump_ppgtt;
 
-	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
+	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
+			 ppgtt->node.size >> 20,
+			 ppgtt->node.start / PAGE_SIZE);
+	ppgtt->pd_offset =
+		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
 
 	return 0;
 
@@ -696,19 +955,15 @@ err_pt_alloc:
 			__free_page(ppgtt->pt_pages[i]);
 	}
 	kfree(ppgtt->pt_pages);
+	drm_mm_remove_node(&ppgtt->node);
 
 	return ret;
 }
 
-static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_ppgtt *ppgtt;
-	int ret;
-
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return -ENOMEM;
+	int ret = 0;
 
 	ppgtt->base.dev = dev;
 
@@ -719,45 +974,42 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 	else
 		BUG();
 
-	if (ret)
-		kfree(ppgtt);
-	else {
-		dev_priv->mm.aliasing_ppgtt = ppgtt;
+	if (!ret) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
+		kref_init(&ppgtt->ref);
 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
 			    ppgtt->base.total);
+		i915_init_vm(dev_priv, &ppgtt->base);
+		if (INTEL_INFO(dev)->gen < 8) {
+			gen6_write_pdes(ppgtt);
+			DRM_DEBUG("Adding PPGTT at offset %x\n",
+				  ppgtt->pd_offset << 10);
+		}
 	}
 
 	return ret;
 }
 
-void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
+static void
+ppgtt_bind_vma(struct i915_vma *vma,
+	       enum i915_cache_level cache_level,
+	       u32 flags)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
-	if (!ppgtt)
-		return;
+	WARN_ON(flags);
 
-	ppgtt->base.cleanup(&ppgtt->base);
-	dev_priv->mm.aliasing_ppgtt = NULL;
+	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
 }
 
-void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
-			    struct drm_i915_gem_object *obj,
-			    enum i915_cache_level cache_level)
+static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
-	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
-				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-				   cache_level);
-}
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
-void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
-			      struct drm_i915_gem_object *obj)
-{
-	ppgtt->base.clear_range(&ppgtt->base,
-				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-				obj->base.size >> PAGE_SHIFT,
-				true);
+	vma->vm->clear_range(vma->vm,
+			     entry,
+			     vma->obj->base.size >> PAGE_SHIFT,
+			     true);
 }
 
 extern int intel_iommu_gfx_mapped;
@@ -849,6 +1101,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
 
 	i915_check_and_clear_faults(dev);
 
@@ -859,8 +1112,33 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 				       true);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
+							   &dev_priv->gtt.base);
+		if (!vma)
+			continue;
+
 		i915_gem_clflush_object(obj, obj->pin_display);
-		i915_gem_gtt_bind_object(obj, obj->cache_level);
+		/* The bind_vma code tries to be smart about tracking mappings.
+		 * Unfortunately above, we've just wiped out the mappings
+		 * without telling our object about it. So we need to fake it.
+		 */
+		obj->has_global_gtt_mapping = 0;
+		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
+	}
+
+
+	if (INTEL_INFO(dev)->gen >= 8)
+		return;
+
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+		/* TODO: Perhaps it shouldn't be gen6 specific */
+		if (i915_is_ggtt(vm)) {
+			if (dev_priv->mm.aliasing_ppgtt)
+				gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
+			continue;
+		}
+
+		gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
 	}
 
 	i915_gem_chipset_flush(dev);
@@ -1017,16 +1295,18 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	readl(gtt_base);
 }
 
-static void i915_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct sg_table *st,
-				     unsigned int pg_start,
-				     enum i915_cache_level cache_level)
+
+static void i915_ggtt_bind_vma(struct i915_vma *vma,
+			       enum i915_cache_level cache_level,
+			       u32 unused)
 {
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
-	intel_gtt_insert_sg_entries(st, pg_start, flags);
-
+	BUG_ON(!i915_is_ggtt(vma->vm));
+	intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
+	vma->obj->has_global_gtt_mapping = 1;
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
@@ -1037,33 +1317,77 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
 	intel_gtt_clear_range(first_entry, num_entries);
 }
 
+static void i915_ggtt_unbind_vma(struct i915_vma *vma)
+{
+	const unsigned int first = vma->node.start >> PAGE_SHIFT;
+	const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
+
+	BUG_ON(!i915_is_ggtt(vma->vm));
+	vma->obj->has_global_gtt_mapping = 0;
+	intel_gtt_clear_range(first, size);
+}
 
-void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
-			      enum i915_cache_level cache_level)
+static void ggtt_bind_vma(struct i915_vma *vma,
+			  enum i915_cache_level cache_level,
+			  u32 flags)
 {
-	struct drm_device *dev = obj->base.dev;
+	struct drm_device *dev = vma->vm->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
+	struct drm_i915_gem_object *obj = vma->obj;
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
-	dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
-					  entry,
-					  cache_level);
+	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
+	 * or we have a global mapping already but the cacheability flags have
+	 * changed, set the global PTEs.
+	 *
+	 * If there is an aliasing PPGTT it is anecdotally faster, so use that
+	 * instead if none of the above hold true.
+	 *
+	 * NB: A global mapping should only be needed for special regions like
+	 * "gtt mappable", SNB errata, or if specified via special execbuf
+	 * flags. At all other times, the GPU will use the aliasing PPGTT.
+	 */
+	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
+		if (!obj->has_global_gtt_mapping ||
+		    (cache_level != obj->cache_level)) {
+			vma->vm->insert_entries(vma->vm, obj->pages, entry,
+						cache_level);
+			obj->has_global_gtt_mapping = 1;
+		}
+	}
 
-	obj->has_global_gtt_mapping = 1;
+	if (dev_priv->mm.aliasing_ppgtt &&
+	    (!obj->has_aliasing_ppgtt_mapping ||
+	     (cache_level != obj->cache_level))) {
+		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
+		appgtt->base.insert_entries(&appgtt->base,
+					    vma->obj->pages, entry, cache_level);
+		vma->obj->has_aliasing_ppgtt_mapping = 1;
+	}
 }
 
-void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
+static void ggtt_unbind_vma(struct i915_vma *vma)
 {
-	struct drm_device *dev = obj->base.dev;
+	struct drm_device *dev = vma->vm->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
-
-	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
-				       entry,
-				       obj->base.size >> PAGE_SHIFT,
-				       true);
+	struct drm_i915_gem_object *obj = vma->obj;
+	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
+
+	if (obj->has_global_gtt_mapping) {
+		vma->vm->clear_range(vma->vm, entry,
+				     vma->obj->base.size >> PAGE_SHIFT,
+				     true);
+		obj->has_global_gtt_mapping = 0;
+	}
 
-	obj->has_global_gtt_mapping = 0;
+	if (obj->has_aliasing_ppgtt_mapping) {
+		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
+		appgtt->base.clear_range(&appgtt->base,
+					 entry,
+					 obj->base.size >> PAGE_SHIFT,
+					 true);
+		obj->has_aliasing_ppgtt_mapping = 0;
+	}
 }
 
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
@@ -1155,21 +1479,6 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 	ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
 }
 
-static bool
-intel_enable_ppgtt(struct drm_device *dev)
-{
-	if (i915_enable_ppgtt >= 0)
-		return i915_enable_ppgtt;
-
-#ifdef CONFIG_INTEL_IOMMU
-	/* Disable ppgtt on SNB if VT-d is on. */
-	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
-		return false;
-#endif
-
-	return true;
-}
-
 void i915_gem_init_global_gtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1178,26 +1487,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 	gtt_size = dev_priv->gtt.base.total;
 	mappable_size = dev_priv->gtt.mappable_end;
 
-	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
-		int ret;
-
-		if (INTEL_INFO(dev)->gen <= 7) {
-			/* PPGTT pdes are stolen from global gtt ptes, so shrink the
-			 * aperture accordingly when using aliasing ppgtt. */
-			gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
-		}
-
-		i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
-
-		ret = i915_gem_init_aliasing_ppgtt(dev);
-		if (!ret)
-			return;
-
-		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
-		drm_mm_takedown(&dev_priv->gtt.base.mm);
-		if (INTEL_INFO(dev)->gen < 8)
-			gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE;
-	}
 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 }
 
@@ -1438,7 +1727,6 @@ static int i915_gmch_probe(struct drm_device *dev,
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
-	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 
 	if (unlikely(dev_priv->gtt.do_idle_maps))
 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
@@ -1493,3 +1781,62 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 	return 0;
 }
+
+static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
+					      struct i915_address_space *vm)
+{
+	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (vma == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&vma->vma_link);
+	INIT_LIST_HEAD(&vma->mm_list);
+	INIT_LIST_HEAD(&vma->exec_list);
+	vma->vm = vm;
+	vma->obj = obj;
+
+	switch (INTEL_INFO(vm->dev)->gen) {
+	case 8:
+	case 7:
+	case 6:
+		if (i915_is_ggtt(vm)) {
+			vma->unbind_vma = ggtt_unbind_vma;
+			vma->bind_vma = ggtt_bind_vma;
+		} else {
+			vma->unbind_vma = ppgtt_unbind_vma;
+			vma->bind_vma = ppgtt_bind_vma;
+		}
+		break;
+	case 5:
+	case 4:
+	case 3:
+	case 2:
+		BUG_ON(!i915_is_ggtt(vm));
+		vma->unbind_vma = i915_ggtt_unbind_vma;
+		vma->bind_vma = i915_ggtt_bind_vma;
+		break;
+	default:
+		BUG();
+	}
+
+	/* Keep GGTT vmas first to make debug easier */
+	if (i915_is_ggtt(vm))
+		list_add(&vma->vma_link, &obj->vma_list);
+	else
+		list_add_tail(&vma->vma_link, &obj->vma_list);
+
+	return vma;
+}
+
+struct i915_vma *
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+				  struct i915_address_space *vm)
+{
+	struct i915_vma *vma;
+
+	vma = i915_gem_obj_to_vma(obj, vm);
+	if (!vma)
+		vma = __i915_gem_vma_create(obj, vm);
+
+	return vma;
+}

+ 1 - 1
drivers/gpu/drm/i915/i915_gem_tiling.c

@@ -308,7 +308,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (obj->pin_count || obj->framebuffer_references) {
+	if (i915_gem_obj_is_pinned(obj) || obj->framebuffer_references) {
 		drm_gem_object_unreference_unlocked(&obj->base);
 		return -EBUSY;
 	}

+ 52 - 16
drivers/gpu/drm/i915/i915_gpu_error.c

@@ -481,6 +481,7 @@ static void i915_error_state_free(struct kref *error_ref)
 static struct drm_i915_error_object *
 i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 			       struct drm_i915_gem_object *src,
+			       struct i915_address_space *vm,
 			       const int num_pages)
 {
 	struct drm_i915_error_object *dst;
@@ -494,7 +495,7 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 	if (dst == NULL)
 		return NULL;
 
-	reloc_offset = dst->gtt_offset = i915_gem_obj_ggtt_offset(src);
+	reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
 	for (i = 0; i < num_pages; i++) {
 		unsigned long flags;
 		void *d;
@@ -505,7 +506,8 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
 
 		local_irq_save(flags);
 		if (reloc_offset < dev_priv->gtt.mappable_end &&
-		    src->has_global_gtt_mapping) {
+		    src->has_global_gtt_mapping &&
+		    i915_is_ggtt(vm)) {
 			void __iomem *s;
 
 			/* Simply ignore tiling or any overlapping fence.
@@ -555,8 +557,12 @@ unwind:
 	kfree(dst);
 	return NULL;
 }
-#define i915_error_object_create(dev_priv, src) \
-	i915_error_object_create_sized((dev_priv), (src), \
+#define i915_error_object_create(dev_priv, src, vm) \
+	i915_error_object_create_sized((dev_priv), (src), (vm), \
+				       (src)->base.size>>PAGE_SHIFT)
+
+#define i915_error_ggtt_object_create(dev_priv, src) \
+	i915_error_object_create_sized((dev_priv), (src), &(dev_priv)->gtt.base, \
 				       (src)->base.size>>PAGE_SHIFT)
 
 static void capture_bo(struct drm_i915_error_buffer *err,
@@ -571,7 +577,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->write_domain = obj->base.write_domain;
 	err->fence_reg = obj->fence_reg;
 	err->pinned = 0;
-	if (obj->pin_count > 0)
+	if (i915_gem_obj_is_pinned(obj))
 		err->pinned = 1;
 	if (obj->user_pin_count > 0)
 		err->pinned = -1;
@@ -604,7 +610,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
 	int i = 0;
 
 	list_for_each_entry(obj, head, global_list) {
-		if (obj->pin_count == 0)
+		if (!i915_gem_obj_is_pinned(obj))
 			continue;
 
 		capture_bo(err++, obj);
@@ -648,6 +654,32 @@ static void i915_gem_record_fences(struct drm_device *dev,
 	}
 }
 
+/* This assumes all batchbuffers are executed from the PPGTT. It might have to
+ * change in the future. */
+static bool is_active_vm(struct i915_address_space *vm,
+			 struct intel_ring_buffer *ring)
+{
+	struct drm_device *dev = vm->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt;
+
+	if (INTEL_INFO(dev)->gen < 7)
+		return i915_is_ggtt(vm);
+
+	/* FIXME: This ignores that the global gtt vm is also on this list. */
+	ppgtt = container_of(vm, struct i915_hw_ppgtt, base);
+
+	if (INTEL_INFO(dev)->gen >= 8) {
+		u64 pdp0 = (u64)I915_READ(GEN8_RING_PDP_UDW(ring, 0)) << 32;
+		pdp0 |=  I915_READ(GEN8_RING_PDP_LDW(ring, 0));
+		return pdp0 == ppgtt->pd_dma_addr[0];
+	} else {
+		u32 pp_db;
+		pp_db = I915_READ(RING_PP_DIR_BASE(ring));
+		return (pp_db >> 10) == ppgtt->pd_offset;
+	}
+}
+
 static struct drm_i915_error_object *
 i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 			     struct intel_ring_buffer *ring)
@@ -655,6 +687,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	struct i915_address_space *vm;
 	struct i915_vma *vma;
 	struct drm_i915_gem_object *obj;
+	bool found_active = false;
 	u32 seqno;
 
 	if (!ring->get_seqno)
@@ -669,11 +702,16 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 		obj = ring->scratch.obj;
 		if (acthd >= i915_gem_obj_ggtt_offset(obj) &&
 		    acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
-			return i915_error_object_create(dev_priv, obj);
+			return i915_error_ggtt_object_create(dev_priv, obj);
 	}
 
 	seqno = ring->get_seqno(ring, false);
 	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+		if (!is_active_vm(vm, ring))
+			continue;
+
+		found_active = true;
+
 		list_for_each_entry(vma, &vm->active_list, mm_list) {
 			obj = vma->obj;
 			if (obj->ring != ring)
@@ -688,10 +726,11 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 			/* We need to copy these to an anonymous buffer as the simplest
 			 * method to avoid being overwritten by userspace.
 			 */
-			return i915_error_object_create(dev_priv, obj);
+			return i915_error_object_create(dev_priv, obj, vm);
 		}
 	}
 
+	WARN_ON(!found_active);
 	return NULL;
 }
 
@@ -765,7 +804,9 @@ static void i915_gem_record_active_context(struct intel_ring_buffer *ring,
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
 			ering->ctx = i915_error_object_create_sized(dev_priv,
-								    obj, 1);
+								    obj,
+								    &dev_priv->gtt.base,
+								    1);
 			break;
 		}
 	}
@@ -786,7 +827,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			i915_error_first_batchbuffer(dev_priv, ring);
 
 		error->ring[i].ringbuffer =
-			i915_error_object_create(dev_priv, ring->obj);
+			i915_error_ggtt_object_create(dev_priv, ring->obj);
 
 
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
@@ -834,7 +875,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 		i++;
 	error->active_bo_count[ndx] = i;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (obj->pin_count)
+		if (i915_gem_obj_is_pinned(obj))
 			i++;
 	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
 
@@ -868,11 +909,6 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
 		cnt++;
 
-	if (WARN(cnt > 1, "Multiple VMs not yet supported\n"))
-		cnt = 1;
-
-	vm = &dev_priv->gtt.base;
-
 	error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
 	error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
 	error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),

+ 2 - 2
drivers/gpu/drm/i915/intel_fbdev.c

@@ -104,7 +104,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	return 0;
 
 out_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 out_unref:
 	drm_gem_object_unreference(&obj->base);
 out:
@@ -208,7 +208,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	return 0;
 
 out_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);

+ 4 - 4
drivers/gpu/drm/i915/intel_overlay.c

@@ -293,7 +293,7 @@ static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
 
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 
 	overlay->old_vid_bo = NULL;
@@ -306,7 +306,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
 	/* never have the overlay hw on without showing a frame */
 	BUG_ON(!overlay->vid_bo);
 
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 	overlay->vid_bo = NULL;
 
@@ -782,7 +782,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	return 0;
 
 out_unpin:
-	i915_gem_object_unpin(new_bo);
+	i915_gem_object_ggtt_unpin(new_bo);
 	return ret;
 }
 
@@ -1386,7 +1386,7 @@ void intel_setup_overlay(struct drm_device *dev)
 
 out_unpin_bo:
 	if (!OVERLAY_NEEDS_PHYSICAL(dev))
-		i915_gem_object_unpin(reg_bo);
+		i915_gem_object_ggtt_unpin(reg_bo);
 out_free_bo:
 	drm_gem_object_unreference(&reg_bo->base);
 out_free:

+ 3 - 3
drivers/gpu/drm/i915/intel_pm.c

@@ -2753,7 +2753,7 @@ intel_alloc_context_page(struct drm_device *dev)
 	return ctx;
 
 err_unpin:
-	i915_gem_object_unpin(ctx);
+	i915_gem_object_ggtt_unpin(ctx);
 err_unref:
 	drm_gem_object_unreference(&ctx->base);
 	return NULL;
@@ -3625,13 +3625,13 @@ void ironlake_teardown_rc6(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->ips.renderctx) {
-		i915_gem_object_unpin(dev_priv->ips.renderctx);
+		i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx);
 		drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
 		dev_priv->ips.renderctx = NULL;
 	}
 
 	if (dev_priv->ips.pwrctx) {
-		i915_gem_object_unpin(dev_priv->ips.pwrctx);
+		i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx);
 		drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
 		dev_priv->ips.pwrctx = NULL;
 	}

+ 6 - 6
drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -549,7 +549,7 @@ init_pipe_control(struct intel_ring_buffer *ring)
 	return 0;
 
 err_unpin:
-	i915_gem_object_unpin(ring->scratch.obj);
+	i915_gem_object_ggtt_unpin(ring->scratch.obj);
 err_unref:
 	drm_gem_object_unreference(&ring->scratch.obj->base);
 err:
@@ -625,7 +625,7 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 
 	if (INTEL_INFO(dev)->gen >= 5) {
 		kunmap(sg_page(ring->scratch.obj->pages->sgl));
-		i915_gem_object_unpin(ring->scratch.obj);
+		i915_gem_object_ggtt_unpin(ring->scratch.obj);
 	}
 
 	drm_gem_object_unreference(&ring->scratch.obj->base);
@@ -1253,7 +1253,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
 		return;
 
 	kunmap(sg_page(obj->pages->sgl));
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 	ring->status_page.obj = NULL;
 }
@@ -1293,7 +1293,7 @@ static int init_status_page(struct intel_ring_buffer *ring)
 	return 0;
 
 err_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 err_unref:
 	drm_gem_object_unreference(&obj->base);
 err:
@@ -1390,7 +1390,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 err_unmap:
 	iounmap(ring->virtual_start);
 err_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_ggtt_unpin(obj);
 err_unref:
 	drm_gem_object_unreference(&obj->base);
 	ring->obj = NULL;
@@ -1418,7 +1418,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
 
 	iounmap(ring->virtual_start);
 
-	i915_gem_object_unpin(ring->obj);
+	i915_gem_object_ggtt_unpin(ring->obj);
 	drm_gem_object_unreference(&ring->obj->base);
 	ring->obj = NULL;
 	ring->preallocated_lazy_request = NULL;

+ 5 - 3
drivers/gpu/drm/i915/intel_uncore.c

@@ -852,6 +852,7 @@ int i915_get_reset_stats_ioctl(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_reset_stats *args = data;
 	struct i915_ctx_hang_stats *hs;
+	struct i915_hw_context *ctx;
 	int ret;
 
 	if (args->flags || args->pad)
@@ -864,11 +865,12 @@ int i915_get_reset_stats_ioctl(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	hs = i915_gem_context_get_hang_stats(dev, file, args->ctx_id);
-	if (IS_ERR(hs)) {
+	ctx = i915_gem_context_get(file->driver_priv, args->ctx_id);
+	if (IS_ERR(ctx)) {
 		mutex_unlock(&dev->struct_mutex);
-		return PTR_ERR(hs);
+		return PTR_ERR(ctx);
 	}
+	hs = &ctx->hang_stats;
 
 	if (capable(CAP_SYS_ADMIN))
 		args->reset_count = i915_reset_count(&dev_priv->gpu_error);