10 years ago · ef55f92a92
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3657,53 +3657,106 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Changes the cache-level of an object across all VMA.
			
 
				+ *
			
 
				+ * After this function returns, the object will be in the new cache-level
			
 
				+ * across all GTT and the contents of the backing storage will be coherent,
			
 
				+ * with respect to the new cache-level. In order to keep the backing storage
			
 
				+ * coherent for all users, we only allow a single cache level to be set
			
 
				+ * globally on the object and prevent it from being changed whilst the
			
 
				+ * hardware is reading from the object. That is if the object is currently
			
 
				+ * on the scanout it will be set to uncached (or equivalent display
			
 
				+ * cache coherency) and all non-MOCS GPU access will also be uncached so
			
 
				+ * that all direct access to the scanout remains coherent.
			
 
				+ */
			
 
				 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
			
 
				 				    enum i915_cache_level cache_level)
			
 
				 {
			
 
				 	struct drm_device *dev = obj->base.dev;
			
 
				 	struct i915_vma *vma, *next;
			
 
				+	bool bound = false;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	if (obj->cache_level == cache_level)
			
 
				 		goto out;
			
 
				 
			
 
				-	if (i915_gem_obj_is_pinned(obj)) {
			
 
				-		DRM_DEBUG("can not change the cache level of pinned objects\n");
			
 
				-		return -EBUSY;
			
 
				-	}
			
 
				-
			
 
				+	/* Inspect the list of currently bound VMA and unbind any that would
			
 
				+	 * be invalid given the new cache-level. This is principally to
			
 
				+	 * catch the issue of the CS prefetch crossing page boundaries and
			
 
				+	 * reading an invalid PTE on older architectures.
			
 
				+	 */
			
 
				 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
			
 
				+		if (!drm_mm_node_allocated(&vma->node))
			
 
				+			continue;
			
 
				+
			
 
				+		if (vma->pin_count) {
			
 
				+			DRM_DEBUG("can not change the cache level of pinned objects\n");
			
 
				+			return -EBUSY;
			
 
				+		}
			
 
				+
			
 
				 		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
			
 
				 			ret = i915_vma_unbind(vma);
			
 
				 			if (ret)
			
 
				 				return ret;
			
 
				-		}
			
 
				+		} else
			
 
				+			bound = true;
			
 
				 	}
			
 
				 
			
 
				-	if (i915_gem_obj_bound_any(obj)) {
			
 
				+	/* We can reuse the existing drm_mm nodes but need to change the
			
 
				+	 * cache-level on the PTE. We could simply unbind them all and
			
 
				+	 * rebind with the correct cache-level on next use. However since
			
 
				+	 * we already have a valid slot, dma mapping, pages etc, we may as
			
 
				+	 * rewrite the PTE in the belief that doing so tramples upon less
			
 
				+	 * state and so involves less work.
			
 
				+	 */
			
 
				+	if (bound) {
			
 
				+		/* Before we change the PTE, the GPU must not be accessing it.
			
 
				+		 * If we wait upon the object, we know that all the bound
			
 
				+		 * VMA are no longer active.
			
 
				+		 */
			
 
				 		ret = i915_gem_object_wait_rendering(obj, false);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 
			
 
				-		i915_gem_object_finish_gtt(obj);
			
 
				-
			
 
				-		/* Before SandyBridge, you could not use tiling or fence
			
 
				-		 * registers with snooped memory, so relinquish any fences
			
 
				-		 * currently pointing to our region in the aperture.
			
 
				-		 */
			
 
				-		if (INTEL_INFO(dev)->gen < 6) {
			
 
				+		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
			
 
				+			/* Access to snoopable pages through the GTT is
			
 
				+			 * incoherent and on some machines causes a hard
			
 
				+			 * lockup. Relinquish the CPU mmaping to force
			
 
				+			 * userspace to refault in the pages and we can
			
 
				+			 * then double check if the GTT mapping is still
			
 
				+			 * valid for that pointer access.
			
 
				+			 */
			
 
				+			i915_gem_release_mmap(obj);
			
 
				+
			
 
				+			/* As we no longer need a fence for GTT access,
			
 
				+			 * we can relinquish it now (and so prevent having
			
 
				+			 * to steal a fence from someone else on the next
			
 
				+			 * fence request). Note GPU activity would have
			
 
				+			 * dropped the fence as all snoopable access is
			
 
				+			 * supposed to be linear.
			
 
				+			 */
			
 
				 			ret = i915_gem_object_put_fence(obj);
			
 
				 			if (ret)
			
 
				 				return ret;
			
 
				+		} else {
			
 
				+			/* We either have incoherent backing store and
			
 
				+			 * so no GTT access or the architecture is fully
			
 
				+			 * coherent. In such cases, existing GTT mmaps
			
 
				+			 * ignore the cache bit in the PTE and we can
			
 
				+			 * rewrite it without confusing the GPU or having
			
 
				+			 * to force userspace to fault back in its mmaps.
			
 
				+			 */
			
 
				 		}
			
 
				 
			
 
				-		list_for_each_entry(vma, &obj->vma_list, vma_link)
			
 
				-			if (drm_mm_node_allocated(&vma->node)) {
			
 
				-				ret = i915_vma_bind(vma, cache_level,
			
 
				-						    PIN_UPDATE);
			
 
				-				if (ret)
			
 
				-					return ret;
			
 
				-			}
			
 
				+		list_for_each_entry(vma, &obj->vma_list, vma_link) {
			
 
				+			if (!drm_mm_node_allocated(&vma->node))
			
 
				+				continue;
			
 
				+
			
 
				+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
			
 
				+			if (ret)
			
 
				+				return ret;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	list_for_each_entry(vma, &obj->vma_list, vma_link)
			
@@ -3711,6 +3764,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 
				 	obj->cache_level = cache_level;
			
 
				 
			
 
				 out:
			
 
				+	/* Flush the dirty CPU caches to the backing storage so that the
			
 
				+	 * object is now coherent at its new cache level (with respect
			
 
				+	 * to the access domain).
			
 
				+	 */
			
 
				 	if (obj->cache_dirty &&
			
 
				 	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
			
 
				 	    cpu_write_needs_clflush(obj)) {