|
@@ -3657,53 +3657,106 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Changes the cache-level of an object across all VMA.
|
|
|
+ *
|
|
|
+ * After this function returns, the object will be in the new cache-level
|
|
|
+ * across all GTT and the contents of the backing storage will be coherent,
|
|
|
+ * with respect to the new cache-level. In order to keep the backing storage
|
|
|
+ * coherent for all users, we only allow a single cache level to be set
|
|
|
+ * globally on the object and prevent it from being changed whilst the
|
|
|
+ * hardware is reading from the object. That is if the object is currently
|
|
|
+ * on the scanout it will be set to uncached (or equivalent display
|
|
|
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
|
|
|
+ * that all direct access to the scanout remains coherent.
|
|
|
+ */
|
|
|
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
|
|
enum i915_cache_level cache_level)
|
|
|
{
|
|
|
struct drm_device *dev = obj->base.dev;
|
|
|
struct i915_vma *vma, *next;
|
|
|
+ bool bound = false;
|
|
|
int ret = 0;
|
|
|
|
|
|
if (obj->cache_level == cache_level)
|
|
|
goto out;
|
|
|
|
|
|
- if (i915_gem_obj_is_pinned(obj)) {
|
|
|
- DRM_DEBUG("can not change the cache level of pinned objects\n");
|
|
|
- return -EBUSY;
|
|
|
- }
|
|
|
-
|
|
|
+ /* Inspect the list of currently bound VMA and unbind any that would
|
|
|
+ * be invalid given the new cache-level. This is principally to
|
|
|
+ * catch the issue of the CS prefetch crossing page boundaries and
|
|
|
+ * reading an invalid PTE on older architectures.
|
|
|
+ */
|
|
|
list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
|
|
|
+ if (!drm_mm_node_allocated(&vma->node))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (vma->pin_count) {
|
|
|
+ DRM_DEBUG("can not change the cache level of pinned objects\n");
|
|
|
+ return -EBUSY;
|
|
|
+ }
|
|
|
+
|
|
|
if (!i915_gem_valid_gtt_space(vma, cache_level)) {
|
|
|
ret = i915_vma_unbind(vma);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
- }
|
|
|
+ } else
|
|
|
+ bound = true;
|
|
|
}
|
|
|
|
|
|
- if (i915_gem_obj_bound_any(obj)) {
|
|
|
+ /* We can reuse the existing drm_mm nodes but need to change the
|
|
|
+ * cache-level on the PTE. We could simply unbind them all and
|
|
|
+ * rebind with the correct cache-level on next use. However since
|
|
|
+ * we already have a valid slot, dma mapping, pages etc, we may as
|
|
|
+ * rewrite the PTE in the belief that doing so tramples upon less
|
|
|
+ * state and so involves less work.
|
|
|
+ */
|
|
|
+ if (bound) {
|
|
|
+ /* Before we change the PTE, the GPU must not be accessing it.
|
|
|
+ * If we wait upon the object, we know that all the bound
|
|
|
+ * VMA are no longer active.
|
|
|
+ */
|
|
|
ret = i915_gem_object_wait_rendering(obj, false);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
|
- i915_gem_object_finish_gtt(obj);
|
|
|
-
|
|
|
- /* Before SandyBridge, you could not use tiling or fence
|
|
|
- * registers with snooped memory, so relinquish any fences
|
|
|
- * currently pointing to our region in the aperture.
|
|
|
- */
|
|
|
- if (INTEL_INFO(dev)->gen < 6) {
|
|
|
+ if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
|
|
|
+ /* Access to snoopable pages through the GTT is
|
|
|
+ * incoherent and on some machines causes a hard
|
|
|
+ * lockup. Relinquish the CPU mmaping to force
|
|
|
+ * userspace to refault in the pages and we can
|
|
|
+ * then double check if the GTT mapping is still
|
|
|
+ * valid for that pointer access.
|
|
|
+ */
|
|
|
+ i915_gem_release_mmap(obj);
|
|
|
+
|
|
|
+ /* As we no longer need a fence for GTT access,
|
|
|
+ * we can relinquish it now (and so prevent having
|
|
|
+ * to steal a fence from someone else on the next
|
|
|
+ * fence request). Note GPU activity would have
|
|
|
+ * dropped the fence as all snoopable access is
|
|
|
+ * supposed to be linear.
|
|
|
+ */
|
|
|
ret = i915_gem_object_put_fence(obj);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
+ } else {
|
|
|
+ /* We either have incoherent backing store and
|
|
|
+ * so no GTT access or the architecture is fully
|
|
|
+ * coherent. In such cases, existing GTT mmaps
|
|
|
+ * ignore the cache bit in the PTE and we can
|
|
|
+ * rewrite it without confusing the GPU or having
|
|
|
+ * to force userspace to fault back in its mmaps.
|
|
|
+ */
|
|
|
}
|
|
|
|
|
|
- list_for_each_entry(vma, &obj->vma_list, vma_link)
|
|
|
- if (drm_mm_node_allocated(&vma->node)) {
|
|
|
- ret = i915_vma_bind(vma, cache_level,
|
|
|
- PIN_UPDATE);
|
|
|
- if (ret)
|
|
|
- return ret;
|
|
|
- }
|
|
|
+ list_for_each_entry(vma, &obj->vma_list, vma_link) {
|
|
|
+ if (!drm_mm_node_allocated(&vma->node))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
list_for_each_entry(vma, &obj->vma_list, vma_link)
|
|
@@ -3711,6 +3764,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
|
|
obj->cache_level = cache_level;
|
|
|
|
|
|
out:
|
|
|
+ /* Flush the dirty CPU caches to the backing storage so that the
|
|
|
+ * object is now coherent at its new cache level (with respect
|
|
|
+ * to the access domain).
|
|
|
+ */
|
|
|
if (obj->cache_dirty &&
|
|
|
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
|
|
|
cpu_write_needs_clflush(obj)) {
|