před 10 roky · 17ee950df3
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -211,6 +211,7 @@ enum {
 
				 	FAULT_AND_CONTINUE /* Unsupported */
			
 
				 };
			
 
				 #define GEN8_CTX_ID_SHIFT 32
			
 
				+#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
			
 
				 
			
 
				 static int intel_lr_context_pin(struct intel_engine_cs *ring,
			
 
				 		struct intel_context *ctx);
			
@@ -1077,6 +1078,191 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#define wa_ctx_emit(batch, cmd)						\
			
 
				+	do {								\
			
 
				+		if (WARN_ON(index >= (PAGE_SIZE / sizeof(uint32_t)))) {	\
			
 
				+			return -ENOSPC;					\
			
 
				+		}							\
			
 
				+		batch[index++] = (cmd);					\
			
 
				+	} while (0)
			
 
				+
			
 
				+static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
			
 
				+				    uint32_t offset,
			
 
				+				    uint32_t start_alignment)
			
 
				+{
			
 
				+	return wa_ctx->offset = ALIGN(offset, start_alignment);
			
 
				+}
			
 
				+
			
 
				+static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
			
 
				+			     uint32_t offset,
			
 
				+			     uint32_t size_alignment)
			
 
				+{
			
 
				+	wa_ctx->size = offset - wa_ctx->offset;
			
 
				+
			
 
				+	WARN(wa_ctx->size % size_alignment,
			
 
				+	     "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
			
 
				+	     wa_ctx->size, size_alignment);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
			
 
				+ *
			
 
				+ * @ring: only applicable for RCS
			
 
				+ * @wa_ctx: structure representing wa_ctx
			
 
				+ *  offset: specifies start of the batch, should be cache-aligned. This is updated
			
 
				+ *    with the offset value received as input.
			
 
				+ *  size: size of the batch in DWORDS but HW expects in terms of cachelines
			
 
				+ * @batch: page in which WA are loaded
			
 
				+ * @offset: This field specifies the start of the batch, it should be
			
 
				+ *  cache-aligned otherwise it is adjusted accordingly.
			
 
				+ *  Typically we only have one indirect_ctx and per_ctx batch buffer which are
			
 
				+ *  initialized at the beginning and shared across all contexts but this field
			
 
				+ *  helps us to have multiple batches at different offsets and select them based
			
 
				+ *  on a criteria. At the moment this batch always start at the beginning of the page
			
 
				+ *  and at this point we don't have multiple wa_ctx batch buffers.
			
 
				+ *
			
 
				+ *  The number of WA applied are not known at the beginning; we use this field
			
 
				+ *  to return the no of DWORDS written.
			
 
				+
			
 
				+ *  It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
			
 
				+ *  so it adds NOOPs as padding to make it cacheline aligned.
			
 
				+ *  MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
			
 
				+ *  makes a complete batch buffer.
			
 
				+ *
			
 
				+ * Return: non-zero if we exceed the PAGE_SIZE limit.
			
 
				+ */
			
 
				+
			
 
				+static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
			
 
				+				    struct i915_wa_ctx_bb *wa_ctx,
			
 
				+				    uint32_t *const batch,
			
 
				+				    uint32_t *offset)
			
 
				+{
			
 
				+	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
			
 
				+
			
 
				+	/* FIXME: Replace me with WA */
			
 
				+	wa_ctx_emit(batch, MI_NOOP);
			
 
				+
			
 
				+	/* Pad to end of cacheline */
			
 
				+	while (index % CACHELINE_DWORDS)
			
 
				+		wa_ctx_emit(batch, MI_NOOP);
			
 
				+
			
 
				+	/*
			
 
				+	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
			
 
				+	 * execution depends on the length specified in terms of cache lines
			
 
				+	 * in the register CTX_RCS_INDIRECT_CTX
			
 
				+	 */
			
 
				+
			
 
				+	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * gen8_init_perctx_bb() - initialize per ctx batch with WA
			
 
				+ *
			
 
				+ * @ring: only applicable for RCS
			
 
				+ * @wa_ctx: structure representing wa_ctx
			
 
				+ *  offset: specifies start of the batch, should be cache-aligned.
			
 
				+ *  size: size of the batch in DWORDS but HW expects in terms of cachelines
			
 
				+ * @offset: This field specifies the start of this batch.
			
 
				+ *   This batch is started immediately after indirect_ctx batch. Since we ensure
			
 
				+ *   that indirect_ctx ends on a cacheline this batch is aligned automatically.
			
 
				+ *
			
 
				+ *   The number of DWORDS written are returned using this field.
			
 
				+ *
			
 
				+ *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
			
 
				+ *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
			
 
				+ */
			
 
				+static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
			
 
				+			       struct i915_wa_ctx_bb *wa_ctx,
			
 
				+			       uint32_t *const batch,
			
 
				+			       uint32_t *offset)
			
 
				+{
			
 
				+	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
			
 
				+
			
 
				+	wa_ctx_emit(batch, MI_BATCH_BUFFER_END);
			
 
				+
			
 
				+	return wa_ctx_end(wa_ctx, *offset = index, 1);
			
 
				+}
			
 
				+
			
 
				+static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size));
			
 
				+	if (!ring->wa_ctx.obj) {
			
 
				+		DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0);
			
 
				+	if (ret) {
			
 
				+		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
			
 
				+				 ret);
			
 
				+		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
			
 
				+{
			
 
				+	if (ring->wa_ctx.obj) {
			
 
				+		i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
			
 
				+		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
			
 
				+		ring->wa_ctx.obj = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int intel_init_workaround_bb(struct intel_engine_cs *ring)
			
 
				+{
			
 
				+	int ret;
			
 
				+	uint32_t *batch;
			
 
				+	uint32_t offset;
			
 
				+	struct page *page;
			
 
				+	struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
			
 
				+
			
 
				+	WARN_ON(ring->id != RCS);
			
 
				+
			
 
				+	ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
			
 
				+	if (ret) {
			
 
				+		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	page = i915_gem_object_get_page(wa_ctx->obj, 0);
			
 
				+	batch = kmap_atomic(page);
			
 
				+	offset = 0;
			
 
				+
			
 
				+	if (INTEL_INFO(ring->dev)->gen == 8) {
			
 
				+		ret = gen8_init_indirectctx_bb(ring,
			
 
				+					       &wa_ctx->indirect_ctx,
			
 
				+					       batch,
			
 
				+					       &offset);
			
 
				+		if (ret)
			
 
				+			goto out;
			
 
				+
			
 
				+		ret = gen8_init_perctx_bb(ring,
			
 
				+					  &wa_ctx->per_ctx,
			
 
				+					  batch,
			
 
				+					  &offset);
			
 
				+		if (ret)
			
 
				+			goto out;
			
 
				+	} else {
			
 
				+		WARN(INTEL_INFO(ring->dev)->gen >= 8,
			
 
				+		     "WA batch buffer is not initialized for Gen%d\n",
			
 
				+		     INTEL_INFO(ring->dev)->gen);
			
 
				+		lrc_destroy_wa_ctx_obj(ring);
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	kunmap_atomic(batch);
			
 
				+	if (ret)
			
 
				+		lrc_destroy_wa_ctx_obj(ring);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int gen8_init_common_ring(struct intel_engine_cs *ring)
			
 
				 {
			
 
				 	struct drm_device *dev = ring->dev;
			
@@ -1411,6 +1597,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 
				 		kunmap(sg_page(ring->status_page.obj->pages->sgl));
			
 
				 		ring->status_page.obj = NULL;
			
 
				 	}
			
 
				+
			
 
				+	lrc_destroy_wa_ctx_obj(ring);
			
 
				 }
			
 
				 
			
 
				 static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
			
@@ -1474,7 +1662,22 @@ static int logical_render_ring_init(struct drm_device *dev)
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	return intel_init_pipe_control(ring);
			
 
				+	ret = intel_init_workaround_bb(ring);
			
 
				+	if (ret) {
			
 
				+		/*
			
 
				+		 * We continue even if we fail to initialize WA batch
			
 
				+		 * because we only expect rare glitches but nothing
			
 
				+		 * critical to prevent us from using GPU
			
 
				+		 */
			
 
				+		DRM_ERROR("WA batch buffer initialization failed: %d\n",
			
 
				+			  ret);
			
 
				+	}
			
 
				+
			
 
				+	ret = intel_init_pipe_control(ring);
			
 
				+	if (ret)
			
 
				+		lrc_destroy_wa_ctx_obj(ring);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static int logical_bsd_ring_init(struct drm_device *dev)
			
@@ -1754,15 +1957,27 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 
				 	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
			
 
				 	reg_state[CTX_SECOND_BB_STATE+1] = 0;
			
 
				 	if (ring->id == RCS) {
			
 
				-		/* TODO: according to BSpec, the register state context
			
 
				-		 * for CHV does not have these. OTOH, these registers do
			
 
				-		 * exist in CHV. I'm waiting for a clarification */
			
 
				 		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
			
 
				 		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
			
 
				 		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
			
 
				 		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
			
 
				 		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
			
 
				 		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
			
 
				+		if (ring->wa_ctx.obj) {
			
 
				+			struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
			
 
				+			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
			
 
				+
			
 
				+			reg_state[CTX_RCS_INDIRECT_CTX+1] =
			
 
				+				(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
			
 
				+				(wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
			
 
				+
			
 
				+			reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
			
 
				+				CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
			
 
				+
			
 
				+			reg_state[CTX_BB_PER_CTX_PTR+1] =
			
 
				+				(ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
			
 
				+				0x01;
			
 
				+		}
			
 
				 	}
			
 
				 	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
			
 
				 	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
			
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -12,6 +12,7 @@
 
				  * workarounds!
			
 
				  */
			
 
				 #define CACHELINE_BYTES 64
			
 
				+#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
			
 
				 
			
 
				 /*
			
 
				  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
			
@@ -120,6 +121,25 @@ struct intel_ringbuffer {
 
				 struct	intel_context;
			
 
				 struct drm_i915_reg_descriptor;
			
 
				 
			
 
				+/*
			
 
				+ * we use a single page to load ctx workarounds so all of these
			
 
				+ * values are referred in terms of dwords
			
 
				+ *
			
 
				+ * struct i915_wa_ctx_bb:
			
 
				+ *  offset: specifies batch starting position, also helpful in case
			
 
				+ *    if we want to have multiple batches at different offsets based on
			
 
				+ *    some criteria. It is not a requirement at the moment but provides
			
 
				+ *    an option for future use.
			
 
				+ *  size: size of the batch in DWORDS
			
 
				+ */
			
 
				+struct  i915_ctx_workarounds {
			
 
				+	struct i915_wa_ctx_bb {
			
 
				+		u32 offset;
			
 
				+		u32 size;
			
 
				+	} indirect_ctx, per_ctx;
			
 
				+	struct drm_i915_gem_object *obj;
			
 
				+};
			
 
				+
			
 
				 struct  intel_engine_cs {
			
 
				 	const char	*name;
			
 
				 	enum intel_ring_id {
			
@@ -143,6 +163,7 @@ struct  intel_engine_cs {
 
				 	struct i915_gem_batch_pool batch_pool;
			
 
				 
			
 
				 	struct intel_hw_status_page status_page;
			
 
				+	struct i915_ctx_workarounds wa_ctx;
			
 
				 
			
 
				 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
			
 
				 	u32		irq_enable_mask;	/* bitmask to enable ring interrupt */