7 vuotta sitten · beecec9017
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -368,9 +368,16 @@ static int i915_getparam(struct drm_device *dev, void *data,
 
				 		break;
			
 
				 	case I915_PARAM_HAS_SCHEDULER:
			
 
				 		value = 0;
			
 
				-		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule)
			
 
				+		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) {
			
 
				 			value |= I915_SCHEDULER_CAP_ENABLED;
			
 
				+
			
 
				+			if (INTEL_INFO(dev_priv)->has_logical_ring_preemption &&
			
 
				+			    i915_modparams.enable_execlists &&
			
 
				+			    !i915_modparams.enable_guc_submission)
			
 
				+				value |= I915_SCHEDULER_CAP_PREEMPTION;
			
 
				+		}
			
 
				 		break;
			
 
				+
			
 
				 	case I915_PARAM_MMAP_VERSION:
			
 
				 		/* Remember to bump this if the version changes! */
			
 
				 	case I915_PARAM_HAS_GEM:
			
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1382,10 +1382,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 
				 	bool tasklet = false;
			
 
				 
			
 
				 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
			
 
				-		if (port_count(&execlists->port[0])) {
			
 
				-			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
			
 
				-			tasklet = true;
			
 
				-		}
			
 
				+		__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
			
 
				+		tasklet = true;
			
 
				 	}
			
 
				 
			
 
				 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
			
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -424,6 +424,7 @@ static const struct intel_device_info intel_cherryview_info __initconst = {
 
				 
			
 
				 #define GEN9_FEATURES \
			
 
				 	GEN8_FEATURES, \
			
 
				+	.has_logical_ring_preemption = 1, \
			
 
				 	.has_csr = 1, \
			
 
				 	.has_guc = 1, \
			
 
				 	.has_ipc = 1, \
			
@@ -477,6 +478,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = {
 
				 	.has_rc6 = 1, \
			
 
				 	.has_dp_mst = 1, \
			
 
				 	.has_logical_ring_contexts = 1, \
			
 
				+	.has_logical_ring_preemption = 1, \
			
 
				 	.has_guc = 1, \
			
 
				 	.has_aliasing_ppgtt = 1, \
			
 
				 	.has_full_ppgtt = 1, \
			
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -208,9 +208,9 @@
 
				 
			
 
				 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
			
 
				 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
			
 
				-
			
 
				 #define WA_TAIL_DWORDS 2
			
 
				 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
			
 
				+#define PREEMPT_ID 0x1
			
 
				 
			
 
				 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
			
 
				 					    struct intel_engine_cs *engine);
			
@@ -429,6 +429,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 
				 	return ce->lrc_desc;
			
 
				 }
			
 
				 
			
 
				+static inline void elsp_write(u64 desc, u32 __iomem *elsp)
			
 
				+{
			
 
				+	writel(upper_32_bits(desc), elsp);
			
 
				+	writel(lower_32_bits(desc), elsp);
			
 
				+}
			
 
				+
			
 
				 static void execlists_submit_ports(struct intel_engine_cs *engine)
			
 
				 {
			
 
				 	struct execlist_port *port = engine->execlists.port;
			
@@ -454,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 
				 			desc = 0;
			
 
				 		}
			
 
				 
			
 
				-		writel(upper_32_bits(desc), elsp);
			
 
				-		writel(lower_32_bits(desc), elsp);
			
 
				+		elsp_write(desc, elsp);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -488,26 +493,43 @@ static void port_assign(struct execlist_port *port,
 
				 	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
			
 
				 }
			
 
				 
			
 
				+static void inject_preempt_context(struct intel_engine_cs *engine)
			
 
				+{
			
 
				+	struct intel_context *ce =
			
 
				+		&engine->i915->preempt_context->engine[engine->id];
			
 
				+	u32 __iomem *elsp =
			
 
				+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
			
 
				+	unsigned int n;
			
 
				+
			
 
				+	GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
			
 
				+	GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
			
 
				+
			
 
				+	memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
			
 
				+	ce->ring->tail += WA_TAIL_BYTES;
			
 
				+	ce->ring->tail &= (ce->ring->size - 1);
			
 
				+	ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
			
 
				+
			
 
				+	for (n = execlists_num_ports(&engine->execlists); --n; )
			
 
				+		elsp_write(0, elsp);
			
 
				+
			
 
				+	elsp_write(ce->lrc_desc, elsp);
			
 
				+}
			
 
				+
			
 
				+static bool can_preempt(struct intel_engine_cs *engine)
			
 
				+{
			
 
				+	return INTEL_INFO(engine->i915)->has_logical_ring_preemption;
			
 
				+}
			
 
				+
			
 
				 static void execlists_dequeue(struct intel_engine_cs *engine)
			
 
				 {
			
 
				-	struct drm_i915_gem_request *last;
			
 
				 	struct intel_engine_execlists * const execlists = &engine->execlists;
			
 
				 	struct execlist_port *port = execlists->port;
			
 
				 	const struct execlist_port * const last_port =
			
 
				 		&execlists->port[execlists->port_mask];
			
 
				+	struct drm_i915_gem_request *last = port_request(port);
			
 
				 	struct rb_node *rb;
			
 
				 	bool submit = false;
			
 
				 
			
 
				-	last = port_request(port);
			
 
				-	if (last)
			
 
				-		/* WaIdleLiteRestore:bdw,skl
			
 
				-		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
			
 
				-		 * as we resubmit the request. See gen8_emit_breadcrumb()
			
 
				-		 * for where we prepare the padding after the end of the
			
 
				-		 * request.
			
 
				-		 */
			
 
				-		last->tail = last->wa_tail;
			
 
				-
			
 
				 	/* Hardware submission is through 2 ports. Conceptually each port
			
 
				 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
			
 
				 	 * static for a context, and unique to each, so we only execute
			
@@ -532,7 +554,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
				 	spin_lock_irq(&engine->timeline->lock);
			
 
				 	rb = execlists->first;
			
 
				 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
			
 
				-	while (rb) {
			
 
				+	if (!rb)
			
 
				+		goto unlock;
			
 
				+
			
 
				+	if (last) {
			
 
				+		/*
			
 
				+		 * Don't resubmit or switch until all outstanding
			
 
				+		 * preemptions (lite-restore) are seen. Then we
			
 
				+		 * know the next preemption status we see corresponds
			
 
				+		 * to this ELSP update.
			
 
				+		 */
			
 
				+		if (port_count(&port[0]) > 1)
			
 
				+			goto unlock;
			
 
				+
			
 
				+		if (can_preempt(engine) &&
			
 
				+		    rb_entry(rb, struct i915_priolist, node)->priority >
			
 
				+		    max(last->priotree.priority, 0)) {
			
 
				+			/*
			
 
				+			 * Switch to our empty preempt context so
			
 
				+			 * the state of the GPU is known (idle).
			
 
				+			 */
			
 
				+			inject_preempt_context(engine);
			
 
				+			execlists->preempt = true;
			
 
				+			goto unlock;
			
 
				+		} else {
			
 
				+			/*
			
 
				+			 * In theory, we could coalesce more requests onto
			
 
				+			 * the second port (the first port is active, with
			
 
				+			 * no preemptions pending). However, that means we
			
 
				+			 * then have to deal with the possible lite-restore
			
 
				+			 * of the second port (as we submit the ELSP, there
			
 
				+			 * may be a context-switch) but also we may complete
			
 
				+			 * the resubmission before the context-switch. Ergo,
			
 
				+			 * coalescing onto the second port will cause a
			
 
				+			 * preemption event, but we cannot predict whether
			
 
				+			 * that will affect port[0] or port[1].
			
 
				+			 *
			
 
				+			 * If the second port is already active, we can wait
			
 
				+			 * until the next context-switch before contemplating
			
 
				+			 * new requests. The GPU will be busy and we should be
			
 
				+			 * able to resubmit the new ELSP before it idles,
			
 
				+			 * avoiding pipeline bubbles (momentary pauses where
			
 
				+			 * the driver is unable to keep up the supply of new
			
 
				+			 * work).
			
 
				+			 */
			
 
				+			if (port_count(&port[1]))
			
 
				+				goto unlock;
			
 
				+
			
 
				+			/* WaIdleLiteRestore:bdw,skl
			
 
				+			 * Apply the wa NOOPs to prevent
			
 
				+			 * ring:HEAD == req:TAIL as we resubmit the
			
 
				+			 * request. See gen8_emit_breadcrumb() for
			
 
				+			 * where we prepare the padding after the
			
 
				+			 * end of the request.
			
 
				+			 */
			
 
				+			last->tail = last->wa_tail;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
			
 
				 		struct drm_i915_gem_request *rq, *rn;
			
 
				 
			
@@ -595,11 +675,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
				 		INIT_LIST_HEAD(&p->requests);
			
 
				 		if (p->priority != I915_PRIORITY_NORMAL)
			
 
				 			kmem_cache_free(engine->i915->priorities, p);
			
 
				-	}
			
 
				+	} while (rb);
			
 
				 done:
			
 
				 	execlists->first = rb;
			
 
				 	if (submit)
			
 
				 		port_assign(port, last);
			
 
				+unlock:
			
 
				 	spin_unlock_irq(&engine->timeline->lock);
			
 
				 
			
 
				 	if (submit)
			
@@ -680,13 +761,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
				 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
			
 
				 }
			
 
				 
			
 
				-static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
			
 
				-{
			
 
				-	const struct execlist_port *port = engine->execlists.port;
			
 
				-
			
 
				-	return port_count(&port[0]) + port_count(&port[1]) < 2;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Check the unread Context Status Buffers and manage the submission of new
			
 
				  * contexts to the ELSP accordingly.
			
@@ -695,7 +769,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 
				 {
			
 
				 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
			
 
				 	struct intel_engine_execlists * const execlists = &engine->execlists;
			
 
				-	struct execlist_port *port = execlists->port;
			
 
				+	struct execlist_port * const port = execlists->port;
			
 
				 	struct drm_i915_private *dev_priv = engine->i915;
			
 
				 
			
 
				 	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
			
@@ -780,6 +854,23 @@ static void intel_lrc_irq_handler(unsigned long data)
 
				 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
			
 
				 				continue;
			
 
				 
			
 
				+			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
			
 
				+			    buf[2*head + 1] == PREEMPT_ID) {
			
 
				+				execlist_cancel_port_requests(execlists);
			
 
				+
			
 
				+				spin_lock_irq(&engine->timeline->lock);
			
 
				+				unwind_incomplete_requests(engine);
			
 
				+				spin_unlock_irq(&engine->timeline->lock);
			
 
				+
			
 
				+				GEM_BUG_ON(!execlists->preempt);
			
 
				+				execlists->preempt = false;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			if (status & GEN8_CTX_STATUS_PREEMPTED &&
			
 
				+			    execlists->preempt)
			
 
				+				continue;
			
 
				+
			
 
				 			/* Check the context/desc id for this event matches */
			
 
				 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
			
 
				 
			
@@ -811,7 +902,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (execlists_elsp_ready(engine))
			
 
				+	if (!execlists->preempt)
			
 
				 		execlists_dequeue(engine);
			
 
				 
			
 
				 	intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
			
@@ -824,7 +915,7 @@ static void insert_request(struct intel_engine_cs *engine,
 
				 	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
			
 
				 
			
 
				 	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
			
 
				-	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
			
 
				+	if (ptr_unmask_bits(p, 1))
			
 
				 		tasklet_hi_schedule(&engine->execlists.irq_tasklet);
			
 
				 }
			
 
				 
			
@@ -954,8 +1045,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
				 	}
			
 
				 
			
 
				 	spin_unlock_irq(&engine->timeline->lock);
			
 
				-
			
 
				-	/* XXX Do we need to preempt to make room for us and our deps? */
			
 
				 }
			
 
				 
			
 
				 static struct intel_ring *
			
@@ -1151,6 +1240,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 
				 				       i915_ggtt_offset(engine->scratch) +
			
 
				 				       2 * CACHELINE_BYTES);
			
 
				 
			
 
				+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
			
 
				+
			
 
				 	/* Pad to end of cacheline */
			
 
				 	while ((unsigned long)batch % CACHELINE_BYTES)
			
 
				 		*batch++ = MI_NOOP;
			
@@ -1166,6 +1257,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 
				 
			
 
				 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
			
 
				 {
			
 
				+	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
			
 
				+
			
 
				 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
			
 
				 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
			
 
				 
			
@@ -1211,6 +1304,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 
				 		*batch++ = 0;
			
 
				 	}
			
 
				 
			
 
				+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
			
 
				+
			
 
				 	/* Pad to end of cacheline */
			
 
				 	while ((unsigned long)batch % CACHELINE_BYTES)
			
 
				 		*batch++ = MI_NOOP;
			
@@ -1364,6 +1459,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
				 		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
			
 
				 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
			
 
				 	execlists->csb_head = -1;
			
 
				+	execlists->preempt = false;
			
 
				 
			
 
				 	/* After a GPU reset, we may have requests to replay */
			
 
				 	if (!i915_modparams.enable_guc_submission && execlists->first)
			
@@ -1659,7 +1755,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 
				  */
			
 
				 static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
			
 
				 {
			
 
				-	*cs++ = MI_NOOP;
			
 
				+	/* Ensure there's always at least one preemption point per-request. */
			
 
				+	*cs++ = MI_ARB_CHECK;
			
 
				 	*cs++ = MI_NOOP;
			
 
				 	request->wa_tail = intel_ring_offset(request, cs);
			
 
				 }
			
@@ -1680,7 +1777,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
 
				 
			
 
				 	gen8_emit_wa_tail(request, cs);
			
 
				 }
			
 
				-
			
 
				 static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
			
 
				 
			
 
				 static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
			
@@ -1708,7 +1804,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
 
				 
			
 
				 	gen8_emit_wa_tail(request, cs);
			
 
				 }
			
 
				-
			
 
				 static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
			
 
				 
			
 
				 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
			
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -238,6 +238,11 @@ struct intel_engine_execlists {
 
				 #define EXECLIST_MAX_PORTS 2
			
 
				 	} port[EXECLIST_MAX_PORTS];
			
 
				 
			
 
				+	/**
			
 
				+	 * @preempt: are we currently handling a preempting context switch?
			
 
				+	 */
			
 
				+	bool preempt;
			
 
				+
			
 
				 	/**
			
 
				 	 * @port_mask: number of execlist ports - 1
			
 
				 	 */