8 years ago · 6c067579e6
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
				 
			
 
				 		if (i915.enable_execlists) {
			
 
				 			u32 ptr, read, write;
			
 
				-			struct rb_node *rb;
			
 
				 			unsigned int idx;
			
 
				 
			
 
				 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
			
@@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
				 			rcu_read_unlock();
			
 
				 
			
 
				 			spin_lock_irq(&engine->timeline->lock);
			
 
				-			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
			
 
				-				rq = rb_entry(rb, typeof(*rq), priotree.node);
			
 
				-				print_request(m, rq, "\t\tQ ");
			
 
				+			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
			
 
				+				struct i915_priolist *p =
			
 
				+					rb_entry(rb, typeof(*p), node);
			
 
				+
			
 
				+				list_for_each_entry(rq, &p->requests,
			
 
				+						    priotree.link)
			
 
				+					print_request(m, rq, "\t\tQ ");
			
 
				 			}
			
 
				 			spin_unlock_irq(&engine->timeline->lock);
			
 
				 		} else if (INTEL_GEN(dev_priv) > 6) {
			
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3155,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
				 	struct drm_i915_private *dev_priv =
			
 
				 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
			
 
				 	struct drm_device *dev = &dev_priv->drm;
			
 
				-	struct intel_engine_cs *engine;
			
 
				-	enum intel_engine_id id;
			
 
				 	bool rearm_hangcheck;
			
 
				 
			
 
				 	if (!READ_ONCE(dev_priv->gt.awake))
			
@@ -3194,10 +3192,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
				 	if (wait_for(intel_engines_are_idle(dev_priv), 10))
			
 
				 		DRM_ERROR("Timeout waiting for engines to idle\n");
			
 
				 
			
 
				-	for_each_engine(engine, dev_priv, id) {
			
 
				-		intel_engine_disarm_breadcrumbs(engine);
			
 
				-		i915_gem_batch_pool_fini(&engine->batch_pool);
			
 
				-	}
			
 
				+	intel_engines_mark_idle(dev_priv);
			
 
				 	i915_gem_timelines_mark_idle(dev_priv);
			
 
				 
			
 
				 	GEM_BUG_ON(!dev_priv->gt.awake);
			
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
 
				 {
			
 
				 	struct i915_dependency *dep, *next;
			
 
				 
			
 
				-	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
			
 
				+	GEM_BUG_ON(!list_empty(&pt->link));
			
 
				 
			
 
				 	/* Everyone we depended upon (the fences we wait to be signaled)
			
 
				 	 * should retire before us and remove themselves from our list.
			
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
 
				 {
			
 
				 	INIT_LIST_HEAD(&pt->signalers_list);
			
 
				 	INIT_LIST_HEAD(&pt->waiters_list);
			
 
				-	RB_CLEAR_NODE(&pt->node);
			
 
				+	INIT_LIST_HEAD(&pt->link);
			
 
				 	pt->priority = INT_MIN;
			
 
				 }
			
 
				 
			
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,7 +67,7 @@ struct i915_dependency {
 
				 struct i915_priotree {
			
 
				 	struct list_head signalers_list; /* those before us, we depend upon */
			
 
				 	struct list_head waiters_list; /* those after us, they depend upon us */
			
 
				-	struct rb_node node;
			
 
				+	struct list_head link;
			
 
				 	int priority;
			
 
				 #define I915_PRIORITY_MAX 1024
			
 
				 #define I915_PRIORITY_NORMAL 0
			
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 
				 
			
 
				 	spin_lock_irq(&engine->timeline->lock);
			
 
				 	rb = engine->execlist_first;
			
 
				+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
			
 
				 	while (rb) {
			
 
				-		struct drm_i915_gem_request *rq =
			
 
				-			rb_entry(rb, typeof(*rq), priotree.node);
			
 
				-
			
 
				-		if (last && rq->ctx != last->ctx) {
			
 
				-			if (port != engine->execlist_port)
			
 
				-				break;
			
 
				-
			
 
				-			port_assign(port, last);
			
 
				-			port++;
			
 
				+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
			
 
				+		struct drm_i915_gem_request *rq, *rn;
			
 
				+
			
 
				+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
			
 
				+			if (last && rq->ctx != last->ctx) {
			
 
				+				if (port != engine->execlist_port) {
			
 
				+					__list_del_many(&p->requests,
			
 
				+							&rq->priotree.link);
			
 
				+					goto done;
			
 
				+				}
			
 
				+
			
 
				+				port_assign(port, last);
			
 
				+				port++;
			
 
				+			}
			
 
				+
			
 
				+			INIT_LIST_HEAD(&rq->priotree.link);
			
 
				+			rq->priotree.priority = INT_MAX;
			
 
				+
			
 
				+			i915_guc_submit(rq);
			
 
				+			trace_i915_gem_request_in(rq, port_index(port, engine));
			
 
				+			last = rq;
			
 
				+			submit = true;
			
 
				 		}
			
 
				 
			
 
				 		rb = rb_next(rb);
			
 
				-		rb_erase(&rq->priotree.node, &engine->execlist_queue);
			
 
				-		RB_CLEAR_NODE(&rq->priotree.node);
			
 
				-		rq->priotree.priority = INT_MAX;
			
 
				-
			
 
				-		i915_guc_submit(rq);
			
 
				-		trace_i915_gem_request_in(rq, port_index(port, engine));
			
 
				-		last = rq;
			
 
				-		submit = true;
			
 
				+		rb_erase(&p->node, &engine->execlist_queue);
			
 
				+		INIT_LIST_HEAD(&p->requests);
			
 
				+		if (p->priority != I915_PRIORITY_NORMAL)
			
 
				+			kfree(p);
			
 
				 	}
			
 
				-	if (submit) {
			
 
				+done:
			
 
				+	engine->execlist_first = rb;
			
 
				+	if (submit)
			
 
				 		port_assign(port, last);
			
 
				-		engine->execlist_first = rb;
			
 
				-	}
			
 
				 	spin_unlock_irq(&engine->timeline->lock);
			
 
				 
			
 
				 	return submit;
			
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,4 +105,13 @@
 
				 	__idx;								\
			
 
				 })
			
 
				 
			
 
				+#include <linux/list.h>
			
 
				+
			
 
				+static inline void __list_del_many(struct list_head *head,
			
 
				+				   struct list_head *first)
			
 
				+{
			
 
				+	first->prev = head;
			
 
				+	WRITE_ONCE(head->next, first);
			
 
				+}
			
 
				+
			
 
				 #endif /* !__I915_UTILS_H */
			
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1274,6 +1274,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 
				 		engine->set_default_submission(engine);
			
 
				 }
			
 
				 
			
 
				+void intel_engines_mark_idle(struct drm_i915_private *i915)
			
 
				+{
			
 
				+	struct intel_engine_cs *engine;
			
 
				+	enum intel_engine_id id;
			
 
				+
			
 
				+	for_each_engine(engine, i915, id) {
			
 
				+		intel_engine_disarm_breadcrumbs(engine);
			
 
				+		i915_gem_batch_pool_fini(&engine->batch_pool);
			
 
				+		engine->no_priolist = false;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
			
 
				 #include "selftests/mock_engine.c"
			
 
				 #endif
			
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -436,57 +436,75 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
				 
			
 
				 	spin_lock_irq(&engine->timeline->lock);
			
 
				 	rb = engine->execlist_first;
			
 
				+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
			
 
				 	while (rb) {
			
 
				-		struct drm_i915_gem_request *cursor =
			
 
				-			rb_entry(rb, typeof(*cursor), priotree.node);
			
 
				-
			
 
				-		/* Can we combine this request with the current port? It has to
			
 
				-		 * be the same context/ringbuffer and not have any exceptions
			
 
				-		 * (e.g. GVT saying never to combine contexts).
			
 
				-		 *
			
 
				-		 * If we can combine the requests, we can execute both by
			
 
				-		 * updating the RING_TAIL to point to the end of the second
			
 
				-		 * request, and so we never need to tell the hardware about
			
 
				-		 * the first.
			
 
				-		 */
			
 
				-		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
			
 
				-			/* If we are on the second port and cannot combine
			
 
				-			 * this request with the last, then we are done.
			
 
				-			 */
			
 
				-			if (port != engine->execlist_port)
			
 
				-				break;
			
 
				-
			
 
				-			/* If GVT overrides us we only ever submit port[0],
			
 
				-			 * leaving port[1] empty. Note that we also have
			
 
				-			 * to be careful that we don't queue the same
			
 
				-			 * context (even though a different request) to
			
 
				-			 * the second port.
			
 
				+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
			
 
				+		struct drm_i915_gem_request *rq, *rn;
			
 
				+
			
 
				+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
			
 
				+			/*
			
 
				+			 * Can we combine this request with the current port?
			
 
				+			 * It has to be the same context/ringbuffer and not
			
 
				+			 * have any exceptions (e.g. GVT saying never to
			
 
				+			 * combine contexts).
			
 
				+			 *
			
 
				+			 * If we can combine the requests, we can execute both
			
 
				+			 * by updating the RING_TAIL to point to the end of the
			
 
				+			 * second request, and so we never need to tell the
			
 
				+			 * hardware about the first.
			
 
				 			 */
			
 
				-			if (ctx_single_port_submission(last->ctx) ||
			
 
				-			    ctx_single_port_submission(cursor->ctx))
			
 
				-				break;
			
 
				+			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
			
 
				+				/*
			
 
				+				 * If we are on the second port and cannot
			
 
				+				 * combine this request with the last, then we
			
 
				+				 * are done.
			
 
				+				 */
			
 
				+				if (port != engine->execlist_port) {
			
 
				+					__list_del_many(&p->requests,
			
 
				+							&rq->priotree.link);
			
 
				+					goto done;
			
 
				+				}
			
 
				+
			
 
				+				/*
			
 
				+				 * If GVT overrides us we only ever submit
			
 
				+				 * port[0], leaving port[1] empty. Note that we
			
 
				+				 * also have to be careful that we don't queue
			
 
				+				 * the same context (even though a different
			
 
				+				 * request) to the second port.
			
 
				+				 */
			
 
				+				if (ctx_single_port_submission(last->ctx) ||
			
 
				+				    ctx_single_port_submission(rq->ctx)) {
			
 
				+					__list_del_many(&p->requests,
			
 
				+							&rq->priotree.link);
			
 
				+					goto done;
			
 
				+				}
			
 
				+
			
 
				+				GEM_BUG_ON(last->ctx == rq->ctx);
			
 
				+
			
 
				+				if (submit)
			
 
				+					port_assign(port, last);
			
 
				+				port++;
			
 
				+			}
			
 
				 
			
 
				-			GEM_BUG_ON(last->ctx == cursor->ctx);
			
 
				+			INIT_LIST_HEAD(&rq->priotree.link);
			
 
				+			rq->priotree.priority = INT_MAX;
			
 
				 
			
 
				-			if (submit)
			
 
				-				port_assign(port, last);
			
 
				-			port++;
			
 
				+			__i915_gem_request_submit(rq);
			
 
				+			trace_i915_gem_request_in(rq, port_index(port, engine));
			
 
				+			last = rq;
			
 
				+			submit = true;
			
 
				 		}
			
 
				 
			
 
				 		rb = rb_next(rb);
			
 
				-		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
			
 
				-		RB_CLEAR_NODE(&cursor->priotree.node);
			
 
				-		cursor->priotree.priority = INT_MAX;
			
 
				-
			
 
				-		__i915_gem_request_submit(cursor);
			
 
				-		trace_i915_gem_request_in(cursor, port_index(port, engine));
			
 
				-		last = cursor;
			
 
				-		submit = true;
			
 
				+		rb_erase(&p->node, &engine->execlist_queue);
			
 
				+		INIT_LIST_HEAD(&p->requests);
			
 
				+		if (p->priority != I915_PRIORITY_NORMAL)
			
 
				+			kfree(p);
			
 
				 	}
			
 
				-	if (submit) {
			
 
				+done:
			
 
				+	engine->execlist_first = rb;
			
 
				+	if (submit)
			
 
				 		port_assign(port, last);
			
 
				-		engine->execlist_first = rb;
			
 
				-	}
			
 
				 	spin_unlock_irq(&engine->timeline->lock);
			
 
				 
			
 
				 	if (submit)
			
@@ -610,28 +628,66 @@ static void intel_lrc_irq_handler(unsigned long data)
 
				 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
			
 
				 }
			
 
				 
			
 
				-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
			
 
				+static bool
			
 
				+insert_request(struct intel_engine_cs *engine,
			
 
				+	       struct i915_priotree *pt,
			
 
				+	       int prio)
			
 
				 {
			
 
				-	struct rb_node **p, *rb;
			
 
				+	struct i915_priolist *p;
			
 
				+	struct rb_node **parent, *rb;
			
 
				 	bool first = true;
			
 
				 
			
 
				+	if (unlikely(engine->no_priolist))
			
 
				+		prio = I915_PRIORITY_NORMAL;
			
 
				+
			
 
				+find_priolist:
			
 
				 	/* most positive priority is scheduled first, equal priorities fifo */
			
 
				 	rb = NULL;
			
 
				-	p = &root->rb_node;
			
 
				-	while (*p) {
			
 
				-		struct i915_priotree *pos;
			
 
				-
			
 
				-		rb = *p;
			
 
				-		pos = rb_entry(rb, typeof(*pos), node);
			
 
				-		if (pt->priority > pos->priority) {
			
 
				-			p = &rb->rb_left;
			
 
				-		} else {
			
 
				-			p = &rb->rb_right;
			
 
				+	parent = &engine->execlist_queue.rb_node;
			
 
				+	while (*parent) {
			
 
				+		rb = *parent;
			
 
				+		p = rb_entry(rb, typeof(*p), node);
			
 
				+		if (prio > p->priority) {
			
 
				+			parent = &rb->rb_left;
			
 
				+		} else if (prio < p->priority) {
			
 
				+			parent = &rb->rb_right;
			
 
				 			first = false;
			
 
				+		} else {
			
 
				+			list_add_tail(&pt->link, &p->requests);
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (prio == I915_PRIORITY_NORMAL) {
			
 
				+		p = &engine->default_priolist;
			
 
				+	} else {
			
 
				+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
			
 
				+		/* Convert an allocation failure to a priority bump */
			
 
				+		if (unlikely(!p)) {
			
 
				+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
			
 
				+
			
 
				+			/* To maintain ordering with all rendering, after an
			
 
				+			 * allocation failure we have to disable all scheduling.
			
 
				+			 * Requests will then be executed in fifo, and schedule
			
 
				+			 * will ensure that dependencies are emitted in fifo.
			
 
				+			 * There will be still some reordering with existing
			
 
				+			 * requests, so if userspace lied about their
			
 
				+			 * dependencies that reordering may be visible.
			
 
				+			 */
			
 
				+			engine->no_priolist = true;
			
 
				+			goto find_priolist;
			
 
				 		}
			
 
				 	}
			
 
				-	rb_link_node(&pt->node, rb, p);
			
 
				-	rb_insert_color(&pt->node, root);
			
 
				+
			
 
				+	p->priority = prio;
			
 
				+	rb_link_node(&p->node, rb, parent);
			
 
				+	rb_insert_color(&p->node, &engine->execlist_queue);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&p->requests);
			
 
				+	list_add_tail(&pt->link, &p->requests);
			
 
				+
			
 
				+	if (first)
			
 
				+		engine->execlist_first = &p->node;
			
 
				 
			
 
				 	return first;
			
 
				 }
			
@@ -644,12 +700,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
				 	/* Will be called from irq-context when using foreign fences. */
			
 
				 	spin_lock_irqsave(&engine->timeline->lock, flags);
			
 
				 
			
 
				-	if (insert_request(&request->priotree, &engine->execlist_queue)) {
			
 
				-		engine->execlist_first = &request->priotree.node;
			
 
				+	if (insert_request(engine,
			
 
				+			   &request->priotree,
			
 
				+			   request->priotree.priority)) {
			
 
				 		if (execlists_elsp_ready(engine))
			
 
				 			tasklet_hi_schedule(&engine->irq_tasklet);
			
 
				 	}
			
 
				 
			
 
				+	GEM_BUG_ON(!engine->execlist_first);
			
 
				+	GEM_BUG_ON(list_empty(&request->priotree.link));
			
 
				+
			
 
				 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
			
 
				 }
			
 
				 
			
@@ -734,10 +794,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
				 			continue;
			
 
				 
			
 
				 		pt->priority = prio;
			
 
				-		if (!RB_EMPTY_NODE(&pt->node)) {
			
 
				-			rb_erase(&pt->node, &engine->execlist_queue);
			
 
				-			if (insert_request(pt, &engine->execlist_queue))
			
 
				-				engine->execlist_first = &pt->node;
			
 
				+		if (!list_empty(&pt->link)) {
			
 
				+			__list_del_entry(&pt->link);
			
 
				+			insert_request(engine, pt, prio);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -177,6 +177,12 @@ enum intel_engine_id {
 
				 	VECS
			
 
				 };
			
 
				 
			
 
				+struct i915_priolist {
			
 
				+	struct rb_node node;
			
 
				+	struct list_head requests;
			
 
				+	int priority;
			
 
				+};
			
 
				+
			
 
				 #define INTEL_ENGINE_CS_MAX_NAME 8
			
 
				 
			
 
				 struct intel_engine_cs {
			
@@ -367,6 +373,8 @@ struct intel_engine_cs {
 
				 
			
 
				 	/* Execlists */
			
 
				 	struct tasklet_struct irq_tasklet;
			
 
				+	struct i915_priolist default_priolist;
			
 
				+	bool no_priolist;
			
 
				 	struct execlist_port {
			
 
				 		struct drm_i915_gem_request *request_count;
			
 
				 #define EXECLIST_COUNT_BITS 2
			
@@ -723,6 +731,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 
				 bool intel_engine_is_idle(struct intel_engine_cs *engine);
			
 
				 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
			
 
				 
			
 
				+void intel_engines_mark_idle(struct drm_i915_private *i915);
			
 
				 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
			
 
				 
			
 
				 #endif /* _INTEL_RINGBUFFER_H_ */