7 lat temu · cd46c545b7
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -44,8 +44,8 @@ struct intel_wait {
 
				 };
			
 
				 
			
 
				 struct intel_signal_node {
			
 
				-	struct rb_node node;
			
 
				 	struct intel_wait wait;
			
 
				+	struct list_head link;
			
 
				 };
			
 
				 
			
 
				 struct i915_dependency {
			
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -340,7 +340,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
 
				 	lockdep_assert_held(&b->rb_lock);
			
 
				 	GEM_BUG_ON(b->irq_wait == wait);
			
 
				 
			
 
				-	/* This request is completed, so remove it from the tree, mark it as
			
 
				+	/*
			
 
				+	 * This request is completed, so remove it from the tree, mark it as
			
 
				 	 * complete, and *then* wake up the associated task. N.B. when the
			
 
				 	 * task wakes up, it will find the empty rb_node, discern that it
			
 
				 	 * has already been removed from the tree and skip the serialisation
			
@@ -351,7 +352,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
 
				 	rb_erase(&wait->node, &b->waiters);
			
 
				 	RB_CLEAR_NODE(&wait->node);
			
 
				 
			
 
				-	wake_up_process(wait->tsk); /* implicit smp_wmb() */
			
 
				+	if (wait->tsk->state != TASK_RUNNING)
			
 
				+		wake_up_process(wait->tsk); /* implicit smp_wmb() */
			
 
				 }
			
 
				 
			
 
				 static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
			
@@ -592,23 +594,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 
				 	spin_unlock_irq(&b->rb_lock);
			
 
				 }
			
 
				 
			
 
				-static bool signal_complete(const struct i915_request *request)
			
 
				-{
			
 
				-	if (!request)
			
 
				-		return false;
			
 
				-
			
 
				-	/*
			
 
				-	 * Carefully check if the request is complete, giving time for the
			
 
				-	 * seqno to be visible or if the GPU hung.
			
 
				-	 */
			
 
				-	return __i915_request_irq_complete(request);
			
 
				-}
			
 
				-
			
 
				-static struct i915_request *to_signaler(struct rb_node *rb)
			
 
				-{
			
 
				-	return rb_entry(rb, struct i915_request, signaling.node);
			
 
				-}
			
 
				-
			
 
				 static void signaler_set_rtpriority(void)
			
 
				 {
			
 
				 	 struct sched_param param = { .sched_priority = 1 };
			
@@ -616,78 +601,26 @@ static void signaler_set_rtpriority(void)
 
				 	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
			
 
				 }
			
 
				 
			
 
				-static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
			
 
				-					 struct i915_request *request)
			
 
				-{
			
 
				-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
			
 
				-
			
 
				-	lockdep_assert_held(&b->rb_lock);
			
 
				-
			
 
				-	/*
			
 
				-	 * Wake up all other completed waiters and select the
			
 
				-	 * next bottom-half for the next user interrupt.
			
 
				-	 */
			
 
				-	__intel_engine_remove_wait(engine, &request->signaling.wait);
			
 
				-
			
 
				-	/*
			
 
				-	 * Find the next oldest signal. Note that as we have
			
 
				-	 * not been holding the lock, another client may
			
 
				-	 * have installed an even older signal than the one
			
 
				-	 * we just completed - so double check we are still
			
 
				-	 * the oldest before picking the next one.
			
 
				-	 */
			
 
				-	if (request->signaling.wait.seqno) {
			
 
				-		if (request == rcu_access_pointer(b->first_signal)) {
			
 
				-			struct rb_node *rb = rb_next(&request->signaling.node);
			
 
				-			rcu_assign_pointer(b->first_signal,
			
 
				-					   rb ? to_signaler(rb) : NULL);
			
 
				-		}
			
 
				-
			
 
				-		rb_erase(&request->signaling.node, &b->signals);
			
 
				-		request->signaling.wait.seqno = 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static struct i915_request *
			
 
				-get_first_signal_rcu(struct intel_breadcrumbs *b)
			
 
				-{
			
 
				-	/*
			
 
				-	 * See the big warnings for i915_gem_active_get_rcu() and similarly
			
 
				-	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
			
 
				-	 * here with defeating CPU/compiler speculation and enforcing
			
 
				-	 * the required memory barriers.
			
 
				-	 */
			
 
				-	do {
			
 
				-		struct i915_request *request;
			
 
				-
			
 
				-		request = rcu_dereference(b->first_signal);
			
 
				-		if (request)
			
 
				-			request = i915_request_get_rcu(request);
			
 
				-
			
 
				-		barrier();
			
 
				-
			
 
				-		if (!request || request == rcu_access_pointer(b->first_signal))
			
 
				-			return rcu_pointer_handoff(request);
			
 
				-
			
 
				-		i915_request_put(request);
			
 
				-	} while (1);
			
 
				-}
			
 
				-
			
 
				 static int intel_breadcrumbs_signaler(void *arg)
			
 
				 {
			
 
				 	struct intel_engine_cs *engine = arg;
			
 
				 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
			
 
				-	struct i915_request *request;
			
 
				+	struct i915_request *rq, *n;
			
 
				 
			
 
				 	/* Install ourselves with high priority to reduce signalling latency */
			
 
				 	signaler_set_rtpriority();
			
 
				 
			
 
				 	do {
			
 
				 		bool do_schedule = true;
			
 
				+		LIST_HEAD(list);
			
 
				+		u32 seqno;
			
 
				 
			
 
				 		set_current_state(TASK_INTERRUPTIBLE);
			
 
				+		if (list_empty(&b->signals))
			
 
				+			goto sleep;
			
 
				 
			
 
				-		/* We are either woken up by the interrupt bottom-half,
			
 
				+		/*
			
 
				+		 * We are either woken up by the interrupt bottom-half,
			
 
				 		 * or by a client adding a new signaller. In both cases,
			
 
				 		 * the GPU seqno may have advanced beyond our oldest signal.
			
 
				 		 * If it has, propagate the signal, remove the waiter and
			
@@ -695,25 +628,45 @@ static int intel_breadcrumbs_signaler(void *arg)
 
				 		 * need to wait for a new interrupt from the GPU or for
			
 
				 		 * a new client.
			
 
				 		 */
			
 
				-		rcu_read_lock();
			
 
				-		request = get_first_signal_rcu(b);
			
 
				-		rcu_read_unlock();
			
 
				-		if (signal_complete(request)) {
			
 
				-			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
			
 
				-				      &request->fence.flags)) {
			
 
				-				local_bh_disable();
			
 
				-				dma_fence_signal(&request->fence);
			
 
				-				GEM_BUG_ON(!i915_request_completed(request));
			
 
				-				local_bh_enable(); /* kick start the tasklets */
			
 
				-			}
			
 
				+		seqno = intel_engine_get_seqno(engine);
			
 
				+
			
 
				+		spin_lock_irq(&b->rb_lock);
			
 
				+		list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
			
 
				+			u32 this = rq->signaling.wait.seqno;
			
 
				+
			
 
				+			GEM_BUG_ON(!rq->signaling.wait.seqno);
			
 
				 
			
 
				-			if (READ_ONCE(request->signaling.wait.seqno)) {
			
 
				-				spin_lock_irq(&b->rb_lock);
			
 
				-				__intel_engine_remove_signal(engine, request);
			
 
				-				spin_unlock_irq(&b->rb_lock);
			
 
				+			if (!i915_seqno_passed(seqno, this))
			
 
				+				break;
			
 
				+
			
 
				+			if (likely(this == i915_request_global_seqno(rq))) {
			
 
				+				__intel_engine_remove_wait(engine,
			
 
				+							   &rq->signaling.wait);
			
 
				+
			
 
				+				rq->signaling.wait.seqno = 0;
			
 
				+				__list_del_entry(&rq->signaling.link);
			
 
				+
			
 
				+				if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
			
 
				+					      &rq->fence.flags)) {
			
 
				+					list_add_tail(&rq->signaling.link,
			
 
				+						      &list);
			
 
				+					i915_request_get(rq);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		spin_unlock_irq(&b->rb_lock);
			
 
				+
			
 
				+		if (!list_empty(&list)) {
			
 
				+			local_bh_disable();
			
 
				+			list_for_each_entry_safe(rq, n, &list, signaling.link) {
			
 
				+				dma_fence_signal(&rq->fence);
			
 
				+				GEM_BUG_ON(!i915_request_completed(rq));
			
 
				+				i915_request_put(rq);
			
 
				 			}
			
 
				+			local_bh_enable(); /* kick start the tasklets */
			
 
				 
			
 
				-			/* If the engine is saturated we may be continually
			
 
				+			/*
			
 
				+			 * If the engine is saturated we may be continually
			
 
				 			 * processing completed requests. This angers the
			
 
				 			 * NMI watchdog if we never let anything else
			
 
				 			 * have access to the CPU. Let's pretend to be nice
			
@@ -722,9 +675,19 @@ static int intel_breadcrumbs_signaler(void *arg)
 
				 			 */
			
 
				 			do_schedule = need_resched();
			
 
				 		}
			
 
				-		i915_request_put(request);
			
 
				 
			
 
				 		if (unlikely(do_schedule)) {
			
 
				+			/* Before we sleep, check for a missed seqno */
			
 
				+			if (current->state & TASK_NORMAL &&
			
 
				+			    !list_empty(&b->signals) &&
			
 
				+			    engine->irq_seqno_barrier &&
			
 
				+			    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
			
 
				+					       &engine->irq_posted)) {
			
 
				+				engine->irq_seqno_barrier(engine);
			
 
				+				intel_engine_wakeup(engine);
			
 
				+			}
			
 
				+
			
 
				+sleep:
			
 
				 			if (kthread_should_park())
			
 
				 				kthread_parkme();
			
 
				 
			
@@ -739,13 +702,40 @@ static int intel_breadcrumbs_signaler(void *arg)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void insert_signal(struct intel_breadcrumbs *b,
			
 
				+			  struct i915_request *request,
			
 
				+			  const u32 seqno)
			
 
				+{
			
 
				+	struct i915_request *iter;
			
 
				+
			
 
				+	lockdep_assert_held(&b->rb_lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * A reasonable assumption is that we are called to add signals
			
 
				+	 * in sequence, as the requests are submitted for execution and
			
 
				+	 * assigned a global_seqno. This will be the case for the majority
			
 
				+	 * of internally generated signals (inter-engine signaling).
			
 
				+	 *
			
 
				+	 * Out of order waiters triggering random signaling enabling will
			
 
				+	 * be more problematic, but hopefully rare enough and the list
			
 
				+	 * small enough that the O(N) insertion sort is not an issue.
			
 
				+	 */
			
 
				+
			
 
				+	list_for_each_entry_reverse(iter, &b->signals, signaling.link)
			
 
				+		if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
			
 
				+			break;
			
 
				+
			
 
				+	list_add(&request->signaling.link, &iter->signaling.link);
			
 
				+}
			
 
				+
			
 
				 void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
			
 
				 {
			
 
				 	struct intel_engine_cs *engine = request->engine;
			
 
				 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
			
 
				 	u32 seqno;
			
 
				 
			
 
				-	/* Note that we may be called from an interrupt handler on another
			
 
				+	/*
			
 
				+	 * Note that we may be called from an interrupt handler on another
			
 
				 	 * device (e.g. nouveau signaling a fence completion causing us
			
 
				 	 * to submit a request, and so enable signaling). As such,
			
 
				 	 * we need to make sure that all other users of b->rb_lock protect
			
@@ -757,17 +747,16 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
 
				 	lockdep_assert_held(&request->lock);
			
 
				 
			
 
				 	seqno = i915_request_global_seqno(request);
			
 
				-	if (!seqno)
			
 
				+	if (!seqno) /* will be enabled later upon execution */
			
 
				 		return;
			
 
				 
			
 
				-	spin_lock(&b->rb_lock);
			
 
				-
			
 
				 	GEM_BUG_ON(request->signaling.wait.seqno);
			
 
				 	request->signaling.wait.tsk = b->signaler;
			
 
				 	request->signaling.wait.request = request;
			
 
				 	request->signaling.wait.seqno = seqno;
			
 
				 
			
 
				-	/* First add ourselves into the list of waiters, but register our
			
 
				+	/*
			
 
				+	 * Add ourselves into the list of waiters, but registering our
			
 
				 	 * bottom-half as the signaller thread. As per usual, only the oldest
			
 
				 	 * waiter (not just signaller) is tasked as the bottom-half waking
			
 
				 	 * up all completed waiters after the user interrupt.
			
@@ -775,39 +764,9 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
 
				 	 * If we are the oldest waiter, enable the irq (after which we
			
 
				 	 * must double check that the seqno did not complete).
			
 
				 	 */
			
 
				+	spin_lock(&b->rb_lock);
			
 
				+	insert_signal(b, request, seqno);
			
 
				 	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
			
 
				-
			
 
				-	if (!__i915_request_completed(request, seqno)) {
			
 
				-		struct rb_node *parent, **p;
			
 
				-		bool first;
			
 
				-
			
 
				-		/* Now insert ourselves into the retirement ordered list of
			
 
				-		 * signals on this engine. We track the oldest seqno as that
			
 
				-		 * will be the first signal to complete.
			
 
				-		 */
			
 
				-		parent = NULL;
			
 
				-		first = true;
			
 
				-		p = &b->signals.rb_node;
			
 
				-		while (*p) {
			
 
				-			parent = *p;
			
 
				-			if (i915_seqno_passed(seqno,
			
 
				-					      to_signaler(parent)->signaling.wait.seqno)) {
			
 
				-				p = &parent->rb_right;
			
 
				-				first = false;
			
 
				-			} else {
			
 
				-				p = &parent->rb_left;
			
 
				-			}
			
 
				-		}
			
 
				-		rb_link_node(&request->signaling.node, parent, p);
			
 
				-		rb_insert_color(&request->signaling.node, &b->signals);
			
 
				-		if (first)
			
 
				-			rcu_assign_pointer(b->first_signal, request);
			
 
				-	} else {
			
 
				-		__intel_engine_remove_wait(engine, &request->signaling.wait);
			
 
				-		request->signaling.wait.seqno = 0;
			
 
				-		wakeup = false;
			
 
				-	}
			
 
				-
			
 
				 	spin_unlock(&b->rb_lock);
			
 
				 
			
 
				 	if (wakeup)
			
@@ -816,17 +775,20 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
 
				 
			
 
				 void intel_engine_cancel_signaling(struct i915_request *request)
			
 
				 {
			
 
				+	struct intel_engine_cs *engine = request->engine;
			
 
				+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
			
 
				+
			
 
				 	GEM_BUG_ON(!irqs_disabled());
			
 
				 	lockdep_assert_held(&request->lock);
			
 
				 
			
 
				-	if (READ_ONCE(request->signaling.wait.seqno)) {
			
 
				-		struct intel_engine_cs *engine = request->engine;
			
 
				-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
			
 
				+	if (!READ_ONCE(request->signaling.wait.seqno))
			
 
				+		return;
			
 
				 
			
 
				-		spin_lock(&b->rb_lock);
			
 
				-		__intel_engine_remove_signal(engine, request);
			
 
				-		spin_unlock(&b->rb_lock);
			
 
				-	}
			
 
				+	spin_lock(&b->rb_lock);
			
 
				+	__intel_engine_remove_wait(engine, &request->signaling.wait);
			
 
				+	if (fetch_and_zero(&request->signaling.wait.seqno))
			
 
				+		__list_del_entry(&request->signaling.link);
			
 
				+	spin_unlock(&b->rb_lock);
			
 
				 }
			
 
				 
			
 
				 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
			
@@ -840,6 +802,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 
				 	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
			
 
				 	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
			
 
				 
			
 
				+	INIT_LIST_HEAD(&b->signals);
			
 
				+
			
 
				 	/* Spawn a thread to provide a common bottom-half for all signals.
			
 
				 	 * As this is an asynchronous interface we cannot steal the current
			
 
				 	 * task for handling the bottom-half to the user interrupt, therefore
			
@@ -899,8 +863,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 
				 	/* The engines should be idle and all requests accounted for! */
			
 
				 	WARN_ON(READ_ONCE(b->irq_wait));
			
 
				 	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
			
 
				-	WARN_ON(rcu_access_pointer(b->first_signal));
			
 
				-	WARN_ON(!RB_EMPTY_ROOT(&b->signals));
			
 
				+	WARN_ON(!list_empty(&b->signals));
			
 
				 
			
 
				 	if (!IS_ERR_OR_NULL(b->signaler))
			
 
				 		kthread_stop(b->signaler);
			
@@ -913,20 +876,22 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 
				 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
			
 
				 	bool busy = false;
			
 
				 
			
 
				-	spin_lock_irq(&b->rb_lock);
			
 
				-
			
 
				 	if (b->irq_wait) {
			
 
				-		wake_up_process(b->irq_wait->tsk);
			
 
				-		busy = true;
			
 
				+		spin_lock_irq(&b->irq_lock);
			
 
				+
			
 
				+		if (b->irq_wait) {
			
 
				+			wake_up_process(b->irq_wait->tsk);
			
 
				+			busy = true;
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock_irq(&b->irq_lock);
			
 
				 	}
			
 
				 
			
 
				-	if (rcu_access_pointer(b->first_signal)) {
			
 
				+	if (!busy && !list_empty(&b->signals)) {
			
 
				 		wake_up_process(b->signaler);
			
 
				 		busy = true;
			
 
				 	}
			
 
				 
			
 
				-	spin_unlock_irq(&b->rb_lock);
			
 
				-
			
 
				 	return busy;
			
 
				 }
			
 
				 
			
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -356,9 +356,9 @@ struct intel_engine_cs {
 
				 
			
 
				 		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
			
 
				 		struct rb_root waiters; /* sorted by retirement, priority */
			
 
				-		struct rb_root signals; /* sorted by retirement */
			
 
				+		struct list_head signals; /* sorted by retirement */
			
 
				 		struct task_struct *signaler; /* used for fence signalling */
			
 
				-		struct i915_request __rcu *first_signal;
			
 
				+
			
 
				 		struct timer_list fake_irq; /* used after a missed interrupt */
			
 
				 		struct timer_list hangcheck; /* detect missed interrupts */