Эх сурвалжийг харах

drm/i915: Don't accumulate hangcheck score on forward progress

If the actual head has progressed forward inside a batch (request),
don't accumulate hangcheck score.

As the hangcheck score in increased only by acthd jumping backwards,
the result is that we only declare an active batch as stuck if it is
trapped inside a loop. Or that the looping will dominate the batch
progression so that it overcomes the bonus that forward progress gives.

v2: Improved commit message (Chris Wilson)

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
[danvet: s/active_loop/active (loop)/ as requested by Chris.]
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Mika Kuoppala 11 жил өмнө
parent
commit
f260fe7b2f

+ 2 - 0
drivers/gpu/drm/i915/i915_gpu_error.c

@@ -229,6 +229,8 @@ static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
 		return "wait";
 		return "wait";
 	case HANGCHECK_ACTIVE:
 	case HANGCHECK_ACTIVE:
 		return "active";
 		return "active";
+	case HANGCHECK_ACTIVE_LOOP:
+		return "active (loop)";
 	case HANGCHECK_KICK:
 	case HANGCHECK_KICK:
 		return "kick";
 		return "kick";
 	case HANGCHECK_HUNG:
 	case HANGCHECK_HUNG:

+ 12 - 3
drivers/gpu/drm/i915/i915_irq.c

@@ -3189,8 +3189,14 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 tmp;
 	u32 tmp;
 
 
-	if (ring->hangcheck.acthd != acthd)
-		return HANGCHECK_ACTIVE;
+	if (acthd != ring->hangcheck.acthd) {
+		if (acthd > ring->hangcheck.max_acthd) {
+			ring->hangcheck.max_acthd = acthd;
+			return HANGCHECK_ACTIVE;
+		}
+
+		return HANGCHECK_ACTIVE_LOOP;
+	}
 
 
 	if (IS_GEN2(dev))
 	if (IS_GEN2(dev))
 		return HANGCHECK_HUNG;
 		return HANGCHECK_HUNG;
@@ -3301,8 +3307,9 @@ static void i915_hangcheck_elapsed(unsigned long data)
 				switch (ring->hangcheck.action) {
 				switch (ring->hangcheck.action) {
 				case HANGCHECK_IDLE:
 				case HANGCHECK_IDLE:
 				case HANGCHECK_WAIT:
 				case HANGCHECK_WAIT:
-					break;
 				case HANGCHECK_ACTIVE:
 				case HANGCHECK_ACTIVE:
+					break;
+				case HANGCHECK_ACTIVE_LOOP:
 					ring->hangcheck.score += BUSY;
 					ring->hangcheck.score += BUSY;
 					break;
 					break;
 				case HANGCHECK_KICK:
 				case HANGCHECK_KICK:
@@ -3322,6 +3329,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
 			 */
 			 */
 			if (ring->hangcheck.score > 0)
 			if (ring->hangcheck.score > 0)
 				ring->hangcheck.score--;
 				ring->hangcheck.score--;
+
+			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
 		}
 		}
 
 
 		ring->hangcheck.seqno = seqno;
 		ring->hangcheck.seqno = seqno;

+ 2 - 0
drivers/gpu/drm/i915/intel_ringbuffer.h

@@ -70,6 +70,7 @@ enum intel_ring_hangcheck_action {
 	HANGCHECK_IDLE = 0,
 	HANGCHECK_IDLE = 0,
 	HANGCHECK_WAIT,
 	HANGCHECK_WAIT,
 	HANGCHECK_ACTIVE,
 	HANGCHECK_ACTIVE,
+	HANGCHECK_ACTIVE_LOOP,
 	HANGCHECK_KICK,
 	HANGCHECK_KICK,
 	HANGCHECK_HUNG,
 	HANGCHECK_HUNG,
 };
 };
@@ -78,6 +79,7 @@ enum intel_ring_hangcheck_action {
 
 
 struct intel_ring_hangcheck {
 struct intel_ring_hangcheck {
 	u64 acthd;
 	u64 acthd;
+	u64 max_acthd;
 	u32 seqno;
 	u32 seqno;
 	int score;
 	int score;
 	enum intel_ring_hangcheck_action action;
 	enum intel_ring_hangcheck_action action;