|
@@ -26,6 +26,40 @@
|
|
|
|
|
|
#include "i915_drv.h"
|
|
|
|
|
|
+static void intel_breadcrumbs_hangcheck(unsigned long data)
|
|
|
+{
|
|
|
+ struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
|
|
|
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
|
|
|
+
|
|
|
+ if (!b->irq_enabled)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (time_before(jiffies, b->timeout)) {
|
|
|
+ mod_timer(&b->hangcheck, b->timeout);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
|
|
|
+ set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
|
|
|
+ mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
|
|
|
+
|
|
|
+ /* Ensure that even if the GPU hangs, we get woken up.
|
|
|
+ *
|
|
|
+ * However, note that if no one is waiting, we never notice
|
|
|
+ * a gpu hang. Eventually, we will have to wait for a resource
|
|
|
+ * held by the GPU and so trigger a hangcheck. In the most
|
|
|
+ * pathological case, this will be upon memory starvation! To
|
|
|
+ * prevent this, we also queue the hangcheck from the retire
|
|
|
+ * worker.
|
|
|
+ */
|
|
|
+ i915_queue_hangcheck(engine->i915);
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned long wait_timeout(void)
|
|
|
+{
|
|
|
+ return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
|
|
|
+}
|
|
|
+
|
|
|
static void intel_breadcrumbs_fake_irq(unsigned long data)
|
|
|
{
|
|
|
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
|
|
@@ -51,13 +85,6 @@ static void irq_enable(struct intel_engine_cs *engine)
|
|
|
*/
|
|
|
engine->breadcrumbs.irq_posted = true;
|
|
|
|
|
|
- /* Make sure the current hangcheck doesn't falsely accuse a just
|
|
|
- * started irq handler from missing an interrupt (because the
|
|
|
- * interrupt count still matches the stale value from when
|
|
|
- * the irq handler was disabled, many hangchecks ago).
|
|
|
- */
|
|
|
- engine->breadcrumbs.irq_wakeups++;
|
|
|
-
|
|
|
spin_lock_irq(&engine->i915->irq_lock);
|
|
|
engine->irq_enable(engine);
|
|
|
spin_unlock_irq(&engine->i915->irq_lock);
|
|
@@ -98,17 +125,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
|
|
|
}
|
|
|
|
|
|
if (!b->irq_enabled ||
|
|
|
- test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
|
|
|
+ test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
|
|
|
mod_timer(&b->fake_irq, jiffies + 1);
|
|
|
-
|
|
|
- /* Ensure that even if the GPU hangs, we get woken up.
|
|
|
- *
|
|
|
- * However, note that if no one is waiting, we never notice
|
|
|
- * a gpu hang. Eventually, we will have to wait for a resource
|
|
|
- * held by the GPU and so trigger a hangcheck. In the most
|
|
|
- * pathological case, this will be upon memory starvation!
|
|
|
- */
|
|
|
- i915_queue_hangcheck(i915);
|
|
|
+ } else {
|
|
|
+ /* Ensure we never sleep indefinitely */
|
|
|
+ GEM_BUG_ON(!time_after(b->timeout, jiffies));
|
|
|
+ mod_timer(&b->hangcheck, b->timeout);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
|
|
@@ -219,6 +242,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
|
|
|
GEM_BUG_ON(!next && !first);
|
|
|
if (next && next != &wait->node) {
|
|
|
GEM_BUG_ON(first);
|
|
|
+ b->timeout = wait_timeout();
|
|
|
b->first_wait = to_wait(next);
|
|
|
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
|
|
|
/* As there is a delay between reading the current
|
|
@@ -245,6 +269,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
|
|
|
|
|
|
if (first) {
|
|
|
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
|
|
|
+ b->timeout = wait_timeout();
|
|
|
b->first_wait = wait;
|
|
|
smp_store_mb(b->irq_seqno_bh, wait->tsk);
|
|
|
/* After assigning ourselves as the new bottom-half, we must
|
|
@@ -277,11 +302,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
|
|
|
return first;
|
|
|
}
|
|
|
|
|
|
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
|
|
|
-{
|
|
|
- mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
|
|
|
-}
|
|
|
-
|
|
|
static inline bool chain_wakeup(struct rb_node *rb, int priority)
|
|
|
{
|
|
|
return rb && to_wait(rb)->tsk->prio <= priority;
|
|
@@ -359,6 +379,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
|
|
|
* the interrupt, or if we have to handle an
|
|
|
* exception rather than a seqno completion.
|
|
|
*/
|
|
|
+ b->timeout = wait_timeout();
|
|
|
b->first_wait = to_wait(next);
|
|
|
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
|
|
|
if (b->first_wait->seqno != wait->seqno)
|
|
@@ -536,6 +557,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
|
|
|
setup_timer(&b->fake_irq,
|
|
|
intel_breadcrumbs_fake_irq,
|
|
|
(unsigned long)engine);
|
|
|
+ setup_timer(&b->hangcheck,
|
|
|
+ intel_breadcrumbs_hangcheck,
|
|
|
+ (unsigned long)engine);
|
|
|
|
|
|
/* Spawn a thread to provide a common bottom-half for all signals.
|
|
|
* As this is an asynchronous interface we cannot steal the current
|
|
@@ -560,6 +584,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
|
|
|
if (!IS_ERR_OR_NULL(b->signaler))
|
|
|
kthread_stop(b->signaler);
|
|
|
|
|
|
+ del_timer_sync(&b->hangcheck);
|
|
|
del_timer_sync(&b->fake_irq);
|
|
|
}
|
|
|
|