intel_breadcrumbs.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /*
  2. * Copyright © 2015 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/kthread.h>
  25. #include "i915_drv.h"
  26. static void intel_breadcrumbs_fake_irq(unsigned long data)
  27. {
  28. struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
  29. /*
  30. * The timer persists in case we cannot enable interrupts,
  31. * or if we have previously seen seqno/interrupt incoherency
  32. * ("missed interrupt" syndrome). Here the worker will wake up
  33. * every jiffie in order to kick the oldest waiter to do the
  34. * coherent seqno check.
  35. */
  36. rcu_read_lock();
  37. if (intel_engine_wakeup(engine))
  38. mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
  39. rcu_read_unlock();
  40. }
  41. static void irq_enable(struct intel_engine_cs *engine)
  42. {
  43. /* Enabling the IRQ may miss the generation of the interrupt, but
  44. * we still need to force the barrier before reading the seqno,
  45. * just in case.
  46. */
  47. engine->breadcrumbs.irq_posted = true;
  48. spin_lock_irq(&engine->i915->irq_lock);
  49. engine->irq_enable(engine);
  50. spin_unlock_irq(&engine->i915->irq_lock);
  51. }
  52. static void irq_disable(struct intel_engine_cs *engine)
  53. {
  54. spin_lock_irq(&engine->i915->irq_lock);
  55. engine->irq_disable(engine);
  56. spin_unlock_irq(&engine->i915->irq_lock);
  57. engine->breadcrumbs.irq_posted = false;
  58. }
  59. static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
  60. {
  61. struct intel_engine_cs *engine =
  62. container_of(b, struct intel_engine_cs, breadcrumbs);
  63. struct drm_i915_private *i915 = engine->i915;
  64. assert_spin_locked(&b->lock);
  65. if (b->rpm_wakelock)
  66. return;
  67. /* Since we are waiting on a request, the GPU should be busy
  68. * and should have its own rpm reference. For completeness,
  69. * record an rpm reference for ourselves to cover the
  70. * interrupt we unmask.
  71. */
  72. intel_runtime_pm_get_noresume(i915);
  73. b->rpm_wakelock = true;
  74. /* No interrupts? Kick the waiter every jiffie! */
  75. if (intel_irqs_enabled(i915)) {
  76. if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
  77. irq_enable(engine);
  78. b->irq_enabled = true;
  79. }
  80. if (!b->irq_enabled ||
  81. test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
  82. mod_timer(&b->fake_irq, jiffies + 1);
  83. /* Ensure that even if the GPU hangs, we get woken up.
  84. *
  85. * However, note that if no one is waiting, we never notice
  86. * a gpu hang. Eventually, we will have to wait for a resource
  87. * held by the GPU and so trigger a hangcheck. In the most
  88. * pathological case, this will be upon memory starvation!
  89. */
  90. i915_queue_hangcheck(i915);
  91. }
  92. static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
  93. {
  94. struct intel_engine_cs *engine =
  95. container_of(b, struct intel_engine_cs, breadcrumbs);
  96. assert_spin_locked(&b->lock);
  97. if (!b->rpm_wakelock)
  98. return;
  99. if (b->irq_enabled) {
  100. irq_disable(engine);
  101. b->irq_enabled = false;
  102. }
  103. intel_runtime_pm_put(engine->i915);
  104. b->rpm_wakelock = false;
  105. }
  106. static inline struct intel_wait *to_wait(struct rb_node *node)
  107. {
  108. return container_of(node, struct intel_wait, node);
  109. }
  110. static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
  111. struct intel_wait *wait)
  112. {
  113. assert_spin_locked(&b->lock);
  114. /* This request is completed, so remove it from the tree, mark it as
  115. * complete, and *then* wake up the associated task.
  116. */
  117. rb_erase(&wait->node, &b->waiters);
  118. RB_CLEAR_NODE(&wait->node);
  119. wake_up_process(wait->tsk); /* implicit smp_wmb() */
  120. }
  121. static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
  122. struct intel_wait *wait)
  123. {
  124. struct intel_breadcrumbs *b = &engine->breadcrumbs;
  125. struct rb_node **p, *parent, *completed;
  126. bool first;
  127. u32 seqno;
  128. /* Insert the request into the retirement ordered list
  129. * of waiters by walking the rbtree. If we are the oldest
  130. * seqno in the tree (the first to be retired), then
  131. * set ourselves as the bottom-half.
  132. *
  133. * As we descend the tree, prune completed branches since we hold the
  134. * spinlock we know that the first_waiter must be delayed and can
  135. * reduce some of the sequential wake up latency if we take action
  136. * ourselves and wake up the completed tasks in parallel. Also, by
  137. * removing stale elements in the tree, we may be able to reduce the
  138. * ping-pong between the old bottom-half and ourselves as first-waiter.
  139. */
  140. first = true;
  141. parent = NULL;
  142. completed = NULL;
  143. seqno = intel_engine_get_seqno(engine);
  144. /* If the request completed before we managed to grab the spinlock,
  145. * return now before adding ourselves to the rbtree. We let the
  146. * current bottom-half handle any pending wakeups and instead
  147. * try and get out of the way quickly.
  148. */
  149. if (i915_seqno_passed(seqno, wait->seqno)) {
  150. RB_CLEAR_NODE(&wait->node);
  151. return first;
  152. }
  153. p = &b->waiters.rb_node;
  154. while (*p) {
  155. parent = *p;
  156. if (wait->seqno == to_wait(parent)->seqno) {
  157. /* We have multiple waiters on the same seqno, select
  158. * the highest priority task (that with the smallest
  159. * task->prio) to serve as the bottom-half for this
  160. * group.
  161. */
  162. if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
  163. p = &parent->rb_right;
  164. first = false;
  165. } else {
  166. p = &parent->rb_left;
  167. }
  168. } else if (i915_seqno_passed(wait->seqno,
  169. to_wait(parent)->seqno)) {
  170. p = &parent->rb_right;
  171. if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
  172. completed = parent;
  173. else
  174. first = false;
  175. } else {
  176. p = &parent->rb_left;
  177. }
  178. }
  179. rb_link_node(&wait->node, parent, p);
  180. rb_insert_color(&wait->node, &b->waiters);
  181. GEM_BUG_ON(!first && !b->irq_seqno_bh);
  182. if (completed) {
  183. struct rb_node *next = rb_next(completed);
  184. GEM_BUG_ON(!next && !first);
  185. if (next && next != &wait->node) {
  186. GEM_BUG_ON(first);
  187. b->first_wait = to_wait(next);
  188. smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
  189. /* As there is a delay between reading the current
  190. * seqno, processing the completed tasks and selecting
  191. * the next waiter, we may have missed the interrupt
  192. * and so need for the next bottom-half to wakeup.
  193. *
  194. * Also as we enable the IRQ, we may miss the
  195. * interrupt for that seqno, so we have to wake up
  196. * the next bottom-half in order to do a coherent check
  197. * in case the seqno passed.
  198. */
  199. __intel_breadcrumbs_enable_irq(b);
  200. if (READ_ONCE(b->irq_posted))
  201. wake_up_process(to_wait(next)->tsk);
  202. }
  203. do {
  204. struct intel_wait *crumb = to_wait(completed);
  205. completed = rb_prev(completed);
  206. __intel_breadcrumbs_finish(b, crumb);
  207. } while (completed);
  208. }
  209. if (first) {
  210. GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
  211. b->first_wait = wait;
  212. smp_store_mb(b->irq_seqno_bh, wait->tsk);
  213. /* After assigning ourselves as the new bottom-half, we must
  214. * perform a cursory check to prevent a missed interrupt.
  215. * Either we miss the interrupt whilst programming the hardware,
  216. * or if there was a previous waiter (for a later seqno) they
  217. * may be woken instead of us (due to the inherent race
  218. * in the unlocked read of b->irq_seqno_bh in the irq handler)
  219. * and so we miss the wake up.
  220. */
  221. __intel_breadcrumbs_enable_irq(b);
  222. }
  223. GEM_BUG_ON(!b->irq_seqno_bh);
  224. GEM_BUG_ON(!b->first_wait);
  225. GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
  226. return first;
  227. }
  228. bool intel_engine_add_wait(struct intel_engine_cs *engine,
  229. struct intel_wait *wait)
  230. {
  231. struct intel_breadcrumbs *b = &engine->breadcrumbs;
  232. bool first;
  233. spin_lock(&b->lock);
  234. first = __intel_engine_add_wait(engine, wait);
  235. spin_unlock(&b->lock);
  236. return first;
  237. }
  238. void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
  239. {
  240. mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
  241. }
  242. static inline bool chain_wakeup(struct rb_node *rb, int priority)
  243. {
  244. return rb && to_wait(rb)->tsk->prio <= priority;
  245. }
  246. static inline int wakeup_priority(struct intel_breadcrumbs *b,
  247. struct task_struct *tsk)
  248. {
  249. if (tsk == b->signaler)
  250. return INT_MIN;
  251. else
  252. return tsk->prio;
  253. }
  254. void intel_engine_remove_wait(struct intel_engine_cs *engine,
  255. struct intel_wait *wait)
  256. {
  257. struct intel_breadcrumbs *b = &engine->breadcrumbs;
  258. /* Quick check to see if this waiter was already decoupled from
  259. * the tree by the bottom-half to avoid contention on the spinlock
  260. * by the herd.
  261. */
  262. if (RB_EMPTY_NODE(&wait->node))
  263. return;
  264. spin_lock(&b->lock);
  265. if (RB_EMPTY_NODE(&wait->node))
  266. goto out_unlock;
  267. if (b->first_wait == wait) {
  268. const int priority = wakeup_priority(b, wait->tsk);
  269. struct rb_node *next;
  270. GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
  271. /* We are the current bottom-half. Find the next candidate,
  272. * the first waiter in the queue on the remaining oldest
  273. * request. As multiple seqnos may complete in the time it
  274. * takes us to wake up and find the next waiter, we have to
  275. * wake up that waiter for it to perform its own coherent
  276. * completion check.
  277. */
  278. next = rb_next(&wait->node);
  279. if (chain_wakeup(next, priority)) {
  280. /* If the next waiter is already complete,
  281. * wake it up and continue onto the next waiter. So
  282. * if have a small herd, they will wake up in parallel
  283. * rather than sequentially, which should reduce
  284. * the overall latency in waking all the completed
  285. * clients.
  286. *
  287. * However, waking up a chain adds extra latency to
  288. * the first_waiter. This is undesirable if that
  289. * waiter is a high priority task.
  290. */
  291. u32 seqno = intel_engine_get_seqno(engine);
  292. while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
  293. struct rb_node *n = rb_next(next);
  294. __intel_breadcrumbs_finish(b, to_wait(next));
  295. next = n;
  296. if (!chain_wakeup(next, priority))
  297. break;
  298. }
  299. }
  300. if (next) {
  301. /* In our haste, we may have completed the first waiter
  302. * before we enabled the interrupt. Do so now as we
  303. * have a second waiter for a future seqno. Afterwards,
  304. * we have to wake up that waiter in case we missed
  305. * the interrupt, or if we have to handle an
  306. * exception rather than a seqno completion.
  307. */
  308. b->first_wait = to_wait(next);
  309. smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
  310. if (b->first_wait->seqno != wait->seqno)
  311. __intel_breadcrumbs_enable_irq(b);
  312. wake_up_process(b->irq_seqno_bh);
  313. } else {
  314. b->first_wait = NULL;
  315. WRITE_ONCE(b->irq_seqno_bh, NULL);
  316. __intel_breadcrumbs_disable_irq(b);
  317. }
  318. } else {
  319. GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
  320. }
  321. GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
  322. rb_erase(&wait->node, &b->waiters);
  323. out_unlock:
  324. GEM_BUG_ON(b->first_wait == wait);
  325. GEM_BUG_ON(rb_first(&b->waiters) !=
  326. (b->first_wait ? &b->first_wait->node : NULL));
  327. GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
  328. spin_unlock(&b->lock);
  329. }
  330. static bool signal_complete(struct drm_i915_gem_request *request)
  331. {
  332. if (!request)
  333. return false;
  334. /* If another process served as the bottom-half it may have already
  335. * signalled that this wait is already completed.
  336. */
  337. if (intel_wait_complete(&request->signaling.wait))
  338. return true;
  339. /* Carefully check if the request is complete, giving time for the
  340. * seqno to be visible or if the GPU hung.
  341. */
  342. if (__i915_request_irq_complete(request))
  343. return true;
  344. return false;
  345. }
  346. static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
  347. {
  348. return container_of(rb, struct drm_i915_gem_request, signaling.node);
  349. }
  350. static void signaler_set_rtpriority(void)
  351. {
  352. struct sched_param param = { .sched_priority = 1 };
  353. sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
  354. }
  355. static int intel_breadcrumbs_signaler(void *arg)
  356. {
  357. struct intel_engine_cs *engine = arg;
  358. struct intel_breadcrumbs *b = &engine->breadcrumbs;
  359. struct drm_i915_gem_request *request;
  360. /* Install ourselves with high priority to reduce signalling latency */
  361. signaler_set_rtpriority();
  362. do {
  363. set_current_state(TASK_INTERRUPTIBLE);
  364. /* We are either woken up by the interrupt bottom-half,
  365. * or by a client adding a new signaller. In both cases,
  366. * the GPU seqno may have advanced beyond our oldest signal.
  367. * If it has, propagate the signal, remove the waiter and
  368. * check again with the next oldest signal. Otherwise we
  369. * need to wait for a new interrupt from the GPU or for
  370. * a new client.
  371. */
  372. request = READ_ONCE(b->first_signal);
  373. if (signal_complete(request)) {
  374. /* Wake up all other completed waiters and select the
  375. * next bottom-half for the next user interrupt.
  376. */
  377. intel_engine_remove_wait(engine,
  378. &request->signaling.wait);
  379. /* Find the next oldest signal. Note that as we have
  380. * not been holding the lock, another client may
  381. * have installed an even older signal than the one
  382. * we just completed - so double check we are still
  383. * the oldest before picking the next one.
  384. */
  385. spin_lock(&b->lock);
  386. if (request == b->first_signal) {
  387. struct rb_node *rb =
  388. rb_next(&request->signaling.node);
  389. b->first_signal = rb ? to_signaler(rb) : NULL;
  390. }
  391. rb_erase(&request->signaling.node, &b->signals);
  392. spin_unlock(&b->lock);
  393. i915_gem_request_unreference(request);
  394. } else {
  395. if (kthread_should_stop())
  396. break;
  397. schedule();
  398. }
  399. } while (1);
  400. __set_current_state(TASK_RUNNING);
  401. return 0;
  402. }
  403. void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
  404. {
  405. struct intel_engine_cs *engine = request->engine;
  406. struct intel_breadcrumbs *b = &engine->breadcrumbs;
  407. struct rb_node *parent, **p;
  408. bool first, wakeup;
  409. if (unlikely(READ_ONCE(request->signaling.wait.tsk)))
  410. return;
  411. spin_lock(&b->lock);
  412. if (unlikely(request->signaling.wait.tsk)) {
  413. wakeup = false;
  414. goto unlock;
  415. }
  416. request->signaling.wait.tsk = b->signaler;
  417. request->signaling.wait.seqno = request->seqno;
  418. i915_gem_request_reference(request);
  419. /* First add ourselves into the list of waiters, but register our
  420. * bottom-half as the signaller thread. As per usual, only the oldest
  421. * waiter (not just signaller) is tasked as the bottom-half waking
  422. * up all completed waiters after the user interrupt.
  423. *
  424. * If we are the oldest waiter, enable the irq (after which we
  425. * must double check that the seqno did not complete).
  426. */
  427. wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
  428. /* Now insert ourselves into the retirement ordered list of signals
  429. * on this engine. We track the oldest seqno as that will be the
  430. * first signal to complete.
  431. */
  432. parent = NULL;
  433. first = true;
  434. p = &b->signals.rb_node;
  435. while (*p) {
  436. parent = *p;
  437. if (i915_seqno_passed(request->seqno,
  438. to_signaler(parent)->seqno)) {
  439. p = &parent->rb_right;
  440. first = false;
  441. } else {
  442. p = &parent->rb_left;
  443. }
  444. }
  445. rb_link_node(&request->signaling.node, parent, p);
  446. rb_insert_color(&request->signaling.node, &b->signals);
  447. if (first)
  448. smp_store_mb(b->first_signal, request);
  449. unlock:
  450. spin_unlock(&b->lock);
  451. if (wakeup)
  452. wake_up_process(b->signaler);
  453. }
  454. int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
  455. {
  456. struct intel_breadcrumbs *b = &engine->breadcrumbs;
  457. struct task_struct *tsk;
  458. spin_lock_init(&b->lock);
  459. setup_timer(&b->fake_irq,
  460. intel_breadcrumbs_fake_irq,
  461. (unsigned long)engine);
  462. /* Spawn a thread to provide a common bottom-half for all signals.
  463. * As this is an asynchronous interface we cannot steal the current
  464. * task for handling the bottom-half to the user interrupt, therefore
  465. * we create a thread to do the coherent seqno dance after the
  466. * interrupt and then signal the waitqueue (via the dma-buf/fence).
  467. */
  468. tsk = kthread_run(intel_breadcrumbs_signaler, engine,
  469. "i915/signal:%d", engine->id);
  470. if (IS_ERR(tsk))
  471. return PTR_ERR(tsk);
  472. b->signaler = tsk;
  473. return 0;
  474. }
  475. void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
  476. {
  477. struct intel_breadcrumbs *b = &engine->breadcrumbs;
  478. if (!IS_ERR_OR_NULL(b->signaler))
  479. kthread_stop(b->signaler);
  480. del_timer_sync(&b->fake_irq);
  481. }
  482. unsigned int intel_kick_waiters(struct drm_i915_private *i915)
  483. {
  484. struct intel_engine_cs *engine;
  485. unsigned int mask = 0;
  486. /* To avoid the task_struct disappearing beneath us as we wake up
  487. * the process, we must first inspect the task_struct->state under the
  488. * RCU lock, i.e. as we call wake_up_process() we must be holding the
  489. * rcu_read_lock().
  490. */
  491. rcu_read_lock();
  492. for_each_engine(engine, i915)
  493. if (unlikely(intel_engine_wakeup(engine)))
  494. mask |= intel_engine_flag(engine);
  495. rcu_read_unlock();
  496. return mask;
  497. }
  498. unsigned int intel_kick_signalers(struct drm_i915_private *i915)
  499. {
  500. struct intel_engine_cs *engine;
  501. unsigned int mask = 0;
  502. for_each_engine(engine, i915) {
  503. if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) {
  504. wake_up_process(engine->breadcrumbs.signaler);
  505. mask |= intel_engine_flag(engine);
  506. }
  507. }
  508. return mask;
  509. }