i915_syncmap.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. /*
  2. * Copyright © 2017 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/slab.h>
  25. #include "i915_syncmap.h"
  26. #include "i915_gem.h" /* GEM_BUG_ON() */
  27. #include "i915_selftest.h"
  28. #define SHIFT ilog2(KSYNCMAP)
  29. #define MASK (KSYNCMAP - 1)
  30. /*
  31. * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
  32. * context id to the last u32 fence seqno waited upon from that context.
  33. * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
  34. * the root. This allows us to access the whole tree via a single pointer
  35. * to the most recently used layer. We expect fence contexts to be dense
  36. * and most reuse to be on the same i915_gem_context but on neighbouring
  37. * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
  38. * effective lookup cache. If the new lookup is not on the same leaf, we
  39. * expect it to be on the neighbouring branch.
  40. *
  41. * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
  42. * allows us to store whether a particular seqno is valid (i.e. allows us
  43. * to distinguish unset from 0).
  44. *
  45. * A branch holds an array of layer pointers, and has height > 0, and always
  46. * has at least 2 layers (either branches or leaves) below it.
  47. *
  48. * For example,
  49. * for x in
  50. * 0 1 2 0x10 0x11 0x200 0x201
  51. * 0x500000 0x500001 0x503000 0x503001
  52. * 0xE<<60:
  53. * i915_syncmap_set(&sync, x, lower_32_bits(x));
  54. * will build a tree like:
  55. * 0xXXXXXXXXXXXXXXXX
  56. * 0-> 0x0000000000XXXXXX
  57. * | 0-> 0x0000000000000XXX
  58. * | | 0-> 0x00000000000000XX
  59. * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2
  60. * | | | 1-> 0x000000000000001X 0:10, 1:11
  61. * | | 2-> 0x000000000000020X 0:200, 1:201
  62. * | 5-> 0x000000000050XXXX
  63. * | 0-> 0x000000000050000X 0:500000, 1:500001
  64. * | 3-> 0x000000000050300X 0:503000, 1:503001
  65. * e-> 0xe00000000000000X e:e
  66. */
  67. struct i915_syncmap {
  68. u64 prefix;
  69. unsigned int height;
  70. unsigned int bitmap;
  71. struct i915_syncmap *parent;
  72. /*
  73. * Following this header is an array of either seqno or child pointers:
  74. * union {
  75. * u32 seqno[KSYNCMAP];
  76. * struct i915_syncmap *child[KSYNCMAP];
  77. * };
  78. */
  79. };
  80. /**
  81. * i915_syncmap_init -- initialise the #i915_syncmap
  82. * @root: pointer to the #i915_syncmap
  83. */
  84. void i915_syncmap_init(struct i915_syncmap **root)
  85. {
  86. BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
  87. BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
  88. BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap));
  89. *root = NULL;
  90. }
  91. static inline u32 *__sync_seqno(struct i915_syncmap *p)
  92. {
  93. GEM_BUG_ON(p->height);
  94. return (u32 *)(p + 1);
  95. }
  96. static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
  97. {
  98. GEM_BUG_ON(!p->height);
  99. return (struct i915_syncmap **)(p + 1);
  100. }
  101. static inline unsigned int
  102. __sync_branch_idx(const struct i915_syncmap *p, u64 id)
  103. {
  104. return (id >> p->height) & MASK;
  105. }
  106. static inline unsigned int
  107. __sync_leaf_idx(const struct i915_syncmap *p, u64 id)
  108. {
  109. GEM_BUG_ON(p->height);
  110. return id & MASK;
  111. }
  112. static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
  113. {
  114. return id >> p->height >> SHIFT;
  115. }
  116. static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
  117. {
  118. GEM_BUG_ON(p->height);
  119. return id >> SHIFT;
  120. }
  121. static inline bool seqno_later(u32 a, u32 b)
  122. {
  123. return (s32)(a - b) >= 0;
  124. }
  125. /**
  126. * i915_syncmap_is_later -- compare against the last know sync point
  127. * @root: pointer to the #i915_syncmap
  128. * @id: the context id (other timeline) we are synchronising to
  129. * @seqno: the sequence number along the other timeline
  130. *
  131. * If we have already synchronised this @root timeline with another (@id) then
  132. * we can omit any repeated or earlier synchronisation requests. If the two
  133. * timelines are already coupled, we can also omit the dependency between the
  134. * two as that is already known via the timeline.
  135. *
  136. * Returns true if the two timelines are already synchronised wrt to @seqno,
  137. * false if not and the synchronisation must be emitted.
  138. */
  139. bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
  140. {
  141. struct i915_syncmap *p;
  142. unsigned int idx;
  143. p = *root;
  144. if (!p)
  145. return false;
  146. if (likely(__sync_leaf_prefix(p, id) == p->prefix))
  147. goto found;
  148. /* First climb the tree back to a parent branch */
  149. do {
  150. p = p->parent;
  151. if (!p)
  152. return false;
  153. if (__sync_branch_prefix(p, id) == p->prefix)
  154. break;
  155. } while (1);
  156. /* And then descend again until we find our leaf */
  157. do {
  158. if (!p->height)
  159. break;
  160. p = __sync_child(p)[__sync_branch_idx(p, id)];
  161. if (!p)
  162. return false;
  163. if (__sync_branch_prefix(p, id) != p->prefix)
  164. return false;
  165. } while (1);
  166. *root = p;
  167. found:
  168. idx = __sync_leaf_idx(p, id);
  169. if (!(p->bitmap & BIT(idx)))
  170. return false;
  171. return seqno_later(__sync_seqno(p)[idx], seqno);
  172. }
  173. static struct i915_syncmap *
  174. __sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
  175. {
  176. struct i915_syncmap *p;
  177. p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
  178. if (unlikely(!p))
  179. return NULL;
  180. p->parent = parent;
  181. p->height = 0;
  182. p->bitmap = 0;
  183. p->prefix = __sync_leaf_prefix(p, id);
  184. return p;
  185. }
  186. static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
  187. {
  188. unsigned int idx = __sync_leaf_idx(p, id);
  189. p->bitmap |= BIT(idx);
  190. __sync_seqno(p)[idx] = seqno;
  191. }
  192. static inline void __sync_set_child(struct i915_syncmap *p,
  193. unsigned int idx,
  194. struct i915_syncmap *child)
  195. {
  196. p->bitmap |= BIT(idx);
  197. __sync_child(p)[idx] = child;
  198. }
  199. static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
  200. {
  201. struct i915_syncmap *p = *root;
  202. unsigned int idx;
  203. if (!p) {
  204. p = __sync_alloc_leaf(NULL, id);
  205. if (unlikely(!p))
  206. return -ENOMEM;
  207. goto found;
  208. }
  209. /* Caller handled the likely cached case */
  210. GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
  211. /* Climb back up the tree until we find a common prefix */
  212. do {
  213. if (!p->parent)
  214. break;
  215. p = p->parent;
  216. if (__sync_branch_prefix(p, id) == p->prefix)
  217. break;
  218. } while (1);
  219. /*
  220. * No shortcut, we have to descend the tree to find the right layer
  221. * containing this fence.
  222. *
  223. * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
  224. * or lower layers. Leaf nodes (height = 0) contain the fences, all
  225. * other nodes (height > 0) are internal layers that point to a lower
  226. * node. Each internal layer has at least 2 descendents.
  227. *
  228. * Starting at the top, we check whether the current prefix matches. If
  229. * it doesn't, we have gone past our target and need to insert a join
  230. * into the tree, and a new leaf node for the target as a descendent
  231. * of the join, as well as the original layer.
  232. *
  233. * The matching prefix means we are still following the right branch
  234. * of the tree. If it has height 0, we have found our leaf and just
  235. * need to replace the fence slot with ourselves. If the height is
  236. * not zero, our slot contains the next layer in the tree (unless
  237. * it is empty, in which case we can add ourselves as a new leaf).
  238. * As descend the tree the prefix grows (and height decreases).
  239. */
  240. do {
  241. struct i915_syncmap *next;
  242. if (__sync_branch_prefix(p, id) != p->prefix) {
  243. unsigned int above;
  244. /* Insert a join above the current layer */
  245. next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
  246. GFP_KERNEL);
  247. if (unlikely(!next))
  248. return -ENOMEM;
  249. /* Compute the height at which these two diverge */
  250. above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
  251. above = round_up(above, SHIFT);
  252. next->height = above + p->height;
  253. next->prefix = __sync_branch_prefix(next, id);
  254. /* Insert the join into the parent */
  255. if (p->parent) {
  256. idx = __sync_branch_idx(p->parent, id);
  257. __sync_child(p->parent)[idx] = next;
  258. GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
  259. }
  260. next->parent = p->parent;
  261. /* Compute the idx of the other branch, not our id! */
  262. idx = p->prefix >> (above - SHIFT) & MASK;
  263. __sync_set_child(next, idx, p);
  264. p->parent = next;
  265. /* Ascend to the join */
  266. p = next;
  267. } else {
  268. if (!p->height)
  269. break;
  270. }
  271. /* Descend into the next layer */
  272. GEM_BUG_ON(!p->height);
  273. idx = __sync_branch_idx(p, id);
  274. next = __sync_child(p)[idx];
  275. if (!next) {
  276. next = __sync_alloc_leaf(p, id);
  277. if (unlikely(!next))
  278. return -ENOMEM;
  279. __sync_set_child(p, idx, next);
  280. p = next;
  281. break;
  282. }
  283. p = next;
  284. } while (1);
  285. found:
  286. GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
  287. __sync_set_seqno(p, id, seqno);
  288. *root = p;
  289. return 0;
  290. }
  291. /**
  292. * i915_syncmap_set -- mark the most recent syncpoint between contexts
  293. * @root: pointer to the #i915_syncmap
  294. * @id: the context id (other timeline) we have synchronised to
  295. * @seqno: the sequence number along the other timeline
  296. *
  297. * When we synchronise this @root timeline with another (@id), we also know
  298. * that we have synchronized with all previous seqno along that timeline. If
  299. * we then have a request to synchronise with the same seqno or older, we can
  300. * omit it, see i915_syncmap_is_later()
  301. *
  302. * Returns 0 on success, or a negative error code.
  303. */
  304. int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
  305. {
  306. struct i915_syncmap *p = *root;
  307. /*
  308. * We expect to be called in sequence following is_later(id), which
  309. * should have preloaded the root for us.
  310. */
  311. if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
  312. __sync_set_seqno(p, id, seqno);
  313. return 0;
  314. }
  315. return __sync_set(root, id, seqno);
  316. }
  317. static void __sync_free(struct i915_syncmap *p)
  318. {
  319. if (p->height) {
  320. unsigned int i;
  321. while ((i = ffs(p->bitmap))) {
  322. p->bitmap &= ~0u << i;
  323. __sync_free(__sync_child(p)[i - 1]);
  324. }
  325. }
  326. kfree(p);
  327. }
  328. /**
  329. * i915_syncmap_free -- free all memory associated with the syncmap
  330. * @root: pointer to the #i915_syncmap
  331. *
  332. * Either when the timeline is to be freed and we no longer need the sync
  333. * point tracking, or when the fences are all known to be signaled and the
  334. * sync point tracking is redundant, we can free the #i915_syncmap to recover
  335. * its allocations.
  336. *
  337. * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
  338. * reuse.
  339. */
  340. void i915_syncmap_free(struct i915_syncmap **root)
  341. {
  342. struct i915_syncmap *p;
  343. p = *root;
  344. if (!p)
  345. return;
  346. while (p->parent)
  347. p = p->parent;
  348. __sync_free(p);
  349. *root = NULL;
  350. }
  351. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  352. #include "selftests/i915_syncmap.c"
  353. #endif