i915_gem_timeline.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /*
  2. * Copyright © 2017 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include "../i915_selftest.h"
  25. #include "i915_random.h"
  26. #include "mock_gem_device.h"
  27. #include "mock_timeline.h"
  28. struct __igt_sync {
  29. const char *name;
  30. u32 seqno;
  31. bool expected;
  32. bool set;
  33. };
  34. static int __igt_sync(struct intel_timeline *tl,
  35. u64 ctx,
  36. const struct __igt_sync *p,
  37. const char *name)
  38. {
  39. int ret;
  40. if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
  41. pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
  42. name, p->name, ctx, p->seqno, yesno(p->expected));
  43. return -EINVAL;
  44. }
  45. if (p->set) {
  46. ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
  47. if (ret)
  48. return ret;
  49. }
  50. return 0;
  51. }
  52. static int igt_sync(void *arg)
  53. {
  54. const struct __igt_sync pass[] = {
  55. { "unset", 0, false, false },
  56. { "new", 0, false, true },
  57. { "0a", 0, true, true },
  58. { "1a", 1, false, true },
  59. { "1b", 1, true, true },
  60. { "0b", 0, true, false },
  61. { "2a", 2, false, true },
  62. { "4", 4, false, true },
  63. { "INT_MAX", INT_MAX, false, true },
  64. { "INT_MAX-1", INT_MAX-1, true, false },
  65. { "INT_MAX+1", (u32)INT_MAX+1, false, true },
  66. { "INT_MAX", INT_MAX, true, false },
  67. { "UINT_MAX", UINT_MAX, false, true },
  68. { "wrap", 0, false, true },
  69. { "unwrap", UINT_MAX, true, false },
  70. {},
  71. }, *p;
  72. struct intel_timeline *tl;
  73. int order, offset;
  74. int ret;
  75. tl = mock_timeline(0);
  76. if (!tl)
  77. return -ENOMEM;
  78. for (p = pass; p->name; p++) {
  79. for (order = 1; order < 64; order++) {
  80. for (offset = -1; offset <= (order > 1); offset++) {
  81. u64 ctx = BIT_ULL(order) + offset;
  82. ret = __igt_sync(tl, ctx, p, "1");
  83. if (ret)
  84. goto out;
  85. }
  86. }
  87. }
  88. mock_timeline_destroy(tl);
  89. tl = mock_timeline(0);
  90. if (!tl)
  91. return -ENOMEM;
  92. for (order = 1; order < 64; order++) {
  93. for (offset = -1; offset <= (order > 1); offset++) {
  94. u64 ctx = BIT_ULL(order) + offset;
  95. for (p = pass; p->name; p++) {
  96. ret = __igt_sync(tl, ctx, p, "2");
  97. if (ret)
  98. goto out;
  99. }
  100. }
  101. }
  102. out:
  103. mock_timeline_destroy(tl);
  104. return ret;
  105. }
  106. static unsigned int random_engine(struct rnd_state *rnd)
  107. {
  108. return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32;
  109. }
  110. static int bench_sync(void *arg)
  111. {
  112. #define M (1 << 20)
  113. struct rnd_state prng;
  114. struct intel_timeline *tl;
  115. unsigned long end_time, count;
  116. u64 prng32_1M;
  117. ktime_t kt;
  118. int order, last_order;
  119. tl = mock_timeline(0);
  120. if (!tl)
  121. return -ENOMEM;
  122. /* Lookups from cache are very fast and so the random number generation
  123. * and the loop itself becomes a significant factor in the per-iteration
  124. * timings. We try to compensate the results by measuring the overhead
  125. * of the prng and subtract it from the reported results.
  126. */
  127. prandom_seed_state(&prng, i915_selftest.random_seed);
  128. count = 0;
  129. kt = ktime_get();
  130. end_time = jiffies + HZ/10;
  131. do {
  132. u32 x;
  133. /* Make sure the compiler doesn't optimise away the prng call */
  134. WRITE_ONCE(x, prandom_u32_state(&prng));
  135. count++;
  136. } while (!time_after(jiffies, end_time));
  137. kt = ktime_sub(ktime_get(), kt);
  138. pr_debug("%s: %lu random evaluations, %lluns/prng\n",
  139. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  140. prng32_1M = ktime_to_ns(kt) * M / count;
  141. /* Benchmark (only) setting random context ids */
  142. prandom_seed_state(&prng, i915_selftest.random_seed);
  143. count = 0;
  144. kt = ktime_get();
  145. end_time = jiffies + HZ/10;
  146. do {
  147. u64 id = i915_prandom_u64_state(&prng);
  148. __intel_timeline_sync_set(tl, id, 0);
  149. count++;
  150. } while (!time_after(jiffies, end_time));
  151. kt = ktime_sub(ktime_get(), kt);
  152. kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M);
  153. pr_info("%s: %lu random insertions, %lluns/insert\n",
  154. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  155. /* Benchmark looking up the exact same context ids as we just set */
  156. prandom_seed_state(&prng, i915_selftest.random_seed);
  157. end_time = count;
  158. kt = ktime_get();
  159. while (end_time--) {
  160. u64 id = i915_prandom_u64_state(&prng);
  161. if (!__intel_timeline_sync_is_later(tl, id, 0)) {
  162. mock_timeline_destroy(tl);
  163. pr_err("Lookup of %llu failed\n", id);
  164. return -EINVAL;
  165. }
  166. }
  167. kt = ktime_sub(ktime_get(), kt);
  168. kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M);
  169. pr_info("%s: %lu random lookups, %lluns/lookup\n",
  170. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  171. mock_timeline_destroy(tl);
  172. cond_resched();
  173. tl = mock_timeline(0);
  174. if (!tl)
  175. return -ENOMEM;
  176. /* Benchmark setting the first N (in order) contexts */
  177. count = 0;
  178. kt = ktime_get();
  179. end_time = jiffies + HZ/10;
  180. do {
  181. __intel_timeline_sync_set(tl, count++, 0);
  182. } while (!time_after(jiffies, end_time));
  183. kt = ktime_sub(ktime_get(), kt);
  184. pr_info("%s: %lu in-order insertions, %lluns/insert\n",
  185. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  186. /* Benchmark looking up the exact same context ids as we just set */
  187. end_time = count;
  188. kt = ktime_get();
  189. while (end_time--) {
  190. if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
  191. pr_err("Lookup of %lu failed\n", end_time);
  192. mock_timeline_destroy(tl);
  193. return -EINVAL;
  194. }
  195. }
  196. kt = ktime_sub(ktime_get(), kt);
  197. pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
  198. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  199. mock_timeline_destroy(tl);
  200. cond_resched();
  201. tl = mock_timeline(0);
  202. if (!tl)
  203. return -ENOMEM;
  204. /* Benchmark searching for a random context id and maybe changing it */
  205. prandom_seed_state(&prng, i915_selftest.random_seed);
  206. count = 0;
  207. kt = ktime_get();
  208. end_time = jiffies + HZ/10;
  209. do {
  210. u32 id = random_engine(&prng);
  211. u32 seqno = prandom_u32_state(&prng);
  212. if (!__intel_timeline_sync_is_later(tl, id, seqno))
  213. __intel_timeline_sync_set(tl, id, seqno);
  214. count++;
  215. } while (!time_after(jiffies, end_time));
  216. kt = ktime_sub(ktime_get(), kt);
  217. kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M);
  218. pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
  219. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  220. mock_timeline_destroy(tl);
  221. cond_resched();
  222. /* Benchmark searching for a known context id and changing the seqno */
  223. for (last_order = 1, order = 1; order < 32;
  224. ({ int tmp = last_order; last_order = order; order += tmp; })) {
  225. unsigned int mask = BIT(order) - 1;
  226. tl = mock_timeline(0);
  227. if (!tl)
  228. return -ENOMEM;
  229. count = 0;
  230. kt = ktime_get();
  231. end_time = jiffies + HZ/10;
  232. do {
  233. /* Without assuming too many details of the underlying
  234. * implementation, try to identify its phase-changes
  235. * (if any)!
  236. */
  237. u64 id = (u64)(count & mask) << order;
  238. __intel_timeline_sync_is_later(tl, id, 0);
  239. __intel_timeline_sync_set(tl, id, 0);
  240. count++;
  241. } while (!time_after(jiffies, end_time));
  242. kt = ktime_sub(ktime_get(), kt);
  243. pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
  244. __func__, count, order,
  245. (long long)div64_ul(ktime_to_ns(kt), count));
  246. mock_timeline_destroy(tl);
  247. cond_resched();
  248. }
  249. return 0;
  250. #undef M
  251. }
  252. int i915_gem_timeline_mock_selftests(void)
  253. {
  254. static const struct i915_subtest tests[] = {
  255. SUBTEST(igt_sync),
  256. SUBTEST(bench_sync),
  257. };
  258. return i915_subtests(tests, NULL);
  259. }