i915_gem_timeline.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. /*
  2. * Copyright © 2017 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include "../i915_selftest.h"
  25. #include "i915_random.h"
  26. #include "mock_gem_device.h"
  27. #include "mock_timeline.h"
  28. struct __igt_sync {
  29. const char *name;
  30. u32 seqno;
  31. bool expected;
  32. bool set;
  33. };
  34. static int __igt_sync(struct intel_timeline *tl,
  35. u64 ctx,
  36. const struct __igt_sync *p,
  37. const char *name)
  38. {
  39. int ret;
  40. if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
  41. pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
  42. name, p->name, ctx, p->seqno, yesno(p->expected));
  43. return -EINVAL;
  44. }
  45. if (p->set) {
  46. ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
  47. if (ret)
  48. return ret;
  49. }
  50. return 0;
  51. }
  52. static int igt_sync(void *arg)
  53. {
  54. const struct __igt_sync pass[] = {
  55. { "unset", 0, false, false },
  56. { "new", 0, false, true },
  57. { "0a", 0, true, true },
  58. { "1a", 1, false, true },
  59. { "1b", 1, true, true },
  60. { "0b", 0, true, false },
  61. { "2a", 2, false, true },
  62. { "4", 4, false, true },
  63. { "INT_MAX", INT_MAX, false, true },
  64. { "INT_MAX-1", INT_MAX-1, true, false },
  65. { "INT_MAX+1", (u32)INT_MAX+1, false, true },
  66. { "INT_MAX", INT_MAX, true, false },
  67. { "UINT_MAX", UINT_MAX, false, true },
  68. { "wrap", 0, false, true },
  69. { "unwrap", UINT_MAX, true, false },
  70. {},
  71. }, *p;
  72. struct intel_timeline *tl;
  73. int order, offset;
  74. int ret;
  75. tl = mock_timeline(0);
  76. if (!tl)
  77. return -ENOMEM;
  78. for (p = pass; p->name; p++) {
  79. for (order = 1; order < 64; order++) {
  80. for (offset = -1; offset <= (order > 1); offset++) {
  81. u64 ctx = BIT_ULL(order) + offset;
  82. ret = __igt_sync(tl, ctx, p, "1");
  83. if (ret)
  84. goto out;
  85. }
  86. }
  87. }
  88. mock_timeline_destroy(tl);
  89. tl = mock_timeline(0);
  90. if (!tl)
  91. return -ENOMEM;
  92. for (order = 1; order < 64; order++) {
  93. for (offset = -1; offset <= (order > 1); offset++) {
  94. u64 ctx = BIT_ULL(order) + offset;
  95. for (p = pass; p->name; p++) {
  96. ret = __igt_sync(tl, ctx, p, "2");
  97. if (ret)
  98. goto out;
  99. }
  100. }
  101. }
  102. out:
  103. mock_timeline_destroy(tl);
  104. return ret;
  105. }
  106. static unsigned int random_engine(struct rnd_state *rnd)
  107. {
  108. return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32;
  109. }
  110. static int bench_sync(void *arg)
  111. {
  112. struct rnd_state prng;
  113. struct intel_timeline *tl;
  114. unsigned long end_time, count;
  115. u64 prng32_1M;
  116. ktime_t kt;
  117. int order, last_order;
  118. tl = mock_timeline(0);
  119. if (!tl)
  120. return -ENOMEM;
  121. /* Lookups from cache are very fast and so the random number generation
  122. * and the loop itself becomes a significant factor in the per-iteration
  123. * timings. We try to compensate the results by measuring the overhead
  124. * of the prng and subtract it from the reported results.
  125. */
  126. prandom_seed_state(&prng, i915_selftest.random_seed);
  127. count = 0;
  128. kt = ktime_get();
  129. end_time = jiffies + HZ/10;
  130. do {
  131. u32 x;
  132. /* Make sure the compiler doesn't optimise away the prng call */
  133. WRITE_ONCE(x, prandom_u32_state(&prng));
  134. count++;
  135. } while (!time_after(jiffies, end_time));
  136. kt = ktime_sub(ktime_get(), kt);
  137. pr_debug("%s: %lu random evaluations, %lluns/prng\n",
  138. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  139. prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
  140. /* Benchmark (only) setting random context ids */
  141. prandom_seed_state(&prng, i915_selftest.random_seed);
  142. count = 0;
  143. kt = ktime_get();
  144. end_time = jiffies + HZ/10;
  145. do {
  146. u64 id = i915_prandom_u64_state(&prng);
  147. __intel_timeline_sync_set(tl, id, 0);
  148. count++;
  149. } while (!time_after(jiffies, end_time));
  150. kt = ktime_sub(ktime_get(), kt);
  151. kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
  152. pr_info("%s: %lu random insertions, %lluns/insert\n",
  153. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  154. /* Benchmark looking up the exact same context ids as we just set */
  155. prandom_seed_state(&prng, i915_selftest.random_seed);
  156. end_time = count;
  157. kt = ktime_get();
  158. while (end_time--) {
  159. u64 id = i915_prandom_u64_state(&prng);
  160. if (!__intel_timeline_sync_is_later(tl, id, 0)) {
  161. mock_timeline_destroy(tl);
  162. pr_err("Lookup of %llu failed\n", id);
  163. return -EINVAL;
  164. }
  165. }
  166. kt = ktime_sub(ktime_get(), kt);
  167. kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
  168. pr_info("%s: %lu random lookups, %lluns/lookup\n",
  169. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  170. mock_timeline_destroy(tl);
  171. cond_resched();
  172. tl = mock_timeline(0);
  173. if (!tl)
  174. return -ENOMEM;
  175. /* Benchmark setting the first N (in order) contexts */
  176. count = 0;
  177. kt = ktime_get();
  178. end_time = jiffies + HZ/10;
  179. do {
  180. __intel_timeline_sync_set(tl, count++, 0);
  181. } while (!time_after(jiffies, end_time));
  182. kt = ktime_sub(ktime_get(), kt);
  183. pr_info("%s: %lu in-order insertions, %lluns/insert\n",
  184. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  185. /* Benchmark looking up the exact same context ids as we just set */
  186. end_time = count;
  187. kt = ktime_get();
  188. while (end_time--) {
  189. if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
  190. pr_err("Lookup of %lu failed\n", end_time);
  191. mock_timeline_destroy(tl);
  192. return -EINVAL;
  193. }
  194. }
  195. kt = ktime_sub(ktime_get(), kt);
  196. pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
  197. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  198. mock_timeline_destroy(tl);
  199. cond_resched();
  200. tl = mock_timeline(0);
  201. if (!tl)
  202. return -ENOMEM;
  203. /* Benchmark searching for a random context id and maybe changing it */
  204. prandom_seed_state(&prng, i915_selftest.random_seed);
  205. count = 0;
  206. kt = ktime_get();
  207. end_time = jiffies + HZ/10;
  208. do {
  209. u32 id = random_engine(&prng);
  210. u32 seqno = prandom_u32_state(&prng);
  211. if (!__intel_timeline_sync_is_later(tl, id, seqno))
  212. __intel_timeline_sync_set(tl, id, seqno);
  213. count++;
  214. } while (!time_after(jiffies, end_time));
  215. kt = ktime_sub(ktime_get(), kt);
  216. kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
  217. pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
  218. __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
  219. mock_timeline_destroy(tl);
  220. cond_resched();
  221. /* Benchmark searching for a known context id and changing the seqno */
  222. for (last_order = 1, order = 1; order < 32;
  223. ({ int tmp = last_order; last_order = order; order += tmp; })) {
  224. unsigned int mask = BIT(order) - 1;
  225. tl = mock_timeline(0);
  226. if (!tl)
  227. return -ENOMEM;
  228. count = 0;
  229. kt = ktime_get();
  230. end_time = jiffies + HZ/10;
  231. do {
  232. /* Without assuming too many details of the underlying
  233. * implementation, try to identify its phase-changes
  234. * (if any)!
  235. */
  236. u64 id = (u64)(count & mask) << order;
  237. __intel_timeline_sync_is_later(tl, id, 0);
  238. __intel_timeline_sync_set(tl, id, 0);
  239. count++;
  240. } while (!time_after(jiffies, end_time));
  241. kt = ktime_sub(ktime_get(), kt);
  242. pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
  243. __func__, count, order,
  244. (long long)div64_ul(ktime_to_ns(kt), count));
  245. mock_timeline_destroy(tl);
  246. cond_resched();
  247. }
  248. return 0;
  249. }
  250. int i915_gem_timeline_mock_selftests(void)
  251. {
  252. static const struct i915_subtest tests[] = {
  253. SUBTEST(igt_sync),
  254. SUBTEST(bench_sync),
  255. };
  256. return i915_subtests(tests, NULL);
  257. }