intel_workarounds.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2018 Intel Corporation
  5. */
  6. #include "../i915_selftest.h"
  7. #include "igt_wedge_me.h"
  8. #include "mock_context.h"
  9. static struct drm_i915_gem_object *
  10. read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
  11. {
  12. struct drm_i915_gem_object *result;
  13. struct i915_request *rq;
  14. struct i915_vma *vma;
  15. const u32 base = engine->mmio_base;
  16. u32 srm, *cs;
  17. int err;
  18. int i;
  19. result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
  20. if (IS_ERR(result))
  21. return result;
  22. i915_gem_object_set_cache_level(result, I915_CACHE_LLC);
  23. cs = i915_gem_object_pin_map(result, I915_MAP_WB);
  24. if (IS_ERR(cs)) {
  25. err = PTR_ERR(cs);
  26. goto err_obj;
  27. }
  28. memset(cs, 0xc5, PAGE_SIZE);
  29. i915_gem_object_unpin_map(result);
  30. vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL);
  31. if (IS_ERR(vma)) {
  32. err = PTR_ERR(vma);
  33. goto err_obj;
  34. }
  35. err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
  36. if (err)
  37. goto err_obj;
  38. intel_runtime_pm_get(engine->i915);
  39. rq = i915_request_alloc(engine, ctx);
  40. intel_runtime_pm_put(engine->i915);
  41. if (IS_ERR(rq)) {
  42. err = PTR_ERR(rq);
  43. goto err_pin;
  44. }
  45. err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
  46. if (err)
  47. goto err_req;
  48. srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
  49. if (INTEL_GEN(ctx->i915) >= 8)
  50. srm++;
  51. cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS);
  52. if (IS_ERR(cs)) {
  53. err = PTR_ERR(cs);
  54. goto err_req;
  55. }
  56. for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
  57. *cs++ = srm;
  58. *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i));
  59. *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
  60. *cs++ = 0;
  61. }
  62. intel_ring_advance(rq, cs);
  63. i915_gem_object_get(result);
  64. i915_gem_object_set_active_reference(result);
  65. i915_request_add(rq);
  66. i915_vma_unpin(vma);
  67. return result;
  68. err_req:
  69. i915_request_add(rq);
  70. err_pin:
  71. i915_vma_unpin(vma);
  72. err_obj:
  73. i915_gem_object_put(result);
  74. return ERR_PTR(err);
  75. }
  76. static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i)
  77. {
  78. return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid;
  79. }
  80. static void print_results(const struct whitelist *w, const u32 *results)
  81. {
  82. unsigned int i;
  83. for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
  84. u32 expected = get_whitelist_reg(w, i);
  85. u32 actual = results[i];
  86. pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n",
  87. i, expected, actual);
  88. }
  89. }
  90. static int check_whitelist(const struct whitelist *w,
  91. struct i915_gem_context *ctx,
  92. struct intel_engine_cs *engine)
  93. {
  94. struct drm_i915_gem_object *results;
  95. struct igt_wedge_me wedge;
  96. u32 *vaddr;
  97. int err;
  98. int i;
  99. results = read_nonprivs(ctx, engine);
  100. if (IS_ERR(results))
  101. return PTR_ERR(results);
  102. err = 0;
  103. igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
  104. err = i915_gem_object_set_to_cpu_domain(results, false);
  105. if (i915_terminally_wedged(&ctx->i915->gpu_error))
  106. err = -EIO;
  107. if (err)
  108. goto out_put;
  109. vaddr = i915_gem_object_pin_map(results, I915_MAP_WB);
  110. if (IS_ERR(vaddr)) {
  111. err = PTR_ERR(vaddr);
  112. goto out_put;
  113. }
  114. for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
  115. u32 expected = get_whitelist_reg(w, i);
  116. u32 actual = vaddr[i];
  117. if (expected != actual) {
  118. print_results(w, vaddr);
  119. pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n",
  120. i, expected, actual);
  121. err = -EINVAL;
  122. break;
  123. }
  124. }
  125. i915_gem_object_unpin_map(results);
  126. out_put:
  127. i915_gem_object_put(results);
  128. return err;
  129. }
  130. static int do_device_reset(struct intel_engine_cs *engine)
  131. {
  132. i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL);
  133. return 0;
  134. }
  135. static int do_engine_reset(struct intel_engine_cs *engine)
  136. {
  137. return i915_reset_engine(engine, NULL);
  138. }
  139. static int switch_to_scratch_context(struct intel_engine_cs *engine)
  140. {
  141. struct i915_gem_context *ctx;
  142. struct i915_request *rq;
  143. ctx = kernel_context(engine->i915);
  144. if (IS_ERR(ctx))
  145. return PTR_ERR(ctx);
  146. intel_runtime_pm_get(engine->i915);
  147. rq = i915_request_alloc(engine, ctx);
  148. intel_runtime_pm_put(engine->i915);
  149. kernel_context_close(ctx);
  150. if (IS_ERR(rq))
  151. return PTR_ERR(rq);
  152. i915_request_add(rq);
  153. return 0;
  154. }
  155. static int check_whitelist_across_reset(struct intel_engine_cs *engine,
  156. int (*reset)(struct intel_engine_cs *),
  157. const struct whitelist *w,
  158. const char *name)
  159. {
  160. struct i915_gem_context *ctx;
  161. int err;
  162. ctx = kernel_context(engine->i915);
  163. if (IS_ERR(ctx))
  164. return PTR_ERR(ctx);
  165. err = check_whitelist(w, ctx, engine);
  166. if (err) {
  167. pr_err("Invalid whitelist *before* %s reset!\n", name);
  168. goto out;
  169. }
  170. err = switch_to_scratch_context(engine);
  171. if (err)
  172. goto out;
  173. err = reset(engine);
  174. if (err) {
  175. pr_err("%s reset failed\n", name);
  176. goto out;
  177. }
  178. err = check_whitelist(w, ctx, engine);
  179. if (err) {
  180. pr_err("Whitelist not preserved in context across %s reset!\n",
  181. name);
  182. goto out;
  183. }
  184. kernel_context_close(ctx);
  185. ctx = kernel_context(engine->i915);
  186. if (IS_ERR(ctx))
  187. return PTR_ERR(ctx);
  188. err = check_whitelist(w, ctx, engine);
  189. if (err) {
  190. pr_err("Invalid whitelist *after* %s reset in fresh context!\n",
  191. name);
  192. goto out;
  193. }
  194. out:
  195. kernel_context_close(ctx);
  196. return err;
  197. }
  198. static int live_reset_whitelist(void *arg)
  199. {
  200. struct drm_i915_private *i915 = arg;
  201. struct intel_engine_cs *engine = i915->engine[RCS];
  202. struct i915_gpu_error *error = &i915->gpu_error;
  203. struct whitelist w;
  204. int err = 0;
  205. /* If we reset the gpu, we should not lose the RING_NONPRIV */
  206. if (!engine)
  207. return 0;
  208. if (!whitelist_build(engine, &w))
  209. return 0;
  210. pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count);
  211. set_bit(I915_RESET_BACKOFF, &error->flags);
  212. set_bit(I915_RESET_ENGINE + engine->id, &error->flags);
  213. if (intel_has_reset_engine(i915)) {
  214. err = check_whitelist_across_reset(engine,
  215. do_engine_reset, &w,
  216. "engine");
  217. if (err)
  218. goto out;
  219. }
  220. if (intel_has_gpu_reset(i915)) {
  221. err = check_whitelist_across_reset(engine,
  222. do_device_reset, &w,
  223. "device");
  224. if (err)
  225. goto out;
  226. }
  227. out:
  228. clear_bit(I915_RESET_ENGINE + engine->id, &error->flags);
  229. clear_bit(I915_RESET_BACKOFF, &error->flags);
  230. return err;
  231. }
  232. int intel_workarounds_live_selftests(struct drm_i915_private *i915)
  233. {
  234. static const struct i915_subtest tests[] = {
  235. SUBTEST(live_reset_whitelist),
  236. };
  237. int err;
  238. if (i915_terminally_wedged(&i915->gpu_error))
  239. return 0;
  240. mutex_lock(&i915->drm.struct_mutex);
  241. err = i915_subtests(tests, i915);
  242. mutex_unlock(&i915->drm.struct_mutex);
  243. return err;
  244. }