i915_gem_context.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. /*
  2. * Copyright © 2011-2012 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. * Authors:
  24. * Ben Widawsky <ben@bwidawsk.net>
  25. *
  26. */
  27. /*
  28. * This file implements HW context support. On gen5+ a HW context consists of an
  29. * opaque GPU object which is referenced at times of context saves and restores.
  30. * With RC6 enabled, the context is also referenced as the GPU enters and exists
  31. * from RC6 (GPU has it's own internal power context, except on gen5). Though
  32. * something like a context does exist for the media ring, the code only
  33. * supports contexts for the render ring.
  34. *
  35. * In software, there is a distinction between contexts created by the user,
  36. * and the default HW context. The default HW context is used by GPU clients
  37. * that do not request setup of their own hardware context. The default
  38. * context's state is never restored to help prevent programming errors. This
  39. * would happen if a client ran and piggy-backed off another clients GPU state.
  40. * The default context only exists to give the GPU some offset to load as the
  41. * current to invoke a save of the context we actually care about. In fact, the
  42. * code could likely be constructed, albeit in a more complicated fashion, to
  43. * never use the default context, though that limits the driver's ability to
  44. * swap out, and/or destroy other contexts.
  45. *
  46. * All other contexts are created as a request by the GPU client. These contexts
  47. * store GPU state, and thus allow GPU clients to not re-emit state (and
  48. * potentially query certain state) at any time. The kernel driver makes
  49. * certain that the appropriate commands are inserted.
  50. *
  51. * The context life cycle is semi-complicated in that context BOs may live
  52. * longer than the context itself because of the way the hardware, and object
  53. * tracking works. Below is a very crude representation of the state machine
  54. * describing the context life.
  55. * refcount pincount active
  56. * S0: initial state 0 0 0
  57. * S1: context created 1 0 0
  58. * S2: context is currently running 2 1 X
  59. * S3: GPU referenced, but not current 2 0 1
  60. * S4: context is current, but destroyed 1 1 0
  61. * S5: like S3, but destroyed 1 0 1
  62. *
  63. * The most common (but not all) transitions:
  64. * S0->S1: client creates a context
  65. * S1->S2: client submits execbuf with context
  66. * S2->S3: other clients submits execbuf with context
  67. * S3->S1: context object was retired
  68. * S3->S2: clients submits another execbuf
  69. * S2->S4: context destroy called with current context
  70. * S3->S5->S0: destroy path
  71. * S4->S5->S0: destroy path on current context
  72. *
  73. * There are two confusing terms used above:
  74. * The "current context" means the context which is currently running on the
  75. * GPU. The GPU has loaded its state already and has stored away the gtt
  76. * offset of the BO. The GPU is not actively referencing the data at this
  77. * offset, but it will on the next context switch. The only way to avoid this
  78. * is to do a GPU reset.
  79. *
  80. * An "active context' is one which was previously the "current context" and is
  81. * on the active list waiting for the next context switch to occur. Until this
  82. * happens, the object must remain at the same gtt offset. It is therefore
  83. * possible to destroy a context, but it is still active.
  84. *
  85. */
  86. #include <drm/drmP.h>
  87. #include <drm/i915_drm.h>
  88. #include "i915_drv.h"
  89. #include "i915_trace.h"
  90. /* This is a HW constraint. The value below is the largest known requirement
  91. * I've seen in a spec to date, and that was a workaround for a non-shipping
  92. * part. It should be safe to decrease this, but it's more future proof as is.
  93. */
  94. #define GEN6_CONTEXT_ALIGN (64<<10)
  95. #define GEN7_CONTEXT_ALIGN 4096
  96. static size_t get_context_alignment(struct drm_device *dev)
  97. {
  98. if (IS_GEN6(dev))
  99. return GEN6_CONTEXT_ALIGN;
  100. return GEN7_CONTEXT_ALIGN;
  101. }
  102. static int get_context_size(struct drm_device *dev)
  103. {
  104. struct drm_i915_private *dev_priv = dev->dev_private;
  105. int ret;
  106. u32 reg;
  107. switch (INTEL_INFO(dev)->gen) {
  108. case 6:
  109. reg = I915_READ(CXT_SIZE);
  110. ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
  111. break;
  112. case 7:
  113. reg = I915_READ(GEN7_CXT_SIZE);
  114. if (IS_HASWELL(dev))
  115. ret = HSW_CXT_TOTAL_SIZE;
  116. else
  117. ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
  118. break;
  119. case 8:
  120. ret = GEN8_CXT_TOTAL_SIZE;
  121. break;
  122. default:
  123. BUG();
  124. }
  125. return ret;
  126. }
  127. static void i915_gem_context_clean(struct intel_context *ctx)
  128. {
  129. struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
  130. struct i915_vma *vma, *next;
  131. if (!ppgtt)
  132. return;
  133. list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
  134. vm_link) {
  135. if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
  136. break;
  137. }
  138. }
  139. void i915_gem_context_free(struct kref *ctx_ref)
  140. {
  141. struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
  142. trace_i915_context_free(ctx);
  143. if (i915.enable_execlists)
  144. intel_lr_context_free(ctx);
  145. /*
  146. * This context is going away and we need to remove all VMAs still
  147. * around. This is to handle imported shared objects for which
  148. * destructor did not run when their handles were closed.
  149. */
  150. i915_gem_context_clean(ctx);
  151. i915_ppgtt_put(ctx->ppgtt);
  152. if (ctx->legacy_hw_ctx.rcs_state)
  153. drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
  154. list_del(&ctx->link);
  155. kfree(ctx);
  156. }
  157. struct drm_i915_gem_object *
  158. i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
  159. {
  160. struct drm_i915_gem_object *obj;
  161. int ret;
  162. obj = i915_gem_alloc_object(dev, size);
  163. if (obj == NULL)
  164. return ERR_PTR(-ENOMEM);
  165. /*
  166. * Try to make the context utilize L3 as well as LLC.
  167. *
  168. * On VLV we don't have L3 controls in the PTEs so we
  169. * shouldn't touch the cache level, especially as that
  170. * would make the object snooped which might have a
  171. * negative performance impact.
  172. *
  173. * Snooping is required on non-llc platforms in execlist
  174. * mode, but since all GGTT accesses use PAT entry 0 we
  175. * get snooping anyway regardless of cache_level.
  176. *
  177. * This is only applicable for Ivy Bridge devices since
  178. * later platforms don't have L3 control bits in the PTE.
  179. */
  180. if (IS_IVYBRIDGE(dev)) {
  181. ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
  182. /* Failure shouldn't ever happen this early */
  183. if (WARN_ON(ret)) {
  184. drm_gem_object_unreference(&obj->base);
  185. return ERR_PTR(ret);
  186. }
  187. }
  188. return obj;
  189. }
  190. static struct intel_context *
  191. __create_hw_context(struct drm_device *dev,
  192. struct drm_i915_file_private *file_priv)
  193. {
  194. struct drm_i915_private *dev_priv = dev->dev_private;
  195. struct intel_context *ctx;
  196. int ret;
  197. ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
  198. if (ctx == NULL)
  199. return ERR_PTR(-ENOMEM);
  200. kref_init(&ctx->ref);
  201. list_add_tail(&ctx->link, &dev_priv->context_list);
  202. ctx->i915 = dev_priv;
  203. if (dev_priv->hw_context_size) {
  204. struct drm_i915_gem_object *obj =
  205. i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
  206. if (IS_ERR(obj)) {
  207. ret = PTR_ERR(obj);
  208. goto err_out;
  209. }
  210. ctx->legacy_hw_ctx.rcs_state = obj;
  211. }
  212. /* Default context will never have a file_priv */
  213. if (file_priv != NULL) {
  214. ret = idr_alloc(&file_priv->context_idr, ctx,
  215. DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
  216. if (ret < 0)
  217. goto err_out;
  218. } else
  219. ret = DEFAULT_CONTEXT_HANDLE;
  220. ctx->file_priv = file_priv;
  221. ctx->user_handle = ret;
  222. /* NB: Mark all slices as needing a remap so that when the context first
  223. * loads it will restore whatever remap state already exists. If there
  224. * is no remap info, it will be a NOP. */
  225. ctx->remap_slice = (1 << NUM_L3_SLICES(dev)) - 1;
  226. ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
  227. return ctx;
  228. err_out:
  229. i915_gem_context_unreference(ctx);
  230. return ERR_PTR(ret);
  231. }
  232. /**
  233. * The default context needs to exist per ring that uses contexts. It stores the
  234. * context state of the GPU for applications that don't utilize HW contexts, as
  235. * well as an idle case.
  236. */
  237. static struct intel_context *
  238. i915_gem_create_context(struct drm_device *dev,
  239. struct drm_i915_file_private *file_priv)
  240. {
  241. const bool is_global_default_ctx = file_priv == NULL;
  242. struct intel_context *ctx;
  243. int ret = 0;
  244. BUG_ON(!mutex_is_locked(&dev->struct_mutex));
  245. ctx = __create_hw_context(dev, file_priv);
  246. if (IS_ERR(ctx))
  247. return ctx;
  248. if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) {
  249. /* We may need to do things with the shrinker which
  250. * require us to immediately switch back to the default
  251. * context. This can cause a problem as pinning the
  252. * default context also requires GTT space which may not
  253. * be available. To avoid this we always pin the default
  254. * context.
  255. */
  256. ret = i915_gem_obj_ggtt_pin(ctx->legacy_hw_ctx.rcs_state,
  257. get_context_alignment(dev), 0);
  258. if (ret) {
  259. DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
  260. goto err_destroy;
  261. }
  262. }
  263. if (USES_FULL_PPGTT(dev)) {
  264. struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
  265. if (IS_ERR_OR_NULL(ppgtt)) {
  266. DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
  267. PTR_ERR(ppgtt));
  268. ret = PTR_ERR(ppgtt);
  269. goto err_unpin;
  270. }
  271. ctx->ppgtt = ppgtt;
  272. }
  273. trace_i915_context_create(ctx);
  274. return ctx;
  275. err_unpin:
  276. if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state)
  277. i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
  278. err_destroy:
  279. idr_remove(&file_priv->context_idr, ctx->user_handle);
  280. i915_gem_context_unreference(ctx);
  281. return ERR_PTR(ret);
  282. }
  283. static void i915_gem_context_unpin(struct intel_context *ctx,
  284. struct intel_engine_cs *engine)
  285. {
  286. if (i915.enable_execlists) {
  287. intel_lr_context_unpin(ctx, engine);
  288. } else {
  289. if (engine->id == RCS && ctx->legacy_hw_ctx.rcs_state)
  290. i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
  291. i915_gem_context_unreference(ctx);
  292. }
  293. }
  294. void i915_gem_context_reset(struct drm_device *dev)
  295. {
  296. struct drm_i915_private *dev_priv = dev->dev_private;
  297. int i;
  298. if (i915.enable_execlists) {
  299. struct intel_context *ctx;
  300. list_for_each_entry(ctx, &dev_priv->context_list, link)
  301. intel_lr_context_reset(dev_priv, ctx);
  302. }
  303. for (i = 0; i < I915_NUM_ENGINES; i++) {
  304. struct intel_engine_cs *engine = &dev_priv->engine[i];
  305. if (engine->last_context) {
  306. i915_gem_context_unpin(engine->last_context, engine);
  307. engine->last_context = NULL;
  308. }
  309. }
  310. /* Force the GPU state to be reinitialised on enabling */
  311. dev_priv->kernel_context->legacy_hw_ctx.initialized = false;
  312. }
  313. int i915_gem_context_init(struct drm_device *dev)
  314. {
  315. struct drm_i915_private *dev_priv = dev->dev_private;
  316. struct intel_context *ctx;
  317. /* Init should only be called once per module load. Eventually the
  318. * restriction on the context_disabled check can be loosened. */
  319. if (WARN_ON(dev_priv->kernel_context))
  320. return 0;
  321. if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) {
  322. if (!i915.enable_execlists) {
  323. DRM_INFO("Only EXECLIST mode is supported in vgpu.\n");
  324. return -EINVAL;
  325. }
  326. }
  327. if (i915.enable_execlists) {
  328. /* NB: intentionally left blank. We will allocate our own
  329. * backing objects as we need them, thank you very much */
  330. dev_priv->hw_context_size = 0;
  331. } else if (HAS_HW_CONTEXTS(dev)) {
  332. dev_priv->hw_context_size = round_up(get_context_size(dev), 4096);
  333. if (dev_priv->hw_context_size > (1<<20)) {
  334. DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
  335. dev_priv->hw_context_size);
  336. dev_priv->hw_context_size = 0;
  337. }
  338. }
  339. ctx = i915_gem_create_context(dev, NULL);
  340. if (IS_ERR(ctx)) {
  341. DRM_ERROR("Failed to create default global context (error %ld)\n",
  342. PTR_ERR(ctx));
  343. return PTR_ERR(ctx);
  344. }
  345. dev_priv->kernel_context = ctx;
  346. DRM_DEBUG_DRIVER("%s context support initialized\n",
  347. i915.enable_execlists ? "LR" :
  348. dev_priv->hw_context_size ? "HW" : "fake");
  349. return 0;
  350. }
  351. void i915_gem_context_fini(struct drm_device *dev)
  352. {
  353. struct drm_i915_private *dev_priv = dev->dev_private;
  354. struct intel_context *dctx = dev_priv->kernel_context;
  355. int i;
  356. if (dctx->legacy_hw_ctx.rcs_state) {
  357. /* The only known way to stop the gpu from accessing the hw context is
  358. * to reset it. Do this as the very last operation to avoid confusing
  359. * other code, leading to spurious errors. */
  360. intel_gpu_reset(dev, ALL_ENGINES);
  361. /* When default context is created and switched to, base object refcount
  362. * will be 2 (+1 from object creation and +1 from do_switch()).
  363. * i915_gem_context_fini() will be called after gpu_idle() has switched
  364. * to default context. So we need to unreference the base object once
  365. * to offset the do_switch part, so that i915_gem_context_unreference()
  366. * can then free the base object correctly. */
  367. WARN_ON(!dev_priv->engine[RCS].last_context);
  368. i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
  369. }
  370. for (i = I915_NUM_ENGINES; --i >= 0;) {
  371. struct intel_engine_cs *engine = &dev_priv->engine[i];
  372. if (engine->last_context) {
  373. i915_gem_context_unpin(engine->last_context, engine);
  374. engine->last_context = NULL;
  375. }
  376. }
  377. i915_gem_context_unreference(dctx);
  378. dev_priv->kernel_context = NULL;
  379. }
  380. int i915_gem_context_enable(struct drm_i915_gem_request *req)
  381. {
  382. struct intel_engine_cs *engine = req->engine;
  383. int ret;
  384. if (i915.enable_execlists) {
  385. if (engine->init_context == NULL)
  386. return 0;
  387. ret = engine->init_context(req);
  388. } else
  389. ret = i915_switch_context(req);
  390. if (ret) {
  391. DRM_ERROR("ring init context: %d\n", ret);
  392. return ret;
  393. }
  394. return 0;
  395. }
  396. static int context_idr_cleanup(int id, void *p, void *data)
  397. {
  398. struct intel_context *ctx = p;
  399. i915_gem_context_unreference(ctx);
  400. return 0;
  401. }
  402. int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
  403. {
  404. struct drm_i915_file_private *file_priv = file->driver_priv;
  405. struct intel_context *ctx;
  406. idr_init(&file_priv->context_idr);
  407. mutex_lock(&dev->struct_mutex);
  408. ctx = i915_gem_create_context(dev, file_priv);
  409. mutex_unlock(&dev->struct_mutex);
  410. if (IS_ERR(ctx)) {
  411. idr_destroy(&file_priv->context_idr);
  412. return PTR_ERR(ctx);
  413. }
  414. return 0;
  415. }
  416. void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
  417. {
  418. struct drm_i915_file_private *file_priv = file->driver_priv;
  419. idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
  420. idr_destroy(&file_priv->context_idr);
  421. }
  422. struct intel_context *
  423. i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
  424. {
  425. struct intel_context *ctx;
  426. ctx = (struct intel_context *)idr_find(&file_priv->context_idr, id);
  427. if (!ctx)
  428. return ERR_PTR(-ENOENT);
  429. return ctx;
  430. }
  431. static inline int
  432. mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
  433. {
  434. struct intel_engine_cs *engine = req->engine;
  435. u32 flags = hw_flags | MI_MM_SPACE_GTT;
  436. const int num_rings =
  437. /* Use an extended w/a on ivb+ if signalling from other rings */
  438. i915_semaphore_is_enabled(engine->dev) ?
  439. hweight32(INTEL_INFO(engine->dev)->ring_mask) - 1 :
  440. 0;
  441. int len, ret;
  442. /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
  443. * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
  444. * explicitly, so we rely on the value at ring init, stored in
  445. * itlb_before_ctx_switch.
  446. */
  447. if (IS_GEN6(engine->dev)) {
  448. ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
  449. if (ret)
  450. return ret;
  451. }
  452. /* These flags are for resource streamer on HSW+ */
  453. if (IS_HASWELL(engine->dev) || INTEL_INFO(engine->dev)->gen >= 8)
  454. flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
  455. else if (INTEL_INFO(engine->dev)->gen < 8)
  456. flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
  457. len = 4;
  458. if (INTEL_INFO(engine->dev)->gen >= 7)
  459. len += 2 + (num_rings ? 4*num_rings + 6 : 0);
  460. ret = intel_ring_begin(req, len);
  461. if (ret)
  462. return ret;
  463. /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
  464. if (INTEL_INFO(engine->dev)->gen >= 7) {
  465. intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE);
  466. if (num_rings) {
  467. struct intel_engine_cs *signaller;
  468. intel_ring_emit(engine,
  469. MI_LOAD_REGISTER_IMM(num_rings));
  470. for_each_engine(signaller, to_i915(engine->dev)) {
  471. if (signaller == engine)
  472. continue;
  473. intel_ring_emit_reg(engine,
  474. RING_PSMI_CTL(signaller->mmio_base));
  475. intel_ring_emit(engine,
  476. _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
  477. }
  478. }
  479. }
  480. intel_ring_emit(engine, MI_NOOP);
  481. intel_ring_emit(engine, MI_SET_CONTEXT);
  482. intel_ring_emit(engine,
  483. i915_gem_obj_ggtt_offset(req->ctx->legacy_hw_ctx.rcs_state) |
  484. flags);
  485. /*
  486. * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
  487. * WaMiSetContext_Hang:snb,ivb,vlv
  488. */
  489. intel_ring_emit(engine, MI_NOOP);
  490. if (INTEL_INFO(engine->dev)->gen >= 7) {
  491. if (num_rings) {
  492. struct intel_engine_cs *signaller;
  493. i915_reg_t last_reg = {}; /* keep gcc quiet */
  494. intel_ring_emit(engine,
  495. MI_LOAD_REGISTER_IMM(num_rings));
  496. for_each_engine(signaller, to_i915(engine->dev)) {
  497. if (signaller == engine)
  498. continue;
  499. last_reg = RING_PSMI_CTL(signaller->mmio_base);
  500. intel_ring_emit_reg(engine, last_reg);
  501. intel_ring_emit(engine,
  502. _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
  503. }
  504. /* Insert a delay before the next switch! */
  505. intel_ring_emit(engine,
  506. MI_STORE_REGISTER_MEM |
  507. MI_SRM_LRM_GLOBAL_GTT);
  508. intel_ring_emit_reg(engine, last_reg);
  509. intel_ring_emit(engine, engine->scratch.gtt_offset);
  510. intel_ring_emit(engine, MI_NOOP);
  511. }
  512. intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE);
  513. }
  514. intel_ring_advance(engine);
  515. return ret;
  516. }
  517. static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
  518. struct intel_context *to)
  519. {
  520. if (to->remap_slice)
  521. return false;
  522. if (!to->legacy_hw_ctx.initialized)
  523. return false;
  524. if (to->ppgtt &&
  525. !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
  526. return false;
  527. return to == engine->last_context;
  528. }
  529. static bool
  530. needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
  531. {
  532. if (!to->ppgtt)
  533. return false;
  534. if (engine->last_context == to &&
  535. !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
  536. return false;
  537. if (engine->id != RCS)
  538. return true;
  539. if (INTEL_INFO(engine->dev)->gen < 8)
  540. return true;
  541. return false;
  542. }
  543. static bool
  544. needs_pd_load_post(struct intel_context *to, u32 hw_flags)
  545. {
  546. if (!to->ppgtt)
  547. return false;
  548. if (!IS_GEN8(to->i915))
  549. return false;
  550. if (hw_flags & MI_RESTORE_INHIBIT)
  551. return true;
  552. return false;
  553. }
  554. static int do_rcs_switch(struct drm_i915_gem_request *req)
  555. {
  556. struct intel_context *to = req->ctx;
  557. struct intel_engine_cs *engine = req->engine;
  558. struct intel_context *from;
  559. u32 hw_flags;
  560. int ret, i;
  561. if (skip_rcs_switch(engine, to))
  562. return 0;
  563. /* Trying to pin first makes error handling easier. */
  564. ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
  565. get_context_alignment(engine->dev),
  566. 0);
  567. if (ret)
  568. return ret;
  569. /*
  570. * Pin can switch back to the default context if we end up calling into
  571. * evict_everything - as a last ditch gtt defrag effort that also
  572. * switches to the default context. Hence we need to reload from here.
  573. *
  574. * XXX: Doing so is painfully broken!
  575. */
  576. from = engine->last_context;
  577. /*
  578. * Clear this page out of any CPU caches for coherent swap-in/out. Note
  579. * that thanks to write = false in this call and us not setting any gpu
  580. * write domains when putting a context object onto the active list
  581. * (when switching away from it), this won't block.
  582. *
  583. * XXX: We need a real interface to do this instead of trickery.
  584. */
  585. ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
  586. if (ret)
  587. goto unpin_out;
  588. if (needs_pd_load_pre(engine, to)) {
  589. /* Older GENs and non render rings still want the load first,
  590. * "PP_DCLV followed by PP_DIR_BASE register through Load
  591. * Register Immediate commands in Ring Buffer before submitting
  592. * a context."*/
  593. trace_switch_mm(engine, to);
  594. ret = to->ppgtt->switch_mm(to->ppgtt, req);
  595. if (ret)
  596. goto unpin_out;
  597. }
  598. if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
  599. /* NB: If we inhibit the restore, the context is not allowed to
  600. * die because future work may end up depending on valid address
  601. * space. This means we must enforce that a page table load
  602. * occur when this occurs. */
  603. hw_flags = MI_RESTORE_INHIBIT;
  604. else if (to->ppgtt &&
  605. intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)
  606. hw_flags = MI_FORCE_RESTORE;
  607. else
  608. hw_flags = 0;
  609. /* We should never emit switch_mm more than once */
  610. WARN_ON(needs_pd_load_pre(engine, to) &&
  611. needs_pd_load_post(to, hw_flags));
  612. if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
  613. ret = mi_set_context(req, hw_flags);
  614. if (ret)
  615. goto unpin_out;
  616. }
  617. /* The backing object for the context is done after switching to the
  618. * *next* context. Therefore we cannot retire the previous context until
  619. * the next context has already started running. In fact, the below code
  620. * is a bit suboptimal because the retiring can occur simply after the
  621. * MI_SET_CONTEXT instead of when the next seqno has completed.
  622. */
  623. if (from != NULL) {
  624. from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
  625. i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req);
  626. /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
  627. * whole damn pipeline, we don't need to explicitly mark the
  628. * object dirty. The only exception is that the context must be
  629. * correct in case the object gets swapped out. Ideally we'd be
  630. * able to defer doing this until we know the object would be
  631. * swapped, but there is no way to do that yet.
  632. */
  633. from->legacy_hw_ctx.rcs_state->dirty = 1;
  634. /* obj is kept alive until the next request by its active ref */
  635. i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
  636. i915_gem_context_unreference(from);
  637. }
  638. i915_gem_context_reference(to);
  639. engine->last_context = to;
  640. /* GEN8 does *not* require an explicit reload if the PDPs have been
  641. * setup, and we do not wish to move them.
  642. */
  643. if (needs_pd_load_post(to, hw_flags)) {
  644. trace_switch_mm(engine, to);
  645. ret = to->ppgtt->switch_mm(to->ppgtt, req);
  646. /* The hardware context switch is emitted, but we haven't
  647. * actually changed the state - so it's probably safe to bail
  648. * here. Still, let the user know something dangerous has
  649. * happened.
  650. */
  651. if (ret)
  652. return ret;
  653. }
  654. if (to->ppgtt)
  655. to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
  656. for (i = 0; i < MAX_L3_SLICES; i++) {
  657. if (!(to->remap_slice & (1<<i)))
  658. continue;
  659. ret = i915_gem_l3_remap(req, i);
  660. if (ret)
  661. return ret;
  662. to->remap_slice &= ~(1<<i);
  663. }
  664. if (!to->legacy_hw_ctx.initialized) {
  665. if (engine->init_context) {
  666. ret = engine->init_context(req);
  667. if (ret)
  668. return ret;
  669. }
  670. to->legacy_hw_ctx.initialized = true;
  671. }
  672. return 0;
  673. unpin_out:
  674. i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
  675. return ret;
  676. }
  677. /**
  678. * i915_switch_context() - perform a GPU context switch.
  679. * @req: request for which we'll execute the context switch
  680. *
  681. * The context life cycle is simple. The context refcount is incremented and
  682. * decremented by 1 and create and destroy. If the context is in use by the GPU,
  683. * it will have a refcount > 1. This allows us to destroy the context abstract
  684. * object while letting the normal object tracking destroy the backing BO.
  685. *
  686. * This function should not be used in execlists mode. Instead the context is
  687. * switched by writing to the ELSP and requests keep a reference to their
  688. * context.
  689. */
  690. int i915_switch_context(struct drm_i915_gem_request *req)
  691. {
  692. struct intel_engine_cs *engine = req->engine;
  693. struct drm_i915_private *dev_priv = req->i915;
  694. WARN_ON(i915.enable_execlists);
  695. WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
  696. if (engine->id != RCS ||
  697. req->ctx->legacy_hw_ctx.rcs_state == NULL) {
  698. struct intel_context *to = req->ctx;
  699. if (needs_pd_load_pre(engine, to)) {
  700. int ret;
  701. trace_switch_mm(engine, to);
  702. ret = to->ppgtt->switch_mm(to->ppgtt, req);
  703. if (ret)
  704. return ret;
  705. /* Doing a PD load always reloads the page dirs */
  706. to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
  707. }
  708. if (to != engine->last_context) {
  709. i915_gem_context_reference(to);
  710. if (engine->last_context)
  711. i915_gem_context_unreference(engine->last_context);
  712. engine->last_context = to;
  713. }
  714. return 0;
  715. }
  716. return do_rcs_switch(req);
  717. }
  718. static bool contexts_enabled(struct drm_device *dev)
  719. {
  720. return i915.enable_execlists || to_i915(dev)->hw_context_size;
  721. }
  722. int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
  723. struct drm_file *file)
  724. {
  725. struct drm_i915_gem_context_create *args = data;
  726. struct drm_i915_file_private *file_priv = file->driver_priv;
  727. struct intel_context *ctx;
  728. int ret;
  729. if (!contexts_enabled(dev))
  730. return -ENODEV;
  731. if (args->pad != 0)
  732. return -EINVAL;
  733. ret = i915_mutex_lock_interruptible(dev);
  734. if (ret)
  735. return ret;
  736. ctx = i915_gem_create_context(dev, file_priv);
  737. mutex_unlock(&dev->struct_mutex);
  738. if (IS_ERR(ctx))
  739. return PTR_ERR(ctx);
  740. args->ctx_id = ctx->user_handle;
  741. DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id);
  742. return 0;
  743. }
  744. int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
  745. struct drm_file *file)
  746. {
  747. struct drm_i915_gem_context_destroy *args = data;
  748. struct drm_i915_file_private *file_priv = file->driver_priv;
  749. struct intel_context *ctx;
  750. int ret;
  751. if (args->pad != 0)
  752. return -EINVAL;
  753. if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
  754. return -ENOENT;
  755. ret = i915_mutex_lock_interruptible(dev);
  756. if (ret)
  757. return ret;
  758. ctx = i915_gem_context_get(file_priv, args->ctx_id);
  759. if (IS_ERR(ctx)) {
  760. mutex_unlock(&dev->struct_mutex);
  761. return PTR_ERR(ctx);
  762. }
  763. idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
  764. i915_gem_context_unreference(ctx);
  765. mutex_unlock(&dev->struct_mutex);
  766. DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
  767. return 0;
  768. }
  769. int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
  770. struct drm_file *file)
  771. {
  772. struct drm_i915_file_private *file_priv = file->driver_priv;
  773. struct drm_i915_gem_context_param *args = data;
  774. struct intel_context *ctx;
  775. int ret;
  776. ret = i915_mutex_lock_interruptible(dev);
  777. if (ret)
  778. return ret;
  779. ctx = i915_gem_context_get(file_priv, args->ctx_id);
  780. if (IS_ERR(ctx)) {
  781. mutex_unlock(&dev->struct_mutex);
  782. return PTR_ERR(ctx);
  783. }
  784. args->size = 0;
  785. switch (args->param) {
  786. case I915_CONTEXT_PARAM_BAN_PERIOD:
  787. args->value = ctx->hang_stats.ban_period_seconds;
  788. break;
  789. case I915_CONTEXT_PARAM_NO_ZEROMAP:
  790. args->value = ctx->flags & CONTEXT_NO_ZEROMAP;
  791. break;
  792. case I915_CONTEXT_PARAM_GTT_SIZE:
  793. if (ctx->ppgtt)
  794. args->value = ctx->ppgtt->base.total;
  795. else if (to_i915(dev)->mm.aliasing_ppgtt)
  796. args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
  797. else
  798. args->value = to_i915(dev)->ggtt.base.total;
  799. break;
  800. default:
  801. ret = -EINVAL;
  802. break;
  803. }
  804. mutex_unlock(&dev->struct_mutex);
  805. return ret;
  806. }
  807. int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
  808. struct drm_file *file)
  809. {
  810. struct drm_i915_file_private *file_priv = file->driver_priv;
  811. struct drm_i915_gem_context_param *args = data;
  812. struct intel_context *ctx;
  813. int ret;
  814. ret = i915_mutex_lock_interruptible(dev);
  815. if (ret)
  816. return ret;
  817. ctx = i915_gem_context_get(file_priv, args->ctx_id);
  818. if (IS_ERR(ctx)) {
  819. mutex_unlock(&dev->struct_mutex);
  820. return PTR_ERR(ctx);
  821. }
  822. switch (args->param) {
  823. case I915_CONTEXT_PARAM_BAN_PERIOD:
  824. if (args->size)
  825. ret = -EINVAL;
  826. else if (args->value < ctx->hang_stats.ban_period_seconds &&
  827. !capable(CAP_SYS_ADMIN))
  828. ret = -EPERM;
  829. else
  830. ctx->hang_stats.ban_period_seconds = args->value;
  831. break;
  832. case I915_CONTEXT_PARAM_NO_ZEROMAP:
  833. if (args->size) {
  834. ret = -EINVAL;
  835. } else {
  836. ctx->flags &= ~CONTEXT_NO_ZEROMAP;
  837. ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
  838. }
  839. break;
  840. default:
  841. ret = -EINVAL;
  842. break;
  843. }
  844. mutex_unlock(&dev->struct_mutex);
  845. return ret;
  846. }