execlist.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. /*
  2. * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. * SOFTWARE.
  22. *
  23. * Authors:
  24. * Zhiyuan Lv <zhiyuan.lv@intel.com>
  25. * Zhi Wang <zhi.a.wang@intel.com>
  26. *
  27. * Contributors:
  28. * Min He <min.he@intel.com>
  29. * Bing Niu <bing.niu@intel.com>
  30. * Ping Gao <ping.a.gao@intel.com>
  31. * Tina Zhang <tina.zhang@intel.com>
  32. *
  33. */
  34. #include "i915_drv.h"
  35. #include "gvt.h"
  36. #define _EL_OFFSET_STATUS 0x234
  37. #define _EL_OFFSET_STATUS_BUF 0x370
  38. #define _EL_OFFSET_STATUS_PTR 0x3A0
  39. #define execlist_ring_mmio(gvt, ring_id, offset) \
  40. (gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
  41. #define valid_context(ctx) ((ctx)->valid)
  42. #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
  43. ((a)->lrca == (b)->lrca))
  44. static int context_switch_events[] = {
  45. [RCS] = RCS_AS_CONTEXT_SWITCH,
  46. [BCS] = BCS_AS_CONTEXT_SWITCH,
  47. [VCS] = VCS_AS_CONTEXT_SWITCH,
  48. [VCS2] = VCS2_AS_CONTEXT_SWITCH,
  49. [VECS] = VECS_AS_CONTEXT_SWITCH,
  50. };
  51. static int ring_id_to_context_switch_event(int ring_id)
  52. {
  53. if (WARN_ON(ring_id < RCS ||
  54. ring_id >= ARRAY_SIZE(context_switch_events)))
  55. return -EINVAL;
  56. return context_switch_events[ring_id];
  57. }
  58. static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
  59. {
  60. gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
  61. execlist->running_slot ?
  62. execlist->running_slot->index : -1,
  63. execlist->running_context ?
  64. execlist->running_context->context_id : 0,
  65. execlist->pending_slot ?
  66. execlist->pending_slot->index : -1);
  67. execlist->running_slot = execlist->pending_slot;
  68. execlist->pending_slot = NULL;
  69. execlist->running_context = execlist->running_context ?
  70. &execlist->running_slot->ctx[0] : NULL;
  71. gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
  72. execlist->running_slot ?
  73. execlist->running_slot->index : -1,
  74. execlist->running_context ?
  75. execlist->running_context->context_id : 0,
  76. execlist->pending_slot ?
  77. execlist->pending_slot->index : -1);
  78. }
  79. static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
  80. {
  81. struct intel_vgpu_execlist_slot *running = execlist->running_slot;
  82. struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
  83. struct execlist_ctx_descriptor_format *desc = execlist->running_context;
  84. struct intel_vgpu *vgpu = execlist->vgpu;
  85. struct execlist_status_format status;
  86. int ring_id = execlist->ring_id;
  87. u32 status_reg = execlist_ring_mmio(vgpu->gvt,
  88. ring_id, _EL_OFFSET_STATUS);
  89. status.ldw = vgpu_vreg(vgpu, status_reg);
  90. status.udw = vgpu_vreg(vgpu, status_reg + 4);
  91. if (running) {
  92. status.current_execlist_pointer = !!running->index;
  93. status.execlist_write_pointer = !!!running->index;
  94. status.execlist_0_active = status.execlist_0_valid =
  95. !!!(running->index);
  96. status.execlist_1_active = status.execlist_1_valid =
  97. !!(running->index);
  98. } else {
  99. status.context_id = 0;
  100. status.execlist_0_active = status.execlist_0_valid = 0;
  101. status.execlist_1_active = status.execlist_1_valid = 0;
  102. }
  103. status.context_id = desc ? desc->context_id : 0;
  104. status.execlist_queue_full = !!(pending);
  105. vgpu_vreg(vgpu, status_reg) = status.ldw;
  106. vgpu_vreg(vgpu, status_reg + 4) = status.udw;
  107. gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
  108. vgpu->id, status_reg, status.ldw, status.udw);
  109. }
  110. static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
  111. struct execlist_context_status_format *status,
  112. bool trigger_interrupt_later)
  113. {
  114. struct intel_vgpu *vgpu = execlist->vgpu;
  115. int ring_id = execlist->ring_id;
  116. struct execlist_context_status_pointer_format ctx_status_ptr;
  117. u32 write_pointer;
  118. u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
  119. unsigned long hwsp_gpa;
  120. struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
  121. ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
  122. _EL_OFFSET_STATUS_PTR);
  123. ctx_status_buf_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
  124. _EL_OFFSET_STATUS_BUF);
  125. ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
  126. write_pointer = ctx_status_ptr.write_ptr;
  127. if (write_pointer == 0x7)
  128. write_pointer = 0;
  129. else {
  130. ++write_pointer;
  131. write_pointer %= 0x6;
  132. }
  133. offset = ctx_status_buf_reg + write_pointer * 8;
  134. vgpu_vreg(vgpu, offset) = status->ldw;
  135. vgpu_vreg(vgpu, offset + 4) = status->udw;
  136. ctx_status_ptr.write_ptr = write_pointer;
  137. vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
  138. /* Update the CSB and CSB write pointer in HWSP */
  139. hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
  140. vgpu->hws_pga[ring_id]);
  141. if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) {
  142. intel_gvt_hypervisor_write_gpa(vgpu,
  143. hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 +
  144. write_pointer * 8,
  145. status, 8);
  146. intel_gvt_hypervisor_write_gpa(vgpu,
  147. hwsp_gpa +
  148. intel_hws_csb_write_index(dev_priv) * 4,
  149. &write_pointer, 4);
  150. }
  151. gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
  152. vgpu->id, write_pointer, offset, status->ldw, status->udw);
  153. if (trigger_interrupt_later)
  154. return;
  155. intel_vgpu_trigger_virtual_event(vgpu,
  156. ring_id_to_context_switch_event(execlist->ring_id));
  157. }
  158. static int emulate_execlist_ctx_schedule_out(
  159. struct intel_vgpu_execlist *execlist,
  160. struct execlist_ctx_descriptor_format *ctx)
  161. {
  162. struct intel_vgpu *vgpu = execlist->vgpu;
  163. struct intel_vgpu_execlist_slot *running = execlist->running_slot;
  164. struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
  165. struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
  166. struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1];
  167. struct execlist_context_status_format status;
  168. memset(&status, 0, sizeof(status));
  169. gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
  170. if (WARN_ON(!same_context(ctx, execlist->running_context))) {
  171. gvt_vgpu_err("schedule out context is not running context,"
  172. "ctx id %x running ctx id %x\n",
  173. ctx->context_id,
  174. execlist->running_context->context_id);
  175. return -EINVAL;
  176. }
  177. /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
  178. if (valid_context(ctx1) && same_context(ctx0, ctx)) {
  179. gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
  180. execlist->running_context = ctx1;
  181. emulate_execlist_status(execlist);
  182. status.context_complete = status.element_switch = 1;
  183. status.context_id = ctx->context_id;
  184. emulate_csb_update(execlist, &status, false);
  185. /*
  186. * ctx1 is not valid, ctx == ctx0
  187. * ctx1 is valid, ctx1 == ctx
  188. * --> last element is finished
  189. * emulate:
  190. * active-to-idle if there is *no* pending execlist
  191. * context-complete if there *is* pending execlist
  192. */
  193. } else if ((!valid_context(ctx1) && same_context(ctx0, ctx))
  194. || (valid_context(ctx1) && same_context(ctx1, ctx))) {
  195. gvt_dbg_el("need to switch virtual execlist slot\n");
  196. switch_virtual_execlist_slot(execlist);
  197. emulate_execlist_status(execlist);
  198. status.context_complete = status.active_to_idle = 1;
  199. status.context_id = ctx->context_id;
  200. if (!pending) {
  201. emulate_csb_update(execlist, &status, false);
  202. } else {
  203. emulate_csb_update(execlist, &status, true);
  204. memset(&status, 0, sizeof(status));
  205. status.idle_to_active = 1;
  206. status.context_id = 0;
  207. emulate_csb_update(execlist, &status, false);
  208. }
  209. } else {
  210. WARN_ON(1);
  211. return -EINVAL;
  212. }
  213. return 0;
  214. }
  215. static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
  216. struct intel_vgpu_execlist *execlist)
  217. {
  218. struct intel_vgpu *vgpu = execlist->vgpu;
  219. int ring_id = execlist->ring_id;
  220. u32 status_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
  221. _EL_OFFSET_STATUS);
  222. struct execlist_status_format status;
  223. status.ldw = vgpu_vreg(vgpu, status_reg);
  224. status.udw = vgpu_vreg(vgpu, status_reg + 4);
  225. if (status.execlist_queue_full) {
  226. gvt_vgpu_err("virtual execlist slots are full\n");
  227. return NULL;
  228. }
  229. return &execlist->slot[status.execlist_write_pointer];
  230. }
  231. static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
  232. struct execlist_ctx_descriptor_format ctx[2])
  233. {
  234. struct intel_vgpu_execlist_slot *running = execlist->running_slot;
  235. struct intel_vgpu_execlist_slot *slot =
  236. get_next_execlist_slot(execlist);
  237. struct execlist_ctx_descriptor_format *ctx0, *ctx1;
  238. struct execlist_context_status_format status;
  239. struct intel_vgpu *vgpu = execlist->vgpu;
  240. gvt_dbg_el("emulate schedule-in\n");
  241. if (!slot) {
  242. gvt_vgpu_err("no available execlist slot\n");
  243. return -EINVAL;
  244. }
  245. memset(&status, 0, sizeof(status));
  246. memset(slot->ctx, 0, sizeof(slot->ctx));
  247. slot->ctx[0] = ctx[0];
  248. slot->ctx[1] = ctx[1];
  249. gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
  250. slot->index, ctx[0].context_id,
  251. ctx[1].context_id);
  252. /*
  253. * no running execlist, make this write bundle as running execlist
  254. * -> idle-to-active
  255. */
  256. if (!running) {
  257. gvt_dbg_el("no current running execlist\n");
  258. execlist->running_slot = slot;
  259. execlist->pending_slot = NULL;
  260. execlist->running_context = &slot->ctx[0];
  261. gvt_dbg_el("running slot index %d running context %x\n",
  262. execlist->running_slot->index,
  263. execlist->running_context->context_id);
  264. emulate_execlist_status(execlist);
  265. status.idle_to_active = 1;
  266. status.context_id = 0;
  267. emulate_csb_update(execlist, &status, false);
  268. return 0;
  269. }
  270. ctx0 = &running->ctx[0];
  271. ctx1 = &running->ctx[1];
  272. gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
  273. running->index, ctx0->context_id, ctx1->context_id);
  274. /*
  275. * already has an running execlist
  276. * a. running ctx1 is valid,
  277. * ctx0 is finished, and running ctx1 == new execlist ctx[0]
  278. * b. running ctx1 is not valid,
  279. * ctx0 == new execlist ctx[0]
  280. * ----> lite-restore + preempted
  281. */
  282. if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) &&
  283. /* condition a */
  284. (!same_context(ctx0, execlist->running_context))) ||
  285. (!valid_context(ctx1) &&
  286. same_context(ctx0, &slot->ctx[0]))) { /* condition b */
  287. gvt_dbg_el("need to switch virtual execlist slot\n");
  288. execlist->pending_slot = slot;
  289. switch_virtual_execlist_slot(execlist);
  290. emulate_execlist_status(execlist);
  291. status.lite_restore = status.preempted = 1;
  292. status.context_id = ctx[0].context_id;
  293. emulate_csb_update(execlist, &status, false);
  294. } else {
  295. gvt_dbg_el("emulate as pending slot\n");
  296. /*
  297. * otherwise
  298. * --> emulate pending execlist exist + but no preemption case
  299. */
  300. execlist->pending_slot = slot;
  301. emulate_execlist_status(execlist);
  302. }
  303. return 0;
  304. }
  305. #define get_desc_from_elsp_dwords(ed, i) \
  306. ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
  307. static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
  308. {
  309. struct intel_vgpu *vgpu = workload->vgpu;
  310. struct intel_vgpu_submission *s = &vgpu->submission;
  311. struct execlist_ctx_descriptor_format ctx[2];
  312. int ring_id = workload->ring_id;
  313. int ret;
  314. if (!workload->emulate_schedule_in)
  315. return 0;
  316. ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
  317. ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
  318. ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx);
  319. if (ret) {
  320. gvt_vgpu_err("fail to emulate execlist schedule in\n");
  321. return ret;
  322. }
  323. return 0;
  324. }
  325. static int complete_execlist_workload(struct intel_vgpu_workload *workload)
  326. {
  327. struct intel_vgpu *vgpu = workload->vgpu;
  328. int ring_id = workload->ring_id;
  329. struct intel_vgpu_submission *s = &vgpu->submission;
  330. struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
  331. struct intel_vgpu_workload *next_workload;
  332. struct list_head *next = workload_q_head(vgpu, ring_id)->next;
  333. bool lite_restore = false;
  334. int ret = 0;
  335. gvt_dbg_el("complete workload %p status %d\n", workload,
  336. workload->status);
  337. if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
  338. goto out;
  339. if (!list_empty(workload_q_head(vgpu, ring_id))) {
  340. struct execlist_ctx_descriptor_format *this_desc, *next_desc;
  341. next_workload = container_of(next,
  342. struct intel_vgpu_workload, list);
  343. this_desc = &workload->ctx_desc;
  344. next_desc = &next_workload->ctx_desc;
  345. lite_restore = same_context(this_desc, next_desc);
  346. }
  347. if (lite_restore) {
  348. gvt_dbg_el("next context == current - no schedule-out\n");
  349. goto out;
  350. }
  351. ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
  352. out:
  353. intel_vgpu_unpin_mm(workload->shadow_mm);
  354. intel_vgpu_destroy_workload(workload);
  355. return ret;
  356. }
  357. static int submit_context(struct intel_vgpu *vgpu, int ring_id,
  358. struct execlist_ctx_descriptor_format *desc,
  359. bool emulate_schedule_in)
  360. {
  361. struct intel_vgpu_submission *s = &vgpu->submission;
  362. struct intel_vgpu_workload *workload = NULL;
  363. workload = intel_vgpu_create_workload(vgpu, ring_id, desc);
  364. if (IS_ERR(workload))
  365. return PTR_ERR(workload);
  366. workload->prepare = prepare_execlist_workload;
  367. workload->complete = complete_execlist_workload;
  368. workload->emulate_schedule_in = emulate_schedule_in;
  369. if (emulate_schedule_in)
  370. workload->elsp_dwords = s->execlist[ring_id].elsp_dwords;
  371. gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
  372. emulate_schedule_in);
  373. intel_vgpu_queue_workload(workload);
  374. return 0;
  375. }
  376. int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
  377. {
  378. struct intel_vgpu_submission *s = &vgpu->submission;
  379. struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
  380. struct execlist_ctx_descriptor_format *desc[2];
  381. int i, ret;
  382. desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
  383. desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
  384. if (!desc[0]->valid) {
  385. gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
  386. goto inv_desc;
  387. }
  388. for (i = 0; i < ARRAY_SIZE(desc); i++) {
  389. if (!desc[i]->valid)
  390. continue;
  391. if (!desc[i]->privilege_access) {
  392. gvt_vgpu_err("unexpected GGTT elsp submission\n");
  393. goto inv_desc;
  394. }
  395. }
  396. /* submit workload */
  397. for (i = 0; i < ARRAY_SIZE(desc); i++) {
  398. if (!desc[i]->valid)
  399. continue;
  400. ret = submit_context(vgpu, ring_id, desc[i], i == 0);
  401. if (ret) {
  402. gvt_vgpu_err("failed to submit desc %d\n", i);
  403. return ret;
  404. }
  405. }
  406. return 0;
  407. inv_desc:
  408. gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
  409. desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw);
  410. return -EINVAL;
  411. }
  412. static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
  413. {
  414. struct intel_vgpu_submission *s = &vgpu->submission;
  415. struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
  416. struct execlist_context_status_pointer_format ctx_status_ptr;
  417. u32 ctx_status_ptr_reg;
  418. memset(execlist, 0, sizeof(*execlist));
  419. execlist->vgpu = vgpu;
  420. execlist->ring_id = ring_id;
  421. execlist->slot[0].index = 0;
  422. execlist->slot[1].index = 1;
  423. ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
  424. _EL_OFFSET_STATUS_PTR);
  425. ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
  426. ctx_status_ptr.read_ptr = 0;
  427. ctx_status_ptr.write_ptr = 0x7;
  428. vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
  429. }
  430. static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask)
  431. {
  432. unsigned int tmp;
  433. struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
  434. struct intel_engine_cs *engine;
  435. struct intel_vgpu_submission *s = &vgpu->submission;
  436. for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
  437. kfree(s->ring_scan_buffer[engine->id]);
  438. s->ring_scan_buffer[engine->id] = NULL;
  439. s->ring_scan_buffer_size[engine->id] = 0;
  440. }
  441. }
  442. static void reset_execlist(struct intel_vgpu *vgpu,
  443. unsigned long engine_mask)
  444. {
  445. struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
  446. struct intel_engine_cs *engine;
  447. unsigned int tmp;
  448. for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
  449. init_vgpu_execlist(vgpu, engine->id);
  450. }
  451. static int init_execlist(struct intel_vgpu *vgpu,
  452. unsigned long engine_mask)
  453. {
  454. reset_execlist(vgpu, engine_mask);
  455. return 0;
  456. }
  457. const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = {
  458. .name = "execlist",
  459. .init = init_execlist,
  460. .reset = reset_execlist,
  461. .clean = clean_execlist,
  462. };