vc4_gem.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. /*
  2. * Copyright © 2014 Broadcom
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. #include <linux/module.h>
  24. #include <linux/platform_device.h>
  25. #include <linux/device.h>
  26. #include <linux/io.h>
  27. #include "uapi/drm/vc4_drm.h"
  28. #include "vc4_drv.h"
  29. #include "vc4_regs.h"
  30. #include "vc4_trace.h"
  31. static void
  32. vc4_queue_hangcheck(struct drm_device *dev)
  33. {
  34. struct vc4_dev *vc4 = to_vc4_dev(dev);
  35. mod_timer(&vc4->hangcheck.timer,
  36. round_jiffies_up(jiffies + msecs_to_jiffies(100)));
  37. }
  38. struct vc4_hang_state {
  39. struct drm_vc4_get_hang_state user_state;
  40. u32 bo_count;
  41. struct drm_gem_object **bo;
  42. };
  43. static void
  44. vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
  45. {
  46. unsigned int i;
  47. mutex_lock(&dev->struct_mutex);
  48. for (i = 0; i < state->user_state.bo_count; i++)
  49. drm_gem_object_unreference(state->bo[i]);
  50. mutex_unlock(&dev->struct_mutex);
  51. kfree(state);
  52. }
  53. int
  54. vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
  55. struct drm_file *file_priv)
  56. {
  57. struct drm_vc4_get_hang_state *get_state = data;
  58. struct drm_vc4_get_hang_state_bo *bo_state;
  59. struct vc4_hang_state *kernel_state;
  60. struct drm_vc4_get_hang_state *state;
  61. struct vc4_dev *vc4 = to_vc4_dev(dev);
  62. unsigned long irqflags;
  63. u32 i;
  64. int ret;
  65. spin_lock_irqsave(&vc4->job_lock, irqflags);
  66. kernel_state = vc4->hang_state;
  67. if (!kernel_state) {
  68. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  69. return -ENOENT;
  70. }
  71. state = &kernel_state->user_state;
  72. /* If the user's array isn't big enough, just return the
  73. * required array size.
  74. */
  75. if (get_state->bo_count < state->bo_count) {
  76. get_state->bo_count = state->bo_count;
  77. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  78. return 0;
  79. }
  80. vc4->hang_state = NULL;
  81. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  82. /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
  83. state->bo = get_state->bo;
  84. memcpy(get_state, state, sizeof(*state));
  85. bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
  86. if (!bo_state) {
  87. ret = -ENOMEM;
  88. goto err_free;
  89. }
  90. for (i = 0; i < state->bo_count; i++) {
  91. struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
  92. u32 handle;
  93. ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
  94. &handle);
  95. if (ret) {
  96. state->bo_count = i - 1;
  97. goto err;
  98. }
  99. bo_state[i].handle = handle;
  100. bo_state[i].paddr = vc4_bo->base.paddr;
  101. bo_state[i].size = vc4_bo->base.base.size;
  102. }
  103. ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
  104. bo_state,
  105. state->bo_count * sizeof(*bo_state));
  106. kfree(bo_state);
  107. err_free:
  108. vc4_free_hang_state(dev, kernel_state);
  109. err:
  110. return ret;
  111. }
  112. static void
  113. vc4_save_hang_state(struct drm_device *dev)
  114. {
  115. struct vc4_dev *vc4 = to_vc4_dev(dev);
  116. struct drm_vc4_get_hang_state *state;
  117. struct vc4_hang_state *kernel_state;
  118. struct vc4_exec_info *exec;
  119. struct vc4_bo *bo;
  120. unsigned long irqflags;
  121. unsigned int i, unref_list_count;
  122. kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
  123. if (!kernel_state)
  124. return;
  125. state = &kernel_state->user_state;
  126. spin_lock_irqsave(&vc4->job_lock, irqflags);
  127. exec = vc4_first_job(vc4);
  128. if (!exec) {
  129. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  130. return;
  131. }
  132. unref_list_count = 0;
  133. list_for_each_entry(bo, &exec->unref_list, unref_head)
  134. unref_list_count++;
  135. state->bo_count = exec->bo_count + unref_list_count;
  136. kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
  137. GFP_ATOMIC);
  138. if (!kernel_state->bo) {
  139. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  140. return;
  141. }
  142. for (i = 0; i < exec->bo_count; i++) {
  143. drm_gem_object_reference(&exec->bo[i]->base);
  144. kernel_state->bo[i] = &exec->bo[i]->base;
  145. }
  146. list_for_each_entry(bo, &exec->unref_list, unref_head) {
  147. drm_gem_object_reference(&bo->base.base);
  148. kernel_state->bo[i] = &bo->base.base;
  149. i++;
  150. }
  151. state->start_bin = exec->ct0ca;
  152. state->start_render = exec->ct1ca;
  153. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  154. state->ct0ca = V3D_READ(V3D_CTNCA(0));
  155. state->ct0ea = V3D_READ(V3D_CTNEA(0));
  156. state->ct1ca = V3D_READ(V3D_CTNCA(1));
  157. state->ct1ea = V3D_READ(V3D_CTNEA(1));
  158. state->ct0cs = V3D_READ(V3D_CTNCS(0));
  159. state->ct1cs = V3D_READ(V3D_CTNCS(1));
  160. state->ct0ra0 = V3D_READ(V3D_CT00RA0);
  161. state->ct1ra0 = V3D_READ(V3D_CT01RA0);
  162. state->bpca = V3D_READ(V3D_BPCA);
  163. state->bpcs = V3D_READ(V3D_BPCS);
  164. state->bpoa = V3D_READ(V3D_BPOA);
  165. state->bpos = V3D_READ(V3D_BPOS);
  166. state->vpmbase = V3D_READ(V3D_VPMBASE);
  167. state->dbge = V3D_READ(V3D_DBGE);
  168. state->fdbgo = V3D_READ(V3D_FDBGO);
  169. state->fdbgb = V3D_READ(V3D_FDBGB);
  170. state->fdbgr = V3D_READ(V3D_FDBGR);
  171. state->fdbgs = V3D_READ(V3D_FDBGS);
  172. state->errstat = V3D_READ(V3D_ERRSTAT);
  173. spin_lock_irqsave(&vc4->job_lock, irqflags);
  174. if (vc4->hang_state) {
  175. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  176. vc4_free_hang_state(dev, kernel_state);
  177. } else {
  178. vc4->hang_state = kernel_state;
  179. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  180. }
  181. }
  182. static void
  183. vc4_reset(struct drm_device *dev)
  184. {
  185. struct vc4_dev *vc4 = to_vc4_dev(dev);
  186. DRM_INFO("Resetting GPU.\n");
  187. vc4_v3d_set_power(vc4, false);
  188. vc4_v3d_set_power(vc4, true);
  189. vc4_irq_reset(dev);
  190. /* Rearm the hangcheck -- another job might have been waiting
  191. * for our hung one to get kicked off, and vc4_irq_reset()
  192. * would have started it.
  193. */
  194. vc4_queue_hangcheck(dev);
  195. }
  196. static void
  197. vc4_reset_work(struct work_struct *work)
  198. {
  199. struct vc4_dev *vc4 =
  200. container_of(work, struct vc4_dev, hangcheck.reset_work);
  201. vc4_save_hang_state(vc4->dev);
  202. vc4_reset(vc4->dev);
  203. }
  204. static void
  205. vc4_hangcheck_elapsed(unsigned long data)
  206. {
  207. struct drm_device *dev = (struct drm_device *)data;
  208. struct vc4_dev *vc4 = to_vc4_dev(dev);
  209. uint32_t ct0ca, ct1ca;
  210. /* If idle, we can stop watching for hangs. */
  211. if (list_empty(&vc4->job_list))
  212. return;
  213. ct0ca = V3D_READ(V3D_CTNCA(0));
  214. ct1ca = V3D_READ(V3D_CTNCA(1));
  215. /* If we've made any progress in execution, rearm the timer
  216. * and wait.
  217. */
  218. if (ct0ca != vc4->hangcheck.last_ct0ca ||
  219. ct1ca != vc4->hangcheck.last_ct1ca) {
  220. vc4->hangcheck.last_ct0ca = ct0ca;
  221. vc4->hangcheck.last_ct1ca = ct1ca;
  222. vc4_queue_hangcheck(dev);
  223. return;
  224. }
  225. /* We've gone too long with no progress, reset. This has to
  226. * be done from a work struct, since resetting can sleep and
  227. * this timer hook isn't allowed to.
  228. */
  229. schedule_work(&vc4->hangcheck.reset_work);
  230. }
  231. static void
  232. submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
  233. {
  234. struct vc4_dev *vc4 = to_vc4_dev(dev);
  235. /* Set the current and end address of the control list.
  236. * Writing the end register is what starts the job.
  237. */
  238. V3D_WRITE(V3D_CTNCA(thread), start);
  239. V3D_WRITE(V3D_CTNEA(thread), end);
  240. }
  241. int
  242. vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
  243. bool interruptible)
  244. {
  245. struct vc4_dev *vc4 = to_vc4_dev(dev);
  246. int ret = 0;
  247. unsigned long timeout_expire;
  248. DEFINE_WAIT(wait);
  249. if (vc4->finished_seqno >= seqno)
  250. return 0;
  251. if (timeout_ns == 0)
  252. return -ETIME;
  253. timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
  254. trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
  255. for (;;) {
  256. prepare_to_wait(&vc4->job_wait_queue, &wait,
  257. interruptible ? TASK_INTERRUPTIBLE :
  258. TASK_UNINTERRUPTIBLE);
  259. if (interruptible && signal_pending(current)) {
  260. ret = -ERESTARTSYS;
  261. break;
  262. }
  263. if (vc4->finished_seqno >= seqno)
  264. break;
  265. if (timeout_ns != ~0ull) {
  266. if (time_after_eq(jiffies, timeout_expire)) {
  267. ret = -ETIME;
  268. break;
  269. }
  270. schedule_timeout(timeout_expire - jiffies);
  271. } else {
  272. schedule();
  273. }
  274. }
  275. finish_wait(&vc4->job_wait_queue, &wait);
  276. trace_vc4_wait_for_seqno_end(dev, seqno);
  277. if (ret && ret != -ERESTARTSYS) {
  278. DRM_ERROR("timeout waiting for render thread idle\n");
  279. return ret;
  280. }
  281. return 0;
  282. }
  283. static void
  284. vc4_flush_caches(struct drm_device *dev)
  285. {
  286. struct vc4_dev *vc4 = to_vc4_dev(dev);
  287. /* Flush the GPU L2 caches. These caches sit on top of system
  288. * L3 (the 128kb or so shared with the CPU), and are
  289. * non-allocating in the L3.
  290. */
  291. V3D_WRITE(V3D_L2CACTL,
  292. V3D_L2CACTL_L2CCLR);
  293. V3D_WRITE(V3D_SLCACTL,
  294. VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
  295. VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
  296. VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
  297. VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
  298. }
  299. /* Sets the registers for the next job to be actually be executed in
  300. * the hardware.
  301. *
  302. * The job_lock should be held during this.
  303. */
  304. void
  305. vc4_submit_next_job(struct drm_device *dev)
  306. {
  307. struct vc4_dev *vc4 = to_vc4_dev(dev);
  308. struct vc4_exec_info *exec = vc4_first_job(vc4);
  309. if (!exec)
  310. return;
  311. vc4_flush_caches(dev);
  312. /* Disable the binner's pre-loaded overflow memory address */
  313. V3D_WRITE(V3D_BPOA, 0);
  314. V3D_WRITE(V3D_BPOS, 0);
  315. if (exec->ct0ca != exec->ct0ea)
  316. submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
  317. submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
  318. }
  319. static void
  320. vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
  321. {
  322. struct vc4_bo *bo;
  323. unsigned i;
  324. for (i = 0; i < exec->bo_count; i++) {
  325. bo = to_vc4_bo(&exec->bo[i]->base);
  326. bo->seqno = seqno;
  327. }
  328. list_for_each_entry(bo, &exec->unref_list, unref_head) {
  329. bo->seqno = seqno;
  330. }
  331. }
  332. /* Queues a struct vc4_exec_info for execution. If no job is
  333. * currently executing, then submits it.
  334. *
  335. * Unlike most GPUs, our hardware only handles one command list at a
  336. * time. To queue multiple jobs at once, we'd need to edit the
  337. * previous command list to have a jump to the new one at the end, and
  338. * then bump the end address. That's a change for a later date,
  339. * though.
  340. */
  341. static void
  342. vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
  343. {
  344. struct vc4_dev *vc4 = to_vc4_dev(dev);
  345. uint64_t seqno;
  346. unsigned long irqflags;
  347. spin_lock_irqsave(&vc4->job_lock, irqflags);
  348. seqno = ++vc4->emit_seqno;
  349. exec->seqno = seqno;
  350. vc4_update_bo_seqnos(exec, seqno);
  351. list_add_tail(&exec->head, &vc4->job_list);
  352. /* If no job was executing, kick ours off. Otherwise, it'll
  353. * get started when the previous job's frame done interrupt
  354. * occurs.
  355. */
  356. if (vc4_first_job(vc4) == exec) {
  357. vc4_submit_next_job(dev);
  358. vc4_queue_hangcheck(dev);
  359. }
  360. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  361. }
  362. /**
  363. * Looks up a bunch of GEM handles for BOs and stores the array for
  364. * use in the command validator that actually writes relocated
  365. * addresses pointing to them.
  366. */
  367. static int
  368. vc4_cl_lookup_bos(struct drm_device *dev,
  369. struct drm_file *file_priv,
  370. struct vc4_exec_info *exec)
  371. {
  372. struct drm_vc4_submit_cl *args = exec->args;
  373. uint32_t *handles;
  374. int ret = 0;
  375. int i;
  376. exec->bo_count = args->bo_handle_count;
  377. if (!exec->bo_count) {
  378. /* See comment on bo_index for why we have to check
  379. * this.
  380. */
  381. DRM_ERROR("Rendering requires BOs to validate\n");
  382. return -EINVAL;
  383. }
  384. exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
  385. GFP_KERNEL);
  386. if (!exec->bo) {
  387. DRM_ERROR("Failed to allocate validated BO pointers\n");
  388. return -ENOMEM;
  389. }
  390. handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
  391. if (!handles) {
  392. DRM_ERROR("Failed to allocate incoming GEM handles\n");
  393. goto fail;
  394. }
  395. ret = copy_from_user(handles,
  396. (void __user *)(uintptr_t)args->bo_handles,
  397. exec->bo_count * sizeof(uint32_t));
  398. if (ret) {
  399. DRM_ERROR("Failed to copy in GEM handles\n");
  400. goto fail;
  401. }
  402. spin_lock(&file_priv->table_lock);
  403. for (i = 0; i < exec->bo_count; i++) {
  404. struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
  405. handles[i]);
  406. if (!bo) {
  407. DRM_ERROR("Failed to look up GEM BO %d: %d\n",
  408. i, handles[i]);
  409. ret = -EINVAL;
  410. spin_unlock(&file_priv->table_lock);
  411. goto fail;
  412. }
  413. drm_gem_object_reference(bo);
  414. exec->bo[i] = (struct drm_gem_cma_object *)bo;
  415. }
  416. spin_unlock(&file_priv->table_lock);
  417. fail:
  418. kfree(handles);
  419. return 0;
  420. }
  421. static int
  422. vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
  423. {
  424. struct drm_vc4_submit_cl *args = exec->args;
  425. void *temp = NULL;
  426. void *bin;
  427. int ret = 0;
  428. uint32_t bin_offset = 0;
  429. uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
  430. 16);
  431. uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
  432. uint32_t exec_size = uniforms_offset + args->uniforms_size;
  433. uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
  434. args->shader_rec_count);
  435. struct vc4_bo *bo;
  436. if (uniforms_offset < shader_rec_offset ||
  437. exec_size < uniforms_offset ||
  438. args->shader_rec_count >= (UINT_MAX /
  439. sizeof(struct vc4_shader_state)) ||
  440. temp_size < exec_size) {
  441. DRM_ERROR("overflow in exec arguments\n");
  442. goto fail;
  443. }
  444. /* Allocate space where we'll store the copied in user command lists
  445. * and shader records.
  446. *
  447. * We don't just copy directly into the BOs because we need to
  448. * read the contents back for validation, and I think the
  449. * bo->vaddr is uncached access.
  450. */
  451. temp = kmalloc(temp_size, GFP_KERNEL);
  452. if (!temp) {
  453. DRM_ERROR("Failed to allocate storage for copying "
  454. "in bin/render CLs.\n");
  455. ret = -ENOMEM;
  456. goto fail;
  457. }
  458. bin = temp + bin_offset;
  459. exec->shader_rec_u = temp + shader_rec_offset;
  460. exec->uniforms_u = temp + uniforms_offset;
  461. exec->shader_state = temp + exec_size;
  462. exec->shader_state_size = args->shader_rec_count;
  463. ret = copy_from_user(bin,
  464. (void __user *)(uintptr_t)args->bin_cl,
  465. args->bin_cl_size);
  466. if (ret) {
  467. DRM_ERROR("Failed to copy in bin cl\n");
  468. goto fail;
  469. }
  470. ret = copy_from_user(exec->shader_rec_u,
  471. (void __user *)(uintptr_t)args->shader_rec,
  472. args->shader_rec_size);
  473. if (ret) {
  474. DRM_ERROR("Failed to copy in shader recs\n");
  475. goto fail;
  476. }
  477. ret = copy_from_user(exec->uniforms_u,
  478. (void __user *)(uintptr_t)args->uniforms,
  479. args->uniforms_size);
  480. if (ret) {
  481. DRM_ERROR("Failed to copy in uniforms cl\n");
  482. goto fail;
  483. }
  484. bo = vc4_bo_create(dev, exec_size, true);
  485. if (!bo) {
  486. DRM_ERROR("Couldn't allocate BO for binning\n");
  487. ret = PTR_ERR(exec->exec_bo);
  488. goto fail;
  489. }
  490. exec->exec_bo = &bo->base;
  491. list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
  492. &exec->unref_list);
  493. exec->ct0ca = exec->exec_bo->paddr + bin_offset;
  494. exec->bin_u = bin;
  495. exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
  496. exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
  497. exec->shader_rec_size = args->shader_rec_size;
  498. exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
  499. exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
  500. exec->uniforms_size = args->uniforms_size;
  501. ret = vc4_validate_bin_cl(dev,
  502. exec->exec_bo->vaddr + bin_offset,
  503. bin,
  504. exec);
  505. if (ret)
  506. goto fail;
  507. ret = vc4_validate_shader_recs(dev, exec);
  508. fail:
  509. kfree(temp);
  510. return ret;
  511. }
  512. static void
  513. vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
  514. {
  515. unsigned i;
  516. /* Need the struct lock for drm_gem_object_unreference(). */
  517. mutex_lock(&dev->struct_mutex);
  518. if (exec->bo) {
  519. for (i = 0; i < exec->bo_count; i++)
  520. drm_gem_object_unreference(&exec->bo[i]->base);
  521. kfree(exec->bo);
  522. }
  523. while (!list_empty(&exec->unref_list)) {
  524. struct vc4_bo *bo = list_first_entry(&exec->unref_list,
  525. struct vc4_bo, unref_head);
  526. list_del(&bo->unref_head);
  527. drm_gem_object_unreference(&bo->base.base);
  528. }
  529. mutex_unlock(&dev->struct_mutex);
  530. kfree(exec);
  531. }
  532. void
  533. vc4_job_handle_completed(struct vc4_dev *vc4)
  534. {
  535. unsigned long irqflags;
  536. struct vc4_seqno_cb *cb, *cb_temp;
  537. spin_lock_irqsave(&vc4->job_lock, irqflags);
  538. while (!list_empty(&vc4->job_done_list)) {
  539. struct vc4_exec_info *exec =
  540. list_first_entry(&vc4->job_done_list,
  541. struct vc4_exec_info, head);
  542. list_del(&exec->head);
  543. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  544. vc4_complete_exec(vc4->dev, exec);
  545. spin_lock_irqsave(&vc4->job_lock, irqflags);
  546. }
  547. list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
  548. if (cb->seqno <= vc4->finished_seqno) {
  549. list_del_init(&cb->work.entry);
  550. schedule_work(&cb->work);
  551. }
  552. }
  553. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  554. }
  555. static void vc4_seqno_cb_work(struct work_struct *work)
  556. {
  557. struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
  558. cb->func(cb);
  559. }
  560. int vc4_queue_seqno_cb(struct drm_device *dev,
  561. struct vc4_seqno_cb *cb, uint64_t seqno,
  562. void (*func)(struct vc4_seqno_cb *cb))
  563. {
  564. struct vc4_dev *vc4 = to_vc4_dev(dev);
  565. int ret = 0;
  566. unsigned long irqflags;
  567. cb->func = func;
  568. INIT_WORK(&cb->work, vc4_seqno_cb_work);
  569. spin_lock_irqsave(&vc4->job_lock, irqflags);
  570. if (seqno > vc4->finished_seqno) {
  571. cb->seqno = seqno;
  572. list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
  573. } else {
  574. schedule_work(&cb->work);
  575. }
  576. spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  577. return ret;
  578. }
  579. /* Scheduled when any job has been completed, this walks the list of
  580. * jobs that had completed and unrefs their BOs and frees their exec
  581. * structs.
  582. */
  583. static void
  584. vc4_job_done_work(struct work_struct *work)
  585. {
  586. struct vc4_dev *vc4 =
  587. container_of(work, struct vc4_dev, job_done_work);
  588. vc4_job_handle_completed(vc4);
  589. }
  590. static int
  591. vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
  592. uint64_t seqno,
  593. uint64_t *timeout_ns)
  594. {
  595. unsigned long start = jiffies;
  596. int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
  597. if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
  598. uint64_t delta = jiffies_to_nsecs(jiffies - start);
  599. if (*timeout_ns >= delta)
  600. *timeout_ns -= delta;
  601. }
  602. return ret;
  603. }
  604. int
  605. vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
  606. struct drm_file *file_priv)
  607. {
  608. struct drm_vc4_wait_seqno *args = data;
  609. return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
  610. &args->timeout_ns);
  611. }
  612. int
  613. vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
  614. struct drm_file *file_priv)
  615. {
  616. int ret;
  617. struct drm_vc4_wait_bo *args = data;
  618. struct drm_gem_object *gem_obj;
  619. struct vc4_bo *bo;
  620. gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
  621. if (!gem_obj) {
  622. DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
  623. return -EINVAL;
  624. }
  625. bo = to_vc4_bo(gem_obj);
  626. ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
  627. &args->timeout_ns);
  628. drm_gem_object_unreference_unlocked(gem_obj);
  629. return ret;
  630. }
  631. /**
  632. * Submits a command list to the VC4.
  633. *
  634. * This is what is called batchbuffer emitting on other hardware.
  635. */
  636. int
  637. vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
  638. struct drm_file *file_priv)
  639. {
  640. struct vc4_dev *vc4 = to_vc4_dev(dev);
  641. struct drm_vc4_submit_cl *args = data;
  642. struct vc4_exec_info *exec;
  643. int ret;
  644. if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
  645. DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
  646. return -EINVAL;
  647. }
  648. exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
  649. if (!exec) {
  650. DRM_ERROR("malloc failure on exec struct\n");
  651. return -ENOMEM;
  652. }
  653. exec->args = args;
  654. INIT_LIST_HEAD(&exec->unref_list);
  655. ret = vc4_cl_lookup_bos(dev, file_priv, exec);
  656. if (ret)
  657. goto fail;
  658. if (exec->args->bin_cl_size != 0) {
  659. ret = vc4_get_bcl(dev, exec);
  660. if (ret)
  661. goto fail;
  662. } else {
  663. exec->ct0ca = 0;
  664. exec->ct0ea = 0;
  665. }
  666. ret = vc4_get_rcl(dev, exec);
  667. if (ret)
  668. goto fail;
  669. /* Clear this out of the struct we'll be putting in the queue,
  670. * since it's part of our stack.
  671. */
  672. exec->args = NULL;
  673. vc4_queue_submit(dev, exec);
  674. /* Return the seqno for our job. */
  675. args->seqno = vc4->emit_seqno;
  676. return 0;
  677. fail:
  678. vc4_complete_exec(vc4->dev, exec);
  679. return ret;
  680. }
  681. void
  682. vc4_gem_init(struct drm_device *dev)
  683. {
  684. struct vc4_dev *vc4 = to_vc4_dev(dev);
  685. INIT_LIST_HEAD(&vc4->job_list);
  686. INIT_LIST_HEAD(&vc4->job_done_list);
  687. INIT_LIST_HEAD(&vc4->seqno_cb_list);
  688. spin_lock_init(&vc4->job_lock);
  689. INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
  690. setup_timer(&vc4->hangcheck.timer,
  691. vc4_hangcheck_elapsed,
  692. (unsigned long)dev);
  693. INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
  694. }
  695. void
  696. vc4_gem_destroy(struct drm_device *dev)
  697. {
  698. struct vc4_dev *vc4 = to_vc4_dev(dev);
  699. /* Waiting for exec to finish would need to be done before
  700. * unregistering V3D.
  701. */
  702. WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
  703. /* V3D should already have disabled its interrupt and cleared
  704. * the overflow allocation registers. Now free the object.
  705. */
  706. if (vc4->overflow_mem) {
  707. drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
  708. vc4->overflow_mem = NULL;
  709. }
  710. vc4_bo_cache_destroy(dev);
  711. if (vc4->hang_state)
  712. vc4_free_hang_state(dev, vc4->hang_state);
  713. }