radeon_cs.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823
  1. /*
  2. * Copyright 2008 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22. * DEALINGS IN THE SOFTWARE.
  23. *
  24. * Authors:
  25. * Jerome Glisse <glisse@freedesktop.org>
  26. */
  27. #include <linux/list_sort.h>
  28. #include <drm/drmP.h>
  29. #include <drm/radeon_drm.h>
  30. #include "radeon_reg.h"
  31. #include "radeon.h"
  32. #include "radeon_trace.h"
  33. #define RADEON_CS_MAX_PRIORITY 32u
  34. #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
  35. /* This is based on the bucket sort with O(n) time complexity.
  36. * An item with priority "i" is added to bucket[i]. The lists are then
  37. * concatenated in descending order.
  38. */
  39. struct radeon_cs_buckets {
  40. struct list_head bucket[RADEON_CS_NUM_BUCKETS];
  41. };
  42. static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
  43. {
  44. unsigned i;
  45. for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
  46. INIT_LIST_HEAD(&b->bucket[i]);
  47. }
  48. static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
  49. struct list_head *item, unsigned priority)
  50. {
  51. /* Since buffers which appear sooner in the relocation list are
  52. * likely to be used more often than buffers which appear later
  53. * in the list, the sort mustn't change the ordering of buffers
  54. * with the same priority, i.e. it must be stable.
  55. */
  56. list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
  57. }
  58. static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
  59. struct list_head *out_list)
  60. {
  61. unsigned i;
  62. /* Connect the sorted buckets in the output list. */
  63. for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
  64. list_splice(&b->bucket[i], out_list);
  65. }
  66. }
  67. static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
  68. {
  69. struct drm_device *ddev = p->rdev->ddev;
  70. struct radeon_cs_chunk *chunk;
  71. struct radeon_cs_buckets buckets;
  72. unsigned i, j;
  73. bool duplicate;
  74. if (p->chunk_relocs_idx == -1) {
  75. return 0;
  76. }
  77. chunk = &p->chunks[p->chunk_relocs_idx];
  78. p->dma_reloc_idx = 0;
  79. /* FIXME: we assume that each relocs use 4 dwords */
  80. p->nrelocs = chunk->length_dw / 4;
  81. p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
  82. if (p->relocs_ptr == NULL) {
  83. return -ENOMEM;
  84. }
  85. p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
  86. if (p->relocs == NULL) {
  87. return -ENOMEM;
  88. }
  89. radeon_cs_buckets_init(&buckets);
  90. for (i = 0; i < p->nrelocs; i++) {
  91. struct drm_radeon_cs_reloc *r;
  92. unsigned priority;
  93. duplicate = false;
  94. r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
  95. for (j = 0; j < i; j++) {
  96. if (r->handle == p->relocs[j].handle) {
  97. p->relocs_ptr[i] = &p->relocs[j];
  98. duplicate = true;
  99. break;
  100. }
  101. }
  102. if (duplicate) {
  103. p->relocs[i].handle = 0;
  104. continue;
  105. }
  106. p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
  107. r->handle);
  108. if (p->relocs[i].gobj == NULL) {
  109. DRM_ERROR("gem object lookup failed 0x%x\n",
  110. r->handle);
  111. return -ENOENT;
  112. }
  113. p->relocs_ptr[i] = &p->relocs[i];
  114. p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
  115. /* The userspace buffer priorities are from 0 to 15. A higher
  116. * number means the buffer is more important.
  117. * Also, the buffers used for write have a higher priority than
  118. * the buffers used for read only, which doubles the range
  119. * to 0 to 31. 32 is reserved for the kernel driver.
  120. */
  121. priority = (r->flags & 0xf) * 2 + !!r->write_domain;
  122. /* the first reloc of an UVD job is the msg and that must be in
  123. VRAM, also but everything into VRAM on AGP cards to avoid
  124. image corruptions */
  125. if (p->ring == R600_RING_TYPE_UVD_INDEX &&
  126. (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
  127. /* TODO: is this still needed for NI+ ? */
  128. p->relocs[i].prefered_domains =
  129. RADEON_GEM_DOMAIN_VRAM;
  130. p->relocs[i].allowed_domains =
  131. RADEON_GEM_DOMAIN_VRAM;
  132. /* prioritize this over any other relocation */
  133. priority = RADEON_CS_MAX_PRIORITY;
  134. } else {
  135. uint32_t domain = r->write_domain ?
  136. r->write_domain : r->read_domains;
  137. if (domain & RADEON_GEM_DOMAIN_CPU) {
  138. DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
  139. "for command submission\n");
  140. return -EINVAL;
  141. }
  142. p->relocs[i].prefered_domains = domain;
  143. if (domain == RADEON_GEM_DOMAIN_VRAM)
  144. domain |= RADEON_GEM_DOMAIN_GTT;
  145. p->relocs[i].allowed_domains = domain;
  146. }
  147. p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
  148. p->relocs[i].handle = r->handle;
  149. radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
  150. priority);
  151. }
  152. radeon_cs_buckets_get_list(&buckets, &p->validated);
  153. if (p->cs_flags & RADEON_CS_USE_VM)
  154. p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
  155. &p->validated);
  156. return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
  157. }
  158. static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
  159. {
  160. p->priority = priority;
  161. switch (ring) {
  162. default:
  163. DRM_ERROR("unknown ring id: %d\n", ring);
  164. return -EINVAL;
  165. case RADEON_CS_RING_GFX:
  166. p->ring = RADEON_RING_TYPE_GFX_INDEX;
  167. break;
  168. case RADEON_CS_RING_COMPUTE:
  169. if (p->rdev->family >= CHIP_TAHITI) {
  170. if (p->priority > 0)
  171. p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
  172. else
  173. p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
  174. } else
  175. p->ring = RADEON_RING_TYPE_GFX_INDEX;
  176. break;
  177. case RADEON_CS_RING_DMA:
  178. if (p->rdev->family >= CHIP_CAYMAN) {
  179. if (p->priority > 0)
  180. p->ring = R600_RING_TYPE_DMA_INDEX;
  181. else
  182. p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
  183. } else if (p->rdev->family >= CHIP_RV770) {
  184. p->ring = R600_RING_TYPE_DMA_INDEX;
  185. } else {
  186. return -EINVAL;
  187. }
  188. break;
  189. case RADEON_CS_RING_UVD:
  190. p->ring = R600_RING_TYPE_UVD_INDEX;
  191. break;
  192. case RADEON_CS_RING_VCE:
  193. /* TODO: only use the low priority ring for now */
  194. p->ring = TN_RING_TYPE_VCE1_INDEX;
  195. break;
  196. }
  197. return 0;
  198. }
  199. static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
  200. {
  201. int i;
  202. for (i = 0; i < p->nrelocs; i++) {
  203. if (!p->relocs[i].robj)
  204. continue;
  205. radeon_semaphore_sync_to(p->ib.semaphore,
  206. p->relocs[i].robj->tbo.sync_obj);
  207. }
  208. }
  209. /* XXX: note that this is called from the legacy UMS CS ioctl as well */
  210. int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
  211. {
  212. struct drm_radeon_cs *cs = data;
  213. uint64_t *chunk_array_ptr;
  214. unsigned size, i;
  215. u32 ring = RADEON_CS_RING_GFX;
  216. s32 priority = 0;
  217. if (!cs->num_chunks) {
  218. return 0;
  219. }
  220. /* get chunks */
  221. INIT_LIST_HEAD(&p->validated);
  222. p->idx = 0;
  223. p->ib.sa_bo = NULL;
  224. p->ib.semaphore = NULL;
  225. p->const_ib.sa_bo = NULL;
  226. p->const_ib.semaphore = NULL;
  227. p->chunk_ib_idx = -1;
  228. p->chunk_relocs_idx = -1;
  229. p->chunk_flags_idx = -1;
  230. p->chunk_const_ib_idx = -1;
  231. p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
  232. if (p->chunks_array == NULL) {
  233. return -ENOMEM;
  234. }
  235. chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
  236. if (copy_from_user(p->chunks_array, chunk_array_ptr,
  237. sizeof(uint64_t)*cs->num_chunks)) {
  238. return -EFAULT;
  239. }
  240. p->cs_flags = 0;
  241. p->nchunks = cs->num_chunks;
  242. p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
  243. if (p->chunks == NULL) {
  244. return -ENOMEM;
  245. }
  246. for (i = 0; i < p->nchunks; i++) {
  247. struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
  248. struct drm_radeon_cs_chunk user_chunk;
  249. uint32_t __user *cdata;
  250. chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
  251. if (copy_from_user(&user_chunk, chunk_ptr,
  252. sizeof(struct drm_radeon_cs_chunk))) {
  253. return -EFAULT;
  254. }
  255. p->chunks[i].length_dw = user_chunk.length_dw;
  256. p->chunks[i].chunk_id = user_chunk.chunk_id;
  257. if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
  258. p->chunk_relocs_idx = i;
  259. }
  260. if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
  261. p->chunk_ib_idx = i;
  262. /* zero length IB isn't useful */
  263. if (p->chunks[i].length_dw == 0)
  264. return -EINVAL;
  265. }
  266. if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
  267. p->chunk_const_ib_idx = i;
  268. /* zero length CONST IB isn't useful */
  269. if (p->chunks[i].length_dw == 0)
  270. return -EINVAL;
  271. }
  272. if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
  273. p->chunk_flags_idx = i;
  274. /* zero length flags aren't useful */
  275. if (p->chunks[i].length_dw == 0)
  276. return -EINVAL;
  277. }
  278. size = p->chunks[i].length_dw;
  279. cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
  280. p->chunks[i].user_ptr = cdata;
  281. if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
  282. continue;
  283. if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
  284. if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
  285. continue;
  286. }
  287. p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
  288. size *= sizeof(uint32_t);
  289. if (p->chunks[i].kdata == NULL) {
  290. return -ENOMEM;
  291. }
  292. if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
  293. return -EFAULT;
  294. }
  295. if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
  296. p->cs_flags = p->chunks[i].kdata[0];
  297. if (p->chunks[i].length_dw > 1)
  298. ring = p->chunks[i].kdata[1];
  299. if (p->chunks[i].length_dw > 2)
  300. priority = (s32)p->chunks[i].kdata[2];
  301. }
  302. }
  303. /* these are KMS only */
  304. if (p->rdev) {
  305. if ((p->cs_flags & RADEON_CS_USE_VM) &&
  306. !p->rdev->vm_manager.enabled) {
  307. DRM_ERROR("VM not active on asic!\n");
  308. return -EINVAL;
  309. }
  310. if (radeon_cs_get_ring(p, ring, priority))
  311. return -EINVAL;
  312. /* we only support VM on some SI+ rings */
  313. if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
  314. if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
  315. DRM_ERROR("Ring %d requires VM!\n", p->ring);
  316. return -EINVAL;
  317. }
  318. } else {
  319. if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
  320. DRM_ERROR("VM not supported on ring %d!\n",
  321. p->ring);
  322. return -EINVAL;
  323. }
  324. }
  325. }
  326. return 0;
  327. }
  328. static int cmp_size_smaller_first(void *priv, struct list_head *a,
  329. struct list_head *b)
  330. {
  331. struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
  332. struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
  333. /* Sort A before B if A is smaller. */
  334. return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
  335. }
  336. /**
  337. * cs_parser_fini() - clean parser states
  338. * @parser: parser structure holding parsing context.
  339. * @error: error number
  340. *
  341. * If error is set than unvalidate buffer, otherwise just free memory
  342. * used by parsing context.
  343. **/
  344. static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
  345. {
  346. unsigned i;
  347. if (!error) {
  348. /* Sort the buffer list from the smallest to largest buffer,
  349. * which affects the order of buffers in the LRU list.
  350. * This assures that the smallest buffers are added first
  351. * to the LRU list, so they are likely to be later evicted
  352. * first, instead of large buffers whose eviction is more
  353. * expensive.
  354. *
  355. * This slightly lowers the number of bytes moved by TTM
  356. * per frame under memory pressure.
  357. */
  358. list_sort(NULL, &parser->validated, cmp_size_smaller_first);
  359. ttm_eu_fence_buffer_objects(&parser->ticket,
  360. &parser->validated,
  361. parser->ib.fence);
  362. } else if (backoff) {
  363. ttm_eu_backoff_reservation(&parser->ticket,
  364. &parser->validated);
  365. }
  366. if (parser->relocs != NULL) {
  367. for (i = 0; i < parser->nrelocs; i++) {
  368. if (parser->relocs[i].gobj)
  369. drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
  370. }
  371. }
  372. kfree(parser->track);
  373. kfree(parser->relocs);
  374. kfree(parser->relocs_ptr);
  375. kfree(parser->vm_bos);
  376. for (i = 0; i < parser->nchunks; i++)
  377. drm_free_large(parser->chunks[i].kdata);
  378. kfree(parser->chunks);
  379. kfree(parser->chunks_array);
  380. radeon_ib_free(parser->rdev, &parser->ib);
  381. radeon_ib_free(parser->rdev, &parser->const_ib);
  382. }
  383. static int radeon_cs_ib_chunk(struct radeon_device *rdev,
  384. struct radeon_cs_parser *parser)
  385. {
  386. int r;
  387. if (parser->chunk_ib_idx == -1)
  388. return 0;
  389. if (parser->cs_flags & RADEON_CS_USE_VM)
  390. return 0;
  391. r = radeon_cs_parse(rdev, parser->ring, parser);
  392. if (r || parser->parser_error) {
  393. DRM_ERROR("Invalid command stream !\n");
  394. return r;
  395. }
  396. if (parser->ring == R600_RING_TYPE_UVD_INDEX)
  397. radeon_uvd_note_usage(rdev);
  398. else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
  399. (parser->ring == TN_RING_TYPE_VCE2_INDEX))
  400. radeon_vce_note_usage(rdev);
  401. radeon_cs_sync_rings(parser);
  402. r = radeon_ib_schedule(rdev, &parser->ib, NULL);
  403. if (r) {
  404. DRM_ERROR("Failed to schedule IB !\n");
  405. }
  406. return r;
  407. }
  408. static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
  409. struct radeon_vm *vm)
  410. {
  411. struct radeon_device *rdev = p->rdev;
  412. struct radeon_bo_va *bo_va;
  413. int i, r;
  414. r = radeon_vm_update_page_directory(rdev, vm);
  415. if (r)
  416. return r;
  417. r = radeon_vm_clear_freed(rdev, vm);
  418. if (r)
  419. return r;
  420. if (vm->ib_bo_va == NULL) {
  421. DRM_ERROR("Tmp BO not in VM!\n");
  422. return -EINVAL;
  423. }
  424. r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
  425. &rdev->ring_tmp_bo.bo->tbo.mem);
  426. if (r)
  427. return r;
  428. for (i = 0; i < p->nrelocs; i++) {
  429. struct radeon_bo *bo;
  430. /* ignore duplicates */
  431. if (p->relocs_ptr[i] != &p->relocs[i])
  432. continue;
  433. bo = p->relocs[i].robj;
  434. bo_va = radeon_vm_bo_find(vm, bo);
  435. if (bo_va == NULL) {
  436. dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
  437. return -EINVAL;
  438. }
  439. r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
  440. if (r)
  441. return r;
  442. }
  443. return 0;
  444. }
  445. static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
  446. struct radeon_cs_parser *parser)
  447. {
  448. struct radeon_fpriv *fpriv = parser->filp->driver_priv;
  449. struct radeon_vm *vm = &fpriv->vm;
  450. int r;
  451. if (parser->chunk_ib_idx == -1)
  452. return 0;
  453. if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
  454. return 0;
  455. if (parser->const_ib.length_dw) {
  456. r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
  457. if (r) {
  458. return r;
  459. }
  460. }
  461. r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
  462. if (r) {
  463. return r;
  464. }
  465. if (parser->ring == R600_RING_TYPE_UVD_INDEX)
  466. radeon_uvd_note_usage(rdev);
  467. mutex_lock(&vm->mutex);
  468. r = radeon_bo_vm_update_pte(parser, vm);
  469. if (r) {
  470. goto out;
  471. }
  472. radeon_cs_sync_rings(parser);
  473. radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
  474. if ((rdev->family >= CHIP_TAHITI) &&
  475. (parser->chunk_const_ib_idx != -1)) {
  476. r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib);
  477. } else {
  478. r = radeon_ib_schedule(rdev, &parser->ib, NULL);
  479. }
  480. out:
  481. mutex_unlock(&vm->mutex);
  482. return r;
  483. }
  484. static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
  485. {
  486. if (r == -EDEADLK) {
  487. r = radeon_gpu_reset(rdev);
  488. if (!r)
  489. r = -EAGAIN;
  490. }
  491. return r;
  492. }
  493. static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
  494. {
  495. struct radeon_cs_chunk *ib_chunk;
  496. struct radeon_vm *vm = NULL;
  497. int r;
  498. if (parser->chunk_ib_idx == -1)
  499. return 0;
  500. if (parser->cs_flags & RADEON_CS_USE_VM) {
  501. struct radeon_fpriv *fpriv = parser->filp->driver_priv;
  502. vm = &fpriv->vm;
  503. if ((rdev->family >= CHIP_TAHITI) &&
  504. (parser->chunk_const_ib_idx != -1)) {
  505. ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
  506. if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
  507. DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
  508. return -EINVAL;
  509. }
  510. r = radeon_ib_get(rdev, parser->ring, &parser->const_ib,
  511. vm, ib_chunk->length_dw * 4);
  512. if (r) {
  513. DRM_ERROR("Failed to get const ib !\n");
  514. return r;
  515. }
  516. parser->const_ib.is_const_ib = true;
  517. parser->const_ib.length_dw = ib_chunk->length_dw;
  518. if (copy_from_user(parser->const_ib.ptr,
  519. ib_chunk->user_ptr,
  520. ib_chunk->length_dw * 4))
  521. return -EFAULT;
  522. }
  523. ib_chunk = &parser->chunks[parser->chunk_ib_idx];
  524. if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
  525. DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
  526. return -EINVAL;
  527. }
  528. }
  529. ib_chunk = &parser->chunks[parser->chunk_ib_idx];
  530. r = radeon_ib_get(rdev, parser->ring, &parser->ib,
  531. vm, ib_chunk->length_dw * 4);
  532. if (r) {
  533. DRM_ERROR("Failed to get ib !\n");
  534. return r;
  535. }
  536. parser->ib.length_dw = ib_chunk->length_dw;
  537. if (ib_chunk->kdata)
  538. memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
  539. else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
  540. return -EFAULT;
  541. return 0;
  542. }
  543. int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
  544. {
  545. struct radeon_device *rdev = dev->dev_private;
  546. struct radeon_cs_parser parser;
  547. int r;
  548. down_read(&rdev->exclusive_lock);
  549. if (!rdev->accel_working) {
  550. up_read(&rdev->exclusive_lock);
  551. return -EBUSY;
  552. }
  553. /* initialize parser */
  554. memset(&parser, 0, sizeof(struct radeon_cs_parser));
  555. parser.filp = filp;
  556. parser.rdev = rdev;
  557. parser.dev = rdev->dev;
  558. parser.family = rdev->family;
  559. r = radeon_cs_parser_init(&parser, data);
  560. if (r) {
  561. DRM_ERROR("Failed to initialize parser !\n");
  562. radeon_cs_parser_fini(&parser, r, false);
  563. up_read(&rdev->exclusive_lock);
  564. r = radeon_cs_handle_lockup(rdev, r);
  565. return r;
  566. }
  567. r = radeon_cs_ib_fill(rdev, &parser);
  568. if (!r) {
  569. r = radeon_cs_parser_relocs(&parser);
  570. if (r && r != -ERESTARTSYS)
  571. DRM_ERROR("Failed to parse relocation %d!\n", r);
  572. }
  573. if (r) {
  574. radeon_cs_parser_fini(&parser, r, false);
  575. up_read(&rdev->exclusive_lock);
  576. r = radeon_cs_handle_lockup(rdev, r);
  577. return r;
  578. }
  579. trace_radeon_cs(&parser);
  580. r = radeon_cs_ib_chunk(rdev, &parser);
  581. if (r) {
  582. goto out;
  583. }
  584. r = radeon_cs_ib_vm_chunk(rdev, &parser);
  585. if (r) {
  586. goto out;
  587. }
  588. out:
  589. radeon_cs_parser_fini(&parser, r, true);
  590. up_read(&rdev->exclusive_lock);
  591. r = radeon_cs_handle_lockup(rdev, r);
  592. return r;
  593. }
  594. /**
  595. * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
  596. * @parser: parser structure holding parsing context.
  597. * @pkt: where to store packet information
  598. *
  599. * Assume that chunk_ib_index is properly set. Will return -EINVAL
  600. * if packet is bigger than remaining ib size. or if packets is unknown.
  601. **/
  602. int radeon_cs_packet_parse(struct radeon_cs_parser *p,
  603. struct radeon_cs_packet *pkt,
  604. unsigned idx)
  605. {
  606. struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
  607. struct radeon_device *rdev = p->rdev;
  608. uint32_t header;
  609. if (idx >= ib_chunk->length_dw) {
  610. DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
  611. idx, ib_chunk->length_dw);
  612. return -EINVAL;
  613. }
  614. header = radeon_get_ib_value(p, idx);
  615. pkt->idx = idx;
  616. pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
  617. pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
  618. pkt->one_reg_wr = 0;
  619. switch (pkt->type) {
  620. case RADEON_PACKET_TYPE0:
  621. if (rdev->family < CHIP_R600) {
  622. pkt->reg = R100_CP_PACKET0_GET_REG(header);
  623. pkt->one_reg_wr =
  624. RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
  625. } else
  626. pkt->reg = R600_CP_PACKET0_GET_REG(header);
  627. break;
  628. case RADEON_PACKET_TYPE3:
  629. pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
  630. break;
  631. case RADEON_PACKET_TYPE2:
  632. pkt->count = -1;
  633. break;
  634. default:
  635. DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
  636. return -EINVAL;
  637. }
  638. if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
  639. DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
  640. pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
  641. return -EINVAL;
  642. }
  643. return 0;
  644. }
  645. /**
  646. * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
  647. * @p: structure holding the parser context.
  648. *
  649. * Check if the next packet is NOP relocation packet3.
  650. **/
  651. bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
  652. {
  653. struct radeon_cs_packet p3reloc;
  654. int r;
  655. r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
  656. if (r)
  657. return false;
  658. if (p3reloc.type != RADEON_PACKET_TYPE3)
  659. return false;
  660. if (p3reloc.opcode != RADEON_PACKET3_NOP)
  661. return false;
  662. return true;
  663. }
  664. /**
  665. * radeon_cs_dump_packet() - dump raw packet context
  666. * @p: structure holding the parser context.
  667. * @pkt: structure holding the packet.
  668. *
  669. * Used mostly for debugging and error reporting.
  670. **/
  671. void radeon_cs_dump_packet(struct radeon_cs_parser *p,
  672. struct radeon_cs_packet *pkt)
  673. {
  674. volatile uint32_t *ib;
  675. unsigned i;
  676. unsigned idx;
  677. ib = p->ib.ptr;
  678. idx = pkt->idx;
  679. for (i = 0; i <= (pkt->count + 1); i++, idx++)
  680. DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
  681. }
  682. /**
  683. * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
  684. * @parser: parser structure holding parsing context.
  685. * @data: pointer to relocation data
  686. * @offset_start: starting offset
  687. * @offset_mask: offset mask (to align start offset on)
  688. * @reloc: reloc informations
  689. *
  690. * Check if next packet is relocation packet3, do bo validation and compute
  691. * GPU offset using the provided start.
  692. **/
  693. int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
  694. struct radeon_cs_reloc **cs_reloc,
  695. int nomm)
  696. {
  697. struct radeon_cs_chunk *relocs_chunk;
  698. struct radeon_cs_packet p3reloc;
  699. unsigned idx;
  700. int r;
  701. if (p->chunk_relocs_idx == -1) {
  702. DRM_ERROR("No relocation chunk !\n");
  703. return -EINVAL;
  704. }
  705. *cs_reloc = NULL;
  706. relocs_chunk = &p->chunks[p->chunk_relocs_idx];
  707. r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
  708. if (r)
  709. return r;
  710. p->idx += p3reloc.count + 2;
  711. if (p3reloc.type != RADEON_PACKET_TYPE3 ||
  712. p3reloc.opcode != RADEON_PACKET3_NOP) {
  713. DRM_ERROR("No packet3 for relocation for packet at %d.\n",
  714. p3reloc.idx);
  715. radeon_cs_dump_packet(p, &p3reloc);
  716. return -EINVAL;
  717. }
  718. idx = radeon_get_ib_value(p, p3reloc.idx + 1);
  719. if (idx >= relocs_chunk->length_dw) {
  720. DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
  721. idx, relocs_chunk->length_dw);
  722. radeon_cs_dump_packet(p, &p3reloc);
  723. return -EINVAL;
  724. }
  725. /* FIXME: we assume reloc size is 4 dwords */
  726. if (nomm) {
  727. *cs_reloc = p->relocs;
  728. (*cs_reloc)->gpu_offset =
  729. (u64)relocs_chunk->kdata[idx + 3] << 32;
  730. (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
  731. } else
  732. *cs_reloc = p->relocs_ptr[(idx / 4)];
  733. return 0;
  734. }