kfd_mqd_manager_vi.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/printk.h>
  24. #include <linux/slab.h>
  25. #include <linux/mm_types.h>
  26. #include "kfd_priv.h"
  27. #include "kfd_mqd_manager.h"
  28. #include "vi_structs.h"
  29. #include "gca/gfx_8_0_sh_mask.h"
  30. #include "gca/gfx_8_0_enum.h"
  31. #include "oss/oss_3_0_sh_mask.h"
  32. #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
  33. static inline struct vi_mqd *get_mqd(void *mqd)
  34. {
  35. return (struct vi_mqd *)mqd;
  36. }
  37. static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
  38. {
  39. return (struct vi_sdma_mqd *)mqd;
  40. }
  41. static void update_cu_mask(struct mqd_manager *mm, void *mqd,
  42. struct queue_properties *q)
  43. {
  44. struct vi_mqd *m;
  45. uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
  46. if (q->cu_mask_count == 0)
  47. return;
  48. mqd_symmetrically_map_cu_mask(mm,
  49. q->cu_mask, q->cu_mask_count, se_mask);
  50. m = get_mqd(mqd);
  51. m->compute_static_thread_mgmt_se0 = se_mask[0];
  52. m->compute_static_thread_mgmt_se1 = se_mask[1];
  53. m->compute_static_thread_mgmt_se2 = se_mask[2];
  54. m->compute_static_thread_mgmt_se3 = se_mask[3];
  55. pr_debug("Update cu mask to %#x %#x %#x %#x\n",
  56. m->compute_static_thread_mgmt_se0,
  57. m->compute_static_thread_mgmt_se1,
  58. m->compute_static_thread_mgmt_se2,
  59. m->compute_static_thread_mgmt_se3);
  60. }
  61. static int init_mqd(struct mqd_manager *mm, void **mqd,
  62. struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
  63. struct queue_properties *q)
  64. {
  65. int retval;
  66. uint64_t addr;
  67. struct vi_mqd *m;
  68. retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
  69. mqd_mem_obj);
  70. if (retval != 0)
  71. return -ENOMEM;
  72. m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
  73. addr = (*mqd_mem_obj)->gpu_addr;
  74. memset(m, 0, sizeof(struct vi_mqd));
  75. m->header = 0xC0310800;
  76. m->compute_pipelinestat_enable = 1;
  77. m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
  78. m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
  79. m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
  80. m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
  81. m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
  82. 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
  83. m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
  84. MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
  85. m->cp_mqd_base_addr_lo = lower_32_bits(addr);
  86. m->cp_mqd_base_addr_hi = upper_32_bits(addr);
  87. m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
  88. 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
  89. 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
  90. m->cp_hqd_pipe_priority = 1;
  91. m->cp_hqd_queue_priority = 15;
  92. m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
  93. if (q->format == KFD_QUEUE_FORMAT_AQL)
  94. m->cp_hqd_iq_rptr = 1;
  95. if (q->tba_addr) {
  96. m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
  97. m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
  98. m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
  99. m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
  100. m->compute_pgm_rsrc2 |=
  101. (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
  102. }
  103. if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
  104. m->cp_hqd_persistent_state |=
  105. (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
  106. m->cp_hqd_ctx_save_base_addr_lo =
  107. lower_32_bits(q->ctx_save_restore_area_address);
  108. m->cp_hqd_ctx_save_base_addr_hi =
  109. upper_32_bits(q->ctx_save_restore_area_address);
  110. m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
  111. m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
  112. m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
  113. m->cp_hqd_wg_state_offset = q->ctl_stack_size;
  114. }
  115. *mqd = m;
  116. if (gart_addr)
  117. *gart_addr = addr;
  118. retval = mm->update_mqd(mm, m, q);
  119. return retval;
  120. }
  121. static int load_mqd(struct mqd_manager *mm, void *mqd,
  122. uint32_t pipe_id, uint32_t queue_id,
  123. struct queue_properties *p, struct mm_struct *mms)
  124. {
  125. /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
  126. uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
  127. uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
  128. return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
  129. (uint32_t __user *)p->write_ptr,
  130. wptr_shift, wptr_mask, mms);
  131. }
  132. static int __update_mqd(struct mqd_manager *mm, void *mqd,
  133. struct queue_properties *q, unsigned int mtype,
  134. unsigned int atc_bit)
  135. {
  136. struct vi_mqd *m;
  137. m = get_mqd(mqd);
  138. m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
  139. atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
  140. mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
  141. m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
  142. pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
  143. m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
  144. m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
  145. m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
  146. m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
  147. m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
  148. m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
  149. m->cp_hqd_pq_doorbell_control =
  150. q->doorbell_off <<
  151. CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
  152. pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
  153. m->cp_hqd_pq_doorbell_control);
  154. m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
  155. mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
  156. m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
  157. 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
  158. mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
  159. /*
  160. * HW does not clamp this field correctly. Maximum EOP queue size
  161. * is constrained by per-SE EOP done signal count, which is 8-bit.
  162. * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
  163. * more than (EOP entry count - 1) so a queue size of 0x800 dwords
  164. * is safe, giving a maximum field value of 0xA.
  165. */
  166. m->cp_hqd_eop_control |= min(0xA,
  167. order_base_2(q->eop_ring_buffer_size / 4) - 1);
  168. m->cp_hqd_eop_base_addr_lo =
  169. lower_32_bits(q->eop_ring_buffer_address >> 8);
  170. m->cp_hqd_eop_base_addr_hi =
  171. upper_32_bits(q->eop_ring_buffer_address >> 8);
  172. m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
  173. mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
  174. m->cp_hqd_vmid = q->vmid;
  175. if (q->format == KFD_QUEUE_FORMAT_AQL) {
  176. m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
  177. 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
  178. }
  179. if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
  180. m->cp_hqd_ctx_save_control =
  181. atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
  182. mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
  183. update_cu_mask(mm, mqd, q);
  184. q->is_active = (q->queue_size > 0 &&
  185. q->queue_address != 0 &&
  186. q->queue_percent > 0 &&
  187. !q->is_evicted);
  188. return 0;
  189. }
  190. static int update_mqd(struct mqd_manager *mm, void *mqd,
  191. struct queue_properties *q)
  192. {
  193. return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
  194. }
  195. static int update_mqd_tonga(struct mqd_manager *mm, void *mqd,
  196. struct queue_properties *q)
  197. {
  198. return __update_mqd(mm, mqd, q, MTYPE_UC, 0);
  199. }
  200. static int destroy_mqd(struct mqd_manager *mm, void *mqd,
  201. enum kfd_preempt_type type,
  202. unsigned int timeout, uint32_t pipe_id,
  203. uint32_t queue_id)
  204. {
  205. return mm->dev->kfd2kgd->hqd_destroy
  206. (mm->dev->kgd, mqd, type, timeout,
  207. pipe_id, queue_id);
  208. }
  209. static void uninit_mqd(struct mqd_manager *mm, void *mqd,
  210. struct kfd_mem_obj *mqd_mem_obj)
  211. {
  212. kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
  213. }
  214. static bool is_occupied(struct mqd_manager *mm, void *mqd,
  215. uint64_t queue_address, uint32_t pipe_id,
  216. uint32_t queue_id)
  217. {
  218. return mm->dev->kfd2kgd->hqd_is_occupied(
  219. mm->dev->kgd, queue_address,
  220. pipe_id, queue_id);
  221. }
  222. static int get_wave_state(struct mqd_manager *mm, void *mqd,
  223. void __user *ctl_stack,
  224. u32 *ctl_stack_used_size,
  225. u32 *save_area_used_size)
  226. {
  227. struct vi_mqd *m;
  228. m = get_mqd(mqd);
  229. *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
  230. m->cp_hqd_cntl_stack_offset;
  231. *save_area_used_size = m->cp_hqd_wg_state_offset -
  232. m->cp_hqd_cntl_stack_size;
  233. /* Control stack is not copied to user mode for GFXv8 because
  234. * it's part of the context save area that is already
  235. * accessible to user mode
  236. */
  237. return 0;
  238. }
  239. static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
  240. struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
  241. struct queue_properties *q)
  242. {
  243. struct vi_mqd *m;
  244. int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
  245. if (retval != 0)
  246. return retval;
  247. m = get_mqd(*mqd);
  248. m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
  249. 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
  250. return retval;
  251. }
  252. static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
  253. struct queue_properties *q)
  254. {
  255. struct vi_mqd *m;
  256. int retval = __update_mqd(mm, mqd, q, MTYPE_UC, 0);
  257. if (retval != 0)
  258. return retval;
  259. m = get_mqd(mqd);
  260. m->cp_hqd_vmid = q->vmid;
  261. return retval;
  262. }
  263. static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
  264. struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
  265. struct queue_properties *q)
  266. {
  267. int retval;
  268. struct vi_sdma_mqd *m;
  269. retval = kfd_gtt_sa_allocate(mm->dev,
  270. sizeof(struct vi_sdma_mqd),
  271. mqd_mem_obj);
  272. if (retval != 0)
  273. return -ENOMEM;
  274. m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
  275. memset(m, 0, sizeof(struct vi_sdma_mqd));
  276. *mqd = m;
  277. if (gart_addr != NULL)
  278. *gart_addr = (*mqd_mem_obj)->gpu_addr;
  279. retval = mm->update_mqd(mm, m, q);
  280. return retval;
  281. }
  282. static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
  283. struct kfd_mem_obj *mqd_mem_obj)
  284. {
  285. kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
  286. }
  287. static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
  288. uint32_t pipe_id, uint32_t queue_id,
  289. struct queue_properties *p, struct mm_struct *mms)
  290. {
  291. return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
  292. (uint32_t __user *)p->write_ptr,
  293. mms);
  294. }
  295. static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
  296. struct queue_properties *q)
  297. {
  298. struct vi_sdma_mqd *m;
  299. m = get_sdma_mqd(mqd);
  300. m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
  301. << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
  302. q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
  303. 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
  304. 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
  305. m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
  306. m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
  307. m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
  308. m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
  309. m->sdmax_rlcx_doorbell =
  310. q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
  311. m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr;
  312. m->sdma_engine_id = q->sdma_engine_id;
  313. m->sdma_queue_id = q->sdma_queue_id;
  314. q->is_active = (q->queue_size > 0 &&
  315. q->queue_address != 0 &&
  316. q->queue_percent > 0 &&
  317. !q->is_evicted);
  318. return 0;
  319. }
  320. /*
  321. * * preempt type here is ignored because there is only one way
  322. * * to preempt sdma queue
  323. */
  324. static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
  325. enum kfd_preempt_type type,
  326. unsigned int timeout, uint32_t pipe_id,
  327. uint32_t queue_id)
  328. {
  329. return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
  330. }
  331. static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
  332. uint64_t queue_address, uint32_t pipe_id,
  333. uint32_t queue_id)
  334. {
  335. return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
  336. }
  337. #if defined(CONFIG_DEBUG_FS)
  338. static int debugfs_show_mqd(struct seq_file *m, void *data)
  339. {
  340. seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
  341. data, sizeof(struct vi_mqd), false);
  342. return 0;
  343. }
  344. static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
  345. {
  346. seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
  347. data, sizeof(struct vi_sdma_mqd), false);
  348. return 0;
  349. }
  350. #endif
  351. struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
  352. struct kfd_dev *dev)
  353. {
  354. struct mqd_manager *mqd;
  355. if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
  356. return NULL;
  357. mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
  358. if (!mqd)
  359. return NULL;
  360. mqd->dev = dev;
  361. switch (type) {
  362. case KFD_MQD_TYPE_CP:
  363. case KFD_MQD_TYPE_COMPUTE:
  364. mqd->init_mqd = init_mqd;
  365. mqd->uninit_mqd = uninit_mqd;
  366. mqd->load_mqd = load_mqd;
  367. mqd->update_mqd = update_mqd;
  368. mqd->destroy_mqd = destroy_mqd;
  369. mqd->is_occupied = is_occupied;
  370. mqd->get_wave_state = get_wave_state;
  371. #if defined(CONFIG_DEBUG_FS)
  372. mqd->debugfs_show_mqd = debugfs_show_mqd;
  373. #endif
  374. break;
  375. case KFD_MQD_TYPE_HIQ:
  376. mqd->init_mqd = init_mqd_hiq;
  377. mqd->uninit_mqd = uninit_mqd;
  378. mqd->load_mqd = load_mqd;
  379. mqd->update_mqd = update_mqd_hiq;
  380. mqd->destroy_mqd = destroy_mqd;
  381. mqd->is_occupied = is_occupied;
  382. #if defined(CONFIG_DEBUG_FS)
  383. mqd->debugfs_show_mqd = debugfs_show_mqd;
  384. #endif
  385. break;
  386. case KFD_MQD_TYPE_SDMA:
  387. mqd->init_mqd = init_mqd_sdma;
  388. mqd->uninit_mqd = uninit_mqd_sdma;
  389. mqd->load_mqd = load_mqd_sdma;
  390. mqd->update_mqd = update_mqd_sdma;
  391. mqd->destroy_mqd = destroy_mqd_sdma;
  392. mqd->is_occupied = is_occupied_sdma;
  393. #if defined(CONFIG_DEBUG_FS)
  394. mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
  395. #endif
  396. break;
  397. default:
  398. kfree(mqd);
  399. return NULL;
  400. }
  401. return mqd;
  402. }
  403. struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
  404. struct kfd_dev *dev)
  405. {
  406. struct mqd_manager *mqd;
  407. mqd = mqd_manager_init_vi(type, dev);
  408. if (!mqd)
  409. return NULL;
  410. if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
  411. mqd->update_mqd = update_mqd_tonga;
  412. return mqd;
  413. }