kfd_mqd_manager_v9.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /*
  2. * Copyright 2016-2018 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/printk.h>
  24. #include <linux/slab.h>
  25. #include <linux/uaccess.h>
  26. #include "kfd_priv.h"
  27. #include "kfd_mqd_manager.h"
  28. #include "v9_structs.h"
  29. #include "gc/gc_9_0_offset.h"
  30. #include "gc/gc_9_0_sh_mask.h"
  31. #include "sdma0/sdma0_4_0_sh_mask.h"
  32. static inline struct v9_mqd *get_mqd(void *mqd)
  33. {
  34. return (struct v9_mqd *)mqd;
  35. }
  36. static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
  37. {
  38. return (struct v9_sdma_mqd *)mqd;
  39. }
  40. static void update_cu_mask(struct mqd_manager *mm, void *mqd,
  41. struct queue_properties *q)
  42. {
  43. struct v9_mqd *m;
  44. uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
  45. if (q->cu_mask_count == 0)
  46. return;
  47. mqd_symmetrically_map_cu_mask(mm,
  48. q->cu_mask, q->cu_mask_count, se_mask);
  49. m = get_mqd(mqd);
  50. m->compute_static_thread_mgmt_se0 = se_mask[0];
  51. m->compute_static_thread_mgmt_se1 = se_mask[1];
  52. m->compute_static_thread_mgmt_se2 = se_mask[2];
  53. m->compute_static_thread_mgmt_se3 = se_mask[3];
  54. pr_debug("update cu mask to %#x %#x %#x %#x\n",
  55. m->compute_static_thread_mgmt_se0,
  56. m->compute_static_thread_mgmt_se1,
  57. m->compute_static_thread_mgmt_se2,
  58. m->compute_static_thread_mgmt_se3);
  59. }
  60. static int init_mqd(struct mqd_manager *mm, void **mqd,
  61. struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
  62. struct queue_properties *q)
  63. {
  64. int retval;
  65. uint64_t addr;
  66. struct v9_mqd *m;
  67. struct kfd_dev *kfd = mm->dev;
  68. /* From V9, for CWSR, the control stack is located on the next page
  69. * boundary after the mqd, we will use the gtt allocation function
  70. * instead of sub-allocation function.
  71. */
  72. if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
  73. *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
  74. if (!*mqd_mem_obj)
  75. return -ENOMEM;
  76. retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
  77. ALIGN(q->ctl_stack_size, PAGE_SIZE) +
  78. ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
  79. &((*mqd_mem_obj)->gtt_mem),
  80. &((*mqd_mem_obj)->gpu_addr),
  81. (void *)&((*mqd_mem_obj)->cpu_ptr), true);
  82. } else
  83. retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
  84. mqd_mem_obj);
  85. if (retval != 0)
  86. return -ENOMEM;
  87. m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
  88. addr = (*mqd_mem_obj)->gpu_addr;
  89. memset(m, 0, sizeof(struct v9_mqd));
  90. m->header = 0xC0310800;
  91. m->compute_pipelinestat_enable = 1;
  92. m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
  93. m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
  94. m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
  95. m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
  96. m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
  97. 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
  98. m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
  99. m->cp_mqd_base_addr_lo = lower_32_bits(addr);
  100. m->cp_mqd_base_addr_hi = upper_32_bits(addr);
  101. m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
  102. 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
  103. 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
  104. m->cp_hqd_pipe_priority = 1;
  105. m->cp_hqd_queue_priority = 15;
  106. if (q->format == KFD_QUEUE_FORMAT_AQL) {
  107. m->cp_hqd_aql_control =
  108. 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
  109. }
  110. if (q->tba_addr) {
  111. m->compute_pgm_rsrc2 |=
  112. (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
  113. }
  114. if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
  115. m->cp_hqd_persistent_state |=
  116. (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
  117. m->cp_hqd_ctx_save_base_addr_lo =
  118. lower_32_bits(q->ctx_save_restore_area_address);
  119. m->cp_hqd_ctx_save_base_addr_hi =
  120. upper_32_bits(q->ctx_save_restore_area_address);
  121. m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
  122. m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
  123. m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
  124. m->cp_hqd_wg_state_offset = q->ctl_stack_size;
  125. }
  126. *mqd = m;
  127. if (gart_addr)
  128. *gart_addr = addr;
  129. retval = mm->update_mqd(mm, m, q);
  130. return retval;
  131. }
  132. static int load_mqd(struct mqd_manager *mm, void *mqd,
  133. uint32_t pipe_id, uint32_t queue_id,
  134. struct queue_properties *p, struct mm_struct *mms)
  135. {
  136. /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
  137. uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
  138. return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
  139. (uint32_t __user *)p->write_ptr,
  140. wptr_shift, 0, mms);
  141. }
  142. static int update_mqd(struct mqd_manager *mm, void *mqd,
  143. struct queue_properties *q)
  144. {
  145. struct v9_mqd *m;
  146. m = get_mqd(mqd);
  147. m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
  148. m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
  149. pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
  150. m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
  151. m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
  152. m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
  153. m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
  154. m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
  155. m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
  156. m->cp_hqd_pq_doorbell_control =
  157. q->doorbell_off <<
  158. CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
  159. pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
  160. m->cp_hqd_pq_doorbell_control);
  161. m->cp_hqd_ib_control =
  162. 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
  163. 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
  164. /*
  165. * HW does not clamp this field correctly. Maximum EOP queue size
  166. * is constrained by per-SE EOP done signal count, which is 8-bit.
  167. * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
  168. * more than (EOP entry count - 1) so a queue size of 0x800 dwords
  169. * is safe, giving a maximum field value of 0xA.
  170. */
  171. m->cp_hqd_eop_control = min(0xA,
  172. order_base_2(q->eop_ring_buffer_size / 4) - 1);
  173. m->cp_hqd_eop_base_addr_lo =
  174. lower_32_bits(q->eop_ring_buffer_address >> 8);
  175. m->cp_hqd_eop_base_addr_hi =
  176. upper_32_bits(q->eop_ring_buffer_address >> 8);
  177. m->cp_hqd_iq_timer = 0;
  178. m->cp_hqd_vmid = q->vmid;
  179. if (q->format == KFD_QUEUE_FORMAT_AQL) {
  180. m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
  181. 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
  182. 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
  183. 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
  184. m->cp_hqd_pq_doorbell_control |= 1 <<
  185. CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
  186. }
  187. if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
  188. m->cp_hqd_ctx_save_control = 0;
  189. update_cu_mask(mm, mqd, q);
  190. q->is_active = (q->queue_size > 0 &&
  191. q->queue_address != 0 &&
  192. q->queue_percent > 0 &&
  193. !q->is_evicted);
  194. return 0;
  195. }
  196. static int destroy_mqd(struct mqd_manager *mm, void *mqd,
  197. enum kfd_preempt_type type,
  198. unsigned int timeout, uint32_t pipe_id,
  199. uint32_t queue_id)
  200. {
  201. return mm->dev->kfd2kgd->hqd_destroy
  202. (mm->dev->kgd, mqd, type, timeout,
  203. pipe_id, queue_id);
  204. }
  205. static void uninit_mqd(struct mqd_manager *mm, void *mqd,
  206. struct kfd_mem_obj *mqd_mem_obj)
  207. {
  208. struct kfd_dev *kfd = mm->dev;
  209. if (mqd_mem_obj->gtt_mem) {
  210. kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
  211. kfree(mqd_mem_obj);
  212. } else {
  213. kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
  214. }
  215. }
  216. static bool is_occupied(struct mqd_manager *mm, void *mqd,
  217. uint64_t queue_address, uint32_t pipe_id,
  218. uint32_t queue_id)
  219. {
  220. return mm->dev->kfd2kgd->hqd_is_occupied(
  221. mm->dev->kgd, queue_address,
  222. pipe_id, queue_id);
  223. }
  224. static int get_wave_state(struct mqd_manager *mm, void *mqd,
  225. void __user *ctl_stack,
  226. u32 *ctl_stack_used_size,
  227. u32 *save_area_used_size)
  228. {
  229. struct v9_mqd *m;
  230. /* Control stack is located one page after MQD. */
  231. void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
  232. m = get_mqd(mqd);
  233. *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
  234. m->cp_hqd_cntl_stack_offset;
  235. *save_area_used_size = m->cp_hqd_wg_state_offset;
  236. if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
  237. return -EFAULT;
  238. return 0;
  239. }
  240. static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
  241. struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
  242. struct queue_properties *q)
  243. {
  244. struct v9_mqd *m;
  245. int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
  246. if (retval != 0)
  247. return retval;
  248. m = get_mqd(*mqd);
  249. m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
  250. 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
  251. return retval;
  252. }
  253. static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
  254. struct queue_properties *q)
  255. {
  256. struct v9_mqd *m;
  257. int retval = update_mqd(mm, mqd, q);
  258. if (retval != 0)
  259. return retval;
  260. /* TODO: what's the point? update_mqd already does this. */
  261. m = get_mqd(mqd);
  262. m->cp_hqd_vmid = q->vmid;
  263. return retval;
  264. }
  265. static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
  266. struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
  267. struct queue_properties *q)
  268. {
  269. int retval;
  270. struct v9_sdma_mqd *m;
  271. retval = kfd_gtt_sa_allocate(mm->dev,
  272. sizeof(struct v9_sdma_mqd),
  273. mqd_mem_obj);
  274. if (retval != 0)
  275. return -ENOMEM;
  276. m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
  277. memset(m, 0, sizeof(struct v9_sdma_mqd));
  278. *mqd = m;
  279. if (gart_addr)
  280. *gart_addr = (*mqd_mem_obj)->gpu_addr;
  281. retval = mm->update_mqd(mm, m, q);
  282. return retval;
  283. }
  284. static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
  285. struct kfd_mem_obj *mqd_mem_obj)
  286. {
  287. kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
  288. }
  289. static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
  290. uint32_t pipe_id, uint32_t queue_id,
  291. struct queue_properties *p, struct mm_struct *mms)
  292. {
  293. return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
  294. (uint32_t __user *)p->write_ptr,
  295. mms);
  296. }
  297. #define SDMA_RLC_DUMMY_DEFAULT 0xf
  298. static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
  299. struct queue_properties *q)
  300. {
  301. struct v9_sdma_mqd *m;
  302. m = get_sdma_mqd(mqd);
  303. m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
  304. << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
  305. q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
  306. 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
  307. 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
  308. m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
  309. m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
  310. m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
  311. m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
  312. m->sdmax_rlcx_doorbell_offset =
  313. q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
  314. m->sdma_engine_id = q->sdma_engine_id;
  315. m->sdma_queue_id = q->sdma_queue_id;
  316. m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
  317. q->is_active = (q->queue_size > 0 &&
  318. q->queue_address != 0 &&
  319. q->queue_percent > 0 &&
  320. !q->is_evicted);
  321. return 0;
  322. }
  323. /*
  324. * * preempt type here is ignored because there is only one way
  325. * * to preempt sdma queue
  326. */
  327. static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
  328. enum kfd_preempt_type type,
  329. unsigned int timeout, uint32_t pipe_id,
  330. uint32_t queue_id)
  331. {
  332. return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
  333. }
  334. static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
  335. uint64_t queue_address, uint32_t pipe_id,
  336. uint32_t queue_id)
  337. {
  338. return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
  339. }
  340. #if defined(CONFIG_DEBUG_FS)
  341. static int debugfs_show_mqd(struct seq_file *m, void *data)
  342. {
  343. seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
  344. data, sizeof(struct v9_mqd), false);
  345. return 0;
  346. }
  347. static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
  348. {
  349. seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
  350. data, sizeof(struct v9_sdma_mqd), false);
  351. return 0;
  352. }
  353. #endif
  354. struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
  355. struct kfd_dev *dev)
  356. {
  357. struct mqd_manager *mqd;
  358. if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
  359. return NULL;
  360. mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
  361. if (!mqd)
  362. return NULL;
  363. mqd->dev = dev;
  364. switch (type) {
  365. case KFD_MQD_TYPE_CP:
  366. case KFD_MQD_TYPE_COMPUTE:
  367. mqd->init_mqd = init_mqd;
  368. mqd->uninit_mqd = uninit_mqd;
  369. mqd->load_mqd = load_mqd;
  370. mqd->update_mqd = update_mqd;
  371. mqd->destroy_mqd = destroy_mqd;
  372. mqd->is_occupied = is_occupied;
  373. mqd->get_wave_state = get_wave_state;
  374. #if defined(CONFIG_DEBUG_FS)
  375. mqd->debugfs_show_mqd = debugfs_show_mqd;
  376. #endif
  377. break;
  378. case KFD_MQD_TYPE_HIQ:
  379. mqd->init_mqd = init_mqd_hiq;
  380. mqd->uninit_mqd = uninit_mqd;
  381. mqd->load_mqd = load_mqd;
  382. mqd->update_mqd = update_mqd_hiq;
  383. mqd->destroy_mqd = destroy_mqd;
  384. mqd->is_occupied = is_occupied;
  385. #if defined(CONFIG_DEBUG_FS)
  386. mqd->debugfs_show_mqd = debugfs_show_mqd;
  387. #endif
  388. break;
  389. case KFD_MQD_TYPE_SDMA:
  390. mqd->init_mqd = init_mqd_sdma;
  391. mqd->uninit_mqd = uninit_mqd_sdma;
  392. mqd->load_mqd = load_mqd_sdma;
  393. mqd->update_mqd = update_mqd_sdma;
  394. mqd->destroy_mqd = destroy_mqd_sdma;
  395. mqd->is_occupied = is_occupied_sdma;
  396. #if defined(CONFIG_DEBUG_FS)
  397. mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
  398. #endif
  399. break;
  400. default:
  401. kfree(mqd);
  402. return NULL;
  403. }
  404. return mqd;
  405. }