kfd_device_queue_manager.c 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/ratelimit.h>
  24. #include <linux/printk.h>
  25. #include <linux/slab.h>
  26. #include <linux/list.h>
  27. #include <linux/types.h>
  28. #include <linux/bitops.h>
  29. #include <linux/sched.h>
  30. #include "kfd_priv.h"
  31. #include "kfd_device_queue_manager.h"
  32. #include "kfd_mqd_manager.h"
  33. #include "cik_regs.h"
  34. #include "kfd_kernel_queue.h"
  35. /* Size of the per-pipe EOP queue */
  36. #define CIK_HPD_EOP_BYTES_LOG2 11
  37. #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
  38. static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
  39. unsigned int pasid, unsigned int vmid);
  40. static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
  41. struct queue *q,
  42. struct qcm_process_device *qpd);
  43. static int execute_queues_cpsch(struct device_queue_manager *dqm,
  44. enum kfd_unmap_queues_filter filter,
  45. uint32_t filter_param);
  46. static int unmap_queues_cpsch(struct device_queue_manager *dqm,
  47. enum kfd_unmap_queues_filter filter,
  48. uint32_t filter_param);
  49. static int map_queues_cpsch(struct device_queue_manager *dqm);
  50. static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
  51. struct queue *q,
  52. struct qcm_process_device *qpd);
  53. static void deallocate_sdma_queue(struct device_queue_manager *dqm,
  54. unsigned int sdma_queue_id);
  55. static inline
  56. enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
  57. {
  58. if (type == KFD_QUEUE_TYPE_SDMA)
  59. return KFD_MQD_TYPE_SDMA;
  60. return KFD_MQD_TYPE_CP;
  61. }
  62. static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
  63. {
  64. int i;
  65. int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
  66. + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
  67. /* queue is available for KFD usage if bit is 1 */
  68. for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
  69. if (test_bit(pipe_offset + i,
  70. dqm->dev->shared_resources.queue_bitmap))
  71. return true;
  72. return false;
  73. }
  74. unsigned int get_queues_num(struct device_queue_manager *dqm)
  75. {
  76. return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
  77. KGD_MAX_QUEUES);
  78. }
  79. unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
  80. {
  81. return dqm->dev->shared_resources.num_queue_per_pipe;
  82. }
  83. unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
  84. {
  85. return dqm->dev->shared_resources.num_pipe_per_mec;
  86. }
  87. void program_sh_mem_settings(struct device_queue_manager *dqm,
  88. struct qcm_process_device *qpd)
  89. {
  90. return dqm->dev->kfd2kgd->program_sh_mem_settings(
  91. dqm->dev->kgd, qpd->vmid,
  92. qpd->sh_mem_config,
  93. qpd->sh_mem_ape1_base,
  94. qpd->sh_mem_ape1_limit,
  95. qpd->sh_mem_bases);
  96. }
  97. static int allocate_vmid(struct device_queue_manager *dqm,
  98. struct qcm_process_device *qpd,
  99. struct queue *q)
  100. {
  101. int bit, allocated_vmid;
  102. if (dqm->vmid_bitmap == 0)
  103. return -ENOMEM;
  104. bit = ffs(dqm->vmid_bitmap) - 1;
  105. dqm->vmid_bitmap &= ~(1 << bit);
  106. allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
  107. pr_debug("vmid allocation %d\n", allocated_vmid);
  108. qpd->vmid = allocated_vmid;
  109. q->properties.vmid = allocated_vmid;
  110. set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
  111. program_sh_mem_settings(dqm, qpd);
  112. /* qpd->page_table_base is set earlier when register_process()
  113. * is called, i.e. when the first queue is created.
  114. */
  115. dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
  116. qpd->vmid,
  117. qpd->page_table_base);
  118. /* invalidate the VM context after pasid and vmid mapping is set up */
  119. kfd_flush_tlb(qpd_to_pdd(qpd));
  120. return 0;
  121. }
  122. static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
  123. struct qcm_process_device *qpd)
  124. {
  125. uint32_t len;
  126. if (!qpd->ib_kaddr)
  127. return -ENOMEM;
  128. len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
  129. return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
  130. qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
  131. }
  132. static void deallocate_vmid(struct device_queue_manager *dqm,
  133. struct qcm_process_device *qpd,
  134. struct queue *q)
  135. {
  136. int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
  137. /* On GFX v7, CP doesn't flush TC at dequeue */
  138. if (q->device->device_info->asic_family == CHIP_HAWAII)
  139. if (flush_texture_cache_nocpsch(q->device, qpd))
  140. pr_err("Failed to flush TC\n");
  141. kfd_flush_tlb(qpd_to_pdd(qpd));
  142. /* Release the vmid mapping */
  143. set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
  144. dqm->vmid_bitmap |= (1 << bit);
  145. qpd->vmid = 0;
  146. q->properties.vmid = 0;
  147. }
  148. static int create_queue_nocpsch(struct device_queue_manager *dqm,
  149. struct queue *q,
  150. struct qcm_process_device *qpd)
  151. {
  152. int retval;
  153. print_queue(q);
  154. mutex_lock(&dqm->lock);
  155. if (dqm->total_queue_count >= max_num_of_queues_per_device) {
  156. pr_warn("Can't create new usermode queue because %d queues were already created\n",
  157. dqm->total_queue_count);
  158. retval = -EPERM;
  159. goto out_unlock;
  160. }
  161. if (list_empty(&qpd->queues_list)) {
  162. retval = allocate_vmid(dqm, qpd, q);
  163. if (retval)
  164. goto out_unlock;
  165. }
  166. q->properties.vmid = qpd->vmid;
  167. /*
  168. * Eviction state logic: we only mark active queues as evicted
  169. * to avoid the overhead of restoring inactive queues later
  170. */
  171. if (qpd->evicted)
  172. q->properties.is_evicted = (q->properties.queue_size > 0 &&
  173. q->properties.queue_percent > 0 &&
  174. q->properties.queue_address != 0);
  175. q->properties.tba_addr = qpd->tba_addr;
  176. q->properties.tma_addr = qpd->tma_addr;
  177. if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
  178. retval = create_compute_queue_nocpsch(dqm, q, qpd);
  179. else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  180. retval = create_sdma_queue_nocpsch(dqm, q, qpd);
  181. else
  182. retval = -EINVAL;
  183. if (retval) {
  184. if (list_empty(&qpd->queues_list))
  185. deallocate_vmid(dqm, qpd, q);
  186. goto out_unlock;
  187. }
  188. list_add(&q->list, &qpd->queues_list);
  189. qpd->queue_count++;
  190. if (q->properties.is_active)
  191. dqm->queue_count++;
  192. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  193. dqm->sdma_queue_count++;
  194. /*
  195. * Unconditionally increment this counter, regardless of the queue's
  196. * type or whether the queue is active.
  197. */
  198. dqm->total_queue_count++;
  199. pr_debug("Total of %d queues are accountable so far\n",
  200. dqm->total_queue_count);
  201. out_unlock:
  202. mutex_unlock(&dqm->lock);
  203. return retval;
  204. }
  205. static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
  206. {
  207. bool set;
  208. int pipe, bit, i;
  209. set = false;
  210. for (pipe = dqm->next_pipe_to_allocate, i = 0;
  211. i < get_pipes_per_mec(dqm);
  212. pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
  213. if (!is_pipe_enabled(dqm, 0, pipe))
  214. continue;
  215. if (dqm->allocated_queues[pipe] != 0) {
  216. bit = ffs(dqm->allocated_queues[pipe]) - 1;
  217. dqm->allocated_queues[pipe] &= ~(1 << bit);
  218. q->pipe = pipe;
  219. q->queue = bit;
  220. set = true;
  221. break;
  222. }
  223. }
  224. if (!set)
  225. return -EBUSY;
  226. pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
  227. /* horizontal hqd allocation */
  228. dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
  229. return 0;
  230. }
  231. static inline void deallocate_hqd(struct device_queue_manager *dqm,
  232. struct queue *q)
  233. {
  234. dqm->allocated_queues[q->pipe] |= (1 << q->queue);
  235. }
  236. static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
  237. struct queue *q,
  238. struct qcm_process_device *qpd)
  239. {
  240. int retval;
  241. struct mqd_manager *mqd;
  242. mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
  243. if (!mqd)
  244. return -ENOMEM;
  245. retval = allocate_hqd(dqm, q);
  246. if (retval)
  247. return retval;
  248. retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
  249. &q->gart_mqd_addr, &q->properties);
  250. if (retval)
  251. goto out_deallocate_hqd;
  252. pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
  253. q->pipe, q->queue);
  254. dqm->dev->kfd2kgd->set_scratch_backing_va(
  255. dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
  256. if (!q->properties.is_active)
  257. return 0;
  258. retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
  259. q->process->mm);
  260. if (retval)
  261. goto out_uninit_mqd;
  262. return 0;
  263. out_uninit_mqd:
  264. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  265. out_deallocate_hqd:
  266. deallocate_hqd(dqm, q);
  267. return retval;
  268. }
  269. /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
  270. * to avoid asynchronized access
  271. */
  272. static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
  273. struct qcm_process_device *qpd,
  274. struct queue *q)
  275. {
  276. int retval;
  277. struct mqd_manager *mqd;
  278. mqd = dqm->ops.get_mqd_manager(dqm,
  279. get_mqd_type_from_queue_type(q->properties.type));
  280. if (!mqd)
  281. return -ENOMEM;
  282. if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
  283. deallocate_hqd(dqm, q);
  284. } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
  285. dqm->sdma_queue_count--;
  286. deallocate_sdma_queue(dqm, q->sdma_id);
  287. } else {
  288. pr_debug("q->properties.type %d is invalid\n",
  289. q->properties.type);
  290. return -EINVAL;
  291. }
  292. dqm->total_queue_count--;
  293. retval = mqd->destroy_mqd(mqd, q->mqd,
  294. KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
  295. KFD_UNMAP_LATENCY_MS,
  296. q->pipe, q->queue);
  297. if (retval == -ETIME)
  298. qpd->reset_wavefronts = true;
  299. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  300. list_del(&q->list);
  301. if (list_empty(&qpd->queues_list)) {
  302. if (qpd->reset_wavefronts) {
  303. pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
  304. dqm->dev);
  305. /* dbgdev_wave_reset_wavefronts has to be called before
  306. * deallocate_vmid(), i.e. when vmid is still in use.
  307. */
  308. dbgdev_wave_reset_wavefronts(dqm->dev,
  309. qpd->pqm->process);
  310. qpd->reset_wavefronts = false;
  311. }
  312. deallocate_vmid(dqm, qpd, q);
  313. }
  314. qpd->queue_count--;
  315. if (q->properties.is_active)
  316. dqm->queue_count--;
  317. return retval;
  318. }
  319. static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
  320. struct qcm_process_device *qpd,
  321. struct queue *q)
  322. {
  323. int retval;
  324. mutex_lock(&dqm->lock);
  325. retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
  326. mutex_unlock(&dqm->lock);
  327. return retval;
  328. }
  329. static int update_queue(struct device_queue_manager *dqm, struct queue *q)
  330. {
  331. int retval;
  332. struct mqd_manager *mqd;
  333. struct kfd_process_device *pdd;
  334. bool prev_active = false;
  335. mutex_lock(&dqm->lock);
  336. pdd = kfd_get_process_device_data(q->device, q->process);
  337. if (!pdd) {
  338. retval = -ENODEV;
  339. goto out_unlock;
  340. }
  341. mqd = dqm->ops.get_mqd_manager(dqm,
  342. get_mqd_type_from_queue_type(q->properties.type));
  343. if (!mqd) {
  344. retval = -ENOMEM;
  345. goto out_unlock;
  346. }
  347. /*
  348. * Eviction state logic: we only mark active queues as evicted
  349. * to avoid the overhead of restoring inactive queues later
  350. */
  351. if (pdd->qpd.evicted)
  352. q->properties.is_evicted = (q->properties.queue_size > 0 &&
  353. q->properties.queue_percent > 0 &&
  354. q->properties.queue_address != 0);
  355. /* Save previous activity state for counters */
  356. prev_active = q->properties.is_active;
  357. /* Make sure the queue is unmapped before updating the MQD */
  358. if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
  359. retval = unmap_queues_cpsch(dqm,
  360. KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  361. if (retval) {
  362. pr_err("unmap queue failed\n");
  363. goto out_unlock;
  364. }
  365. } else if (prev_active &&
  366. (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
  367. q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
  368. retval = mqd->destroy_mqd(mqd, q->mqd,
  369. KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
  370. KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
  371. if (retval) {
  372. pr_err("destroy mqd failed\n");
  373. goto out_unlock;
  374. }
  375. }
  376. retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
  377. /*
  378. * check active state vs. the previous state and modify
  379. * counter accordingly. map_queues_cpsch uses the
  380. * dqm->queue_count to determine whether a new runlist must be
  381. * uploaded.
  382. */
  383. if (q->properties.is_active && !prev_active)
  384. dqm->queue_count++;
  385. else if (!q->properties.is_active && prev_active)
  386. dqm->queue_count--;
  387. if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
  388. retval = map_queues_cpsch(dqm);
  389. else if (q->properties.is_active &&
  390. (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
  391. q->properties.type == KFD_QUEUE_TYPE_SDMA))
  392. retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
  393. &q->properties, q->process->mm);
  394. out_unlock:
  395. mutex_unlock(&dqm->lock);
  396. return retval;
  397. }
  398. static struct mqd_manager *get_mqd_manager(
  399. struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
  400. {
  401. struct mqd_manager *mqd;
  402. if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
  403. return NULL;
  404. pr_debug("mqd type %d\n", type);
  405. mqd = dqm->mqds[type];
  406. if (!mqd) {
  407. mqd = mqd_manager_init(type, dqm->dev);
  408. if (!mqd)
  409. pr_err("mqd manager is NULL");
  410. dqm->mqds[type] = mqd;
  411. }
  412. return mqd;
  413. }
  414. static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
  415. struct qcm_process_device *qpd)
  416. {
  417. struct queue *q;
  418. struct mqd_manager *mqd;
  419. struct kfd_process_device *pdd;
  420. int retval = 0;
  421. mutex_lock(&dqm->lock);
  422. if (qpd->evicted++ > 0) /* already evicted, do nothing */
  423. goto out;
  424. pdd = qpd_to_pdd(qpd);
  425. pr_info_ratelimited("Evicting PASID %u queues\n",
  426. pdd->process->pasid);
  427. /* unactivate all active queues on the qpd */
  428. list_for_each_entry(q, &qpd->queues_list, list) {
  429. if (!q->properties.is_active)
  430. continue;
  431. mqd = dqm->ops.get_mqd_manager(dqm,
  432. get_mqd_type_from_queue_type(q->properties.type));
  433. if (!mqd) { /* should not be here */
  434. pr_err("Cannot evict queue, mqd mgr is NULL\n");
  435. retval = -ENOMEM;
  436. goto out;
  437. }
  438. q->properties.is_evicted = true;
  439. q->properties.is_active = false;
  440. retval = mqd->destroy_mqd(mqd, q->mqd,
  441. KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
  442. KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
  443. if (retval)
  444. goto out;
  445. dqm->queue_count--;
  446. }
  447. out:
  448. mutex_unlock(&dqm->lock);
  449. return retval;
  450. }
  451. static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
  452. struct qcm_process_device *qpd)
  453. {
  454. struct queue *q;
  455. struct kfd_process_device *pdd;
  456. int retval = 0;
  457. mutex_lock(&dqm->lock);
  458. if (qpd->evicted++ > 0) /* already evicted, do nothing */
  459. goto out;
  460. pdd = qpd_to_pdd(qpd);
  461. pr_info_ratelimited("Evicting PASID %u queues\n",
  462. pdd->process->pasid);
  463. /* unactivate all active queues on the qpd */
  464. list_for_each_entry(q, &qpd->queues_list, list) {
  465. if (!q->properties.is_active)
  466. continue;
  467. q->properties.is_evicted = true;
  468. q->properties.is_active = false;
  469. dqm->queue_count--;
  470. }
  471. retval = execute_queues_cpsch(dqm,
  472. qpd->is_debug ?
  473. KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
  474. KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  475. out:
  476. mutex_unlock(&dqm->lock);
  477. return retval;
  478. }
  479. static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
  480. struct qcm_process_device *qpd)
  481. {
  482. struct queue *q;
  483. struct mqd_manager *mqd;
  484. struct kfd_process_device *pdd;
  485. uint32_t pd_base;
  486. int retval = 0;
  487. pdd = qpd_to_pdd(qpd);
  488. /* Retrieve PD base */
  489. pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
  490. mutex_lock(&dqm->lock);
  491. if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
  492. goto out;
  493. if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
  494. qpd->evicted--;
  495. goto out;
  496. }
  497. pr_info_ratelimited("Restoring PASID %u queues\n",
  498. pdd->process->pasid);
  499. /* Update PD Base in QPD */
  500. qpd->page_table_base = pd_base;
  501. pr_debug("Updated PD address to 0x%08x\n", pd_base);
  502. if (!list_empty(&qpd->queues_list)) {
  503. dqm->dev->kfd2kgd->set_vm_context_page_table_base(
  504. dqm->dev->kgd,
  505. qpd->vmid,
  506. qpd->page_table_base);
  507. kfd_flush_tlb(pdd);
  508. }
  509. /* activate all active queues on the qpd */
  510. list_for_each_entry(q, &qpd->queues_list, list) {
  511. if (!q->properties.is_evicted)
  512. continue;
  513. mqd = dqm->ops.get_mqd_manager(dqm,
  514. get_mqd_type_from_queue_type(q->properties.type));
  515. if (!mqd) { /* should not be here */
  516. pr_err("Cannot restore queue, mqd mgr is NULL\n");
  517. retval = -ENOMEM;
  518. goto out;
  519. }
  520. q->properties.is_evicted = false;
  521. q->properties.is_active = true;
  522. retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
  523. q->queue, &q->properties,
  524. q->process->mm);
  525. if (retval)
  526. goto out;
  527. dqm->queue_count++;
  528. }
  529. qpd->evicted = 0;
  530. out:
  531. mutex_unlock(&dqm->lock);
  532. return retval;
  533. }
  534. static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
  535. struct qcm_process_device *qpd)
  536. {
  537. struct queue *q;
  538. struct kfd_process_device *pdd;
  539. uint32_t pd_base;
  540. int retval = 0;
  541. pdd = qpd_to_pdd(qpd);
  542. /* Retrieve PD base */
  543. pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
  544. mutex_lock(&dqm->lock);
  545. if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
  546. goto out;
  547. if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
  548. qpd->evicted--;
  549. goto out;
  550. }
  551. pr_info_ratelimited("Restoring PASID %u queues\n",
  552. pdd->process->pasid);
  553. /* Update PD Base in QPD */
  554. qpd->page_table_base = pd_base;
  555. pr_debug("Updated PD address to 0x%08x\n", pd_base);
  556. /* activate all active queues on the qpd */
  557. list_for_each_entry(q, &qpd->queues_list, list) {
  558. if (!q->properties.is_evicted)
  559. continue;
  560. q->properties.is_evicted = false;
  561. q->properties.is_active = true;
  562. dqm->queue_count++;
  563. }
  564. retval = execute_queues_cpsch(dqm,
  565. KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  566. if (!retval)
  567. qpd->evicted = 0;
  568. out:
  569. mutex_unlock(&dqm->lock);
  570. return retval;
  571. }
  572. static int register_process(struct device_queue_manager *dqm,
  573. struct qcm_process_device *qpd)
  574. {
  575. struct device_process_node *n;
  576. struct kfd_process_device *pdd;
  577. uint32_t pd_base;
  578. int retval;
  579. n = kzalloc(sizeof(*n), GFP_KERNEL);
  580. if (!n)
  581. return -ENOMEM;
  582. n->qpd = qpd;
  583. pdd = qpd_to_pdd(qpd);
  584. /* Retrieve PD base */
  585. pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
  586. mutex_lock(&dqm->lock);
  587. list_add(&n->list, &dqm->queues);
  588. /* Update PD Base in QPD */
  589. qpd->page_table_base = pd_base;
  590. retval = dqm->asic_ops.update_qpd(dqm, qpd);
  591. dqm->processes_count++;
  592. mutex_unlock(&dqm->lock);
  593. return retval;
  594. }
  595. static int unregister_process(struct device_queue_manager *dqm,
  596. struct qcm_process_device *qpd)
  597. {
  598. int retval;
  599. struct device_process_node *cur, *next;
  600. pr_debug("qpd->queues_list is %s\n",
  601. list_empty(&qpd->queues_list) ? "empty" : "not empty");
  602. retval = 0;
  603. mutex_lock(&dqm->lock);
  604. list_for_each_entry_safe(cur, next, &dqm->queues, list) {
  605. if (qpd == cur->qpd) {
  606. list_del(&cur->list);
  607. kfree(cur);
  608. dqm->processes_count--;
  609. goto out;
  610. }
  611. }
  612. /* qpd not found in dqm list */
  613. retval = 1;
  614. out:
  615. mutex_unlock(&dqm->lock);
  616. return retval;
  617. }
  618. static int
  619. set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
  620. unsigned int vmid)
  621. {
  622. uint32_t pasid_mapping;
  623. pasid_mapping = (pasid == 0) ? 0 :
  624. (uint32_t)pasid |
  625. ATC_VMID_PASID_MAPPING_VALID;
  626. return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
  627. dqm->dev->kgd, pasid_mapping,
  628. vmid);
  629. }
  630. static void init_interrupts(struct device_queue_manager *dqm)
  631. {
  632. unsigned int i;
  633. for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
  634. if (is_pipe_enabled(dqm, 0, i))
  635. dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
  636. }
  637. static int initialize_nocpsch(struct device_queue_manager *dqm)
  638. {
  639. int pipe, queue;
  640. pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
  641. dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
  642. sizeof(unsigned int), GFP_KERNEL);
  643. if (!dqm->allocated_queues)
  644. return -ENOMEM;
  645. mutex_init(&dqm->lock);
  646. INIT_LIST_HEAD(&dqm->queues);
  647. dqm->queue_count = dqm->next_pipe_to_allocate = 0;
  648. dqm->sdma_queue_count = 0;
  649. for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
  650. int pipe_offset = pipe * get_queues_per_pipe(dqm);
  651. for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
  652. if (test_bit(pipe_offset + queue,
  653. dqm->dev->shared_resources.queue_bitmap))
  654. dqm->allocated_queues[pipe] |= 1 << queue;
  655. }
  656. dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
  657. dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
  658. return 0;
  659. }
  660. static void uninitialize(struct device_queue_manager *dqm)
  661. {
  662. int i;
  663. WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
  664. kfree(dqm->allocated_queues);
  665. for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
  666. kfree(dqm->mqds[i]);
  667. mutex_destroy(&dqm->lock);
  668. kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
  669. }
  670. static int start_nocpsch(struct device_queue_manager *dqm)
  671. {
  672. init_interrupts(dqm);
  673. return pm_init(&dqm->packets, dqm);
  674. }
  675. static int stop_nocpsch(struct device_queue_manager *dqm)
  676. {
  677. pm_uninit(&dqm->packets);
  678. return 0;
  679. }
  680. static int allocate_sdma_queue(struct device_queue_manager *dqm,
  681. unsigned int *sdma_queue_id)
  682. {
  683. int bit;
  684. if (dqm->sdma_bitmap == 0)
  685. return -ENOMEM;
  686. bit = ffs(dqm->sdma_bitmap) - 1;
  687. dqm->sdma_bitmap &= ~(1 << bit);
  688. *sdma_queue_id = bit;
  689. return 0;
  690. }
  691. static void deallocate_sdma_queue(struct device_queue_manager *dqm,
  692. unsigned int sdma_queue_id)
  693. {
  694. if (sdma_queue_id >= CIK_SDMA_QUEUES)
  695. return;
  696. dqm->sdma_bitmap |= (1 << sdma_queue_id);
  697. }
  698. static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
  699. struct queue *q,
  700. struct qcm_process_device *qpd)
  701. {
  702. struct mqd_manager *mqd;
  703. int retval;
  704. mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
  705. if (!mqd)
  706. return -ENOMEM;
  707. retval = allocate_sdma_queue(dqm, &q->sdma_id);
  708. if (retval)
  709. return retval;
  710. q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
  711. q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
  712. pr_debug("SDMA id is: %d\n", q->sdma_id);
  713. pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
  714. pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
  715. dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
  716. retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
  717. &q->gart_mqd_addr, &q->properties);
  718. if (retval)
  719. goto out_deallocate_sdma_queue;
  720. retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
  721. if (retval)
  722. goto out_uninit_mqd;
  723. return 0;
  724. out_uninit_mqd:
  725. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  726. out_deallocate_sdma_queue:
  727. deallocate_sdma_queue(dqm, q->sdma_id);
  728. return retval;
  729. }
  730. /*
  731. * Device Queue Manager implementation for cp scheduler
  732. */
  733. static int set_sched_resources(struct device_queue_manager *dqm)
  734. {
  735. int i, mec;
  736. struct scheduling_resources res;
  737. res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
  738. res.queue_mask = 0;
  739. for (i = 0; i < KGD_MAX_QUEUES; ++i) {
  740. mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
  741. / dqm->dev->shared_resources.num_pipe_per_mec;
  742. if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
  743. continue;
  744. /* only acquire queues from the first MEC */
  745. if (mec > 0)
  746. continue;
  747. /* This situation may be hit in the future if a new HW
  748. * generation exposes more than 64 queues. If so, the
  749. * definition of res.queue_mask needs updating
  750. */
  751. if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
  752. pr_err("Invalid queue enabled by amdgpu: %d\n", i);
  753. break;
  754. }
  755. res.queue_mask |= (1ull << i);
  756. }
  757. res.gws_mask = res.oac_mask = res.gds_heap_base =
  758. res.gds_heap_size = 0;
  759. pr_debug("Scheduling resources:\n"
  760. "vmid mask: 0x%8X\n"
  761. "queue mask: 0x%8llX\n",
  762. res.vmid_mask, res.queue_mask);
  763. return pm_send_set_resources(&dqm->packets, &res);
  764. }
  765. static int initialize_cpsch(struct device_queue_manager *dqm)
  766. {
  767. pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
  768. mutex_init(&dqm->lock);
  769. INIT_LIST_HEAD(&dqm->queues);
  770. dqm->queue_count = dqm->processes_count = 0;
  771. dqm->sdma_queue_count = 0;
  772. dqm->active_runlist = false;
  773. dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
  774. return 0;
  775. }
  776. static int start_cpsch(struct device_queue_manager *dqm)
  777. {
  778. int retval;
  779. retval = 0;
  780. retval = pm_init(&dqm->packets, dqm);
  781. if (retval)
  782. goto fail_packet_manager_init;
  783. retval = set_sched_resources(dqm);
  784. if (retval)
  785. goto fail_set_sched_resources;
  786. pr_debug("Allocating fence memory\n");
  787. /* allocate fence memory on the gart */
  788. retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
  789. &dqm->fence_mem);
  790. if (retval)
  791. goto fail_allocate_vidmem;
  792. dqm->fence_addr = dqm->fence_mem->cpu_ptr;
  793. dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
  794. init_interrupts(dqm);
  795. mutex_lock(&dqm->lock);
  796. execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  797. mutex_unlock(&dqm->lock);
  798. return 0;
  799. fail_allocate_vidmem:
  800. fail_set_sched_resources:
  801. pm_uninit(&dqm->packets);
  802. fail_packet_manager_init:
  803. return retval;
  804. }
  805. static int stop_cpsch(struct device_queue_manager *dqm)
  806. {
  807. mutex_lock(&dqm->lock);
  808. unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
  809. mutex_unlock(&dqm->lock);
  810. kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
  811. pm_uninit(&dqm->packets);
  812. return 0;
  813. }
  814. static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
  815. struct kernel_queue *kq,
  816. struct qcm_process_device *qpd)
  817. {
  818. mutex_lock(&dqm->lock);
  819. if (dqm->total_queue_count >= max_num_of_queues_per_device) {
  820. pr_warn("Can't create new kernel queue because %d queues were already created\n",
  821. dqm->total_queue_count);
  822. mutex_unlock(&dqm->lock);
  823. return -EPERM;
  824. }
  825. /*
  826. * Unconditionally increment this counter, regardless of the queue's
  827. * type or whether the queue is active.
  828. */
  829. dqm->total_queue_count++;
  830. pr_debug("Total of %d queues are accountable so far\n",
  831. dqm->total_queue_count);
  832. list_add(&kq->list, &qpd->priv_queue_list);
  833. dqm->queue_count++;
  834. qpd->is_debug = true;
  835. execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  836. mutex_unlock(&dqm->lock);
  837. return 0;
  838. }
  839. static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
  840. struct kernel_queue *kq,
  841. struct qcm_process_device *qpd)
  842. {
  843. mutex_lock(&dqm->lock);
  844. list_del(&kq->list);
  845. dqm->queue_count--;
  846. qpd->is_debug = false;
  847. execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
  848. /*
  849. * Unconditionally decrement this counter, regardless of the queue's
  850. * type.
  851. */
  852. dqm->total_queue_count--;
  853. pr_debug("Total of %d queues are accountable so far\n",
  854. dqm->total_queue_count);
  855. mutex_unlock(&dqm->lock);
  856. }
  857. static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  858. struct qcm_process_device *qpd)
  859. {
  860. int retval;
  861. struct mqd_manager *mqd;
  862. retval = 0;
  863. mutex_lock(&dqm->lock);
  864. if (dqm->total_queue_count >= max_num_of_queues_per_device) {
  865. pr_warn("Can't create new usermode queue because %d queues were already created\n",
  866. dqm->total_queue_count);
  867. retval = -EPERM;
  868. goto out_unlock;
  869. }
  870. if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
  871. retval = allocate_sdma_queue(dqm, &q->sdma_id);
  872. if (retval)
  873. goto out_unlock;
  874. q->properties.sdma_queue_id =
  875. q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
  876. q->properties.sdma_engine_id =
  877. q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
  878. }
  879. mqd = dqm->ops.get_mqd_manager(dqm,
  880. get_mqd_type_from_queue_type(q->properties.type));
  881. if (!mqd) {
  882. retval = -ENOMEM;
  883. goto out_deallocate_sdma_queue;
  884. }
  885. /*
  886. * Eviction state logic: we only mark active queues as evicted
  887. * to avoid the overhead of restoring inactive queues later
  888. */
  889. if (qpd->evicted)
  890. q->properties.is_evicted = (q->properties.queue_size > 0 &&
  891. q->properties.queue_percent > 0 &&
  892. q->properties.queue_address != 0);
  893. dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
  894. q->properties.tba_addr = qpd->tba_addr;
  895. q->properties.tma_addr = qpd->tma_addr;
  896. retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
  897. &q->gart_mqd_addr, &q->properties);
  898. if (retval)
  899. goto out_deallocate_sdma_queue;
  900. list_add(&q->list, &qpd->queues_list);
  901. qpd->queue_count++;
  902. if (q->properties.is_active) {
  903. dqm->queue_count++;
  904. retval = execute_queues_cpsch(dqm,
  905. KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  906. }
  907. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  908. dqm->sdma_queue_count++;
  909. /*
  910. * Unconditionally increment this counter, regardless of the queue's
  911. * type or whether the queue is active.
  912. */
  913. dqm->total_queue_count++;
  914. pr_debug("Total of %d queues are accountable so far\n",
  915. dqm->total_queue_count);
  916. mutex_unlock(&dqm->lock);
  917. return retval;
  918. out_deallocate_sdma_queue:
  919. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  920. deallocate_sdma_queue(dqm, q->sdma_id);
  921. out_unlock:
  922. mutex_unlock(&dqm->lock);
  923. return retval;
  924. }
  925. int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
  926. unsigned int fence_value,
  927. unsigned int timeout_ms)
  928. {
  929. unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
  930. while (*fence_addr != fence_value) {
  931. if (time_after(jiffies, end_jiffies)) {
  932. pr_err("qcm fence wait loop timeout expired\n");
  933. return -ETIME;
  934. }
  935. schedule();
  936. }
  937. return 0;
  938. }
  939. static int unmap_sdma_queues(struct device_queue_manager *dqm,
  940. unsigned int sdma_engine)
  941. {
  942. return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
  943. KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
  944. sdma_engine);
  945. }
  946. /* dqm->lock mutex has to be locked before calling this function */
  947. static int map_queues_cpsch(struct device_queue_manager *dqm)
  948. {
  949. int retval;
  950. if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
  951. return 0;
  952. if (dqm->active_runlist)
  953. return 0;
  954. retval = pm_send_runlist(&dqm->packets, &dqm->queues);
  955. if (retval) {
  956. pr_err("failed to execute runlist\n");
  957. return retval;
  958. }
  959. dqm->active_runlist = true;
  960. return retval;
  961. }
  962. /* dqm->lock mutex has to be locked before calling this function */
  963. static int unmap_queues_cpsch(struct device_queue_manager *dqm,
  964. enum kfd_unmap_queues_filter filter,
  965. uint32_t filter_param)
  966. {
  967. int retval = 0;
  968. if (!dqm->active_runlist)
  969. return retval;
  970. pr_debug("Before destroying queues, sdma queue count is : %u\n",
  971. dqm->sdma_queue_count);
  972. if (dqm->sdma_queue_count > 0) {
  973. unmap_sdma_queues(dqm, 0);
  974. unmap_sdma_queues(dqm, 1);
  975. }
  976. retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
  977. filter, filter_param, false, 0);
  978. if (retval)
  979. return retval;
  980. *dqm->fence_addr = KFD_FENCE_INIT;
  981. pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
  982. KFD_FENCE_COMPLETED);
  983. /* should be timed out */
  984. retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
  985. QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
  986. if (retval)
  987. return retval;
  988. pm_release_ib(&dqm->packets);
  989. dqm->active_runlist = false;
  990. return retval;
  991. }
  992. /* dqm->lock mutex has to be locked before calling this function */
  993. static int execute_queues_cpsch(struct device_queue_manager *dqm,
  994. enum kfd_unmap_queues_filter filter,
  995. uint32_t filter_param)
  996. {
  997. int retval;
  998. retval = unmap_queues_cpsch(dqm, filter, filter_param);
  999. if (retval) {
  1000. pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
  1001. return retval;
  1002. }
  1003. return map_queues_cpsch(dqm);
  1004. }
  1005. static int destroy_queue_cpsch(struct device_queue_manager *dqm,
  1006. struct qcm_process_device *qpd,
  1007. struct queue *q)
  1008. {
  1009. int retval;
  1010. struct mqd_manager *mqd;
  1011. bool preempt_all_queues;
  1012. preempt_all_queues = false;
  1013. retval = 0;
  1014. /* remove queue from list to prevent rescheduling after preemption */
  1015. mutex_lock(&dqm->lock);
  1016. if (qpd->is_debug) {
  1017. /*
  1018. * error, currently we do not allow to destroy a queue
  1019. * of a currently debugged process
  1020. */
  1021. retval = -EBUSY;
  1022. goto failed_try_destroy_debugged_queue;
  1023. }
  1024. mqd = dqm->ops.get_mqd_manager(dqm,
  1025. get_mqd_type_from_queue_type(q->properties.type));
  1026. if (!mqd) {
  1027. retval = -ENOMEM;
  1028. goto failed;
  1029. }
  1030. if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
  1031. dqm->sdma_queue_count--;
  1032. deallocate_sdma_queue(dqm, q->sdma_id);
  1033. }
  1034. list_del(&q->list);
  1035. qpd->queue_count--;
  1036. if (q->properties.is_active) {
  1037. dqm->queue_count--;
  1038. retval = execute_queues_cpsch(dqm,
  1039. KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  1040. if (retval == -ETIME)
  1041. qpd->reset_wavefronts = true;
  1042. }
  1043. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  1044. /*
  1045. * Unconditionally decrement this counter, regardless of the queue's
  1046. * type
  1047. */
  1048. dqm->total_queue_count--;
  1049. pr_debug("Total of %d queues are accountable so far\n",
  1050. dqm->total_queue_count);
  1051. mutex_unlock(&dqm->lock);
  1052. return retval;
  1053. failed:
  1054. failed_try_destroy_debugged_queue:
  1055. mutex_unlock(&dqm->lock);
  1056. return retval;
  1057. }
  1058. /*
  1059. * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
  1060. * stay in user mode.
  1061. */
  1062. #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
  1063. /* APE1 limit is inclusive and 64K aligned. */
  1064. #define APE1_LIMIT_ALIGNMENT 0xFFFF
  1065. static bool set_cache_memory_policy(struct device_queue_manager *dqm,
  1066. struct qcm_process_device *qpd,
  1067. enum cache_policy default_policy,
  1068. enum cache_policy alternate_policy,
  1069. void __user *alternate_aperture_base,
  1070. uint64_t alternate_aperture_size)
  1071. {
  1072. bool retval;
  1073. mutex_lock(&dqm->lock);
  1074. if (alternate_aperture_size == 0) {
  1075. /* base > limit disables APE1 */
  1076. qpd->sh_mem_ape1_base = 1;
  1077. qpd->sh_mem_ape1_limit = 0;
  1078. } else {
  1079. /*
  1080. * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
  1081. * SH_MEM_APE1_BASE[31:0], 0x0000 }
  1082. * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
  1083. * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
  1084. * Verify that the base and size parameters can be
  1085. * represented in this format and convert them.
  1086. * Additionally restrict APE1 to user-mode addresses.
  1087. */
  1088. uint64_t base = (uintptr_t)alternate_aperture_base;
  1089. uint64_t limit = base + alternate_aperture_size - 1;
  1090. if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
  1091. (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
  1092. retval = false;
  1093. goto out;
  1094. }
  1095. qpd->sh_mem_ape1_base = base >> 16;
  1096. qpd->sh_mem_ape1_limit = limit >> 16;
  1097. }
  1098. retval = dqm->asic_ops.set_cache_memory_policy(
  1099. dqm,
  1100. qpd,
  1101. default_policy,
  1102. alternate_policy,
  1103. alternate_aperture_base,
  1104. alternate_aperture_size);
  1105. if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
  1106. program_sh_mem_settings(dqm, qpd);
  1107. pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
  1108. qpd->sh_mem_config, qpd->sh_mem_ape1_base,
  1109. qpd->sh_mem_ape1_limit);
  1110. out:
  1111. mutex_unlock(&dqm->lock);
  1112. return retval;
  1113. }
  1114. static int set_trap_handler(struct device_queue_manager *dqm,
  1115. struct qcm_process_device *qpd,
  1116. uint64_t tba_addr,
  1117. uint64_t tma_addr)
  1118. {
  1119. uint64_t *tma;
  1120. if (dqm->dev->cwsr_enabled) {
  1121. /* Jump from CWSR trap handler to user trap */
  1122. tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
  1123. tma[0] = tba_addr;
  1124. tma[1] = tma_addr;
  1125. } else {
  1126. qpd->tba_addr = tba_addr;
  1127. qpd->tma_addr = tma_addr;
  1128. }
  1129. return 0;
  1130. }
  1131. static int process_termination_nocpsch(struct device_queue_manager *dqm,
  1132. struct qcm_process_device *qpd)
  1133. {
  1134. struct queue *q, *next;
  1135. struct device_process_node *cur, *next_dpn;
  1136. int retval = 0;
  1137. mutex_lock(&dqm->lock);
  1138. /* Clear all user mode queues */
  1139. list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
  1140. int ret;
  1141. ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
  1142. if (ret)
  1143. retval = ret;
  1144. }
  1145. /* Unregister process */
  1146. list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
  1147. if (qpd == cur->qpd) {
  1148. list_del(&cur->list);
  1149. kfree(cur);
  1150. dqm->processes_count--;
  1151. break;
  1152. }
  1153. }
  1154. mutex_unlock(&dqm->lock);
  1155. return retval;
  1156. }
  1157. static int process_termination_cpsch(struct device_queue_manager *dqm,
  1158. struct qcm_process_device *qpd)
  1159. {
  1160. int retval;
  1161. struct queue *q, *next;
  1162. struct kernel_queue *kq, *kq_next;
  1163. struct mqd_manager *mqd;
  1164. struct device_process_node *cur, *next_dpn;
  1165. enum kfd_unmap_queues_filter filter =
  1166. KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
  1167. retval = 0;
  1168. mutex_lock(&dqm->lock);
  1169. /* Clean all kernel queues */
  1170. list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
  1171. list_del(&kq->list);
  1172. dqm->queue_count--;
  1173. qpd->is_debug = false;
  1174. dqm->total_queue_count--;
  1175. filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
  1176. }
  1177. /* Clear all user mode queues */
  1178. list_for_each_entry(q, &qpd->queues_list, list) {
  1179. if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
  1180. dqm->sdma_queue_count--;
  1181. deallocate_sdma_queue(dqm, q->sdma_id);
  1182. }
  1183. if (q->properties.is_active)
  1184. dqm->queue_count--;
  1185. dqm->total_queue_count--;
  1186. }
  1187. /* Unregister process */
  1188. list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
  1189. if (qpd == cur->qpd) {
  1190. list_del(&cur->list);
  1191. kfree(cur);
  1192. dqm->processes_count--;
  1193. break;
  1194. }
  1195. }
  1196. retval = execute_queues_cpsch(dqm, filter, 0);
  1197. if (retval || qpd->reset_wavefronts) {
  1198. pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
  1199. dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
  1200. qpd->reset_wavefronts = false;
  1201. }
  1202. /* lastly, free mqd resources */
  1203. list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
  1204. mqd = dqm->ops.get_mqd_manager(dqm,
  1205. get_mqd_type_from_queue_type(q->properties.type));
  1206. if (!mqd) {
  1207. retval = -ENOMEM;
  1208. goto out;
  1209. }
  1210. list_del(&q->list);
  1211. qpd->queue_count--;
  1212. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  1213. }
  1214. out:
  1215. mutex_unlock(&dqm->lock);
  1216. return retval;
  1217. }
  1218. struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
  1219. {
  1220. struct device_queue_manager *dqm;
  1221. pr_debug("Loading device queue manager\n");
  1222. dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
  1223. if (!dqm)
  1224. return NULL;
  1225. switch (dev->device_info->asic_family) {
  1226. /* HWS is not available on Hawaii. */
  1227. case CHIP_HAWAII:
  1228. /* HWS depends on CWSR for timely dequeue. CWSR is not
  1229. * available on Tonga.
  1230. *
  1231. * FIXME: This argument also applies to Kaveri.
  1232. */
  1233. case CHIP_TONGA:
  1234. dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
  1235. break;
  1236. default:
  1237. dqm->sched_policy = sched_policy;
  1238. break;
  1239. }
  1240. dqm->dev = dev;
  1241. switch (dqm->sched_policy) {
  1242. case KFD_SCHED_POLICY_HWS:
  1243. case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
  1244. /* initialize dqm for cp scheduling */
  1245. dqm->ops.create_queue = create_queue_cpsch;
  1246. dqm->ops.initialize = initialize_cpsch;
  1247. dqm->ops.start = start_cpsch;
  1248. dqm->ops.stop = stop_cpsch;
  1249. dqm->ops.destroy_queue = destroy_queue_cpsch;
  1250. dqm->ops.update_queue = update_queue;
  1251. dqm->ops.get_mqd_manager = get_mqd_manager;
  1252. dqm->ops.register_process = register_process;
  1253. dqm->ops.unregister_process = unregister_process;
  1254. dqm->ops.uninitialize = uninitialize;
  1255. dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
  1256. dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
  1257. dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
  1258. dqm->ops.set_trap_handler = set_trap_handler;
  1259. dqm->ops.process_termination = process_termination_cpsch;
  1260. dqm->ops.evict_process_queues = evict_process_queues_cpsch;
  1261. dqm->ops.restore_process_queues = restore_process_queues_cpsch;
  1262. break;
  1263. case KFD_SCHED_POLICY_NO_HWS:
  1264. /* initialize dqm for no cp scheduling */
  1265. dqm->ops.start = start_nocpsch;
  1266. dqm->ops.stop = stop_nocpsch;
  1267. dqm->ops.create_queue = create_queue_nocpsch;
  1268. dqm->ops.destroy_queue = destroy_queue_nocpsch;
  1269. dqm->ops.update_queue = update_queue;
  1270. dqm->ops.get_mqd_manager = get_mqd_manager;
  1271. dqm->ops.register_process = register_process;
  1272. dqm->ops.unregister_process = unregister_process;
  1273. dqm->ops.initialize = initialize_nocpsch;
  1274. dqm->ops.uninitialize = uninitialize;
  1275. dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
  1276. dqm->ops.set_trap_handler = set_trap_handler;
  1277. dqm->ops.process_termination = process_termination_nocpsch;
  1278. dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
  1279. dqm->ops.restore_process_queues =
  1280. restore_process_queues_nocpsch;
  1281. break;
  1282. default:
  1283. pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
  1284. goto out_free;
  1285. }
  1286. switch (dev->device_info->asic_family) {
  1287. case CHIP_CARRIZO:
  1288. device_queue_manager_init_vi(&dqm->asic_ops);
  1289. break;
  1290. case CHIP_KAVERI:
  1291. device_queue_manager_init_cik(&dqm->asic_ops);
  1292. break;
  1293. case CHIP_HAWAII:
  1294. device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
  1295. break;
  1296. case CHIP_TONGA:
  1297. case CHIP_FIJI:
  1298. case CHIP_POLARIS10:
  1299. case CHIP_POLARIS11:
  1300. device_queue_manager_init_vi_tonga(&dqm->asic_ops);
  1301. break;
  1302. default:
  1303. WARN(1, "Unexpected ASIC family %u",
  1304. dev->device_info->asic_family);
  1305. goto out_free;
  1306. }
  1307. if (!dqm->ops.initialize(dqm))
  1308. return dqm;
  1309. out_free:
  1310. kfree(dqm);
  1311. return NULL;
  1312. }
  1313. void device_queue_manager_uninit(struct device_queue_manager *dqm)
  1314. {
  1315. dqm->ops.uninitialize(dqm);
  1316. kfree(dqm);
  1317. }
  1318. #if defined(CONFIG_DEBUG_FS)
  1319. static void seq_reg_dump(struct seq_file *m,
  1320. uint32_t (*dump)[2], uint32_t n_regs)
  1321. {
  1322. uint32_t i, count;
  1323. for (i = 0, count = 0; i < n_regs; i++) {
  1324. if (count == 0 ||
  1325. dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
  1326. seq_printf(m, "%s %08x: %08x",
  1327. i ? "\n" : "",
  1328. dump[i][0], dump[i][1]);
  1329. count = 7;
  1330. } else {
  1331. seq_printf(m, " %08x", dump[i][1]);
  1332. count--;
  1333. }
  1334. }
  1335. seq_puts(m, "\n");
  1336. }
  1337. int dqm_debugfs_hqds(struct seq_file *m, void *data)
  1338. {
  1339. struct device_queue_manager *dqm = data;
  1340. uint32_t (*dump)[2], n_regs;
  1341. int pipe, queue;
  1342. int r = 0;
  1343. for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
  1344. int pipe_offset = pipe * get_queues_per_pipe(dqm);
  1345. for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
  1346. if (!test_bit(pipe_offset + queue,
  1347. dqm->dev->shared_resources.queue_bitmap))
  1348. continue;
  1349. r = dqm->dev->kfd2kgd->hqd_dump(
  1350. dqm->dev->kgd, pipe, queue, &dump, &n_regs);
  1351. if (r)
  1352. break;
  1353. seq_printf(m, " CP Pipe %d, Queue %d\n",
  1354. pipe, queue);
  1355. seq_reg_dump(m, dump, n_regs);
  1356. kfree(dump);
  1357. }
  1358. }
  1359. for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) {
  1360. for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) {
  1361. r = dqm->dev->kfd2kgd->hqd_sdma_dump(
  1362. dqm->dev->kgd, pipe, queue, &dump, &n_regs);
  1363. if (r)
  1364. break;
  1365. seq_printf(m, " SDMA Engine %d, RLC %d\n",
  1366. pipe, queue);
  1367. seq_reg_dump(m, dump, n_regs);
  1368. kfree(dump);
  1369. }
  1370. }
  1371. return r;
  1372. }
  1373. #endif