kfd_device_queue_manager.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. */
  23. #include <linux/slab.h>
  24. #include <linux/list.h>
  25. #include <linux/types.h>
  26. #include <linux/printk.h>
  27. #include <linux/bitops.h>
  28. #include <linux/sched.h>
  29. #include "kfd_priv.h"
  30. #include "kfd_device_queue_manager.h"
  31. #include "kfd_mqd_manager.h"
  32. #include "cik_regs.h"
  33. #include "kfd_kernel_queue.h"
  34. /* Size of the per-pipe EOP queue */
  35. #define CIK_HPD_EOP_BYTES_LOG2 11
  36. #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
  37. static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
  38. unsigned int pasid, unsigned int vmid);
  39. static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
  40. struct queue *q,
  41. struct qcm_process_device *qpd);
  42. static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
  43. static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
  44. static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
  45. struct queue *q,
  46. struct qcm_process_device *qpd);
  47. static void deallocate_sdma_queue(struct device_queue_manager *dqm,
  48. unsigned int sdma_queue_id);
  49. static inline
  50. enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
  51. {
  52. if (type == KFD_QUEUE_TYPE_SDMA)
  53. return KFD_MQD_TYPE_SDMA;
  54. return KFD_MQD_TYPE_CP;
  55. }
  56. unsigned int get_first_pipe(struct device_queue_manager *dqm)
  57. {
  58. BUG_ON(!dqm || !dqm->dev);
  59. return dqm->dev->shared_resources.first_compute_pipe;
  60. }
  61. unsigned int get_pipes_num(struct device_queue_manager *dqm)
  62. {
  63. BUG_ON(!dqm || !dqm->dev);
  64. return dqm->dev->shared_resources.compute_pipe_count;
  65. }
  66. static inline unsigned int get_pipes_num_cpsch(void)
  67. {
  68. return PIPE_PER_ME_CP_SCHEDULING;
  69. }
  70. void program_sh_mem_settings(struct device_queue_manager *dqm,
  71. struct qcm_process_device *qpd)
  72. {
  73. return dqm->dev->kfd2kgd->program_sh_mem_settings(
  74. dqm->dev->kgd, qpd->vmid,
  75. qpd->sh_mem_config,
  76. qpd->sh_mem_ape1_base,
  77. qpd->sh_mem_ape1_limit,
  78. qpd->sh_mem_bases);
  79. }
  80. static int allocate_vmid(struct device_queue_manager *dqm,
  81. struct qcm_process_device *qpd,
  82. struct queue *q)
  83. {
  84. int bit, allocated_vmid;
  85. if (dqm->vmid_bitmap == 0)
  86. return -ENOMEM;
  87. bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
  88. clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
  89. /* Kaveri kfd vmid's starts from vmid 8 */
  90. allocated_vmid = bit + KFD_VMID_START_OFFSET;
  91. pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
  92. qpd->vmid = allocated_vmid;
  93. q->properties.vmid = allocated_vmid;
  94. set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
  95. program_sh_mem_settings(dqm, qpd);
  96. return 0;
  97. }
  98. static void deallocate_vmid(struct device_queue_manager *dqm,
  99. struct qcm_process_device *qpd,
  100. struct queue *q)
  101. {
  102. int bit = qpd->vmid - KFD_VMID_START_OFFSET;
  103. /* Release the vmid mapping */
  104. set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
  105. set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
  106. qpd->vmid = 0;
  107. q->properties.vmid = 0;
  108. }
  109. static int create_queue_nocpsch(struct device_queue_manager *dqm,
  110. struct queue *q,
  111. struct qcm_process_device *qpd,
  112. int *allocated_vmid)
  113. {
  114. int retval;
  115. BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
  116. pr_debug("kfd: In func %s\n", __func__);
  117. print_queue(q);
  118. mutex_lock(&dqm->lock);
  119. if (dqm->total_queue_count >= max_num_of_queues_per_device) {
  120. pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
  121. dqm->total_queue_count);
  122. mutex_unlock(&dqm->lock);
  123. return -EPERM;
  124. }
  125. if (list_empty(&qpd->queues_list)) {
  126. retval = allocate_vmid(dqm, qpd, q);
  127. if (retval != 0) {
  128. mutex_unlock(&dqm->lock);
  129. return retval;
  130. }
  131. }
  132. *allocated_vmid = qpd->vmid;
  133. q->properties.vmid = qpd->vmid;
  134. if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
  135. retval = create_compute_queue_nocpsch(dqm, q, qpd);
  136. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  137. retval = create_sdma_queue_nocpsch(dqm, q, qpd);
  138. if (retval != 0) {
  139. if (list_empty(&qpd->queues_list)) {
  140. deallocate_vmid(dqm, qpd, q);
  141. *allocated_vmid = 0;
  142. }
  143. mutex_unlock(&dqm->lock);
  144. return retval;
  145. }
  146. list_add(&q->list, &qpd->queues_list);
  147. if (q->properties.is_active)
  148. dqm->queue_count++;
  149. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  150. dqm->sdma_queue_count++;
  151. /*
  152. * Unconditionally increment this counter, regardless of the queue's
  153. * type or whether the queue is active.
  154. */
  155. dqm->total_queue_count++;
  156. pr_debug("Total of %d queues are accountable so far\n",
  157. dqm->total_queue_count);
  158. mutex_unlock(&dqm->lock);
  159. return 0;
  160. }
  161. static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
  162. {
  163. bool set;
  164. int pipe, bit, i;
  165. set = false;
  166. for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
  167. pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
  168. if (dqm->allocated_queues[pipe] != 0) {
  169. bit = find_first_bit(
  170. (unsigned long *)&dqm->allocated_queues[pipe],
  171. QUEUES_PER_PIPE);
  172. clear_bit(bit,
  173. (unsigned long *)&dqm->allocated_queues[pipe]);
  174. q->pipe = pipe;
  175. q->queue = bit;
  176. set = true;
  177. break;
  178. }
  179. }
  180. if (set == false)
  181. return -EBUSY;
  182. pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
  183. __func__, q->pipe, q->queue);
  184. /* horizontal hqd allocation */
  185. dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
  186. return 0;
  187. }
  188. static inline void deallocate_hqd(struct device_queue_manager *dqm,
  189. struct queue *q)
  190. {
  191. set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
  192. }
  193. static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
  194. struct queue *q,
  195. struct qcm_process_device *qpd)
  196. {
  197. int retval;
  198. struct mqd_manager *mqd;
  199. BUG_ON(!dqm || !q || !qpd);
  200. mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
  201. if (mqd == NULL)
  202. return -ENOMEM;
  203. retval = allocate_hqd(dqm, q);
  204. if (retval != 0)
  205. return retval;
  206. retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
  207. &q->gart_mqd_addr, &q->properties);
  208. if (retval != 0) {
  209. deallocate_hqd(dqm, q);
  210. return retval;
  211. }
  212. pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
  213. q->pipe,
  214. q->queue);
  215. retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
  216. q->queue, (uint32_t __user *) q->properties.write_ptr);
  217. if (retval != 0) {
  218. deallocate_hqd(dqm, q);
  219. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  220. return retval;
  221. }
  222. return 0;
  223. }
  224. static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
  225. struct qcm_process_device *qpd,
  226. struct queue *q)
  227. {
  228. int retval;
  229. struct mqd_manager *mqd;
  230. BUG_ON(!dqm || !q || !q->mqd || !qpd);
  231. retval = 0;
  232. pr_debug("kfd: In Func %s\n", __func__);
  233. mutex_lock(&dqm->lock);
  234. if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
  235. mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
  236. if (mqd == NULL) {
  237. retval = -ENOMEM;
  238. goto out;
  239. }
  240. deallocate_hqd(dqm, q);
  241. } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
  242. mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
  243. if (mqd == NULL) {
  244. retval = -ENOMEM;
  245. goto out;
  246. }
  247. dqm->sdma_queue_count--;
  248. deallocate_sdma_queue(dqm, q->sdma_id);
  249. } else {
  250. pr_debug("q->properties.type is invalid (%d)\n",
  251. q->properties.type);
  252. retval = -EINVAL;
  253. goto out;
  254. }
  255. retval = mqd->destroy_mqd(mqd, q->mqd,
  256. KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
  257. QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
  258. q->pipe, q->queue);
  259. if (retval != 0)
  260. goto out;
  261. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  262. list_del(&q->list);
  263. if (list_empty(&qpd->queues_list))
  264. deallocate_vmid(dqm, qpd, q);
  265. if (q->properties.is_active)
  266. dqm->queue_count--;
  267. /*
  268. * Unconditionally decrement this counter, regardless of the queue's
  269. * type
  270. */
  271. dqm->total_queue_count--;
  272. pr_debug("Total of %d queues are accountable so far\n",
  273. dqm->total_queue_count);
  274. out:
  275. mutex_unlock(&dqm->lock);
  276. return retval;
  277. }
  278. static int update_queue(struct device_queue_manager *dqm, struct queue *q)
  279. {
  280. int retval;
  281. struct mqd_manager *mqd;
  282. bool prev_active = false;
  283. BUG_ON(!dqm || !q || !q->mqd);
  284. mutex_lock(&dqm->lock);
  285. mqd = dqm->ops.get_mqd_manager(dqm,
  286. get_mqd_type_from_queue_type(q->properties.type));
  287. if (mqd == NULL) {
  288. mutex_unlock(&dqm->lock);
  289. return -ENOMEM;
  290. }
  291. if (q->properties.is_active == true)
  292. prev_active = true;
  293. /*
  294. *
  295. * check active state vs. the previous state
  296. * and modify counter accordingly
  297. */
  298. retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
  299. if ((q->properties.is_active == true) && (prev_active == false))
  300. dqm->queue_count++;
  301. else if ((q->properties.is_active == false) && (prev_active == true))
  302. dqm->queue_count--;
  303. if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
  304. retval = execute_queues_cpsch(dqm, false);
  305. mutex_unlock(&dqm->lock);
  306. return retval;
  307. }
  308. static struct mqd_manager *get_mqd_manager_nocpsch(
  309. struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
  310. {
  311. struct mqd_manager *mqd;
  312. BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
  313. pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
  314. mqd = dqm->mqds[type];
  315. if (!mqd) {
  316. mqd = mqd_manager_init(type, dqm->dev);
  317. if (mqd == NULL)
  318. pr_err("kfd: mqd manager is NULL");
  319. dqm->mqds[type] = mqd;
  320. }
  321. return mqd;
  322. }
  323. static int register_process_nocpsch(struct device_queue_manager *dqm,
  324. struct qcm_process_device *qpd)
  325. {
  326. struct device_process_node *n;
  327. int retval;
  328. BUG_ON(!dqm || !qpd);
  329. pr_debug("kfd: In func %s\n", __func__);
  330. n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
  331. if (!n)
  332. return -ENOMEM;
  333. n->qpd = qpd;
  334. mutex_lock(&dqm->lock);
  335. list_add(&n->list, &dqm->queues);
  336. retval = dqm->ops_asic_specific.register_process(dqm, qpd);
  337. dqm->processes_count++;
  338. mutex_unlock(&dqm->lock);
  339. return retval;
  340. }
  341. static int unregister_process_nocpsch(struct device_queue_manager *dqm,
  342. struct qcm_process_device *qpd)
  343. {
  344. int retval;
  345. struct device_process_node *cur, *next;
  346. BUG_ON(!dqm || !qpd);
  347. pr_debug("In func %s\n", __func__);
  348. pr_debug("qpd->queues_list is %s\n",
  349. list_empty(&qpd->queues_list) ? "empty" : "not empty");
  350. retval = 0;
  351. mutex_lock(&dqm->lock);
  352. list_for_each_entry_safe(cur, next, &dqm->queues, list) {
  353. if (qpd == cur->qpd) {
  354. list_del(&cur->list);
  355. kfree(cur);
  356. dqm->processes_count--;
  357. goto out;
  358. }
  359. }
  360. /* qpd not found in dqm list */
  361. retval = 1;
  362. out:
  363. mutex_unlock(&dqm->lock);
  364. return retval;
  365. }
  366. static int
  367. set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
  368. unsigned int vmid)
  369. {
  370. uint32_t pasid_mapping;
  371. pasid_mapping = (pasid == 0) ? 0 :
  372. (uint32_t)pasid |
  373. ATC_VMID_PASID_MAPPING_VALID;
  374. return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
  375. dqm->dev->kgd, pasid_mapping,
  376. vmid);
  377. }
  378. int init_pipelines(struct device_queue_manager *dqm,
  379. unsigned int pipes_num, unsigned int first_pipe)
  380. {
  381. void *hpdptr;
  382. struct mqd_manager *mqd;
  383. unsigned int i, err, inx;
  384. uint64_t pipe_hpd_addr;
  385. BUG_ON(!dqm || !dqm->dev);
  386. pr_debug("kfd: In func %s\n", __func__);
  387. /*
  388. * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
  389. * The driver never accesses this memory after zeroing it.
  390. * It doesn't even have to be saved/restored on suspend/resume
  391. * because it contains no data when there are no active queues.
  392. */
  393. err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
  394. &dqm->pipeline_mem);
  395. if (err) {
  396. pr_err("kfd: error allocate vidmem num pipes: %d\n",
  397. pipes_num);
  398. return -ENOMEM;
  399. }
  400. hpdptr = dqm->pipeline_mem->cpu_ptr;
  401. dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
  402. memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
  403. mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
  404. if (mqd == NULL) {
  405. kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
  406. return -ENOMEM;
  407. }
  408. for (i = 0; i < pipes_num; i++) {
  409. inx = i + first_pipe;
  410. /*
  411. * HPD buffer on GTT is allocated by amdkfd, no need to waste
  412. * space in GTT for pipelines we don't initialize
  413. */
  414. pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
  415. pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
  416. /* = log2(bytes/4)-1 */
  417. dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
  418. CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
  419. }
  420. return 0;
  421. }
  422. static int init_scheduler(struct device_queue_manager *dqm)
  423. {
  424. int retval;
  425. BUG_ON(!dqm);
  426. pr_debug("kfd: In %s\n", __func__);
  427. retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
  428. return retval;
  429. }
  430. static int initialize_nocpsch(struct device_queue_manager *dqm)
  431. {
  432. int i;
  433. BUG_ON(!dqm);
  434. pr_debug("kfd: In func %s num of pipes: %d\n",
  435. __func__, get_pipes_num(dqm));
  436. mutex_init(&dqm->lock);
  437. INIT_LIST_HEAD(&dqm->queues);
  438. dqm->queue_count = dqm->next_pipe_to_allocate = 0;
  439. dqm->sdma_queue_count = 0;
  440. dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
  441. sizeof(unsigned int), GFP_KERNEL);
  442. if (!dqm->allocated_queues) {
  443. mutex_destroy(&dqm->lock);
  444. return -ENOMEM;
  445. }
  446. for (i = 0; i < get_pipes_num(dqm); i++)
  447. dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
  448. dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
  449. dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
  450. init_scheduler(dqm);
  451. return 0;
  452. }
  453. static void uninitialize_nocpsch(struct device_queue_manager *dqm)
  454. {
  455. int i;
  456. BUG_ON(!dqm);
  457. BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
  458. kfree(dqm->allocated_queues);
  459. for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
  460. kfree(dqm->mqds[i]);
  461. mutex_destroy(&dqm->lock);
  462. kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
  463. }
  464. static int start_nocpsch(struct device_queue_manager *dqm)
  465. {
  466. return 0;
  467. }
  468. static int stop_nocpsch(struct device_queue_manager *dqm)
  469. {
  470. return 0;
  471. }
  472. static int allocate_sdma_queue(struct device_queue_manager *dqm,
  473. unsigned int *sdma_queue_id)
  474. {
  475. int bit;
  476. if (dqm->sdma_bitmap == 0)
  477. return -ENOMEM;
  478. bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
  479. CIK_SDMA_QUEUES);
  480. clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
  481. *sdma_queue_id = bit;
  482. return 0;
  483. }
  484. static void deallocate_sdma_queue(struct device_queue_manager *dqm,
  485. unsigned int sdma_queue_id)
  486. {
  487. if (sdma_queue_id >= CIK_SDMA_QUEUES)
  488. return;
  489. set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
  490. }
  491. static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
  492. struct qcm_process_device *qpd)
  493. {
  494. uint32_t value = SDMA_ATC;
  495. if (q->process->is_32bit_user_mode)
  496. value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
  497. else
  498. value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
  499. qpd_to_pdd(qpd)));
  500. q->properties.sdma_vm_addr = value;
  501. }
  502. static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
  503. struct queue *q,
  504. struct qcm_process_device *qpd)
  505. {
  506. struct mqd_manager *mqd;
  507. int retval;
  508. mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
  509. if (!mqd)
  510. return -ENOMEM;
  511. retval = allocate_sdma_queue(dqm, &q->sdma_id);
  512. if (retval != 0)
  513. return retval;
  514. q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
  515. q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
  516. pr_debug("kfd: sdma id is: %d\n", q->sdma_id);
  517. pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
  518. pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
  519. init_sdma_vm(dqm, q, qpd);
  520. retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
  521. &q->gart_mqd_addr, &q->properties);
  522. if (retval != 0) {
  523. deallocate_sdma_queue(dqm, q->sdma_id);
  524. return retval;
  525. }
  526. retval = mqd->load_mqd(mqd, q->mqd, 0,
  527. 0, NULL);
  528. if (retval != 0) {
  529. deallocate_sdma_queue(dqm, q->sdma_id);
  530. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  531. return retval;
  532. }
  533. return 0;
  534. }
  535. /*
  536. * Device Queue Manager implementation for cp scheduler
  537. */
  538. static int set_sched_resources(struct device_queue_manager *dqm)
  539. {
  540. struct scheduling_resources res;
  541. unsigned int queue_num, queue_mask;
  542. BUG_ON(!dqm);
  543. pr_debug("kfd: In func %s\n", __func__);
  544. queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
  545. queue_mask = (1 << queue_num) - 1;
  546. res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
  547. res.vmid_mask <<= KFD_VMID_START_OFFSET;
  548. res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
  549. res.gws_mask = res.oac_mask = res.gds_heap_base =
  550. res.gds_heap_size = 0;
  551. pr_debug("kfd: scheduling resources:\n"
  552. " vmid mask: 0x%8X\n"
  553. " queue mask: 0x%8llX\n",
  554. res.vmid_mask, res.queue_mask);
  555. return pm_send_set_resources(&dqm->packets, &res);
  556. }
  557. static int initialize_cpsch(struct device_queue_manager *dqm)
  558. {
  559. int retval;
  560. BUG_ON(!dqm);
  561. pr_debug("kfd: In func %s num of pipes: %d\n",
  562. __func__, get_pipes_num_cpsch());
  563. mutex_init(&dqm->lock);
  564. INIT_LIST_HEAD(&dqm->queues);
  565. dqm->queue_count = dqm->processes_count = 0;
  566. dqm->sdma_queue_count = 0;
  567. dqm->active_runlist = false;
  568. retval = dqm->ops_asic_specific.initialize(dqm);
  569. if (retval != 0)
  570. goto fail_init_pipelines;
  571. return 0;
  572. fail_init_pipelines:
  573. mutex_destroy(&dqm->lock);
  574. return retval;
  575. }
  576. static int start_cpsch(struct device_queue_manager *dqm)
  577. {
  578. struct device_process_node *node;
  579. int retval;
  580. BUG_ON(!dqm);
  581. retval = 0;
  582. retval = pm_init(&dqm->packets, dqm);
  583. if (retval != 0)
  584. goto fail_packet_manager_init;
  585. retval = set_sched_resources(dqm);
  586. if (retval != 0)
  587. goto fail_set_sched_resources;
  588. pr_debug("kfd: allocating fence memory\n");
  589. /* allocate fence memory on the gart */
  590. retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
  591. &dqm->fence_mem);
  592. if (retval != 0)
  593. goto fail_allocate_vidmem;
  594. dqm->fence_addr = dqm->fence_mem->cpu_ptr;
  595. dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
  596. list_for_each_entry(node, &dqm->queues, list)
  597. if (node->qpd->pqm->process && dqm->dev)
  598. kfd_bind_process_to_device(dqm->dev,
  599. node->qpd->pqm->process);
  600. execute_queues_cpsch(dqm, true);
  601. return 0;
  602. fail_allocate_vidmem:
  603. fail_set_sched_resources:
  604. pm_uninit(&dqm->packets);
  605. fail_packet_manager_init:
  606. return retval;
  607. }
  608. static int stop_cpsch(struct device_queue_manager *dqm)
  609. {
  610. struct device_process_node *node;
  611. struct kfd_process_device *pdd;
  612. BUG_ON(!dqm);
  613. destroy_queues_cpsch(dqm, true);
  614. list_for_each_entry(node, &dqm->queues, list) {
  615. pdd = qpd_to_pdd(node->qpd);
  616. pdd->bound = false;
  617. }
  618. kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
  619. pm_uninit(&dqm->packets);
  620. return 0;
  621. }
  622. static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
  623. struct kernel_queue *kq,
  624. struct qcm_process_device *qpd)
  625. {
  626. BUG_ON(!dqm || !kq || !qpd);
  627. pr_debug("kfd: In func %s\n", __func__);
  628. mutex_lock(&dqm->lock);
  629. if (dqm->total_queue_count >= max_num_of_queues_per_device) {
  630. pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
  631. dqm->total_queue_count);
  632. mutex_unlock(&dqm->lock);
  633. return -EPERM;
  634. }
  635. /*
  636. * Unconditionally increment this counter, regardless of the queue's
  637. * type or whether the queue is active.
  638. */
  639. dqm->total_queue_count++;
  640. pr_debug("Total of %d queues are accountable so far\n",
  641. dqm->total_queue_count);
  642. list_add(&kq->list, &qpd->priv_queue_list);
  643. dqm->queue_count++;
  644. qpd->is_debug = true;
  645. execute_queues_cpsch(dqm, false);
  646. mutex_unlock(&dqm->lock);
  647. return 0;
  648. }
  649. static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
  650. struct kernel_queue *kq,
  651. struct qcm_process_device *qpd)
  652. {
  653. BUG_ON(!dqm || !kq);
  654. pr_debug("kfd: In %s\n", __func__);
  655. mutex_lock(&dqm->lock);
  656. destroy_queues_cpsch(dqm, false);
  657. list_del(&kq->list);
  658. dqm->queue_count--;
  659. qpd->is_debug = false;
  660. execute_queues_cpsch(dqm, false);
  661. /*
  662. * Unconditionally decrement this counter, regardless of the queue's
  663. * type.
  664. */
  665. dqm->total_queue_count--;
  666. pr_debug("Total of %d queues are accountable so far\n",
  667. dqm->total_queue_count);
  668. mutex_unlock(&dqm->lock);
  669. }
  670. static void select_sdma_engine_id(struct queue *q)
  671. {
  672. static int sdma_id;
  673. q->sdma_id = sdma_id;
  674. sdma_id = (sdma_id + 1) % 2;
  675. }
  676. static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
  677. struct qcm_process_device *qpd, int *allocate_vmid)
  678. {
  679. int retval;
  680. struct mqd_manager *mqd;
  681. BUG_ON(!dqm || !q || !qpd);
  682. retval = 0;
  683. if (allocate_vmid)
  684. *allocate_vmid = 0;
  685. mutex_lock(&dqm->lock);
  686. if (dqm->total_queue_count >= max_num_of_queues_per_device) {
  687. pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
  688. dqm->total_queue_count);
  689. retval = -EPERM;
  690. goto out;
  691. }
  692. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  693. select_sdma_engine_id(q);
  694. mqd = dqm->ops.get_mqd_manager(dqm,
  695. get_mqd_type_from_queue_type(q->properties.type));
  696. if (mqd == NULL) {
  697. mutex_unlock(&dqm->lock);
  698. return -ENOMEM;
  699. }
  700. init_sdma_vm(dqm, q, qpd);
  701. retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
  702. &q->gart_mqd_addr, &q->properties);
  703. if (retval != 0)
  704. goto out;
  705. list_add(&q->list, &qpd->queues_list);
  706. if (q->properties.is_active) {
  707. dqm->queue_count++;
  708. retval = execute_queues_cpsch(dqm, false);
  709. }
  710. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  711. dqm->sdma_queue_count++;
  712. /*
  713. * Unconditionally increment this counter, regardless of the queue's
  714. * type or whether the queue is active.
  715. */
  716. dqm->total_queue_count++;
  717. pr_debug("Total of %d queues are accountable so far\n",
  718. dqm->total_queue_count);
  719. out:
  720. mutex_unlock(&dqm->lock);
  721. return retval;
  722. }
  723. static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
  724. unsigned int fence_value,
  725. unsigned long timeout)
  726. {
  727. BUG_ON(!fence_addr);
  728. timeout += jiffies;
  729. while (*fence_addr != fence_value) {
  730. if (time_after(jiffies, timeout)) {
  731. pr_err("kfd: qcm fence wait loop timeout expired\n");
  732. return -ETIME;
  733. }
  734. schedule();
  735. }
  736. return 0;
  737. }
  738. static int destroy_sdma_queues(struct device_queue_manager *dqm,
  739. unsigned int sdma_engine)
  740. {
  741. return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
  742. KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
  743. sdma_engine);
  744. }
  745. static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
  746. {
  747. int retval;
  748. BUG_ON(!dqm);
  749. retval = 0;
  750. if (lock)
  751. mutex_lock(&dqm->lock);
  752. if (dqm->active_runlist == false)
  753. goto out;
  754. pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
  755. dqm->sdma_queue_count);
  756. if (dqm->sdma_queue_count > 0) {
  757. destroy_sdma_queues(dqm, 0);
  758. destroy_sdma_queues(dqm, 1);
  759. }
  760. retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
  761. KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
  762. if (retval != 0)
  763. goto out;
  764. *dqm->fence_addr = KFD_FENCE_INIT;
  765. pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
  766. KFD_FENCE_COMPLETED);
  767. /* should be timed out */
  768. amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
  769. QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
  770. pm_release_ib(&dqm->packets);
  771. dqm->active_runlist = false;
  772. out:
  773. if (lock)
  774. mutex_unlock(&dqm->lock);
  775. return retval;
  776. }
  777. static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
  778. {
  779. int retval;
  780. BUG_ON(!dqm);
  781. if (lock)
  782. mutex_lock(&dqm->lock);
  783. retval = destroy_queues_cpsch(dqm, false);
  784. if (retval != 0) {
  785. pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
  786. goto out;
  787. }
  788. if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
  789. retval = 0;
  790. goto out;
  791. }
  792. if (dqm->active_runlist) {
  793. retval = 0;
  794. goto out;
  795. }
  796. retval = pm_send_runlist(&dqm->packets, &dqm->queues);
  797. if (retval != 0) {
  798. pr_err("kfd: failed to execute runlist");
  799. goto out;
  800. }
  801. dqm->active_runlist = true;
  802. out:
  803. if (lock)
  804. mutex_unlock(&dqm->lock);
  805. return retval;
  806. }
  807. static int destroy_queue_cpsch(struct device_queue_manager *dqm,
  808. struct qcm_process_device *qpd,
  809. struct queue *q)
  810. {
  811. int retval;
  812. struct mqd_manager *mqd;
  813. BUG_ON(!dqm || !qpd || !q);
  814. retval = 0;
  815. /* remove queue from list to prevent rescheduling after preemption */
  816. mutex_lock(&dqm->lock);
  817. mqd = dqm->ops.get_mqd_manager(dqm,
  818. get_mqd_type_from_queue_type(q->properties.type));
  819. if (!mqd) {
  820. retval = -ENOMEM;
  821. goto failed;
  822. }
  823. if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  824. dqm->sdma_queue_count--;
  825. list_del(&q->list);
  826. if (q->properties.is_active)
  827. dqm->queue_count--;
  828. execute_queues_cpsch(dqm, false);
  829. mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
  830. /*
  831. * Unconditionally decrement this counter, regardless of the queue's
  832. * type
  833. */
  834. dqm->total_queue_count--;
  835. pr_debug("Total of %d queues are accountable so far\n",
  836. dqm->total_queue_count);
  837. mutex_unlock(&dqm->lock);
  838. return 0;
  839. failed:
  840. mutex_unlock(&dqm->lock);
  841. return retval;
  842. }
  843. /*
  844. * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
  845. * stay in user mode.
  846. */
  847. #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
  848. /* APE1 limit is inclusive and 64K aligned. */
  849. #define APE1_LIMIT_ALIGNMENT 0xFFFF
  850. static bool set_cache_memory_policy(struct device_queue_manager *dqm,
  851. struct qcm_process_device *qpd,
  852. enum cache_policy default_policy,
  853. enum cache_policy alternate_policy,
  854. void __user *alternate_aperture_base,
  855. uint64_t alternate_aperture_size)
  856. {
  857. bool retval;
  858. pr_debug("kfd: In func %s\n", __func__);
  859. mutex_lock(&dqm->lock);
  860. if (alternate_aperture_size == 0) {
  861. /* base > limit disables APE1 */
  862. qpd->sh_mem_ape1_base = 1;
  863. qpd->sh_mem_ape1_limit = 0;
  864. } else {
  865. /*
  866. * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
  867. * SH_MEM_APE1_BASE[31:0], 0x0000 }
  868. * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
  869. * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
  870. * Verify that the base and size parameters can be
  871. * represented in this format and convert them.
  872. * Additionally restrict APE1 to user-mode addresses.
  873. */
  874. uint64_t base = (uintptr_t)alternate_aperture_base;
  875. uint64_t limit = base + alternate_aperture_size - 1;
  876. if (limit <= base)
  877. goto out;
  878. if ((base & APE1_FIXED_BITS_MASK) != 0)
  879. goto out;
  880. if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
  881. goto out;
  882. qpd->sh_mem_ape1_base = base >> 16;
  883. qpd->sh_mem_ape1_limit = limit >> 16;
  884. }
  885. retval = dqm->ops_asic_specific.set_cache_memory_policy(
  886. dqm,
  887. qpd,
  888. default_policy,
  889. alternate_policy,
  890. alternate_aperture_base,
  891. alternate_aperture_size);
  892. if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
  893. program_sh_mem_settings(dqm, qpd);
  894. pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
  895. qpd->sh_mem_config, qpd->sh_mem_ape1_base,
  896. qpd->sh_mem_ape1_limit);
  897. mutex_unlock(&dqm->lock);
  898. return retval;
  899. out:
  900. mutex_unlock(&dqm->lock);
  901. return false;
  902. }
  903. struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
  904. {
  905. struct device_queue_manager *dqm;
  906. BUG_ON(!dev);
  907. pr_debug("kfd: loading device queue manager\n");
  908. dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
  909. if (!dqm)
  910. return NULL;
  911. dqm->dev = dev;
  912. switch (sched_policy) {
  913. case KFD_SCHED_POLICY_HWS:
  914. case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
  915. /* initialize dqm for cp scheduling */
  916. dqm->ops.create_queue = create_queue_cpsch;
  917. dqm->ops.initialize = initialize_cpsch;
  918. dqm->ops.start = start_cpsch;
  919. dqm->ops.stop = stop_cpsch;
  920. dqm->ops.destroy_queue = destroy_queue_cpsch;
  921. dqm->ops.update_queue = update_queue;
  922. dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
  923. dqm->ops.register_process = register_process_nocpsch;
  924. dqm->ops.unregister_process = unregister_process_nocpsch;
  925. dqm->ops.uninitialize = uninitialize_nocpsch;
  926. dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
  927. dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
  928. dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
  929. break;
  930. case KFD_SCHED_POLICY_NO_HWS:
  931. /* initialize dqm for no cp scheduling */
  932. dqm->ops.start = start_nocpsch;
  933. dqm->ops.stop = stop_nocpsch;
  934. dqm->ops.create_queue = create_queue_nocpsch;
  935. dqm->ops.destroy_queue = destroy_queue_nocpsch;
  936. dqm->ops.update_queue = update_queue;
  937. dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
  938. dqm->ops.register_process = register_process_nocpsch;
  939. dqm->ops.unregister_process = unregister_process_nocpsch;
  940. dqm->ops.initialize = initialize_nocpsch;
  941. dqm->ops.uninitialize = uninitialize_nocpsch;
  942. dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
  943. break;
  944. default:
  945. BUG();
  946. break;
  947. }
  948. switch (dev->device_info->asic_family) {
  949. case CHIP_CARRIZO:
  950. device_queue_manager_init_vi(&dqm->ops_asic_specific);
  951. break;
  952. case CHIP_KAVERI:
  953. device_queue_manager_init_cik(&dqm->ops_asic_specific);
  954. break;
  955. }
  956. if (dqm->ops.initialize(dqm) != 0) {
  957. kfree(dqm);
  958. return NULL;
  959. }
  960. return dqm;
  961. }
  962. void device_queue_manager_uninit(struct device_queue_manager *dqm)
  963. {
  964. BUG_ON(!dqm);
  965. dqm->ops.uninitialize(dqm);
  966. kfree(dqm);
  967. }