kfd_process.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/mutex.h>
  23. #include <linux/log2.h>
  24. #include <linux/sched.h>
  25. #include <linux/sched/mm.h>
  26. #include <linux/sched/task.h>
  27. #include <linux/slab.h>
  28. #include <linux/amd-iommu.h>
  29. #include <linux/notifier.h>
  30. #include <linux/compat.h>
  31. #include <linux/mman.h>
  32. #include <linux/file.h>
  33. struct mm_struct;
  34. #include "kfd_priv.h"
  35. #include "kfd_device_queue_manager.h"
  36. #include "kfd_dbgmgr.h"
  37. #include "kfd_iommu.h"
  38. /*
  39. * List of struct kfd_process (field kfd_process).
  40. * Unique/indexed by mm_struct*
  41. */
  42. DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
  43. static DEFINE_MUTEX(kfd_processes_mutex);
  44. DEFINE_SRCU(kfd_processes_srcu);
  45. /* For process termination handling */
  46. static struct workqueue_struct *kfd_process_wq;
  47. /* Ordered, single-threaded workqueue for restoring evicted
  48. * processes. Restoring multiple processes concurrently under memory
  49. * pressure can lead to processes blocking each other from validating
  50. * their BOs and result in a live-lock situation where processes
  51. * remain evicted indefinitely.
  52. */
  53. static struct workqueue_struct *kfd_restore_wq;
  54. static struct kfd_process *find_process(const struct task_struct *thread);
  55. static void kfd_process_ref_release(struct kref *ref);
  56. static struct kfd_process *create_process(const struct task_struct *thread,
  57. struct file *filep);
  58. static void evict_process_worker(struct work_struct *work);
  59. static void restore_process_worker(struct work_struct *work);
  60. int kfd_process_create_wq(void)
  61. {
  62. if (!kfd_process_wq)
  63. kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
  64. if (!kfd_restore_wq)
  65. kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
  66. if (!kfd_process_wq || !kfd_restore_wq) {
  67. kfd_process_destroy_wq();
  68. return -ENOMEM;
  69. }
  70. return 0;
  71. }
  72. void kfd_process_destroy_wq(void)
  73. {
  74. if (kfd_process_wq) {
  75. destroy_workqueue(kfd_process_wq);
  76. kfd_process_wq = NULL;
  77. }
  78. if (kfd_restore_wq) {
  79. destroy_workqueue(kfd_restore_wq);
  80. kfd_restore_wq = NULL;
  81. }
  82. }
  83. static void kfd_process_free_gpuvm(struct kgd_mem *mem,
  84. struct kfd_process_device *pdd)
  85. {
  86. struct kfd_dev *dev = pdd->dev;
  87. dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
  88. dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
  89. }
  90. /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
  91. * This function should be only called right after the process
  92. * is created and when kfd_processes_mutex is still being held
  93. * to avoid concurrency. Because of that exclusiveness, we do
  94. * not need to take p->mutex.
  95. */
  96. static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
  97. uint64_t gpu_va, uint32_t size,
  98. uint32_t flags, void **kptr)
  99. {
  100. struct kfd_dev *kdev = pdd->dev;
  101. struct kgd_mem *mem = NULL;
  102. int handle;
  103. int err;
  104. err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
  105. pdd->vm, &mem, NULL, flags);
  106. if (err)
  107. goto err_alloc_mem;
  108. err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
  109. if (err)
  110. goto err_map_mem;
  111. err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
  112. if (err) {
  113. pr_debug("Sync memory failed, wait interrupted by user signal\n");
  114. goto sync_memory_failed;
  115. }
  116. /* Create an obj handle so kfd_process_device_remove_obj_handle
  117. * will take care of the bo removal when the process finishes.
  118. * We do not need to take p->mutex, because the process is just
  119. * created and the ioctls have not had the chance to run.
  120. */
  121. handle = kfd_process_device_create_obj_handle(pdd, mem);
  122. if (handle < 0) {
  123. err = handle;
  124. goto free_gpuvm;
  125. }
  126. if (kptr) {
  127. err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
  128. (struct kgd_mem *)mem, kptr, NULL);
  129. if (err) {
  130. pr_debug("Map GTT BO to kernel failed\n");
  131. goto free_obj_handle;
  132. }
  133. }
  134. return err;
  135. free_obj_handle:
  136. kfd_process_device_remove_obj_handle(pdd, handle);
  137. free_gpuvm:
  138. sync_memory_failed:
  139. kfd_process_free_gpuvm(mem, pdd);
  140. return err;
  141. err_map_mem:
  142. kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
  143. err_alloc_mem:
  144. *kptr = NULL;
  145. return err;
  146. }
  147. /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
  148. * process for IB usage The memory reserved is for KFD to submit
  149. * IB to AMDGPU from kernel. If the memory is reserved
  150. * successfully, ib_kaddr will have the CPU/kernel
  151. * address. Check ib_kaddr before accessing the memory.
  152. */
  153. static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
  154. {
  155. struct qcm_process_device *qpd = &pdd->qpd;
  156. uint32_t flags = ALLOC_MEM_FLAGS_GTT |
  157. ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
  158. ALLOC_MEM_FLAGS_WRITABLE |
  159. ALLOC_MEM_FLAGS_EXECUTABLE;
  160. void *kaddr;
  161. int ret;
  162. if (qpd->ib_kaddr || !qpd->ib_base)
  163. return 0;
  164. /* ib_base is only set for dGPU */
  165. ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
  166. &kaddr);
  167. if (ret)
  168. return ret;
  169. qpd->ib_kaddr = kaddr;
  170. return 0;
  171. }
  172. struct kfd_process *kfd_create_process(struct file *filep)
  173. {
  174. struct kfd_process *process;
  175. struct task_struct *thread = current;
  176. if (!thread->mm)
  177. return ERR_PTR(-EINVAL);
  178. /* Only the pthreads threading model is supported. */
  179. if (thread->group_leader->mm != thread->mm)
  180. return ERR_PTR(-EINVAL);
  181. /*
  182. * take kfd processes mutex before starting of process creation
  183. * so there won't be a case where two threads of the same process
  184. * create two kfd_process structures
  185. */
  186. mutex_lock(&kfd_processes_mutex);
  187. /* A prior open of /dev/kfd could have already created the process. */
  188. process = find_process(thread);
  189. if (process)
  190. pr_debug("Process already found\n");
  191. else
  192. process = create_process(thread, filep);
  193. mutex_unlock(&kfd_processes_mutex);
  194. return process;
  195. }
  196. struct kfd_process *kfd_get_process(const struct task_struct *thread)
  197. {
  198. struct kfd_process *process;
  199. if (!thread->mm)
  200. return ERR_PTR(-EINVAL);
  201. /* Only the pthreads threading model is supported. */
  202. if (thread->group_leader->mm != thread->mm)
  203. return ERR_PTR(-EINVAL);
  204. process = find_process(thread);
  205. if (!process)
  206. return ERR_PTR(-EINVAL);
  207. return process;
  208. }
  209. static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
  210. {
  211. struct kfd_process *process;
  212. hash_for_each_possible_rcu(kfd_processes_table, process,
  213. kfd_processes, (uintptr_t)mm)
  214. if (process->mm == mm)
  215. return process;
  216. return NULL;
  217. }
  218. static struct kfd_process *find_process(const struct task_struct *thread)
  219. {
  220. struct kfd_process *p;
  221. int idx;
  222. idx = srcu_read_lock(&kfd_processes_srcu);
  223. p = find_process_by_mm(thread->mm);
  224. srcu_read_unlock(&kfd_processes_srcu, idx);
  225. return p;
  226. }
  227. void kfd_unref_process(struct kfd_process *p)
  228. {
  229. kref_put(&p->ref, kfd_process_ref_release);
  230. }
  231. static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
  232. {
  233. struct kfd_process *p = pdd->process;
  234. void *mem;
  235. int id;
  236. /*
  237. * Remove all handles from idr and release appropriate
  238. * local memory object
  239. */
  240. idr_for_each_entry(&pdd->alloc_idr, mem, id) {
  241. struct kfd_process_device *peer_pdd;
  242. list_for_each_entry(peer_pdd, &p->per_device_data,
  243. per_device_list) {
  244. if (!peer_pdd->vm)
  245. continue;
  246. peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
  247. peer_pdd->dev->kgd, mem, peer_pdd->vm);
  248. }
  249. pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
  250. kfd_process_device_remove_obj_handle(pdd, id);
  251. }
  252. }
  253. static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
  254. {
  255. struct kfd_process_device *pdd;
  256. list_for_each_entry(pdd, &p->per_device_data, per_device_list)
  257. kfd_process_device_free_bos(pdd);
  258. }
  259. static void kfd_process_destroy_pdds(struct kfd_process *p)
  260. {
  261. struct kfd_process_device *pdd, *temp;
  262. list_for_each_entry_safe(pdd, temp, &p->per_device_data,
  263. per_device_list) {
  264. pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
  265. pdd->dev->id, p->pasid);
  266. if (pdd->drm_file) {
  267. pdd->dev->kfd2kgd->release_process_vm(pdd->dev->kgd, pdd->vm);
  268. fput(pdd->drm_file);
  269. }
  270. else if (pdd->vm)
  271. pdd->dev->kfd2kgd->destroy_process_vm(
  272. pdd->dev->kgd, pdd->vm);
  273. list_del(&pdd->per_device_list);
  274. if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
  275. free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
  276. get_order(KFD_CWSR_TBA_TMA_SIZE));
  277. kfree(pdd->qpd.doorbell_bitmap);
  278. idr_destroy(&pdd->alloc_idr);
  279. kfree(pdd);
  280. }
  281. }
  282. /* No process locking is needed in this function, because the process
  283. * is not findable any more. We must assume that no other thread is
  284. * using it any more, otherwise we couldn't safely free the process
  285. * structure in the end.
  286. */
  287. static void kfd_process_wq_release(struct work_struct *work)
  288. {
  289. struct kfd_process *p = container_of(work, struct kfd_process,
  290. release_work);
  291. kfd_iommu_unbind_process(p);
  292. kfd_process_free_outstanding_kfd_bos(p);
  293. kfd_process_destroy_pdds(p);
  294. dma_fence_put(p->ef);
  295. kfd_event_free_process(p);
  296. kfd_pasid_free(p->pasid);
  297. kfd_free_process_doorbells(p);
  298. mutex_destroy(&p->mutex);
  299. put_task_struct(p->lead_thread);
  300. kfree(p);
  301. }
  302. static void kfd_process_ref_release(struct kref *ref)
  303. {
  304. struct kfd_process *p = container_of(ref, struct kfd_process, ref);
  305. INIT_WORK(&p->release_work, kfd_process_wq_release);
  306. queue_work(kfd_process_wq, &p->release_work);
  307. }
  308. static void kfd_process_destroy_delayed(struct rcu_head *rcu)
  309. {
  310. struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
  311. kfd_unref_process(p);
  312. }
  313. static void kfd_process_notifier_release(struct mmu_notifier *mn,
  314. struct mm_struct *mm)
  315. {
  316. struct kfd_process *p;
  317. struct kfd_process_device *pdd = NULL;
  318. /*
  319. * The kfd_process structure can not be free because the
  320. * mmu_notifier srcu is read locked
  321. */
  322. p = container_of(mn, struct kfd_process, mmu_notifier);
  323. if (WARN_ON(p->mm != mm))
  324. return;
  325. mutex_lock(&kfd_processes_mutex);
  326. hash_del_rcu(&p->kfd_processes);
  327. mutex_unlock(&kfd_processes_mutex);
  328. synchronize_srcu(&kfd_processes_srcu);
  329. cancel_delayed_work_sync(&p->eviction_work);
  330. cancel_delayed_work_sync(&p->restore_work);
  331. mutex_lock(&p->mutex);
  332. /* Iterate over all process device data structures and if the
  333. * pdd is in debug mode, we should first force unregistration,
  334. * then we will be able to destroy the queues
  335. */
  336. list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
  337. struct kfd_dev *dev = pdd->dev;
  338. mutex_lock(kfd_get_dbgmgr_mutex());
  339. if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
  340. if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
  341. kfd_dbgmgr_destroy(dev->dbgmgr);
  342. dev->dbgmgr = NULL;
  343. }
  344. }
  345. mutex_unlock(kfd_get_dbgmgr_mutex());
  346. }
  347. kfd_process_dequeue_from_all_devices(p);
  348. pqm_uninit(&p->pqm);
  349. /* Indicate to other users that MM is no longer valid */
  350. p->mm = NULL;
  351. mutex_unlock(&p->mutex);
  352. mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
  353. mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
  354. }
  355. static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
  356. .release = kfd_process_notifier_release,
  357. };
  358. static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
  359. {
  360. unsigned long offset;
  361. struct kfd_process_device *pdd;
  362. list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
  363. struct kfd_dev *dev = pdd->dev;
  364. struct qcm_process_device *qpd = &pdd->qpd;
  365. if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
  366. continue;
  367. offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
  368. << PAGE_SHIFT;
  369. qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
  370. KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
  371. MAP_SHARED, offset);
  372. if (IS_ERR_VALUE(qpd->tba_addr)) {
  373. int err = qpd->tba_addr;
  374. pr_err("Failure to set tba address. error %d.\n", err);
  375. qpd->tba_addr = 0;
  376. qpd->cwsr_kaddr = NULL;
  377. return err;
  378. }
  379. memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
  380. qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
  381. pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
  382. qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
  383. }
  384. return 0;
  385. }
  386. static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
  387. {
  388. struct kfd_dev *dev = pdd->dev;
  389. struct qcm_process_device *qpd = &pdd->qpd;
  390. uint32_t flags = ALLOC_MEM_FLAGS_GTT |
  391. ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
  392. void *kaddr;
  393. int ret;
  394. if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
  395. return 0;
  396. /* cwsr_base is only set for dGPU */
  397. ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
  398. KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
  399. if (ret)
  400. return ret;
  401. qpd->cwsr_kaddr = kaddr;
  402. qpd->tba_addr = qpd->cwsr_base;
  403. memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
  404. qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
  405. pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
  406. qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
  407. return 0;
  408. }
  409. static struct kfd_process *create_process(const struct task_struct *thread,
  410. struct file *filep)
  411. {
  412. struct kfd_process *process;
  413. int err = -ENOMEM;
  414. process = kzalloc(sizeof(*process), GFP_KERNEL);
  415. if (!process)
  416. goto err_alloc_process;
  417. process->pasid = kfd_pasid_alloc();
  418. if (process->pasid == 0)
  419. goto err_alloc_pasid;
  420. if (kfd_alloc_process_doorbells(process) < 0)
  421. goto err_alloc_doorbells;
  422. kref_init(&process->ref);
  423. mutex_init(&process->mutex);
  424. process->mm = thread->mm;
  425. /* register notifier */
  426. process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
  427. err = mmu_notifier_register(&process->mmu_notifier, process->mm);
  428. if (err)
  429. goto err_mmu_notifier;
  430. hash_add_rcu(kfd_processes_table, &process->kfd_processes,
  431. (uintptr_t)process->mm);
  432. process->lead_thread = thread->group_leader;
  433. get_task_struct(process->lead_thread);
  434. INIT_LIST_HEAD(&process->per_device_data);
  435. kfd_event_init_process(process);
  436. err = pqm_init(&process->pqm, process);
  437. if (err != 0)
  438. goto err_process_pqm_init;
  439. /* init process apertures*/
  440. process->is_32bit_user_mode = in_compat_syscall();
  441. err = kfd_init_apertures(process);
  442. if (err != 0)
  443. goto err_init_apertures;
  444. INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
  445. INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
  446. process->last_restore_timestamp = get_jiffies_64();
  447. err = kfd_process_init_cwsr_apu(process, filep);
  448. if (err)
  449. goto err_init_cwsr;
  450. return process;
  451. err_init_cwsr:
  452. kfd_process_free_outstanding_kfd_bos(process);
  453. kfd_process_destroy_pdds(process);
  454. err_init_apertures:
  455. pqm_uninit(&process->pqm);
  456. err_process_pqm_init:
  457. hash_del_rcu(&process->kfd_processes);
  458. synchronize_rcu();
  459. mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
  460. err_mmu_notifier:
  461. mutex_destroy(&process->mutex);
  462. kfd_free_process_doorbells(process);
  463. err_alloc_doorbells:
  464. kfd_pasid_free(process->pasid);
  465. err_alloc_pasid:
  466. kfree(process);
  467. err_alloc_process:
  468. return ERR_PTR(err);
  469. }
  470. static int init_doorbell_bitmap(struct qcm_process_device *qpd,
  471. struct kfd_dev *dev)
  472. {
  473. unsigned int i;
  474. if (!KFD_IS_SOC15(dev->device_info->asic_family))
  475. return 0;
  476. qpd->doorbell_bitmap =
  477. kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
  478. BITS_PER_BYTE), GFP_KERNEL);
  479. if (!qpd->doorbell_bitmap)
  480. return -ENOMEM;
  481. /* Mask out any reserved doorbells */
  482. for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
  483. if ((dev->shared_resources.reserved_doorbell_mask & i) ==
  484. dev->shared_resources.reserved_doorbell_val) {
  485. set_bit(i, qpd->doorbell_bitmap);
  486. pr_debug("reserved doorbell 0x%03x\n", i);
  487. }
  488. return 0;
  489. }
  490. struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
  491. struct kfd_process *p)
  492. {
  493. struct kfd_process_device *pdd = NULL;
  494. list_for_each_entry(pdd, &p->per_device_data, per_device_list)
  495. if (pdd->dev == dev)
  496. return pdd;
  497. return NULL;
  498. }
  499. struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
  500. struct kfd_process *p)
  501. {
  502. struct kfd_process_device *pdd = NULL;
  503. pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
  504. if (!pdd)
  505. return NULL;
  506. if (init_doorbell_bitmap(&pdd->qpd, dev)) {
  507. pr_err("Failed to init doorbell for process\n");
  508. kfree(pdd);
  509. return NULL;
  510. }
  511. pdd->dev = dev;
  512. INIT_LIST_HEAD(&pdd->qpd.queues_list);
  513. INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
  514. pdd->qpd.dqm = dev->dqm;
  515. pdd->qpd.pqm = &p->pqm;
  516. pdd->qpd.evicted = 0;
  517. pdd->process = p;
  518. pdd->bound = PDD_UNBOUND;
  519. pdd->already_dequeued = false;
  520. list_add(&pdd->per_device_list, &p->per_device_data);
  521. /* Init idr used for memory handle translation */
  522. idr_init(&pdd->alloc_idr);
  523. return pdd;
  524. }
  525. /**
  526. * kfd_process_device_init_vm - Initialize a VM for a process-device
  527. *
  528. * @pdd: The process-device
  529. * @drm_file: Optional pointer to a DRM file descriptor
  530. *
  531. * If @drm_file is specified, it will be used to acquire the VM from
  532. * that file descriptor. If successful, the @pdd takes ownership of
  533. * the file descriptor.
  534. *
  535. * If @drm_file is NULL, a new VM is created.
  536. *
  537. * Returns 0 on success, -errno on failure.
  538. */
  539. int kfd_process_device_init_vm(struct kfd_process_device *pdd,
  540. struct file *drm_file)
  541. {
  542. struct kfd_process *p;
  543. struct kfd_dev *dev;
  544. int ret;
  545. if (pdd->vm)
  546. return drm_file ? -EBUSY : 0;
  547. p = pdd->process;
  548. dev = pdd->dev;
  549. if (drm_file)
  550. ret = dev->kfd2kgd->acquire_process_vm(
  551. dev->kgd, drm_file, p->pasid,
  552. &pdd->vm, &p->kgd_process_info, &p->ef);
  553. else
  554. ret = dev->kfd2kgd->create_process_vm(
  555. dev->kgd, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef);
  556. if (ret) {
  557. pr_err("Failed to create process VM object\n");
  558. return ret;
  559. }
  560. ret = kfd_process_device_reserve_ib_mem(pdd);
  561. if (ret)
  562. goto err_reserve_ib_mem;
  563. ret = kfd_process_device_init_cwsr_dgpu(pdd);
  564. if (ret)
  565. goto err_init_cwsr;
  566. pdd->drm_file = drm_file;
  567. return 0;
  568. err_init_cwsr:
  569. err_reserve_ib_mem:
  570. kfd_process_device_free_bos(pdd);
  571. if (!drm_file)
  572. dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
  573. pdd->vm = NULL;
  574. return ret;
  575. }
  576. /*
  577. * Direct the IOMMU to bind the process (specifically the pasid->mm)
  578. * to the device.
  579. * Unbinding occurs when the process dies or the device is removed.
  580. *
  581. * Assumes that the process lock is held.
  582. */
  583. struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
  584. struct kfd_process *p)
  585. {
  586. struct kfd_process_device *pdd;
  587. int err;
  588. pdd = kfd_get_process_device_data(dev, p);
  589. if (!pdd) {
  590. pr_err("Process device data doesn't exist\n");
  591. return ERR_PTR(-ENOMEM);
  592. }
  593. err = kfd_iommu_bind_process_to_device(pdd);
  594. if (err)
  595. return ERR_PTR(err);
  596. err = kfd_process_device_init_vm(pdd, NULL);
  597. if (err)
  598. return ERR_PTR(err);
  599. return pdd;
  600. }
  601. struct kfd_process_device *kfd_get_first_process_device_data(
  602. struct kfd_process *p)
  603. {
  604. return list_first_entry(&p->per_device_data,
  605. struct kfd_process_device,
  606. per_device_list);
  607. }
  608. struct kfd_process_device *kfd_get_next_process_device_data(
  609. struct kfd_process *p,
  610. struct kfd_process_device *pdd)
  611. {
  612. if (list_is_last(&pdd->per_device_list, &p->per_device_data))
  613. return NULL;
  614. return list_next_entry(pdd, per_device_list);
  615. }
  616. bool kfd_has_process_device_data(struct kfd_process *p)
  617. {
  618. return !(list_empty(&p->per_device_data));
  619. }
  620. /* Create specific handle mapped to mem from process local memory idr
  621. * Assumes that the process lock is held.
  622. */
  623. int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
  624. void *mem)
  625. {
  626. return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
  627. }
  628. /* Translate specific handle from process local memory idr
  629. * Assumes that the process lock is held.
  630. */
  631. void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
  632. int handle)
  633. {
  634. if (handle < 0)
  635. return NULL;
  636. return idr_find(&pdd->alloc_idr, handle);
  637. }
  638. /* Remove specific handle from process local memory idr
  639. * Assumes that the process lock is held.
  640. */
  641. void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
  642. int handle)
  643. {
  644. if (handle >= 0)
  645. idr_remove(&pdd->alloc_idr, handle);
  646. }
  647. /* This increments the process->ref counter. */
  648. struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
  649. {
  650. struct kfd_process *p, *ret_p = NULL;
  651. unsigned int temp;
  652. int idx = srcu_read_lock(&kfd_processes_srcu);
  653. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  654. if (p->pasid == pasid) {
  655. kref_get(&p->ref);
  656. ret_p = p;
  657. break;
  658. }
  659. }
  660. srcu_read_unlock(&kfd_processes_srcu, idx);
  661. return ret_p;
  662. }
  663. /* This increments the process->ref counter. */
  664. struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
  665. {
  666. struct kfd_process *p;
  667. int idx = srcu_read_lock(&kfd_processes_srcu);
  668. p = find_process_by_mm(mm);
  669. if (p)
  670. kref_get(&p->ref);
  671. srcu_read_unlock(&kfd_processes_srcu, idx);
  672. return p;
  673. }
  674. /* process_evict_queues - Evict all user queues of a process
  675. *
  676. * Eviction is reference-counted per process-device. This means multiple
  677. * evictions from different sources can be nested safely.
  678. */
  679. int kfd_process_evict_queues(struct kfd_process *p)
  680. {
  681. struct kfd_process_device *pdd;
  682. int r = 0;
  683. unsigned int n_evicted = 0;
  684. list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
  685. r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
  686. &pdd->qpd);
  687. if (r) {
  688. pr_err("Failed to evict process queues\n");
  689. goto fail;
  690. }
  691. n_evicted++;
  692. }
  693. return r;
  694. fail:
  695. /* To keep state consistent, roll back partial eviction by
  696. * restoring queues
  697. */
  698. list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
  699. if (n_evicted == 0)
  700. break;
  701. if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
  702. &pdd->qpd))
  703. pr_err("Failed to restore queues\n");
  704. n_evicted--;
  705. }
  706. return r;
  707. }
  708. /* process_restore_queues - Restore all user queues of a process */
  709. int kfd_process_restore_queues(struct kfd_process *p)
  710. {
  711. struct kfd_process_device *pdd;
  712. int r, ret = 0;
  713. list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
  714. r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
  715. &pdd->qpd);
  716. if (r) {
  717. pr_err("Failed to restore process queues\n");
  718. if (!ret)
  719. ret = r;
  720. }
  721. }
  722. return ret;
  723. }
  724. static void evict_process_worker(struct work_struct *work)
  725. {
  726. int ret;
  727. struct kfd_process *p;
  728. struct delayed_work *dwork;
  729. dwork = to_delayed_work(work);
  730. /* Process termination destroys this worker thread. So during the
  731. * lifetime of this thread, kfd_process p will be valid
  732. */
  733. p = container_of(dwork, struct kfd_process, eviction_work);
  734. WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
  735. "Eviction fence mismatch\n");
  736. /* Narrow window of overlap between restore and evict work
  737. * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
  738. * unreserves KFD BOs, it is possible to evicted again. But
  739. * restore has few more steps of finish. So lets wait for any
  740. * previous restore work to complete
  741. */
  742. flush_delayed_work(&p->restore_work);
  743. pr_debug("Started evicting pasid %d\n", p->pasid);
  744. ret = kfd_process_evict_queues(p);
  745. if (!ret) {
  746. dma_fence_signal(p->ef);
  747. dma_fence_put(p->ef);
  748. p->ef = NULL;
  749. queue_delayed_work(kfd_restore_wq, &p->restore_work,
  750. msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
  751. pr_debug("Finished evicting pasid %d\n", p->pasid);
  752. } else
  753. pr_err("Failed to evict queues of pasid %d\n", p->pasid);
  754. }
  755. static void restore_process_worker(struct work_struct *work)
  756. {
  757. struct delayed_work *dwork;
  758. struct kfd_process *p;
  759. struct kfd_process_device *pdd;
  760. int ret = 0;
  761. dwork = to_delayed_work(work);
  762. /* Process termination destroys this worker thread. So during the
  763. * lifetime of this thread, kfd_process p will be valid
  764. */
  765. p = container_of(dwork, struct kfd_process, restore_work);
  766. /* Call restore_process_bos on the first KGD device. This function
  767. * takes care of restoring the whole process including other devices.
  768. * Restore can fail if enough memory is not available. If so,
  769. * reschedule again.
  770. */
  771. pdd = list_first_entry(&p->per_device_data,
  772. struct kfd_process_device,
  773. per_device_list);
  774. pr_debug("Started restoring pasid %d\n", p->pasid);
  775. /* Setting last_restore_timestamp before successful restoration.
  776. * Otherwise this would have to be set by KGD (restore_process_bos)
  777. * before KFD BOs are unreserved. If not, the process can be evicted
  778. * again before the timestamp is set.
  779. * If restore fails, the timestamp will be set again in the next
  780. * attempt. This would mean that the minimum GPU quanta would be
  781. * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
  782. * functions)
  783. */
  784. p->last_restore_timestamp = get_jiffies_64();
  785. ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
  786. &p->ef);
  787. if (ret) {
  788. pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
  789. p->pasid, PROCESS_BACK_OFF_TIME_MS);
  790. ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
  791. msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
  792. WARN(!ret, "reschedule restore work failed\n");
  793. return;
  794. }
  795. ret = kfd_process_restore_queues(p);
  796. if (!ret)
  797. pr_debug("Finished restoring pasid %d\n", p->pasid);
  798. else
  799. pr_err("Failed to restore queues of pasid %d\n", p->pasid);
  800. }
  801. void kfd_suspend_all_processes(void)
  802. {
  803. struct kfd_process *p;
  804. unsigned int temp;
  805. int idx = srcu_read_lock(&kfd_processes_srcu);
  806. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  807. cancel_delayed_work_sync(&p->eviction_work);
  808. cancel_delayed_work_sync(&p->restore_work);
  809. if (kfd_process_evict_queues(p))
  810. pr_err("Failed to suspend process %d\n", p->pasid);
  811. dma_fence_signal(p->ef);
  812. dma_fence_put(p->ef);
  813. p->ef = NULL;
  814. }
  815. srcu_read_unlock(&kfd_processes_srcu, idx);
  816. }
  817. int kfd_resume_all_processes(void)
  818. {
  819. struct kfd_process *p;
  820. unsigned int temp;
  821. int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
  822. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  823. if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
  824. pr_err("Restore process %d failed during resume\n",
  825. p->pasid);
  826. ret = -EFAULT;
  827. }
  828. }
  829. srcu_read_unlock(&kfd_processes_srcu, idx);
  830. return ret;
  831. }
  832. int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
  833. struct vm_area_struct *vma)
  834. {
  835. struct kfd_process_device *pdd;
  836. struct qcm_process_device *qpd;
  837. if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
  838. pr_err("Incorrect CWSR mapping size.\n");
  839. return -EINVAL;
  840. }
  841. pdd = kfd_get_process_device_data(dev, process);
  842. if (!pdd)
  843. return -EINVAL;
  844. qpd = &pdd->qpd;
  845. qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
  846. get_order(KFD_CWSR_TBA_TMA_SIZE));
  847. if (!qpd->cwsr_kaddr) {
  848. pr_err("Error allocating per process CWSR buffer.\n");
  849. return -ENOMEM;
  850. }
  851. vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
  852. | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
  853. /* Mapping pages to user process */
  854. return remap_pfn_range(vma, vma->vm_start,
  855. PFN_DOWN(__pa(qpd->cwsr_kaddr)),
  856. KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
  857. }
  858. void kfd_flush_tlb(struct kfd_process_device *pdd)
  859. {
  860. struct kfd_dev *dev = pdd->dev;
  861. const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
  862. if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
  863. /* Nothing to flush until a VMID is assigned, which
  864. * only happens when the first queue is created.
  865. */
  866. if (pdd->qpd.vmid)
  867. f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
  868. } else {
  869. f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
  870. }
  871. }
  872. #if defined(CONFIG_DEBUG_FS)
  873. int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
  874. {
  875. struct kfd_process *p;
  876. unsigned int temp;
  877. int r = 0;
  878. int idx = srcu_read_lock(&kfd_processes_srcu);
  879. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  880. seq_printf(m, "Process %d PASID %d:\n",
  881. p->lead_thread->tgid, p->pasid);
  882. mutex_lock(&p->mutex);
  883. r = pqm_debugfs_mqds(m, &p->pqm);
  884. mutex_unlock(&p->mutex);
  885. if (r)
  886. break;
  887. }
  888. srcu_read_unlock(&kfd_processes_srcu, idx);
  889. return r;
  890. }
  891. #endif