kfd_process.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/mutex.h>
  23. #include <linux/log2.h>
  24. #include <linux/sched.h>
  25. #include <linux/sched/mm.h>
  26. #include <linux/slab.h>
  27. #include <linux/amd-iommu.h>
  28. #include <linux/notifier.h>
  29. #include <linux/compat.h>
  30. struct mm_struct;
  31. #include "kfd_priv.h"
  32. #include "kfd_dbgmgr.h"
  33. /*
  34. * List of struct kfd_process (field kfd_process).
  35. * Unique/indexed by mm_struct*
  36. */
  37. #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
  38. static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
  39. static DEFINE_MUTEX(kfd_processes_mutex);
  40. DEFINE_STATIC_SRCU(kfd_processes_srcu);
  41. static struct workqueue_struct *kfd_process_wq;
  42. struct kfd_process_release_work {
  43. struct work_struct kfd_work;
  44. struct kfd_process *p;
  45. };
  46. static struct kfd_process *find_process(const struct task_struct *thread);
  47. static struct kfd_process *create_process(const struct task_struct *thread);
  48. void kfd_process_create_wq(void)
  49. {
  50. if (!kfd_process_wq)
  51. kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
  52. }
  53. void kfd_process_destroy_wq(void)
  54. {
  55. if (kfd_process_wq) {
  56. destroy_workqueue(kfd_process_wq);
  57. kfd_process_wq = NULL;
  58. }
  59. }
  60. struct kfd_process *kfd_create_process(const struct task_struct *thread)
  61. {
  62. struct kfd_process *process;
  63. if (!thread->mm)
  64. return ERR_PTR(-EINVAL);
  65. /* Only the pthreads threading model is supported. */
  66. if (thread->group_leader->mm != thread->mm)
  67. return ERR_PTR(-EINVAL);
  68. /* Take mmap_sem because we call __mmu_notifier_register inside */
  69. down_write(&thread->mm->mmap_sem);
  70. /*
  71. * take kfd processes mutex before starting of process creation
  72. * so there won't be a case where two threads of the same process
  73. * create two kfd_process structures
  74. */
  75. mutex_lock(&kfd_processes_mutex);
  76. /* A prior open of /dev/kfd could have already created the process. */
  77. process = find_process(thread);
  78. if (process)
  79. pr_debug("Process already found\n");
  80. if (!process)
  81. process = create_process(thread);
  82. mutex_unlock(&kfd_processes_mutex);
  83. up_write(&thread->mm->mmap_sem);
  84. return process;
  85. }
  86. struct kfd_process *kfd_get_process(const struct task_struct *thread)
  87. {
  88. struct kfd_process *process;
  89. if (!thread->mm)
  90. return ERR_PTR(-EINVAL);
  91. /* Only the pthreads threading model is supported. */
  92. if (thread->group_leader->mm != thread->mm)
  93. return ERR_PTR(-EINVAL);
  94. process = find_process(thread);
  95. return process;
  96. }
  97. static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
  98. {
  99. struct kfd_process *process;
  100. hash_for_each_possible_rcu(kfd_processes_table, process,
  101. kfd_processes, (uintptr_t)mm)
  102. if (process->mm == mm)
  103. return process;
  104. return NULL;
  105. }
  106. static struct kfd_process *find_process(const struct task_struct *thread)
  107. {
  108. struct kfd_process *p;
  109. int idx;
  110. idx = srcu_read_lock(&kfd_processes_srcu);
  111. p = find_process_by_mm(thread->mm);
  112. srcu_read_unlock(&kfd_processes_srcu, idx);
  113. return p;
  114. }
  115. static void kfd_process_wq_release(struct work_struct *work)
  116. {
  117. struct kfd_process_release_work *my_work;
  118. struct kfd_process_device *pdd, *temp;
  119. struct kfd_process *p;
  120. my_work = (struct kfd_process_release_work *) work;
  121. p = my_work->p;
  122. pr_debug("Releasing process (pasid %d) in workqueue\n",
  123. p->pasid);
  124. mutex_lock(&p->mutex);
  125. list_for_each_entry_safe(pdd, temp, &p->per_device_data,
  126. per_device_list) {
  127. pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
  128. pdd->dev->id, p->pasid);
  129. if (pdd->bound == PDD_BOUND)
  130. amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
  131. list_del(&pdd->per_device_list);
  132. kfree(pdd);
  133. }
  134. kfd_event_free_process(p);
  135. kfd_pasid_free(p->pasid);
  136. kfd_free_process_doorbells(p);
  137. mutex_unlock(&p->mutex);
  138. mutex_destroy(&p->mutex);
  139. kfree(p);
  140. kfree(work);
  141. }
  142. static void kfd_process_destroy_delayed(struct rcu_head *rcu)
  143. {
  144. struct kfd_process_release_work *work;
  145. struct kfd_process *p;
  146. p = container_of(rcu, struct kfd_process, rcu);
  147. mmdrop(p->mm);
  148. work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
  149. if (work) {
  150. INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
  151. work->p = p;
  152. queue_work(kfd_process_wq, (struct work_struct *) work);
  153. }
  154. }
  155. static void kfd_process_notifier_release(struct mmu_notifier *mn,
  156. struct mm_struct *mm)
  157. {
  158. struct kfd_process *p;
  159. struct kfd_process_device *pdd = NULL;
  160. /*
  161. * The kfd_process structure can not be free because the
  162. * mmu_notifier srcu is read locked
  163. */
  164. p = container_of(mn, struct kfd_process, mmu_notifier);
  165. if (WARN_ON(p->mm != mm))
  166. return;
  167. mutex_lock(&kfd_processes_mutex);
  168. hash_del_rcu(&p->kfd_processes);
  169. mutex_unlock(&kfd_processes_mutex);
  170. synchronize_srcu(&kfd_processes_srcu);
  171. mutex_lock(&p->mutex);
  172. /* Iterate over all process device data structures and if the
  173. * pdd is in debug mode, we should first force unregistration,
  174. * then we will be able to destroy the queues
  175. */
  176. list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
  177. struct kfd_dev *dev = pdd->dev;
  178. mutex_lock(kfd_get_dbgmgr_mutex());
  179. if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
  180. if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
  181. kfd_dbgmgr_destroy(dev->dbgmgr);
  182. dev->dbgmgr = NULL;
  183. }
  184. }
  185. mutex_unlock(kfd_get_dbgmgr_mutex());
  186. }
  187. kfd_process_dequeue_from_all_devices(p);
  188. pqm_uninit(&p->pqm);
  189. mutex_unlock(&p->mutex);
  190. /*
  191. * Because we drop mm_count inside kfd_process_destroy_delayed
  192. * and because the mmu_notifier_unregister function also drop
  193. * mm_count we need to take an extra count here.
  194. */
  195. mmgrab(p->mm);
  196. mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
  197. mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
  198. }
  199. static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
  200. .release = kfd_process_notifier_release,
  201. };
  202. static struct kfd_process *create_process(const struct task_struct *thread)
  203. {
  204. struct kfd_process *process;
  205. int err = -ENOMEM;
  206. process = kzalloc(sizeof(*process), GFP_KERNEL);
  207. if (!process)
  208. goto err_alloc_process;
  209. process->pasid = kfd_pasid_alloc();
  210. if (process->pasid == 0)
  211. goto err_alloc_pasid;
  212. if (kfd_alloc_process_doorbells(process) < 0)
  213. goto err_alloc_doorbells;
  214. mutex_init(&process->mutex);
  215. process->mm = thread->mm;
  216. /* register notifier */
  217. process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
  218. err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
  219. if (err)
  220. goto err_mmu_notifier;
  221. hash_add_rcu(kfd_processes_table, &process->kfd_processes,
  222. (uintptr_t)process->mm);
  223. process->lead_thread = thread->group_leader;
  224. INIT_LIST_HEAD(&process->per_device_data);
  225. kfd_event_init_process(process);
  226. err = pqm_init(&process->pqm, process);
  227. if (err != 0)
  228. goto err_process_pqm_init;
  229. /* init process apertures*/
  230. process->is_32bit_user_mode = in_compat_syscall();
  231. err = kfd_init_apertures(process);
  232. if (err != 0)
  233. goto err_init_apertures;
  234. return process;
  235. err_init_apertures:
  236. pqm_uninit(&process->pqm);
  237. err_process_pqm_init:
  238. hash_del_rcu(&process->kfd_processes);
  239. synchronize_rcu();
  240. mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
  241. err_mmu_notifier:
  242. mutex_destroy(&process->mutex);
  243. kfd_free_process_doorbells(process);
  244. err_alloc_doorbells:
  245. kfd_pasid_free(process->pasid);
  246. err_alloc_pasid:
  247. kfree(process);
  248. err_alloc_process:
  249. return ERR_PTR(err);
  250. }
  251. struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
  252. struct kfd_process *p)
  253. {
  254. struct kfd_process_device *pdd = NULL;
  255. list_for_each_entry(pdd, &p->per_device_data, per_device_list)
  256. if (pdd->dev == dev)
  257. return pdd;
  258. return NULL;
  259. }
  260. struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
  261. struct kfd_process *p)
  262. {
  263. struct kfd_process_device *pdd = NULL;
  264. pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
  265. if (pdd != NULL) {
  266. pdd->dev = dev;
  267. INIT_LIST_HEAD(&pdd->qpd.queues_list);
  268. INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
  269. pdd->qpd.dqm = dev->dqm;
  270. pdd->process = p;
  271. pdd->bound = PDD_UNBOUND;
  272. pdd->already_dequeued = false;
  273. list_add(&pdd->per_device_list, &p->per_device_data);
  274. }
  275. return pdd;
  276. }
  277. /*
  278. * Direct the IOMMU to bind the process (specifically the pasid->mm)
  279. * to the device.
  280. * Unbinding occurs when the process dies or the device is removed.
  281. *
  282. * Assumes that the process lock is held.
  283. */
  284. struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
  285. struct kfd_process *p)
  286. {
  287. struct kfd_process_device *pdd;
  288. int err;
  289. pdd = kfd_get_process_device_data(dev, p);
  290. if (!pdd) {
  291. pr_err("Process device data doesn't exist\n");
  292. return ERR_PTR(-ENOMEM);
  293. }
  294. if (pdd->bound == PDD_BOUND) {
  295. return pdd;
  296. } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
  297. pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
  298. return ERR_PTR(-EINVAL);
  299. }
  300. err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
  301. if (err < 0)
  302. return ERR_PTR(err);
  303. pdd->bound = PDD_BOUND;
  304. return pdd;
  305. }
  306. /*
  307. * Bind processes do the device that have been temporarily unbound
  308. * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
  309. */
  310. int kfd_bind_processes_to_device(struct kfd_dev *dev)
  311. {
  312. struct kfd_process_device *pdd;
  313. struct kfd_process *p;
  314. unsigned int temp;
  315. int err = 0;
  316. int idx = srcu_read_lock(&kfd_processes_srcu);
  317. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  318. mutex_lock(&p->mutex);
  319. pdd = kfd_get_process_device_data(dev, p);
  320. if (pdd->bound != PDD_BOUND_SUSPENDED) {
  321. mutex_unlock(&p->mutex);
  322. continue;
  323. }
  324. err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
  325. p->lead_thread);
  326. if (err < 0) {
  327. pr_err("Unexpected pasid %d binding failure\n",
  328. p->pasid);
  329. mutex_unlock(&p->mutex);
  330. break;
  331. }
  332. pdd->bound = PDD_BOUND;
  333. mutex_unlock(&p->mutex);
  334. }
  335. srcu_read_unlock(&kfd_processes_srcu, idx);
  336. return err;
  337. }
  338. /*
  339. * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
  340. * processes will be restored to PDD_BOUND state in
  341. * kfd_bind_processes_to_device.
  342. */
  343. void kfd_unbind_processes_from_device(struct kfd_dev *dev)
  344. {
  345. struct kfd_process_device *pdd;
  346. struct kfd_process *p;
  347. unsigned int temp;
  348. int idx = srcu_read_lock(&kfd_processes_srcu);
  349. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  350. mutex_lock(&p->mutex);
  351. pdd = kfd_get_process_device_data(dev, p);
  352. if (pdd->bound == PDD_BOUND)
  353. pdd->bound = PDD_BOUND_SUSPENDED;
  354. mutex_unlock(&p->mutex);
  355. }
  356. srcu_read_unlock(&kfd_processes_srcu, idx);
  357. }
  358. void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
  359. {
  360. struct kfd_process *p;
  361. struct kfd_process_device *pdd;
  362. /*
  363. * Look for the process that matches the pasid. If there is no such
  364. * process, we either released it in amdkfd's own notifier, or there
  365. * is a bug. Unfortunately, there is no way to tell...
  366. */
  367. p = kfd_lookup_process_by_pasid(pasid);
  368. if (!p)
  369. return;
  370. pr_debug("Unbinding process %d from IOMMU\n", pasid);
  371. mutex_lock(kfd_get_dbgmgr_mutex());
  372. if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
  373. if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
  374. kfd_dbgmgr_destroy(dev->dbgmgr);
  375. dev->dbgmgr = NULL;
  376. }
  377. }
  378. mutex_unlock(kfd_get_dbgmgr_mutex());
  379. pdd = kfd_get_process_device_data(dev, p);
  380. if (pdd)
  381. /* For GPU relying on IOMMU, we need to dequeue here
  382. * when PASID is still bound.
  383. */
  384. kfd_process_dequeue_from_device(pdd);
  385. mutex_unlock(&p->mutex);
  386. }
  387. struct kfd_process_device *kfd_get_first_process_device_data(
  388. struct kfd_process *p)
  389. {
  390. return list_first_entry(&p->per_device_data,
  391. struct kfd_process_device,
  392. per_device_list);
  393. }
  394. struct kfd_process_device *kfd_get_next_process_device_data(
  395. struct kfd_process *p,
  396. struct kfd_process_device *pdd)
  397. {
  398. if (list_is_last(&pdd->per_device_list, &p->per_device_data))
  399. return NULL;
  400. return list_next_entry(pdd, per_device_list);
  401. }
  402. bool kfd_has_process_device_data(struct kfd_process *p)
  403. {
  404. return !(list_empty(&p->per_device_data));
  405. }
  406. /* This returns with process->mutex locked. */
  407. struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
  408. {
  409. struct kfd_process *p;
  410. unsigned int temp;
  411. int idx = srcu_read_lock(&kfd_processes_srcu);
  412. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  413. if (p->pasid == pasid) {
  414. mutex_lock(&p->mutex);
  415. break;
  416. }
  417. }
  418. srcu_read_unlock(&kfd_processes_srcu, idx);
  419. return p;
  420. }