kfd_process.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/mutex.h>
  23. #include <linux/log2.h>
  24. #include <linux/sched.h>
  25. #include <linux/sched/mm.h>
  26. #include <linux/slab.h>
  27. #include <linux/amd-iommu.h>
  28. #include <linux/notifier.h>
  29. #include <linux/compat.h>
  30. struct mm_struct;
  31. #include "kfd_priv.h"
  32. #include "kfd_dbgmgr.h"
  33. /*
  34. * Initial size for the array of queues.
  35. * The allocated size is doubled each time
  36. * it is exceeded up to MAX_PROCESS_QUEUES.
  37. */
  38. #define INITIAL_QUEUE_ARRAY_SIZE 16
  39. /*
  40. * List of struct kfd_process (field kfd_process).
  41. * Unique/indexed by mm_struct*
  42. */
  43. #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
  44. static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
  45. static DEFINE_MUTEX(kfd_processes_mutex);
  46. DEFINE_STATIC_SRCU(kfd_processes_srcu);
  47. static struct workqueue_struct *kfd_process_wq;
  48. struct kfd_process_release_work {
  49. struct work_struct kfd_work;
  50. struct kfd_process *p;
  51. };
  52. static struct kfd_process *find_process(const struct task_struct *thread);
  53. static struct kfd_process *create_process(const struct task_struct *thread);
  54. void kfd_process_create_wq(void)
  55. {
  56. if (!kfd_process_wq)
  57. kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
  58. }
  59. void kfd_process_destroy_wq(void)
  60. {
  61. if (kfd_process_wq) {
  62. destroy_workqueue(kfd_process_wq);
  63. kfd_process_wq = NULL;
  64. }
  65. }
  66. struct kfd_process *kfd_create_process(const struct task_struct *thread)
  67. {
  68. struct kfd_process *process;
  69. BUG_ON(!kfd_process_wq);
  70. if (thread->mm == NULL)
  71. return ERR_PTR(-EINVAL);
  72. /* Only the pthreads threading model is supported. */
  73. if (thread->group_leader->mm != thread->mm)
  74. return ERR_PTR(-EINVAL);
  75. /* Take mmap_sem because we call __mmu_notifier_register inside */
  76. down_write(&thread->mm->mmap_sem);
  77. /*
  78. * take kfd processes mutex before starting of process creation
  79. * so there won't be a case where two threads of the same process
  80. * create two kfd_process structures
  81. */
  82. mutex_lock(&kfd_processes_mutex);
  83. /* A prior open of /dev/kfd could have already created the process. */
  84. process = find_process(thread);
  85. if (process)
  86. pr_debug("kfd: process already found\n");
  87. if (!process)
  88. process = create_process(thread);
  89. mutex_unlock(&kfd_processes_mutex);
  90. up_write(&thread->mm->mmap_sem);
  91. return process;
  92. }
  93. struct kfd_process *kfd_get_process(const struct task_struct *thread)
  94. {
  95. struct kfd_process *process;
  96. if (thread->mm == NULL)
  97. return ERR_PTR(-EINVAL);
  98. /* Only the pthreads threading model is supported. */
  99. if (thread->group_leader->mm != thread->mm)
  100. return ERR_PTR(-EINVAL);
  101. process = find_process(thread);
  102. return process;
  103. }
  104. static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
  105. {
  106. struct kfd_process *process;
  107. hash_for_each_possible_rcu(kfd_processes_table, process,
  108. kfd_processes, (uintptr_t)mm)
  109. if (process->mm == mm)
  110. return process;
  111. return NULL;
  112. }
  113. static struct kfd_process *find_process(const struct task_struct *thread)
  114. {
  115. struct kfd_process *p;
  116. int idx;
  117. idx = srcu_read_lock(&kfd_processes_srcu);
  118. p = find_process_by_mm(thread->mm);
  119. srcu_read_unlock(&kfd_processes_srcu, idx);
  120. return p;
  121. }
  122. static void kfd_process_wq_release(struct work_struct *work)
  123. {
  124. struct kfd_process_release_work *my_work;
  125. struct kfd_process_device *pdd, *temp;
  126. struct kfd_process *p;
  127. my_work = (struct kfd_process_release_work *) work;
  128. p = my_work->p;
  129. pr_debug("Releasing process (pasid %d) in workqueue\n",
  130. p->pasid);
  131. mutex_lock(&p->mutex);
  132. list_for_each_entry_safe(pdd, temp, &p->per_device_data,
  133. per_device_list) {
  134. pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
  135. pdd->dev->id, p->pasid);
  136. if (pdd->reset_wavefronts)
  137. dbgdev_wave_reset_wavefronts(pdd->dev, p);
  138. amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
  139. list_del(&pdd->per_device_list);
  140. kfree(pdd);
  141. }
  142. kfd_event_free_process(p);
  143. kfd_pasid_free(p->pasid);
  144. mutex_unlock(&p->mutex);
  145. mutex_destroy(&p->mutex);
  146. kfree(p->queues);
  147. kfree(p);
  148. kfree(work);
  149. }
  150. static void kfd_process_destroy_delayed(struct rcu_head *rcu)
  151. {
  152. struct kfd_process_release_work *work;
  153. struct kfd_process *p;
  154. BUG_ON(!kfd_process_wq);
  155. p = container_of(rcu, struct kfd_process, rcu);
  156. BUG_ON(atomic_read(&p->mm->mm_count) <= 0);
  157. mmdrop(p->mm);
  158. work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
  159. if (work) {
  160. INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
  161. work->p = p;
  162. queue_work(kfd_process_wq, (struct work_struct *) work);
  163. }
  164. }
  165. static void kfd_process_notifier_release(struct mmu_notifier *mn,
  166. struct mm_struct *mm)
  167. {
  168. struct kfd_process *p;
  169. struct kfd_process_device *pdd = NULL;
  170. /*
  171. * The kfd_process structure can not be free because the
  172. * mmu_notifier srcu is read locked
  173. */
  174. p = container_of(mn, struct kfd_process, mmu_notifier);
  175. BUG_ON(p->mm != mm);
  176. mutex_lock(&kfd_processes_mutex);
  177. hash_del_rcu(&p->kfd_processes);
  178. mutex_unlock(&kfd_processes_mutex);
  179. synchronize_srcu(&kfd_processes_srcu);
  180. mutex_lock(&p->mutex);
  181. /* In case our notifier is called before IOMMU notifier */
  182. pqm_uninit(&p->pqm);
  183. /* Iterate over all process device data structure and check
  184. * if we should delete debug managers and reset all wavefronts
  185. */
  186. list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
  187. if ((pdd->dev->dbgmgr) &&
  188. (pdd->dev->dbgmgr->pasid == p->pasid))
  189. kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
  190. if (pdd->reset_wavefronts) {
  191. pr_warn("amdkfd: Resetting all wave fronts\n");
  192. dbgdev_wave_reset_wavefronts(pdd->dev, p);
  193. pdd->reset_wavefronts = false;
  194. }
  195. }
  196. mutex_unlock(&p->mutex);
  197. /*
  198. * Because we drop mm_count inside kfd_process_destroy_delayed
  199. * and because the mmu_notifier_unregister function also drop
  200. * mm_count we need to take an extra count here.
  201. */
  202. mmgrab(p->mm);
  203. mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
  204. mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
  205. }
  206. static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
  207. .release = kfd_process_notifier_release,
  208. };
  209. static struct kfd_process *create_process(const struct task_struct *thread)
  210. {
  211. struct kfd_process *process;
  212. int err = -ENOMEM;
  213. process = kzalloc(sizeof(*process), GFP_KERNEL);
  214. if (!process)
  215. goto err_alloc_process;
  216. process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
  217. sizeof(process->queues[0]), GFP_KERNEL);
  218. if (!process->queues)
  219. goto err_alloc_queues;
  220. process->pasid = kfd_pasid_alloc();
  221. if (process->pasid == 0)
  222. goto err_alloc_pasid;
  223. mutex_init(&process->mutex);
  224. process->mm = thread->mm;
  225. /* register notifier */
  226. process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
  227. err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
  228. if (err)
  229. goto err_mmu_notifier;
  230. hash_add_rcu(kfd_processes_table, &process->kfd_processes,
  231. (uintptr_t)process->mm);
  232. process->lead_thread = thread->group_leader;
  233. process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
  234. INIT_LIST_HEAD(&process->per_device_data);
  235. kfd_event_init_process(process);
  236. err = pqm_init(&process->pqm, process);
  237. if (err != 0)
  238. goto err_process_pqm_init;
  239. /* init process apertures*/
  240. process->is_32bit_user_mode = in_compat_syscall();
  241. err = kfd_init_apertures(process);
  242. if (err != 0)
  243. goto err_init_apertures;
  244. return process;
  245. err_init_apertures:
  246. pqm_uninit(&process->pqm);
  247. err_process_pqm_init:
  248. hash_del_rcu(&process->kfd_processes);
  249. synchronize_rcu();
  250. mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
  251. err_mmu_notifier:
  252. mutex_destroy(&process->mutex);
  253. kfd_pasid_free(process->pasid);
  254. err_alloc_pasid:
  255. kfree(process->queues);
  256. err_alloc_queues:
  257. kfree(process);
  258. err_alloc_process:
  259. return ERR_PTR(err);
  260. }
  261. struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
  262. struct kfd_process *p)
  263. {
  264. struct kfd_process_device *pdd = NULL;
  265. list_for_each_entry(pdd, &p->per_device_data, per_device_list)
  266. if (pdd->dev == dev)
  267. break;
  268. return pdd;
  269. }
  270. struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
  271. struct kfd_process *p)
  272. {
  273. struct kfd_process_device *pdd = NULL;
  274. pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
  275. if (pdd != NULL) {
  276. pdd->dev = dev;
  277. INIT_LIST_HEAD(&pdd->qpd.queues_list);
  278. INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
  279. pdd->qpd.dqm = dev->dqm;
  280. pdd->reset_wavefronts = false;
  281. list_add(&pdd->per_device_list, &p->per_device_data);
  282. }
  283. return pdd;
  284. }
  285. /*
  286. * Direct the IOMMU to bind the process (specifically the pasid->mm)
  287. * to the device.
  288. * Unbinding occurs when the process dies or the device is removed.
  289. *
  290. * Assumes that the process lock is held.
  291. */
  292. struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
  293. struct kfd_process *p)
  294. {
  295. struct kfd_process_device *pdd;
  296. int err;
  297. pdd = kfd_get_process_device_data(dev, p);
  298. if (!pdd) {
  299. pr_err("Process device data doesn't exist\n");
  300. return ERR_PTR(-ENOMEM);
  301. }
  302. if (pdd->bound)
  303. return pdd;
  304. err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
  305. if (err < 0)
  306. return ERR_PTR(err);
  307. pdd->bound = true;
  308. return pdd;
  309. }
  310. void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
  311. {
  312. struct kfd_process *p;
  313. struct kfd_process_device *pdd;
  314. BUG_ON(dev == NULL);
  315. /*
  316. * Look for the process that matches the pasid. If there is no such
  317. * process, we either released it in amdkfd's own notifier, or there
  318. * is a bug. Unfortunately, there is no way to tell...
  319. */
  320. p = kfd_lookup_process_by_pasid(pasid);
  321. if (!p)
  322. return;
  323. pr_debug("Unbinding process %d from IOMMU\n", pasid);
  324. if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
  325. kfd_dbgmgr_destroy(dev->dbgmgr);
  326. pqm_uninit(&p->pqm);
  327. pdd = kfd_get_process_device_data(dev, p);
  328. if (!pdd) {
  329. mutex_unlock(&p->mutex);
  330. return;
  331. }
  332. if (pdd->reset_wavefronts) {
  333. dbgdev_wave_reset_wavefronts(pdd->dev, p);
  334. pdd->reset_wavefronts = false;
  335. }
  336. /*
  337. * Just mark pdd as unbound, because we still need it
  338. * to call amd_iommu_unbind_pasid() in when the
  339. * process exits.
  340. * We don't call amd_iommu_unbind_pasid() here
  341. * because the IOMMU called us.
  342. */
  343. pdd->bound = false;
  344. mutex_unlock(&p->mutex);
  345. }
  346. struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
  347. {
  348. return list_first_entry(&p->per_device_data,
  349. struct kfd_process_device,
  350. per_device_list);
  351. }
  352. struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
  353. struct kfd_process_device *pdd)
  354. {
  355. if (list_is_last(&pdd->per_device_list, &p->per_device_data))
  356. return NULL;
  357. return list_next_entry(pdd, per_device_list);
  358. }
  359. bool kfd_has_process_device_data(struct kfd_process *p)
  360. {
  361. return !(list_empty(&p->per_device_data));
  362. }
  363. /* This returns with process->mutex locked. */
  364. struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
  365. {
  366. struct kfd_process *p;
  367. unsigned int temp;
  368. int idx = srcu_read_lock(&kfd_processes_srcu);
  369. hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
  370. if (p->pasid == pasid) {
  371. mutex_lock(&p->mutex);
  372. break;
  373. }
  374. }
  375. srcu_read_unlock(&kfd_processes_srcu, idx);
  376. return p;
  377. }