kfd_device.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/amd-iommu.h>
  23. #include <linux/bsearch.h>
  24. #include <linux/pci.h>
  25. #include <linux/slab.h>
  26. #include "kfd_priv.h"
  27. #include "kfd_device_queue_manager.h"
  28. #include "kfd_pm4_headers.h"
  29. #define MQD_SIZE_ALIGNED 768
  30. static const struct kfd_device_info kaveri_device_info = {
  31. .asic_family = CHIP_KAVERI,
  32. .max_pasid_bits = 16,
  33. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  34. .mqd_size_aligned = MQD_SIZE_ALIGNED
  35. };
  36. static const struct kfd_device_info carrizo_device_info = {
  37. .asic_family = CHIP_CARRIZO,
  38. .max_pasid_bits = 16,
  39. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  40. .num_of_watch_points = 4,
  41. .mqd_size_aligned = MQD_SIZE_ALIGNED
  42. };
  43. struct kfd_deviceid {
  44. unsigned short did;
  45. const struct kfd_device_info *device_info;
  46. };
  47. /* Please keep this sorted by increasing device id. */
  48. static const struct kfd_deviceid supported_devices[] = {
  49. { 0x1304, &kaveri_device_info }, /* Kaveri */
  50. { 0x1305, &kaveri_device_info }, /* Kaveri */
  51. { 0x1306, &kaveri_device_info }, /* Kaveri */
  52. { 0x1307, &kaveri_device_info }, /* Kaveri */
  53. { 0x1309, &kaveri_device_info }, /* Kaveri */
  54. { 0x130A, &kaveri_device_info }, /* Kaveri */
  55. { 0x130B, &kaveri_device_info }, /* Kaveri */
  56. { 0x130C, &kaveri_device_info }, /* Kaveri */
  57. { 0x130D, &kaveri_device_info }, /* Kaveri */
  58. { 0x130E, &kaveri_device_info }, /* Kaveri */
  59. { 0x130F, &kaveri_device_info }, /* Kaveri */
  60. { 0x1310, &kaveri_device_info }, /* Kaveri */
  61. { 0x1311, &kaveri_device_info }, /* Kaveri */
  62. { 0x1312, &kaveri_device_info }, /* Kaveri */
  63. { 0x1313, &kaveri_device_info }, /* Kaveri */
  64. { 0x1315, &kaveri_device_info }, /* Kaveri */
  65. { 0x1316, &kaveri_device_info }, /* Kaveri */
  66. { 0x1317, &kaveri_device_info }, /* Kaveri */
  67. { 0x1318, &kaveri_device_info }, /* Kaveri */
  68. { 0x131B, &kaveri_device_info }, /* Kaveri */
  69. { 0x131C, &kaveri_device_info }, /* Kaveri */
  70. { 0x131D, &kaveri_device_info } /* Kaveri */
  71. };
  72. static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
  73. unsigned int chunk_size);
  74. static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
  75. static const struct kfd_device_info *lookup_device_info(unsigned short did)
  76. {
  77. size_t i;
  78. for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
  79. if (supported_devices[i].did == did) {
  80. BUG_ON(supported_devices[i].device_info == NULL);
  81. return supported_devices[i].device_info;
  82. }
  83. }
  84. return NULL;
  85. }
  86. struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
  87. struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
  88. {
  89. struct kfd_dev *kfd;
  90. const struct kfd_device_info *device_info =
  91. lookup_device_info(pdev->device);
  92. if (!device_info)
  93. return NULL;
  94. kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
  95. if (!kfd)
  96. return NULL;
  97. kfd->kgd = kgd;
  98. kfd->device_info = device_info;
  99. kfd->pdev = pdev;
  100. kfd->init_complete = false;
  101. kfd->kfd2kgd = f2g;
  102. mutex_init(&kfd->doorbell_mutex);
  103. memset(&kfd->doorbell_available_index, 0,
  104. sizeof(kfd->doorbell_available_index));
  105. return kfd;
  106. }
  107. static bool device_iommu_pasid_init(struct kfd_dev *kfd)
  108. {
  109. const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
  110. AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
  111. AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
  112. struct amd_iommu_device_info iommu_info;
  113. unsigned int pasid_limit;
  114. int err;
  115. err = amd_iommu_device_info(kfd->pdev, &iommu_info);
  116. if (err < 0) {
  117. dev_err(kfd_device,
  118. "error getting iommu info. is the iommu enabled?\n");
  119. return false;
  120. }
  121. if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
  122. dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n",
  123. (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
  124. (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
  125. (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0);
  126. return false;
  127. }
  128. pasid_limit = min_t(unsigned int,
  129. (unsigned int)1 << kfd->device_info->max_pasid_bits,
  130. iommu_info.max_pasids);
  131. /*
  132. * last pasid is used for kernel queues doorbells
  133. * in the future the last pasid might be used for a kernel thread.
  134. */
  135. pasid_limit = min_t(unsigned int,
  136. pasid_limit,
  137. kfd->doorbell_process_limit - 1);
  138. err = amd_iommu_init_device(kfd->pdev, pasid_limit);
  139. if (err < 0) {
  140. dev_err(kfd_device, "error initializing iommu device\n");
  141. return false;
  142. }
  143. if (!kfd_set_pasid_limit(pasid_limit)) {
  144. dev_err(kfd_device, "error setting pasid limit\n");
  145. amd_iommu_free_device(kfd->pdev);
  146. return false;
  147. }
  148. return true;
  149. }
  150. static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
  151. {
  152. struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
  153. if (dev)
  154. kfd_unbind_process_from_device(dev, pasid);
  155. }
  156. bool kgd2kfd_device_init(struct kfd_dev *kfd,
  157. const struct kgd2kfd_shared_resources *gpu_resources)
  158. {
  159. unsigned int size;
  160. kfd->shared_resources = *gpu_resources;
  161. /* calculate max size of mqds needed for queues */
  162. size = max_num_of_queues_per_device *
  163. kfd->device_info->mqd_size_aligned;
  164. /*
  165. * calculate max size of runlist packet.
  166. * There can be only 2 packets at once
  167. */
  168. size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
  169. max_num_of_queues_per_device *
  170. sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
  171. /* Add size of HIQ & DIQ */
  172. size += KFD_KERNEL_QUEUE_SIZE * 2;
  173. /* add another 512KB for all other allocations on gart (HPD, fences) */
  174. size += 512 * 1024;
  175. if (kfd->kfd2kgd->init_gtt_mem_allocation(
  176. kfd->kgd, size, &kfd->gtt_mem,
  177. &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
  178. dev_err(kfd_device,
  179. "Could not allocate %d bytes for device (%x:%x)\n",
  180. size, kfd->pdev->vendor, kfd->pdev->device);
  181. goto out;
  182. }
  183. dev_info(kfd_device,
  184. "Allocated %d bytes on gart for device(%x:%x)\n",
  185. size, kfd->pdev->vendor, kfd->pdev->device);
  186. /* Initialize GTT sa with 512 byte chunk size */
  187. if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
  188. dev_err(kfd_device,
  189. "Error initializing gtt sub-allocator\n");
  190. goto kfd_gtt_sa_init_error;
  191. }
  192. kfd_doorbell_init(kfd);
  193. if (kfd_topology_add_device(kfd) != 0) {
  194. dev_err(kfd_device,
  195. "Error adding device (%x:%x) to topology\n",
  196. kfd->pdev->vendor, kfd->pdev->device);
  197. goto kfd_topology_add_device_error;
  198. }
  199. if (!device_iommu_pasid_init(kfd)) {
  200. dev_err(kfd_device,
  201. "Error initializing iommuv2 for device (%x:%x)\n",
  202. kfd->pdev->vendor, kfd->pdev->device);
  203. goto device_iommu_pasid_error;
  204. }
  205. amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
  206. iommu_pasid_shutdown_callback);
  207. kfd->dqm = device_queue_manager_init(kfd);
  208. if (!kfd->dqm) {
  209. dev_err(kfd_device,
  210. "Error initializing queue manager for device (%x:%x)\n",
  211. kfd->pdev->vendor, kfd->pdev->device);
  212. goto device_queue_manager_error;
  213. }
  214. if (kfd->dqm->ops.start(kfd->dqm) != 0) {
  215. dev_err(kfd_device,
  216. "Error starting queuen manager for device (%x:%x)\n",
  217. kfd->pdev->vendor, kfd->pdev->device);
  218. goto dqm_start_error;
  219. }
  220. kfd->init_complete = true;
  221. dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
  222. kfd->pdev->device);
  223. pr_debug("kfd: Starting kfd with the following scheduling policy %d\n",
  224. sched_policy);
  225. goto out;
  226. dqm_start_error:
  227. device_queue_manager_uninit(kfd->dqm);
  228. device_queue_manager_error:
  229. amd_iommu_free_device(kfd->pdev);
  230. device_iommu_pasid_error:
  231. kfd_topology_remove_device(kfd);
  232. kfd_topology_add_device_error:
  233. kfd_gtt_sa_fini(kfd);
  234. kfd_gtt_sa_init_error:
  235. kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
  236. dev_err(kfd_device,
  237. "device (%x:%x) NOT added due to errors\n",
  238. kfd->pdev->vendor, kfd->pdev->device);
  239. out:
  240. return kfd->init_complete;
  241. }
  242. void kgd2kfd_device_exit(struct kfd_dev *kfd)
  243. {
  244. if (kfd->init_complete) {
  245. device_queue_manager_uninit(kfd->dqm);
  246. amd_iommu_free_device(kfd->pdev);
  247. kfd_topology_remove_device(kfd);
  248. kfd_gtt_sa_fini(kfd);
  249. kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
  250. }
  251. kfree(kfd);
  252. }
  253. void kgd2kfd_suspend(struct kfd_dev *kfd)
  254. {
  255. BUG_ON(kfd == NULL);
  256. if (kfd->init_complete) {
  257. kfd->dqm->ops.stop(kfd->dqm);
  258. amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
  259. amd_iommu_free_device(kfd->pdev);
  260. }
  261. }
  262. int kgd2kfd_resume(struct kfd_dev *kfd)
  263. {
  264. unsigned int pasid_limit;
  265. int err;
  266. BUG_ON(kfd == NULL);
  267. pasid_limit = kfd_get_pasid_limit();
  268. if (kfd->init_complete) {
  269. err = amd_iommu_init_device(kfd->pdev, pasid_limit);
  270. if (err < 0)
  271. return -ENXIO;
  272. amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
  273. iommu_pasid_shutdown_callback);
  274. kfd->dqm->ops.start(kfd->dqm);
  275. }
  276. return 0;
  277. }
  278. /* This is called directly from KGD at ISR. */
  279. void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
  280. {
  281. /* Process interrupts / schedule work as necessary */
  282. }
  283. static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
  284. unsigned int chunk_size)
  285. {
  286. unsigned int num_of_bits;
  287. BUG_ON(!kfd);
  288. BUG_ON(!kfd->gtt_mem);
  289. BUG_ON(buf_size < chunk_size);
  290. BUG_ON(buf_size == 0);
  291. BUG_ON(chunk_size == 0);
  292. kfd->gtt_sa_chunk_size = chunk_size;
  293. kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
  294. num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
  295. BUG_ON(num_of_bits == 0);
  296. kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);
  297. if (!kfd->gtt_sa_bitmap)
  298. return -ENOMEM;
  299. pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
  300. kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
  301. mutex_init(&kfd->gtt_sa_lock);
  302. return 0;
  303. }
  304. static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
  305. {
  306. mutex_destroy(&kfd->gtt_sa_lock);
  307. kfree(kfd->gtt_sa_bitmap);
  308. }
  309. static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
  310. unsigned int bit_num,
  311. unsigned int chunk_size)
  312. {
  313. return start_addr + bit_num * chunk_size;
  314. }
  315. static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
  316. unsigned int bit_num,
  317. unsigned int chunk_size)
  318. {
  319. return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
  320. }
  321. int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
  322. struct kfd_mem_obj **mem_obj)
  323. {
  324. unsigned int found, start_search, cur_size;
  325. BUG_ON(!kfd);
  326. if (size == 0)
  327. return -EINVAL;
  328. if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
  329. return -ENOMEM;
  330. *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
  331. if ((*mem_obj) == NULL)
  332. return -ENOMEM;
  333. pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
  334. start_search = 0;
  335. mutex_lock(&kfd->gtt_sa_lock);
  336. kfd_gtt_restart_search:
  337. /* Find the first chunk that is free */
  338. found = find_next_zero_bit(kfd->gtt_sa_bitmap,
  339. kfd->gtt_sa_num_of_chunks,
  340. start_search);
  341. pr_debug("kfd: found = %d\n", found);
  342. /* If there wasn't any free chunk, bail out */
  343. if (found == kfd->gtt_sa_num_of_chunks)
  344. goto kfd_gtt_no_free_chunk;
  345. /* Update fields of mem_obj */
  346. (*mem_obj)->range_start = found;
  347. (*mem_obj)->range_end = found;
  348. (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
  349. kfd->gtt_start_gpu_addr,
  350. found,
  351. kfd->gtt_sa_chunk_size);
  352. (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
  353. kfd->gtt_start_cpu_ptr,
  354. found,
  355. kfd->gtt_sa_chunk_size);
  356. pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
  357. (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
  358. /* If we need only one chunk, mark it as allocated and get out */
  359. if (size <= kfd->gtt_sa_chunk_size) {
  360. pr_debug("kfd: single bit\n");
  361. set_bit(found, kfd->gtt_sa_bitmap);
  362. goto kfd_gtt_out;
  363. }
  364. /* Otherwise, try to see if we have enough contiguous chunks */
  365. cur_size = size - kfd->gtt_sa_chunk_size;
  366. do {
  367. (*mem_obj)->range_end =
  368. find_next_zero_bit(kfd->gtt_sa_bitmap,
  369. kfd->gtt_sa_num_of_chunks, ++found);
  370. /*
  371. * If next free chunk is not contiguous than we need to
  372. * restart our search from the last free chunk we found (which
  373. * wasn't contiguous to the previous ones
  374. */
  375. if ((*mem_obj)->range_end != found) {
  376. start_search = found;
  377. goto kfd_gtt_restart_search;
  378. }
  379. /*
  380. * If we reached end of buffer, bail out with error
  381. */
  382. if (found == kfd->gtt_sa_num_of_chunks)
  383. goto kfd_gtt_no_free_chunk;
  384. /* Check if we don't need another chunk */
  385. if (cur_size <= kfd->gtt_sa_chunk_size)
  386. cur_size = 0;
  387. else
  388. cur_size -= kfd->gtt_sa_chunk_size;
  389. } while (cur_size > 0);
  390. pr_debug("kfd: range_start = %d, range_end = %d\n",
  391. (*mem_obj)->range_start, (*mem_obj)->range_end);
  392. /* Mark the chunks as allocated */
  393. for (found = (*mem_obj)->range_start;
  394. found <= (*mem_obj)->range_end;
  395. found++)
  396. set_bit(found, kfd->gtt_sa_bitmap);
  397. kfd_gtt_out:
  398. mutex_unlock(&kfd->gtt_sa_lock);
  399. return 0;
  400. kfd_gtt_no_free_chunk:
  401. pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
  402. mutex_unlock(&kfd->gtt_sa_lock);
  403. kfree(mem_obj);
  404. return -ENOMEM;
  405. }
  406. int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
  407. {
  408. unsigned int bit;
  409. BUG_ON(!kfd);
  410. /* Act like kfree when trying to free a NULL object */
  411. if (!mem_obj)
  412. return 0;
  413. pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
  414. mem_obj, mem_obj->range_start, mem_obj->range_end);
  415. mutex_lock(&kfd->gtt_sa_lock);
  416. /* Mark the chunks as free */
  417. for (bit = mem_obj->range_start;
  418. bit <= mem_obj->range_end;
  419. bit++)
  420. clear_bit(bit, kfd->gtt_sa_bitmap);
  421. mutex_unlock(&kfd->gtt_sa_lock);
  422. kfree(mem_obj);
  423. return 0;
  424. }