kfd_device.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/bsearch.h>
  23. #include <linux/pci.h>
  24. #include <linux/slab.h>
  25. #include "kfd_priv.h"
  26. #include "kfd_device_queue_manager.h"
  27. #include "kfd_pm4_headers_vi.h"
  28. #include "cwsr_trap_handler.h"
  29. #include "kfd_iommu.h"
  30. #define MQD_SIZE_ALIGNED 768
  31. /*
  32. * kfd_locked is used to lock the kfd driver during suspend or reset
  33. * once locked, kfd driver will stop any further GPU execution.
  34. * create process (open) will return -EAGAIN.
  35. */
  36. static atomic_t kfd_locked = ATOMIC_INIT(0);
  37. #ifdef KFD_SUPPORT_IOMMU_V2
  38. static const struct kfd_device_info kaveri_device_info = {
  39. .asic_family = CHIP_KAVERI,
  40. .max_pasid_bits = 16,
  41. /* max num of queues for KV.TODO should be a dynamic value */
  42. .max_no_of_hqd = 24,
  43. .doorbell_size = 4,
  44. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  45. .event_interrupt_class = &event_interrupt_class_cik,
  46. .num_of_watch_points = 4,
  47. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  48. .supports_cwsr = false,
  49. .needs_iommu_device = true,
  50. .needs_pci_atomics = false,
  51. .num_sdma_engines = 2,
  52. .num_sdma_queues_per_engine = 2,
  53. };
  54. static const struct kfd_device_info carrizo_device_info = {
  55. .asic_family = CHIP_CARRIZO,
  56. .max_pasid_bits = 16,
  57. /* max num of queues for CZ.TODO should be a dynamic value */
  58. .max_no_of_hqd = 24,
  59. .doorbell_size = 4,
  60. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  61. .event_interrupt_class = &event_interrupt_class_cik,
  62. .num_of_watch_points = 4,
  63. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  64. .supports_cwsr = true,
  65. .needs_iommu_device = true,
  66. .needs_pci_atomics = false,
  67. .num_sdma_engines = 2,
  68. .num_sdma_queues_per_engine = 2,
  69. };
  70. static const struct kfd_device_info raven_device_info = {
  71. .asic_family = CHIP_RAVEN,
  72. .max_pasid_bits = 16,
  73. .max_no_of_hqd = 24,
  74. .doorbell_size = 8,
  75. .ih_ring_entry_size = 8 * sizeof(uint32_t),
  76. .event_interrupt_class = &event_interrupt_class_v9,
  77. .num_of_watch_points = 4,
  78. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  79. .supports_cwsr = true,
  80. .needs_iommu_device = true,
  81. .needs_pci_atomics = true,
  82. .num_sdma_engines = 1,
  83. .num_sdma_queues_per_engine = 2,
  84. };
  85. #endif
  86. static const struct kfd_device_info hawaii_device_info = {
  87. .asic_family = CHIP_HAWAII,
  88. .max_pasid_bits = 16,
  89. /* max num of queues for KV.TODO should be a dynamic value */
  90. .max_no_of_hqd = 24,
  91. .doorbell_size = 4,
  92. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  93. .event_interrupt_class = &event_interrupt_class_cik,
  94. .num_of_watch_points = 4,
  95. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  96. .supports_cwsr = false,
  97. .needs_iommu_device = false,
  98. .needs_pci_atomics = false,
  99. .num_sdma_engines = 2,
  100. .num_sdma_queues_per_engine = 2,
  101. };
  102. static const struct kfd_device_info tonga_device_info = {
  103. .asic_family = CHIP_TONGA,
  104. .max_pasid_bits = 16,
  105. .max_no_of_hqd = 24,
  106. .doorbell_size = 4,
  107. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  108. .event_interrupt_class = &event_interrupt_class_cik,
  109. .num_of_watch_points = 4,
  110. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  111. .supports_cwsr = false,
  112. .needs_iommu_device = false,
  113. .needs_pci_atomics = true,
  114. .num_sdma_engines = 2,
  115. .num_sdma_queues_per_engine = 2,
  116. };
  117. static const struct kfd_device_info fiji_device_info = {
  118. .asic_family = CHIP_FIJI,
  119. .max_pasid_bits = 16,
  120. .max_no_of_hqd = 24,
  121. .doorbell_size = 4,
  122. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  123. .event_interrupt_class = &event_interrupt_class_cik,
  124. .num_of_watch_points = 4,
  125. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  126. .supports_cwsr = true,
  127. .needs_iommu_device = false,
  128. .needs_pci_atomics = true,
  129. .num_sdma_engines = 2,
  130. .num_sdma_queues_per_engine = 2,
  131. };
  132. static const struct kfd_device_info fiji_vf_device_info = {
  133. .asic_family = CHIP_FIJI,
  134. .max_pasid_bits = 16,
  135. .max_no_of_hqd = 24,
  136. .doorbell_size = 4,
  137. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  138. .event_interrupt_class = &event_interrupt_class_cik,
  139. .num_of_watch_points = 4,
  140. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  141. .supports_cwsr = true,
  142. .needs_iommu_device = false,
  143. .needs_pci_atomics = false,
  144. .num_sdma_engines = 2,
  145. .num_sdma_queues_per_engine = 2,
  146. };
  147. static const struct kfd_device_info polaris10_device_info = {
  148. .asic_family = CHIP_POLARIS10,
  149. .max_pasid_bits = 16,
  150. .max_no_of_hqd = 24,
  151. .doorbell_size = 4,
  152. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  153. .event_interrupt_class = &event_interrupt_class_cik,
  154. .num_of_watch_points = 4,
  155. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  156. .supports_cwsr = true,
  157. .needs_iommu_device = false,
  158. .needs_pci_atomics = true,
  159. .num_sdma_engines = 2,
  160. .num_sdma_queues_per_engine = 2,
  161. };
  162. static const struct kfd_device_info polaris10_vf_device_info = {
  163. .asic_family = CHIP_POLARIS10,
  164. .max_pasid_bits = 16,
  165. .max_no_of_hqd = 24,
  166. .doorbell_size = 4,
  167. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  168. .event_interrupt_class = &event_interrupt_class_cik,
  169. .num_of_watch_points = 4,
  170. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  171. .supports_cwsr = true,
  172. .needs_iommu_device = false,
  173. .needs_pci_atomics = false,
  174. .num_sdma_engines = 2,
  175. .num_sdma_queues_per_engine = 2,
  176. };
  177. static const struct kfd_device_info polaris11_device_info = {
  178. .asic_family = CHIP_POLARIS11,
  179. .max_pasid_bits = 16,
  180. .max_no_of_hqd = 24,
  181. .doorbell_size = 4,
  182. .ih_ring_entry_size = 4 * sizeof(uint32_t),
  183. .event_interrupt_class = &event_interrupt_class_cik,
  184. .num_of_watch_points = 4,
  185. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  186. .supports_cwsr = true,
  187. .needs_iommu_device = false,
  188. .needs_pci_atomics = true,
  189. .num_sdma_engines = 2,
  190. .num_sdma_queues_per_engine = 2,
  191. };
  192. static const struct kfd_device_info vega10_device_info = {
  193. .asic_family = CHIP_VEGA10,
  194. .max_pasid_bits = 16,
  195. .max_no_of_hqd = 24,
  196. .doorbell_size = 8,
  197. .ih_ring_entry_size = 8 * sizeof(uint32_t),
  198. .event_interrupt_class = &event_interrupt_class_v9,
  199. .num_of_watch_points = 4,
  200. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  201. .supports_cwsr = true,
  202. .needs_iommu_device = false,
  203. .needs_pci_atomics = false,
  204. .num_sdma_engines = 2,
  205. .num_sdma_queues_per_engine = 2,
  206. };
  207. static const struct kfd_device_info vega10_vf_device_info = {
  208. .asic_family = CHIP_VEGA10,
  209. .max_pasid_bits = 16,
  210. .max_no_of_hqd = 24,
  211. .doorbell_size = 8,
  212. .ih_ring_entry_size = 8 * sizeof(uint32_t),
  213. .event_interrupt_class = &event_interrupt_class_v9,
  214. .num_of_watch_points = 4,
  215. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  216. .supports_cwsr = true,
  217. .needs_iommu_device = false,
  218. .needs_pci_atomics = false,
  219. .num_sdma_engines = 2,
  220. .num_sdma_queues_per_engine = 2,
  221. };
  222. static const struct kfd_device_info vega20_device_info = {
  223. .asic_family = CHIP_VEGA20,
  224. .max_pasid_bits = 16,
  225. .max_no_of_hqd = 24,
  226. .doorbell_size = 8,
  227. .ih_ring_entry_size = 8 * sizeof(uint32_t),
  228. .event_interrupt_class = &event_interrupt_class_v9,
  229. .num_of_watch_points = 4,
  230. .mqd_size_aligned = MQD_SIZE_ALIGNED,
  231. .supports_cwsr = true,
  232. .needs_iommu_device = false,
  233. .needs_pci_atomics = false,
  234. .num_sdma_engines = 2,
  235. .num_sdma_queues_per_engine = 8,
  236. };
  237. struct kfd_deviceid {
  238. unsigned short did;
  239. const struct kfd_device_info *device_info;
  240. };
  241. static const struct kfd_deviceid supported_devices[] = {
  242. #ifdef KFD_SUPPORT_IOMMU_V2
  243. { 0x1304, &kaveri_device_info }, /* Kaveri */
  244. { 0x1305, &kaveri_device_info }, /* Kaveri */
  245. { 0x1306, &kaveri_device_info }, /* Kaveri */
  246. { 0x1307, &kaveri_device_info }, /* Kaveri */
  247. { 0x1309, &kaveri_device_info }, /* Kaveri */
  248. { 0x130A, &kaveri_device_info }, /* Kaveri */
  249. { 0x130B, &kaveri_device_info }, /* Kaveri */
  250. { 0x130C, &kaveri_device_info }, /* Kaveri */
  251. { 0x130D, &kaveri_device_info }, /* Kaveri */
  252. { 0x130E, &kaveri_device_info }, /* Kaveri */
  253. { 0x130F, &kaveri_device_info }, /* Kaveri */
  254. { 0x1310, &kaveri_device_info }, /* Kaveri */
  255. { 0x1311, &kaveri_device_info }, /* Kaveri */
  256. { 0x1312, &kaveri_device_info }, /* Kaveri */
  257. { 0x1313, &kaveri_device_info }, /* Kaveri */
  258. { 0x1315, &kaveri_device_info }, /* Kaveri */
  259. { 0x1316, &kaveri_device_info }, /* Kaveri */
  260. { 0x1317, &kaveri_device_info }, /* Kaveri */
  261. { 0x1318, &kaveri_device_info }, /* Kaveri */
  262. { 0x131B, &kaveri_device_info }, /* Kaveri */
  263. { 0x131C, &kaveri_device_info }, /* Kaveri */
  264. { 0x131D, &kaveri_device_info }, /* Kaveri */
  265. { 0x9870, &carrizo_device_info }, /* Carrizo */
  266. { 0x9874, &carrizo_device_info }, /* Carrizo */
  267. { 0x9875, &carrizo_device_info }, /* Carrizo */
  268. { 0x9876, &carrizo_device_info }, /* Carrizo */
  269. { 0x9877, &carrizo_device_info }, /* Carrizo */
  270. { 0x15DD, &raven_device_info }, /* Raven */
  271. #endif
  272. { 0x67A0, &hawaii_device_info }, /* Hawaii */
  273. { 0x67A1, &hawaii_device_info }, /* Hawaii */
  274. { 0x67A2, &hawaii_device_info }, /* Hawaii */
  275. { 0x67A8, &hawaii_device_info }, /* Hawaii */
  276. { 0x67A9, &hawaii_device_info }, /* Hawaii */
  277. { 0x67AA, &hawaii_device_info }, /* Hawaii */
  278. { 0x67B0, &hawaii_device_info }, /* Hawaii */
  279. { 0x67B1, &hawaii_device_info }, /* Hawaii */
  280. { 0x67B8, &hawaii_device_info }, /* Hawaii */
  281. { 0x67B9, &hawaii_device_info }, /* Hawaii */
  282. { 0x67BA, &hawaii_device_info }, /* Hawaii */
  283. { 0x67BE, &hawaii_device_info }, /* Hawaii */
  284. { 0x6920, &tonga_device_info }, /* Tonga */
  285. { 0x6921, &tonga_device_info }, /* Tonga */
  286. { 0x6928, &tonga_device_info }, /* Tonga */
  287. { 0x6929, &tonga_device_info }, /* Tonga */
  288. { 0x692B, &tonga_device_info }, /* Tonga */
  289. { 0x6938, &tonga_device_info }, /* Tonga */
  290. { 0x6939, &tonga_device_info }, /* Tonga */
  291. { 0x7300, &fiji_device_info }, /* Fiji */
  292. { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/
  293. { 0x67C0, &polaris10_device_info }, /* Polaris10 */
  294. { 0x67C1, &polaris10_device_info }, /* Polaris10 */
  295. { 0x67C2, &polaris10_device_info }, /* Polaris10 */
  296. { 0x67C4, &polaris10_device_info }, /* Polaris10 */
  297. { 0x67C7, &polaris10_device_info }, /* Polaris10 */
  298. { 0x67C8, &polaris10_device_info }, /* Polaris10 */
  299. { 0x67C9, &polaris10_device_info }, /* Polaris10 */
  300. { 0x67CA, &polaris10_device_info }, /* Polaris10 */
  301. { 0x67CC, &polaris10_device_info }, /* Polaris10 */
  302. { 0x67CF, &polaris10_device_info }, /* Polaris10 */
  303. { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/
  304. { 0x67DF, &polaris10_device_info }, /* Polaris10 */
  305. { 0x67E0, &polaris11_device_info }, /* Polaris11 */
  306. { 0x67E1, &polaris11_device_info }, /* Polaris11 */
  307. { 0x67E3, &polaris11_device_info }, /* Polaris11 */
  308. { 0x67E7, &polaris11_device_info }, /* Polaris11 */
  309. { 0x67E8, &polaris11_device_info }, /* Polaris11 */
  310. { 0x67E9, &polaris11_device_info }, /* Polaris11 */
  311. { 0x67EB, &polaris11_device_info }, /* Polaris11 */
  312. { 0x67EF, &polaris11_device_info }, /* Polaris11 */
  313. { 0x67FF, &polaris11_device_info }, /* Polaris11 */
  314. { 0x6860, &vega10_device_info }, /* Vega10 */
  315. { 0x6861, &vega10_device_info }, /* Vega10 */
  316. { 0x6862, &vega10_device_info }, /* Vega10 */
  317. { 0x6863, &vega10_device_info }, /* Vega10 */
  318. { 0x6864, &vega10_device_info }, /* Vega10 */
  319. { 0x6867, &vega10_device_info }, /* Vega10 */
  320. { 0x6868, &vega10_device_info }, /* Vega10 */
  321. { 0x6869, &vega10_device_info }, /* Vega10 */
  322. { 0x686A, &vega10_device_info }, /* Vega10 */
  323. { 0x686B, &vega10_device_info }, /* Vega10 */
  324. { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
  325. { 0x686D, &vega10_device_info }, /* Vega10 */
  326. { 0x686E, &vega10_device_info }, /* Vega10 */
  327. { 0x686F, &vega10_device_info }, /* Vega10 */
  328. { 0x687F, &vega10_device_info }, /* Vega10 */
  329. { 0x66a0, &vega20_device_info }, /* Vega20 */
  330. { 0x66a1, &vega20_device_info }, /* Vega20 */
  331. { 0x66a2, &vega20_device_info }, /* Vega20 */
  332. { 0x66a3, &vega20_device_info }, /* Vega20 */
  333. { 0x66a4, &vega20_device_info }, /* Vega20 */
  334. { 0x66a7, &vega20_device_info }, /* Vega20 */
  335. { 0x66af, &vega20_device_info } /* Vega20 */
  336. };
  337. static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
  338. unsigned int chunk_size);
  339. static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
  340. static int kfd_resume(struct kfd_dev *kfd);
  341. static const struct kfd_device_info *lookup_device_info(unsigned short did)
  342. {
  343. size_t i;
  344. for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
  345. if (supported_devices[i].did == did) {
  346. WARN_ON(!supported_devices[i].device_info);
  347. return supported_devices[i].device_info;
  348. }
  349. }
  350. dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
  351. did);
  352. return NULL;
  353. }
  354. struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
  355. struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
  356. {
  357. struct kfd_dev *kfd;
  358. int ret;
  359. const struct kfd_device_info *device_info =
  360. lookup_device_info(pdev->device);
  361. if (!device_info) {
  362. dev_err(kfd_device, "kgd2kfd_probe failed\n");
  363. return NULL;
  364. }
  365. kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
  366. if (!kfd)
  367. return NULL;
  368. /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
  369. * 32 and 64-bit requests are possible and must be
  370. * supported.
  371. */
  372. ret = pci_enable_atomic_ops_to_root(pdev,
  373. PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
  374. PCI_EXP_DEVCAP2_ATOMIC_COMP64);
  375. if (device_info->needs_pci_atomics && ret < 0) {
  376. dev_info(kfd_device,
  377. "skipped device %x:%x, PCI rejects atomics\n",
  378. pdev->vendor, pdev->device);
  379. kfree(kfd);
  380. return NULL;
  381. } else if (!ret)
  382. kfd->pci_atomic_requested = true;
  383. kfd->kgd = kgd;
  384. kfd->device_info = device_info;
  385. kfd->pdev = pdev;
  386. kfd->init_complete = false;
  387. kfd->kfd2kgd = f2g;
  388. mutex_init(&kfd->doorbell_mutex);
  389. memset(&kfd->doorbell_available_index, 0,
  390. sizeof(kfd->doorbell_available_index));
  391. return kfd;
  392. }
  393. static void kfd_cwsr_init(struct kfd_dev *kfd)
  394. {
  395. if (cwsr_enable && kfd->device_info->supports_cwsr) {
  396. if (kfd->device_info->asic_family < CHIP_VEGA10) {
  397. BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
  398. kfd->cwsr_isa = cwsr_trap_gfx8_hex;
  399. kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
  400. } else {
  401. BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
  402. kfd->cwsr_isa = cwsr_trap_gfx9_hex;
  403. kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
  404. }
  405. kfd->cwsr_enabled = true;
  406. }
  407. }
  408. bool kgd2kfd_device_init(struct kfd_dev *kfd,
  409. const struct kgd2kfd_shared_resources *gpu_resources)
  410. {
  411. unsigned int size;
  412. kfd->mec_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd,
  413. KGD_ENGINE_MEC1);
  414. kfd->sdma_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd,
  415. KGD_ENGINE_SDMA1);
  416. kfd->shared_resources = *gpu_resources;
  417. kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
  418. kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
  419. kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
  420. - kfd->vm_info.first_vmid_kfd + 1;
  421. /* Verify module parameters regarding mapped process number*/
  422. if ((hws_max_conc_proc < 0)
  423. || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
  424. dev_err(kfd_device,
  425. "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
  426. hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
  427. kfd->vm_info.vmid_num_kfd);
  428. kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
  429. } else
  430. kfd->max_proc_per_quantum = hws_max_conc_proc;
  431. /* calculate max size of mqds needed for queues */
  432. size = max_num_of_queues_per_device *
  433. kfd->device_info->mqd_size_aligned;
  434. /*
  435. * calculate max size of runlist packet.
  436. * There can be only 2 packets at once
  437. */
  438. size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
  439. max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
  440. + sizeof(struct pm4_mes_runlist)) * 2;
  441. /* Add size of HIQ & DIQ */
  442. size += KFD_KERNEL_QUEUE_SIZE * 2;
  443. /* add another 512KB for all other allocations on gart (HPD, fences) */
  444. size += 512 * 1024;
  445. if (kfd->kfd2kgd->init_gtt_mem_allocation(
  446. kfd->kgd, size, &kfd->gtt_mem,
  447. &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
  448. false)) {
  449. dev_err(kfd_device, "Could not allocate %d bytes\n", size);
  450. goto out;
  451. }
  452. dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
  453. /* Initialize GTT sa with 512 byte chunk size */
  454. if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
  455. dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
  456. goto kfd_gtt_sa_init_error;
  457. }
  458. if (kfd_doorbell_init(kfd)) {
  459. dev_err(kfd_device,
  460. "Error initializing doorbell aperture\n");
  461. goto kfd_doorbell_error;
  462. }
  463. if (kfd->kfd2kgd->get_hive_id)
  464. kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
  465. if (kfd_topology_add_device(kfd)) {
  466. dev_err(kfd_device, "Error adding device to topology\n");
  467. goto kfd_topology_add_device_error;
  468. }
  469. if (kfd_interrupt_init(kfd)) {
  470. dev_err(kfd_device, "Error initializing interrupts\n");
  471. goto kfd_interrupt_error;
  472. }
  473. kfd->dqm = device_queue_manager_init(kfd);
  474. if (!kfd->dqm) {
  475. dev_err(kfd_device, "Error initializing queue manager\n");
  476. goto device_queue_manager_error;
  477. }
  478. if (kfd_iommu_device_init(kfd)) {
  479. dev_err(kfd_device, "Error initializing iommuv2\n");
  480. goto device_iommu_error;
  481. }
  482. kfd_cwsr_init(kfd);
  483. if (kfd_resume(kfd))
  484. goto kfd_resume_error;
  485. kfd->dbgmgr = NULL;
  486. kfd->init_complete = true;
  487. dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
  488. kfd->pdev->device);
  489. pr_debug("Starting kfd with the following scheduling policy %d\n",
  490. kfd->dqm->sched_policy);
  491. goto out;
  492. kfd_resume_error:
  493. device_iommu_error:
  494. device_queue_manager_uninit(kfd->dqm);
  495. device_queue_manager_error:
  496. kfd_interrupt_exit(kfd);
  497. kfd_interrupt_error:
  498. kfd_topology_remove_device(kfd);
  499. kfd_topology_add_device_error:
  500. kfd_doorbell_fini(kfd);
  501. kfd_doorbell_error:
  502. kfd_gtt_sa_fini(kfd);
  503. kfd_gtt_sa_init_error:
  504. kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
  505. dev_err(kfd_device,
  506. "device %x:%x NOT added due to errors\n",
  507. kfd->pdev->vendor, kfd->pdev->device);
  508. out:
  509. return kfd->init_complete;
  510. }
  511. void kgd2kfd_device_exit(struct kfd_dev *kfd)
  512. {
  513. if (kfd->init_complete) {
  514. kgd2kfd_suspend(kfd);
  515. device_queue_manager_uninit(kfd->dqm);
  516. kfd_interrupt_exit(kfd);
  517. kfd_topology_remove_device(kfd);
  518. kfd_doorbell_fini(kfd);
  519. kfd_gtt_sa_fini(kfd);
  520. kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
  521. }
  522. kfree(kfd);
  523. }
  524. int kgd2kfd_pre_reset(struct kfd_dev *kfd)
  525. {
  526. if (!kfd->init_complete)
  527. return 0;
  528. kgd2kfd_suspend(kfd);
  529. /* hold dqm->lock to prevent further execution*/
  530. dqm_lock(kfd->dqm);
  531. kfd_signal_reset_event(kfd);
  532. return 0;
  533. }
  534. /*
  535. * Fix me. KFD won't be able to resume existing process for now.
  536. * We will keep all existing process in a evicted state and
  537. * wait the process to be terminated.
  538. */
  539. int kgd2kfd_post_reset(struct kfd_dev *kfd)
  540. {
  541. int ret, count;
  542. if (!kfd->init_complete)
  543. return 0;
  544. dqm_unlock(kfd->dqm);
  545. ret = kfd_resume(kfd);
  546. if (ret)
  547. return ret;
  548. count = atomic_dec_return(&kfd_locked);
  549. WARN_ONCE(count != 0, "KFD reset ref. error");
  550. return 0;
  551. }
  552. bool kfd_is_locked(void)
  553. {
  554. return (atomic_read(&kfd_locked) > 0);
  555. }
  556. void kgd2kfd_suspend(struct kfd_dev *kfd)
  557. {
  558. if (!kfd->init_complete)
  559. return;
  560. /* For first KFD device suspend all the KFD processes */
  561. if (atomic_inc_return(&kfd_locked) == 1)
  562. kfd_suspend_all_processes();
  563. kfd->dqm->ops.stop(kfd->dqm);
  564. kfd_iommu_suspend(kfd);
  565. }
  566. int kgd2kfd_resume(struct kfd_dev *kfd)
  567. {
  568. int ret, count;
  569. if (!kfd->init_complete)
  570. return 0;
  571. ret = kfd_resume(kfd);
  572. if (ret)
  573. return ret;
  574. count = atomic_dec_return(&kfd_locked);
  575. WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
  576. if (count == 0)
  577. ret = kfd_resume_all_processes();
  578. return ret;
  579. }
  580. static int kfd_resume(struct kfd_dev *kfd)
  581. {
  582. int err = 0;
  583. err = kfd_iommu_resume(kfd);
  584. if (err) {
  585. dev_err(kfd_device,
  586. "Failed to resume IOMMU for device %x:%x\n",
  587. kfd->pdev->vendor, kfd->pdev->device);
  588. return err;
  589. }
  590. err = kfd->dqm->ops.start(kfd->dqm);
  591. if (err) {
  592. dev_err(kfd_device,
  593. "Error starting queue manager for device %x:%x\n",
  594. kfd->pdev->vendor, kfd->pdev->device);
  595. goto dqm_start_error;
  596. }
  597. return err;
  598. dqm_start_error:
  599. kfd_iommu_suspend(kfd);
  600. return err;
  601. }
  602. /* This is called directly from KGD at ISR. */
  603. void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
  604. {
  605. uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
  606. bool is_patched = false;
  607. if (!kfd->init_complete)
  608. return;
  609. if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
  610. dev_err_once(kfd_device, "Ring entry too small\n");
  611. return;
  612. }
  613. spin_lock(&kfd->interrupt_lock);
  614. if (kfd->interrupts_active
  615. && interrupt_is_wanted(kfd, ih_ring_entry,
  616. patched_ihre, &is_patched)
  617. && enqueue_ih_ring_entry(kfd,
  618. is_patched ? patched_ihre : ih_ring_entry))
  619. queue_work(kfd->ih_wq, &kfd->interrupt_work);
  620. spin_unlock(&kfd->interrupt_lock);
  621. }
  622. int kgd2kfd_quiesce_mm(struct mm_struct *mm)
  623. {
  624. struct kfd_process *p;
  625. int r;
  626. /* Because we are called from arbitrary context (workqueue) as opposed
  627. * to process context, kfd_process could attempt to exit while we are
  628. * running so the lookup function increments the process ref count.
  629. */
  630. p = kfd_lookup_process_by_mm(mm);
  631. if (!p)
  632. return -ESRCH;
  633. r = kfd_process_evict_queues(p);
  634. kfd_unref_process(p);
  635. return r;
  636. }
  637. int kgd2kfd_resume_mm(struct mm_struct *mm)
  638. {
  639. struct kfd_process *p;
  640. int r;
  641. /* Because we are called from arbitrary context (workqueue) as opposed
  642. * to process context, kfd_process could attempt to exit while we are
  643. * running so the lookup function increments the process ref count.
  644. */
  645. p = kfd_lookup_process_by_mm(mm);
  646. if (!p)
  647. return -ESRCH;
  648. r = kfd_process_restore_queues(p);
  649. kfd_unref_process(p);
  650. return r;
  651. }
  652. /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
  653. * prepare for safe eviction of KFD BOs that belong to the specified
  654. * process.
  655. *
  656. * @mm: mm_struct that identifies the specified KFD process
  657. * @fence: eviction fence attached to KFD process BOs
  658. *
  659. */
  660. int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
  661. struct dma_fence *fence)
  662. {
  663. struct kfd_process *p;
  664. unsigned long active_time;
  665. unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
  666. if (!fence)
  667. return -EINVAL;
  668. if (dma_fence_is_signaled(fence))
  669. return 0;
  670. p = kfd_lookup_process_by_mm(mm);
  671. if (!p)
  672. return -ENODEV;
  673. if (fence->seqno == p->last_eviction_seqno)
  674. goto out;
  675. p->last_eviction_seqno = fence->seqno;
  676. /* Avoid KFD process starvation. Wait for at least
  677. * PROCESS_ACTIVE_TIME_MS before evicting the process again
  678. */
  679. active_time = get_jiffies_64() - p->last_restore_timestamp;
  680. if (delay_jiffies > active_time)
  681. delay_jiffies -= active_time;
  682. else
  683. delay_jiffies = 0;
  684. /* During process initialization eviction_work.dwork is initialized
  685. * to kfd_evict_bo_worker
  686. */
  687. schedule_delayed_work(&p->eviction_work, delay_jiffies);
  688. out:
  689. kfd_unref_process(p);
  690. return 0;
  691. }
  692. static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
  693. unsigned int chunk_size)
  694. {
  695. unsigned int num_of_longs;
  696. if (WARN_ON(buf_size < chunk_size))
  697. return -EINVAL;
  698. if (WARN_ON(buf_size == 0))
  699. return -EINVAL;
  700. if (WARN_ON(chunk_size == 0))
  701. return -EINVAL;
  702. kfd->gtt_sa_chunk_size = chunk_size;
  703. kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
  704. num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
  705. BITS_PER_LONG;
  706. kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
  707. if (!kfd->gtt_sa_bitmap)
  708. return -ENOMEM;
  709. pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
  710. kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
  711. mutex_init(&kfd->gtt_sa_lock);
  712. return 0;
  713. }
  714. static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
  715. {
  716. mutex_destroy(&kfd->gtt_sa_lock);
  717. kfree(kfd->gtt_sa_bitmap);
  718. }
  719. static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
  720. unsigned int bit_num,
  721. unsigned int chunk_size)
  722. {
  723. return start_addr + bit_num * chunk_size;
  724. }
  725. static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
  726. unsigned int bit_num,
  727. unsigned int chunk_size)
  728. {
  729. return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
  730. }
  731. int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
  732. struct kfd_mem_obj **mem_obj)
  733. {
  734. unsigned int found, start_search, cur_size;
  735. if (size == 0)
  736. return -EINVAL;
  737. if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
  738. return -ENOMEM;
  739. *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
  740. if (!(*mem_obj))
  741. return -ENOMEM;
  742. pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
  743. start_search = 0;
  744. mutex_lock(&kfd->gtt_sa_lock);
  745. kfd_gtt_restart_search:
  746. /* Find the first chunk that is free */
  747. found = find_next_zero_bit(kfd->gtt_sa_bitmap,
  748. kfd->gtt_sa_num_of_chunks,
  749. start_search);
  750. pr_debug("Found = %d\n", found);
  751. /* If there wasn't any free chunk, bail out */
  752. if (found == kfd->gtt_sa_num_of_chunks)
  753. goto kfd_gtt_no_free_chunk;
  754. /* Update fields of mem_obj */
  755. (*mem_obj)->range_start = found;
  756. (*mem_obj)->range_end = found;
  757. (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
  758. kfd->gtt_start_gpu_addr,
  759. found,
  760. kfd->gtt_sa_chunk_size);
  761. (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
  762. kfd->gtt_start_cpu_ptr,
  763. found,
  764. kfd->gtt_sa_chunk_size);
  765. pr_debug("gpu_addr = %p, cpu_addr = %p\n",
  766. (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
  767. /* If we need only one chunk, mark it as allocated and get out */
  768. if (size <= kfd->gtt_sa_chunk_size) {
  769. pr_debug("Single bit\n");
  770. set_bit(found, kfd->gtt_sa_bitmap);
  771. goto kfd_gtt_out;
  772. }
  773. /* Otherwise, try to see if we have enough contiguous chunks */
  774. cur_size = size - kfd->gtt_sa_chunk_size;
  775. do {
  776. (*mem_obj)->range_end =
  777. find_next_zero_bit(kfd->gtt_sa_bitmap,
  778. kfd->gtt_sa_num_of_chunks, ++found);
  779. /*
  780. * If next free chunk is not contiguous than we need to
  781. * restart our search from the last free chunk we found (which
  782. * wasn't contiguous to the previous ones
  783. */
  784. if ((*mem_obj)->range_end != found) {
  785. start_search = found;
  786. goto kfd_gtt_restart_search;
  787. }
  788. /*
  789. * If we reached end of buffer, bail out with error
  790. */
  791. if (found == kfd->gtt_sa_num_of_chunks)
  792. goto kfd_gtt_no_free_chunk;
  793. /* Check if we don't need another chunk */
  794. if (cur_size <= kfd->gtt_sa_chunk_size)
  795. cur_size = 0;
  796. else
  797. cur_size -= kfd->gtt_sa_chunk_size;
  798. } while (cur_size > 0);
  799. pr_debug("range_start = %d, range_end = %d\n",
  800. (*mem_obj)->range_start, (*mem_obj)->range_end);
  801. /* Mark the chunks as allocated */
  802. for (found = (*mem_obj)->range_start;
  803. found <= (*mem_obj)->range_end;
  804. found++)
  805. set_bit(found, kfd->gtt_sa_bitmap);
  806. kfd_gtt_out:
  807. mutex_unlock(&kfd->gtt_sa_lock);
  808. return 0;
  809. kfd_gtt_no_free_chunk:
  810. pr_debug("Allocation failed with mem_obj = %p\n", mem_obj);
  811. mutex_unlock(&kfd->gtt_sa_lock);
  812. kfree(mem_obj);
  813. return -ENOMEM;
  814. }
  815. int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
  816. {
  817. unsigned int bit;
  818. /* Act like kfree when trying to free a NULL object */
  819. if (!mem_obj)
  820. return 0;
  821. pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
  822. mem_obj, mem_obj->range_start, mem_obj->range_end);
  823. mutex_lock(&kfd->gtt_sa_lock);
  824. /* Mark the chunks as free */
  825. for (bit = mem_obj->range_start;
  826. bit <= mem_obj->range_end;
  827. bit++)
  828. clear_bit(bit, kfd->gtt_sa_bitmap);
  829. mutex_unlock(&kfd->gtt_sa_lock);
  830. kfree(mem_obj);
  831. return 0;
  832. }
  833. #if defined(CONFIG_DEBUG_FS)
  834. /* This function will send a package to HIQ to hang the HWS
  835. * which will trigger a GPU reset and bring the HWS back to normal state
  836. */
  837. int kfd_debugfs_hang_hws(struct kfd_dev *dev)
  838. {
  839. int r = 0;
  840. if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
  841. pr_err("HWS is not enabled");
  842. return -EINVAL;
  843. }
  844. r = pm_debugfs_hang_hws(&dev->dqm->packets);
  845. if (!r)
  846. r = dqm_debugfs_execute_queues(dev->dqm);
  847. return r;
  848. }
  849. #endif