kfd_chardev.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/device.h>
  23. #include <linux/export.h>
  24. #include <linux/err.h>
  25. #include <linux/fs.h>
  26. #include <linux/file.h>
  27. #include <linux/sched.h>
  28. #include <linux/slab.h>
  29. #include <linux/uaccess.h>
  30. #include <linux/compat.h>
  31. #include <uapi/linux/kfd_ioctl.h>
  32. #include <linux/time.h>
  33. #include <linux/mm.h>
  34. #include <linux/mman.h>
  35. #include <asm/processor.h>
  36. #include "kfd_priv.h"
  37. #include "kfd_device_queue_manager.h"
  38. #include "kfd_dbgmgr.h"
  39. static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  40. static int kfd_open(struct inode *, struct file *);
  41. static int kfd_mmap(struct file *, struct vm_area_struct *);
  42. static const char kfd_dev_name[] = "kfd";
  43. static const struct file_operations kfd_fops = {
  44. .owner = THIS_MODULE,
  45. .unlocked_ioctl = kfd_ioctl,
  46. .compat_ioctl = kfd_ioctl,
  47. .open = kfd_open,
  48. .mmap = kfd_mmap,
  49. };
  50. static int kfd_char_dev_major = -1;
  51. static struct class *kfd_class;
  52. struct device *kfd_device;
  53. int kfd_chardev_init(void)
  54. {
  55. int err = 0;
  56. kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  57. err = kfd_char_dev_major;
  58. if (err < 0)
  59. goto err_register_chrdev;
  60. kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  61. err = PTR_ERR(kfd_class);
  62. if (IS_ERR(kfd_class))
  63. goto err_class_create;
  64. kfd_device = device_create(kfd_class, NULL,
  65. MKDEV(kfd_char_dev_major, 0),
  66. NULL, kfd_dev_name);
  67. err = PTR_ERR(kfd_device);
  68. if (IS_ERR(kfd_device))
  69. goto err_device_create;
  70. return 0;
  71. err_device_create:
  72. class_destroy(kfd_class);
  73. err_class_create:
  74. unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  75. err_register_chrdev:
  76. return err;
  77. }
  78. void kfd_chardev_exit(void)
  79. {
  80. device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  81. class_destroy(kfd_class);
  82. unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  83. }
  84. struct device *kfd_chardev(void)
  85. {
  86. return kfd_device;
  87. }
  88. static int kfd_open(struct inode *inode, struct file *filep)
  89. {
  90. struct kfd_process *process;
  91. bool is_32bit_user_mode;
  92. if (iminor(inode) != 0)
  93. return -ENODEV;
  94. is_32bit_user_mode = in_compat_syscall();
  95. if (is_32bit_user_mode) {
  96. dev_warn(kfd_device,
  97. "Process %d (32-bit) failed to open /dev/kfd\n"
  98. "32-bit processes are not supported by amdkfd\n",
  99. current->pid);
  100. return -EPERM;
  101. }
  102. process = kfd_create_process(filep);
  103. if (IS_ERR(process))
  104. return PTR_ERR(process);
  105. dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
  106. process->pasid, process->is_32bit_user_mode);
  107. return 0;
  108. }
  109. static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
  110. void *data)
  111. {
  112. struct kfd_ioctl_get_version_args *args = data;
  113. args->major_version = KFD_IOCTL_MAJOR_VERSION;
  114. args->minor_version = KFD_IOCTL_MINOR_VERSION;
  115. return 0;
  116. }
  117. static int set_queue_properties_from_user(struct queue_properties *q_properties,
  118. struct kfd_ioctl_create_queue_args *args)
  119. {
  120. if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
  121. pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
  122. return -EINVAL;
  123. }
  124. if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
  125. pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
  126. return -EINVAL;
  127. }
  128. if ((args->ring_base_address) &&
  129. (!access_ok(VERIFY_WRITE,
  130. (const void __user *) args->ring_base_address,
  131. sizeof(uint64_t)))) {
  132. pr_err("Can't access ring base address\n");
  133. return -EFAULT;
  134. }
  135. if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
  136. pr_err("Ring size must be a power of 2 or 0\n");
  137. return -EINVAL;
  138. }
  139. if (!access_ok(VERIFY_WRITE,
  140. (const void __user *) args->read_pointer_address,
  141. sizeof(uint32_t))) {
  142. pr_err("Can't access read pointer\n");
  143. return -EFAULT;
  144. }
  145. if (!access_ok(VERIFY_WRITE,
  146. (const void __user *) args->write_pointer_address,
  147. sizeof(uint32_t))) {
  148. pr_err("Can't access write pointer\n");
  149. return -EFAULT;
  150. }
  151. if (args->eop_buffer_address &&
  152. !access_ok(VERIFY_WRITE,
  153. (const void __user *) args->eop_buffer_address,
  154. sizeof(uint32_t))) {
  155. pr_debug("Can't access eop buffer");
  156. return -EFAULT;
  157. }
  158. if (args->ctx_save_restore_address &&
  159. !access_ok(VERIFY_WRITE,
  160. (const void __user *) args->ctx_save_restore_address,
  161. sizeof(uint32_t))) {
  162. pr_debug("Can't access ctx save restore buffer");
  163. return -EFAULT;
  164. }
  165. q_properties->is_interop = false;
  166. q_properties->queue_percent = args->queue_percentage;
  167. q_properties->priority = args->queue_priority;
  168. q_properties->queue_address = args->ring_base_address;
  169. q_properties->queue_size = args->ring_size;
  170. q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
  171. q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
  172. q_properties->eop_ring_buffer_address = args->eop_buffer_address;
  173. q_properties->eop_ring_buffer_size = args->eop_buffer_size;
  174. q_properties->ctx_save_restore_area_address =
  175. args->ctx_save_restore_address;
  176. q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
  177. q_properties->ctl_stack_size = args->ctl_stack_size;
  178. if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
  179. args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
  180. q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
  181. else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
  182. q_properties->type = KFD_QUEUE_TYPE_SDMA;
  183. else
  184. return -ENOTSUPP;
  185. if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
  186. q_properties->format = KFD_QUEUE_FORMAT_AQL;
  187. else
  188. q_properties->format = KFD_QUEUE_FORMAT_PM4;
  189. pr_debug("Queue Percentage: %d, %d\n",
  190. q_properties->queue_percent, args->queue_percentage);
  191. pr_debug("Queue Priority: %d, %d\n",
  192. q_properties->priority, args->queue_priority);
  193. pr_debug("Queue Address: 0x%llX, 0x%llX\n",
  194. q_properties->queue_address, args->ring_base_address);
  195. pr_debug("Queue Size: 0x%llX, %u\n",
  196. q_properties->queue_size, args->ring_size);
  197. pr_debug("Queue r/w Pointers: %px, %px\n",
  198. q_properties->read_ptr,
  199. q_properties->write_ptr);
  200. pr_debug("Queue Format: %d\n", q_properties->format);
  201. pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
  202. pr_debug("Queue CTX save area: 0x%llX\n",
  203. q_properties->ctx_save_restore_area_address);
  204. return 0;
  205. }
  206. static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
  207. void *data)
  208. {
  209. struct kfd_ioctl_create_queue_args *args = data;
  210. struct kfd_dev *dev;
  211. int err = 0;
  212. unsigned int queue_id;
  213. struct kfd_process_device *pdd;
  214. struct queue_properties q_properties;
  215. memset(&q_properties, 0, sizeof(struct queue_properties));
  216. pr_debug("Creating queue ioctl\n");
  217. err = set_queue_properties_from_user(&q_properties, args);
  218. if (err)
  219. return err;
  220. pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
  221. dev = kfd_device_by_id(args->gpu_id);
  222. if (!dev) {
  223. pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
  224. return -EINVAL;
  225. }
  226. mutex_lock(&p->mutex);
  227. pdd = kfd_bind_process_to_device(dev, p);
  228. if (IS_ERR(pdd)) {
  229. err = -ESRCH;
  230. goto err_bind_process;
  231. }
  232. pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
  233. p->pasid,
  234. dev->id);
  235. err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
  236. if (err != 0)
  237. goto err_create_queue;
  238. args->queue_id = queue_id;
  239. /* Return gpu_id as doorbell offset for mmap usage */
  240. args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
  241. args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
  242. args->doorbell_offset <<= PAGE_SHIFT;
  243. if (KFD_IS_SOC15(dev->device_info->asic_family))
  244. /* On SOC15 ASICs, doorbell allocation must be
  245. * per-device, and independent from the per-process
  246. * queue_id. Return the doorbell offset within the
  247. * doorbell aperture to user mode.
  248. */
  249. args->doorbell_offset |= q_properties.doorbell_off;
  250. mutex_unlock(&p->mutex);
  251. pr_debug("Queue id %d was created successfully\n", args->queue_id);
  252. pr_debug("Ring buffer address == 0x%016llX\n",
  253. args->ring_base_address);
  254. pr_debug("Read ptr address == 0x%016llX\n",
  255. args->read_pointer_address);
  256. pr_debug("Write ptr address == 0x%016llX\n",
  257. args->write_pointer_address);
  258. return 0;
  259. err_create_queue:
  260. err_bind_process:
  261. mutex_unlock(&p->mutex);
  262. return err;
  263. }
  264. static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
  265. void *data)
  266. {
  267. int retval;
  268. struct kfd_ioctl_destroy_queue_args *args = data;
  269. pr_debug("Destroying queue id %d for pasid %d\n",
  270. args->queue_id,
  271. p->pasid);
  272. mutex_lock(&p->mutex);
  273. retval = pqm_destroy_queue(&p->pqm, args->queue_id);
  274. mutex_unlock(&p->mutex);
  275. return retval;
  276. }
  277. static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
  278. void *data)
  279. {
  280. int retval;
  281. struct kfd_ioctl_update_queue_args *args = data;
  282. struct queue_properties properties;
  283. if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
  284. pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
  285. return -EINVAL;
  286. }
  287. if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
  288. pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
  289. return -EINVAL;
  290. }
  291. if ((args->ring_base_address) &&
  292. (!access_ok(VERIFY_WRITE,
  293. (const void __user *) args->ring_base_address,
  294. sizeof(uint64_t)))) {
  295. pr_err("Can't access ring base address\n");
  296. return -EFAULT;
  297. }
  298. if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
  299. pr_err("Ring size must be a power of 2 or 0\n");
  300. return -EINVAL;
  301. }
  302. properties.queue_address = args->ring_base_address;
  303. properties.queue_size = args->ring_size;
  304. properties.queue_percent = args->queue_percentage;
  305. properties.priority = args->queue_priority;
  306. pr_debug("Updating queue id %d for pasid %d\n",
  307. args->queue_id, p->pasid);
  308. mutex_lock(&p->mutex);
  309. retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
  310. mutex_unlock(&p->mutex);
  311. return retval;
  312. }
  313. static int kfd_ioctl_set_memory_policy(struct file *filep,
  314. struct kfd_process *p, void *data)
  315. {
  316. struct kfd_ioctl_set_memory_policy_args *args = data;
  317. struct kfd_dev *dev;
  318. int err = 0;
  319. struct kfd_process_device *pdd;
  320. enum cache_policy default_policy, alternate_policy;
  321. if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
  322. && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
  323. return -EINVAL;
  324. }
  325. if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
  326. && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
  327. return -EINVAL;
  328. }
  329. dev = kfd_device_by_id(args->gpu_id);
  330. if (!dev)
  331. return -EINVAL;
  332. mutex_lock(&p->mutex);
  333. pdd = kfd_bind_process_to_device(dev, p);
  334. if (IS_ERR(pdd)) {
  335. err = -ESRCH;
  336. goto out;
  337. }
  338. default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
  339. ? cache_policy_coherent : cache_policy_noncoherent;
  340. alternate_policy =
  341. (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
  342. ? cache_policy_coherent : cache_policy_noncoherent;
  343. if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
  344. &pdd->qpd,
  345. default_policy,
  346. alternate_policy,
  347. (void __user *)args->alternate_aperture_base,
  348. args->alternate_aperture_size))
  349. err = -EINVAL;
  350. out:
  351. mutex_unlock(&p->mutex);
  352. return err;
  353. }
  354. static int kfd_ioctl_set_trap_handler(struct file *filep,
  355. struct kfd_process *p, void *data)
  356. {
  357. struct kfd_ioctl_set_trap_handler_args *args = data;
  358. struct kfd_dev *dev;
  359. int err = 0;
  360. struct kfd_process_device *pdd;
  361. dev = kfd_device_by_id(args->gpu_id);
  362. if (dev == NULL)
  363. return -EINVAL;
  364. mutex_lock(&p->mutex);
  365. pdd = kfd_bind_process_to_device(dev, p);
  366. if (IS_ERR(pdd)) {
  367. err = -ESRCH;
  368. goto out;
  369. }
  370. if (dev->dqm->ops.set_trap_handler(dev->dqm,
  371. &pdd->qpd,
  372. args->tba_addr,
  373. args->tma_addr))
  374. err = -EINVAL;
  375. out:
  376. mutex_unlock(&p->mutex);
  377. return err;
  378. }
  379. static int kfd_ioctl_dbg_register(struct file *filep,
  380. struct kfd_process *p, void *data)
  381. {
  382. struct kfd_ioctl_dbg_register_args *args = data;
  383. struct kfd_dev *dev;
  384. struct kfd_dbgmgr *dbgmgr_ptr;
  385. struct kfd_process_device *pdd;
  386. bool create_ok;
  387. long status = 0;
  388. dev = kfd_device_by_id(args->gpu_id);
  389. if (!dev)
  390. return -EINVAL;
  391. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  392. pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
  393. return -EINVAL;
  394. }
  395. mutex_lock(&p->mutex);
  396. mutex_lock(kfd_get_dbgmgr_mutex());
  397. /*
  398. * make sure that we have pdd, if this the first queue created for
  399. * this process
  400. */
  401. pdd = kfd_bind_process_to_device(dev, p);
  402. if (IS_ERR(pdd)) {
  403. status = PTR_ERR(pdd);
  404. goto out;
  405. }
  406. if (!dev->dbgmgr) {
  407. /* In case of a legal call, we have no dbgmgr yet */
  408. create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
  409. if (create_ok) {
  410. status = kfd_dbgmgr_register(dbgmgr_ptr, p);
  411. if (status != 0)
  412. kfd_dbgmgr_destroy(dbgmgr_ptr);
  413. else
  414. dev->dbgmgr = dbgmgr_ptr;
  415. }
  416. } else {
  417. pr_debug("debugger already registered\n");
  418. status = -EINVAL;
  419. }
  420. out:
  421. mutex_unlock(kfd_get_dbgmgr_mutex());
  422. mutex_unlock(&p->mutex);
  423. return status;
  424. }
  425. static int kfd_ioctl_dbg_unregister(struct file *filep,
  426. struct kfd_process *p, void *data)
  427. {
  428. struct kfd_ioctl_dbg_unregister_args *args = data;
  429. struct kfd_dev *dev;
  430. long status;
  431. dev = kfd_device_by_id(args->gpu_id);
  432. if (!dev || !dev->dbgmgr)
  433. return -EINVAL;
  434. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  435. pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
  436. return -EINVAL;
  437. }
  438. mutex_lock(kfd_get_dbgmgr_mutex());
  439. status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
  440. if (!status) {
  441. kfd_dbgmgr_destroy(dev->dbgmgr);
  442. dev->dbgmgr = NULL;
  443. }
  444. mutex_unlock(kfd_get_dbgmgr_mutex());
  445. return status;
  446. }
  447. /*
  448. * Parse and generate variable size data structure for address watch.
  449. * Total size of the buffer and # watch points is limited in order
  450. * to prevent kernel abuse. (no bearing to the much smaller HW limitation
  451. * which is enforced by dbgdev module)
  452. * please also note that the watch address itself are not "copied from user",
  453. * since it be set into the HW in user mode values.
  454. *
  455. */
  456. static int kfd_ioctl_dbg_address_watch(struct file *filep,
  457. struct kfd_process *p, void *data)
  458. {
  459. struct kfd_ioctl_dbg_address_watch_args *args = data;
  460. struct kfd_dev *dev;
  461. struct dbg_address_watch_info aw_info;
  462. unsigned char *args_buff;
  463. long status;
  464. void __user *cmd_from_user;
  465. uint64_t watch_mask_value = 0;
  466. unsigned int args_idx = 0;
  467. memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
  468. dev = kfd_device_by_id(args->gpu_id);
  469. if (!dev)
  470. return -EINVAL;
  471. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  472. pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
  473. return -EINVAL;
  474. }
  475. cmd_from_user = (void __user *) args->content_ptr;
  476. /* Validate arguments */
  477. if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
  478. (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
  479. (cmd_from_user == NULL))
  480. return -EINVAL;
  481. /* this is the actual buffer to work with */
  482. args_buff = memdup_user(cmd_from_user,
  483. args->buf_size_in_bytes - sizeof(*args));
  484. if (IS_ERR(args_buff))
  485. return PTR_ERR(args_buff);
  486. aw_info.process = p;
  487. aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
  488. args_idx += sizeof(aw_info.num_watch_points);
  489. aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
  490. args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
  491. /*
  492. * set watch address base pointer to point on the array base
  493. * within args_buff
  494. */
  495. aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
  496. /* skip over the addresses buffer */
  497. args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
  498. if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
  499. status = -EINVAL;
  500. goto out;
  501. }
  502. watch_mask_value = (uint64_t) args_buff[args_idx];
  503. if (watch_mask_value > 0) {
  504. /*
  505. * There is an array of masks.
  506. * set watch mask base pointer to point on the array base
  507. * within args_buff
  508. */
  509. aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
  510. /* skip over the masks buffer */
  511. args_idx += sizeof(aw_info.watch_mask) *
  512. aw_info.num_watch_points;
  513. } else {
  514. /* just the NULL mask, set to NULL and skip over it */
  515. aw_info.watch_mask = NULL;
  516. args_idx += sizeof(aw_info.watch_mask);
  517. }
  518. if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
  519. status = -EINVAL;
  520. goto out;
  521. }
  522. /* Currently HSA Event is not supported for DBG */
  523. aw_info.watch_event = NULL;
  524. mutex_lock(kfd_get_dbgmgr_mutex());
  525. status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
  526. mutex_unlock(kfd_get_dbgmgr_mutex());
  527. out:
  528. kfree(args_buff);
  529. return status;
  530. }
  531. /* Parse and generate fixed size data structure for wave control */
  532. static int kfd_ioctl_dbg_wave_control(struct file *filep,
  533. struct kfd_process *p, void *data)
  534. {
  535. struct kfd_ioctl_dbg_wave_control_args *args = data;
  536. struct kfd_dev *dev;
  537. struct dbg_wave_control_info wac_info;
  538. unsigned char *args_buff;
  539. uint32_t computed_buff_size;
  540. long status;
  541. void __user *cmd_from_user;
  542. unsigned int args_idx = 0;
  543. memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
  544. /* we use compact form, independent of the packing attribute value */
  545. computed_buff_size = sizeof(*args) +
  546. sizeof(wac_info.mode) +
  547. sizeof(wac_info.operand) +
  548. sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
  549. sizeof(wac_info.dbgWave_msg.MemoryVA) +
  550. sizeof(wac_info.trapId);
  551. dev = kfd_device_by_id(args->gpu_id);
  552. if (!dev)
  553. return -EINVAL;
  554. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  555. pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
  556. return -EINVAL;
  557. }
  558. /* input size must match the computed "compact" size */
  559. if (args->buf_size_in_bytes != computed_buff_size) {
  560. pr_debug("size mismatch, computed : actual %u : %u\n",
  561. args->buf_size_in_bytes, computed_buff_size);
  562. return -EINVAL;
  563. }
  564. cmd_from_user = (void __user *) args->content_ptr;
  565. if (cmd_from_user == NULL)
  566. return -EINVAL;
  567. /* copy the entire buffer from user */
  568. args_buff = memdup_user(cmd_from_user,
  569. args->buf_size_in_bytes - sizeof(*args));
  570. if (IS_ERR(args_buff))
  571. return PTR_ERR(args_buff);
  572. /* move ptr to the start of the "pay-load" area */
  573. wac_info.process = p;
  574. wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
  575. args_idx += sizeof(wac_info.operand);
  576. wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
  577. args_idx += sizeof(wac_info.mode);
  578. wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
  579. args_idx += sizeof(wac_info.trapId);
  580. wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
  581. *((uint32_t *)(&args_buff[args_idx]));
  582. wac_info.dbgWave_msg.MemoryVA = NULL;
  583. mutex_lock(kfd_get_dbgmgr_mutex());
  584. pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
  585. wac_info.process, wac_info.operand,
  586. wac_info.mode, wac_info.trapId,
  587. wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  588. status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
  589. pr_debug("Returned status of dbg manager is %ld\n", status);
  590. mutex_unlock(kfd_get_dbgmgr_mutex());
  591. kfree(args_buff);
  592. return status;
  593. }
  594. static int kfd_ioctl_get_clock_counters(struct file *filep,
  595. struct kfd_process *p, void *data)
  596. {
  597. struct kfd_ioctl_get_clock_counters_args *args = data;
  598. struct kfd_dev *dev;
  599. struct timespec64 time;
  600. dev = kfd_device_by_id(args->gpu_id);
  601. if (dev)
  602. /* Reading GPU clock counter from KGD */
  603. args->gpu_clock_counter =
  604. dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
  605. else
  606. /* Node without GPU resource */
  607. args->gpu_clock_counter = 0;
  608. /* No access to rdtsc. Using raw monotonic time */
  609. getrawmonotonic64(&time);
  610. args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
  611. get_monotonic_boottime64(&time);
  612. args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
  613. /* Since the counter is in nano-seconds we use 1GHz frequency */
  614. args->system_clock_freq = 1000000000;
  615. return 0;
  616. }
  617. static int kfd_ioctl_get_process_apertures(struct file *filp,
  618. struct kfd_process *p, void *data)
  619. {
  620. struct kfd_ioctl_get_process_apertures_args *args = data;
  621. struct kfd_process_device_apertures *pAperture;
  622. struct kfd_process_device *pdd;
  623. dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
  624. args->num_of_nodes = 0;
  625. mutex_lock(&p->mutex);
  626. /*if the process-device list isn't empty*/
  627. if (kfd_has_process_device_data(p)) {
  628. /* Run over all pdd of the process */
  629. pdd = kfd_get_first_process_device_data(p);
  630. do {
  631. pAperture =
  632. &args->process_apertures[args->num_of_nodes];
  633. pAperture->gpu_id = pdd->dev->id;
  634. pAperture->lds_base = pdd->lds_base;
  635. pAperture->lds_limit = pdd->lds_limit;
  636. pAperture->gpuvm_base = pdd->gpuvm_base;
  637. pAperture->gpuvm_limit = pdd->gpuvm_limit;
  638. pAperture->scratch_base = pdd->scratch_base;
  639. pAperture->scratch_limit = pdd->scratch_limit;
  640. dev_dbg(kfd_device,
  641. "node id %u\n", args->num_of_nodes);
  642. dev_dbg(kfd_device,
  643. "gpu id %u\n", pdd->dev->id);
  644. dev_dbg(kfd_device,
  645. "lds_base %llX\n", pdd->lds_base);
  646. dev_dbg(kfd_device,
  647. "lds_limit %llX\n", pdd->lds_limit);
  648. dev_dbg(kfd_device,
  649. "gpuvm_base %llX\n", pdd->gpuvm_base);
  650. dev_dbg(kfd_device,
  651. "gpuvm_limit %llX\n", pdd->gpuvm_limit);
  652. dev_dbg(kfd_device,
  653. "scratch_base %llX\n", pdd->scratch_base);
  654. dev_dbg(kfd_device,
  655. "scratch_limit %llX\n", pdd->scratch_limit);
  656. args->num_of_nodes++;
  657. pdd = kfd_get_next_process_device_data(p, pdd);
  658. } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
  659. }
  660. mutex_unlock(&p->mutex);
  661. return 0;
  662. }
  663. static int kfd_ioctl_get_process_apertures_new(struct file *filp,
  664. struct kfd_process *p, void *data)
  665. {
  666. struct kfd_ioctl_get_process_apertures_new_args *args = data;
  667. struct kfd_process_device_apertures *pa;
  668. struct kfd_process_device *pdd;
  669. uint32_t nodes = 0;
  670. int ret;
  671. dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
  672. if (args->num_of_nodes == 0) {
  673. /* Return number of nodes, so that user space can alloacate
  674. * sufficient memory
  675. */
  676. mutex_lock(&p->mutex);
  677. if (!kfd_has_process_device_data(p))
  678. goto out_unlock;
  679. /* Run over all pdd of the process */
  680. pdd = kfd_get_first_process_device_data(p);
  681. do {
  682. args->num_of_nodes++;
  683. pdd = kfd_get_next_process_device_data(p, pdd);
  684. } while (pdd);
  685. goto out_unlock;
  686. }
  687. /* Fill in process-aperture information for all available
  688. * nodes, but not more than args->num_of_nodes as that is
  689. * the amount of memory allocated by user
  690. */
  691. pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
  692. args->num_of_nodes), GFP_KERNEL);
  693. if (!pa)
  694. return -ENOMEM;
  695. mutex_lock(&p->mutex);
  696. if (!kfd_has_process_device_data(p)) {
  697. args->num_of_nodes = 0;
  698. kfree(pa);
  699. goto out_unlock;
  700. }
  701. /* Run over all pdd of the process */
  702. pdd = kfd_get_first_process_device_data(p);
  703. do {
  704. pa[nodes].gpu_id = pdd->dev->id;
  705. pa[nodes].lds_base = pdd->lds_base;
  706. pa[nodes].lds_limit = pdd->lds_limit;
  707. pa[nodes].gpuvm_base = pdd->gpuvm_base;
  708. pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
  709. pa[nodes].scratch_base = pdd->scratch_base;
  710. pa[nodes].scratch_limit = pdd->scratch_limit;
  711. dev_dbg(kfd_device,
  712. "gpu id %u\n", pdd->dev->id);
  713. dev_dbg(kfd_device,
  714. "lds_base %llX\n", pdd->lds_base);
  715. dev_dbg(kfd_device,
  716. "lds_limit %llX\n", pdd->lds_limit);
  717. dev_dbg(kfd_device,
  718. "gpuvm_base %llX\n", pdd->gpuvm_base);
  719. dev_dbg(kfd_device,
  720. "gpuvm_limit %llX\n", pdd->gpuvm_limit);
  721. dev_dbg(kfd_device,
  722. "scratch_base %llX\n", pdd->scratch_base);
  723. dev_dbg(kfd_device,
  724. "scratch_limit %llX\n", pdd->scratch_limit);
  725. nodes++;
  726. pdd = kfd_get_next_process_device_data(p, pdd);
  727. } while (pdd && (nodes < args->num_of_nodes));
  728. mutex_unlock(&p->mutex);
  729. args->num_of_nodes = nodes;
  730. ret = copy_to_user(
  731. (void __user *)args->kfd_process_device_apertures_ptr,
  732. pa,
  733. (nodes * sizeof(struct kfd_process_device_apertures)));
  734. kfree(pa);
  735. return ret ? -EFAULT : 0;
  736. out_unlock:
  737. mutex_unlock(&p->mutex);
  738. return 0;
  739. }
  740. static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
  741. void *data)
  742. {
  743. struct kfd_ioctl_create_event_args *args = data;
  744. int err;
  745. /* For dGPUs the event page is allocated in user mode. The
  746. * handle is passed to KFD with the first call to this IOCTL
  747. * through the event_page_offset field.
  748. */
  749. if (args->event_page_offset) {
  750. struct kfd_dev *kfd;
  751. struct kfd_process_device *pdd;
  752. void *mem, *kern_addr;
  753. uint64_t size;
  754. if (p->signal_page) {
  755. pr_err("Event page is already set\n");
  756. return -EINVAL;
  757. }
  758. kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
  759. if (!kfd) {
  760. pr_err("Getting device by id failed in %s\n", __func__);
  761. return -EINVAL;
  762. }
  763. mutex_lock(&p->mutex);
  764. pdd = kfd_bind_process_to_device(kfd, p);
  765. if (IS_ERR(pdd)) {
  766. err = PTR_ERR(pdd);
  767. goto out_unlock;
  768. }
  769. mem = kfd_process_device_translate_handle(pdd,
  770. GET_IDR_HANDLE(args->event_page_offset));
  771. if (!mem) {
  772. pr_err("Can't find BO, offset is 0x%llx\n",
  773. args->event_page_offset);
  774. err = -EINVAL;
  775. goto out_unlock;
  776. }
  777. mutex_unlock(&p->mutex);
  778. err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
  779. mem, &kern_addr, &size);
  780. if (err) {
  781. pr_err("Failed to map event page to kernel\n");
  782. return err;
  783. }
  784. err = kfd_event_page_set(p, kern_addr, size);
  785. if (err) {
  786. pr_err("Failed to set event page\n");
  787. return err;
  788. }
  789. }
  790. err = kfd_event_create(filp, p, args->event_type,
  791. args->auto_reset != 0, args->node_id,
  792. &args->event_id, &args->event_trigger_data,
  793. &args->event_page_offset,
  794. &args->event_slot_index);
  795. return err;
  796. out_unlock:
  797. mutex_unlock(&p->mutex);
  798. return err;
  799. }
  800. static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
  801. void *data)
  802. {
  803. struct kfd_ioctl_destroy_event_args *args = data;
  804. return kfd_event_destroy(p, args->event_id);
  805. }
  806. static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
  807. void *data)
  808. {
  809. struct kfd_ioctl_set_event_args *args = data;
  810. return kfd_set_event(p, args->event_id);
  811. }
  812. static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
  813. void *data)
  814. {
  815. struct kfd_ioctl_reset_event_args *args = data;
  816. return kfd_reset_event(p, args->event_id);
  817. }
  818. static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
  819. void *data)
  820. {
  821. struct kfd_ioctl_wait_events_args *args = data;
  822. int err;
  823. err = kfd_wait_on_events(p, args->num_events,
  824. (void __user *)args->events_ptr,
  825. (args->wait_for_all != 0),
  826. args->timeout, &args->wait_result);
  827. return err;
  828. }
  829. static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
  830. struct kfd_process *p, void *data)
  831. {
  832. struct kfd_ioctl_set_scratch_backing_va_args *args = data;
  833. struct kfd_process_device *pdd;
  834. struct kfd_dev *dev;
  835. long err;
  836. dev = kfd_device_by_id(args->gpu_id);
  837. if (!dev)
  838. return -EINVAL;
  839. mutex_lock(&p->mutex);
  840. pdd = kfd_bind_process_to_device(dev, p);
  841. if (IS_ERR(pdd)) {
  842. err = PTR_ERR(pdd);
  843. goto bind_process_to_device_fail;
  844. }
  845. pdd->qpd.sh_hidden_private_base = args->va_addr;
  846. mutex_unlock(&p->mutex);
  847. if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
  848. pdd->qpd.vmid != 0)
  849. dev->kfd2kgd->set_scratch_backing_va(
  850. dev->kgd, args->va_addr, pdd->qpd.vmid);
  851. return 0;
  852. bind_process_to_device_fail:
  853. mutex_unlock(&p->mutex);
  854. return err;
  855. }
  856. static int kfd_ioctl_get_tile_config(struct file *filep,
  857. struct kfd_process *p, void *data)
  858. {
  859. struct kfd_ioctl_get_tile_config_args *args = data;
  860. struct kfd_dev *dev;
  861. struct tile_config config;
  862. int err = 0;
  863. dev = kfd_device_by_id(args->gpu_id);
  864. if (!dev)
  865. return -EINVAL;
  866. dev->kfd2kgd->get_tile_config(dev->kgd, &config);
  867. args->gb_addr_config = config.gb_addr_config;
  868. args->num_banks = config.num_banks;
  869. args->num_ranks = config.num_ranks;
  870. if (args->num_tile_configs > config.num_tile_configs)
  871. args->num_tile_configs = config.num_tile_configs;
  872. err = copy_to_user((void __user *)args->tile_config_ptr,
  873. config.tile_config_ptr,
  874. args->num_tile_configs * sizeof(uint32_t));
  875. if (err) {
  876. args->num_tile_configs = 0;
  877. return -EFAULT;
  878. }
  879. if (args->num_macro_tile_configs > config.num_macro_tile_configs)
  880. args->num_macro_tile_configs =
  881. config.num_macro_tile_configs;
  882. err = copy_to_user((void __user *)args->macro_tile_config_ptr,
  883. config.macro_tile_config_ptr,
  884. args->num_macro_tile_configs * sizeof(uint32_t));
  885. if (err) {
  886. args->num_macro_tile_configs = 0;
  887. return -EFAULT;
  888. }
  889. return 0;
  890. }
  891. static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
  892. void *data)
  893. {
  894. struct kfd_ioctl_acquire_vm_args *args = data;
  895. struct kfd_process_device *pdd;
  896. struct kfd_dev *dev;
  897. struct file *drm_file;
  898. int ret;
  899. dev = kfd_device_by_id(args->gpu_id);
  900. if (!dev)
  901. return -EINVAL;
  902. drm_file = fget(args->drm_fd);
  903. if (!drm_file)
  904. return -EINVAL;
  905. mutex_lock(&p->mutex);
  906. pdd = kfd_get_process_device_data(dev, p);
  907. if (!pdd) {
  908. ret = -EINVAL;
  909. goto err_unlock;
  910. }
  911. if (pdd->drm_file) {
  912. ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
  913. goto err_unlock;
  914. }
  915. ret = kfd_process_device_init_vm(pdd, drm_file);
  916. if (ret)
  917. goto err_unlock;
  918. /* On success, the PDD keeps the drm_file reference */
  919. mutex_unlock(&p->mutex);
  920. return 0;
  921. err_unlock:
  922. mutex_unlock(&p->mutex);
  923. fput(drm_file);
  924. return ret;
  925. }
  926. static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
  927. {
  928. struct kfd_local_mem_info mem_info;
  929. if (debug_largebar) {
  930. pr_debug("Simulate large-bar allocation on non large-bar machine\n");
  931. return true;
  932. }
  933. if (dev->device_info->needs_iommu_device)
  934. return false;
  935. dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
  936. if (mem_info.local_mem_size_private == 0 &&
  937. mem_info.local_mem_size_public > 0)
  938. return true;
  939. return false;
  940. }
  941. static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
  942. struct kfd_process *p, void *data)
  943. {
  944. struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
  945. struct kfd_process_device *pdd;
  946. void *mem;
  947. struct kfd_dev *dev;
  948. int idr_handle;
  949. long err;
  950. uint64_t offset = args->mmap_offset;
  951. uint32_t flags = args->flags;
  952. if (args->size == 0)
  953. return -EINVAL;
  954. dev = kfd_device_by_id(args->gpu_id);
  955. if (!dev)
  956. return -EINVAL;
  957. if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
  958. (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
  959. !kfd_dev_is_large_bar(dev)) {
  960. pr_err("Alloc host visible vram on small bar is not allowed\n");
  961. return -EINVAL;
  962. }
  963. mutex_lock(&p->mutex);
  964. pdd = kfd_bind_process_to_device(dev, p);
  965. if (IS_ERR(pdd)) {
  966. err = PTR_ERR(pdd);
  967. goto err_unlock;
  968. }
  969. err = dev->kfd2kgd->alloc_memory_of_gpu(
  970. dev->kgd, args->va_addr, args->size,
  971. pdd->vm, (struct kgd_mem **) &mem, &offset,
  972. flags);
  973. if (err)
  974. goto err_unlock;
  975. idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
  976. if (idr_handle < 0) {
  977. err = -EFAULT;
  978. goto err_free;
  979. }
  980. mutex_unlock(&p->mutex);
  981. args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
  982. args->mmap_offset = offset;
  983. return 0;
  984. err_free:
  985. dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
  986. err_unlock:
  987. mutex_unlock(&p->mutex);
  988. return err;
  989. }
  990. static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
  991. struct kfd_process *p, void *data)
  992. {
  993. struct kfd_ioctl_free_memory_of_gpu_args *args = data;
  994. struct kfd_process_device *pdd;
  995. void *mem;
  996. struct kfd_dev *dev;
  997. int ret;
  998. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  999. if (!dev)
  1000. return -EINVAL;
  1001. mutex_lock(&p->mutex);
  1002. pdd = kfd_get_process_device_data(dev, p);
  1003. if (!pdd) {
  1004. pr_err("Process device data doesn't exist\n");
  1005. ret = -EINVAL;
  1006. goto err_unlock;
  1007. }
  1008. mem = kfd_process_device_translate_handle(
  1009. pdd, GET_IDR_HANDLE(args->handle));
  1010. if (!mem) {
  1011. ret = -EINVAL;
  1012. goto err_unlock;
  1013. }
  1014. ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
  1015. /* If freeing the buffer failed, leave the handle in place for
  1016. * clean-up during process tear-down.
  1017. */
  1018. if (!ret)
  1019. kfd_process_device_remove_obj_handle(
  1020. pdd, GET_IDR_HANDLE(args->handle));
  1021. err_unlock:
  1022. mutex_unlock(&p->mutex);
  1023. return ret;
  1024. }
  1025. static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
  1026. struct kfd_process *p, void *data)
  1027. {
  1028. struct kfd_ioctl_map_memory_to_gpu_args *args = data;
  1029. struct kfd_process_device *pdd, *peer_pdd;
  1030. void *mem;
  1031. struct kfd_dev *dev, *peer;
  1032. long err = 0;
  1033. int i;
  1034. uint32_t *devices_arr = NULL;
  1035. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  1036. if (!dev)
  1037. return -EINVAL;
  1038. if (!args->n_devices) {
  1039. pr_debug("Device IDs array empty\n");
  1040. return -EINVAL;
  1041. }
  1042. if (args->n_success > args->n_devices) {
  1043. pr_debug("n_success exceeds n_devices\n");
  1044. return -EINVAL;
  1045. }
  1046. devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
  1047. GFP_KERNEL);
  1048. if (!devices_arr)
  1049. return -ENOMEM;
  1050. err = copy_from_user(devices_arr,
  1051. (void __user *)args->device_ids_array_ptr,
  1052. args->n_devices * sizeof(*devices_arr));
  1053. if (err != 0) {
  1054. err = -EFAULT;
  1055. goto copy_from_user_failed;
  1056. }
  1057. mutex_lock(&p->mutex);
  1058. pdd = kfd_bind_process_to_device(dev, p);
  1059. if (IS_ERR(pdd)) {
  1060. err = PTR_ERR(pdd);
  1061. goto bind_process_to_device_failed;
  1062. }
  1063. mem = kfd_process_device_translate_handle(pdd,
  1064. GET_IDR_HANDLE(args->handle));
  1065. if (!mem) {
  1066. err = -ENOMEM;
  1067. goto get_mem_obj_from_handle_failed;
  1068. }
  1069. for (i = args->n_success; i < args->n_devices; i++) {
  1070. peer = kfd_device_by_id(devices_arr[i]);
  1071. if (!peer) {
  1072. pr_debug("Getting device by id failed for 0x%x\n",
  1073. devices_arr[i]);
  1074. err = -EINVAL;
  1075. goto get_mem_obj_from_handle_failed;
  1076. }
  1077. peer_pdd = kfd_bind_process_to_device(peer, p);
  1078. if (IS_ERR(peer_pdd)) {
  1079. err = PTR_ERR(peer_pdd);
  1080. goto get_mem_obj_from_handle_failed;
  1081. }
  1082. err = peer->kfd2kgd->map_memory_to_gpu(
  1083. peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
  1084. if (err) {
  1085. pr_err("Failed to map to gpu %d/%d\n",
  1086. i, args->n_devices);
  1087. goto map_memory_to_gpu_failed;
  1088. }
  1089. args->n_success = i+1;
  1090. }
  1091. mutex_unlock(&p->mutex);
  1092. err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
  1093. if (err) {
  1094. pr_debug("Sync memory failed, wait interrupted by user signal\n");
  1095. goto sync_memory_failed;
  1096. }
  1097. /* Flush TLBs after waiting for the page table updates to complete */
  1098. for (i = 0; i < args->n_devices; i++) {
  1099. peer = kfd_device_by_id(devices_arr[i]);
  1100. if (WARN_ON_ONCE(!peer))
  1101. continue;
  1102. peer_pdd = kfd_get_process_device_data(peer, p);
  1103. if (WARN_ON_ONCE(!peer_pdd))
  1104. continue;
  1105. kfd_flush_tlb(peer_pdd);
  1106. }
  1107. kfree(devices_arr);
  1108. return err;
  1109. bind_process_to_device_failed:
  1110. get_mem_obj_from_handle_failed:
  1111. map_memory_to_gpu_failed:
  1112. mutex_unlock(&p->mutex);
  1113. copy_from_user_failed:
  1114. sync_memory_failed:
  1115. kfree(devices_arr);
  1116. return err;
  1117. }
  1118. static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
  1119. struct kfd_process *p, void *data)
  1120. {
  1121. struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
  1122. struct kfd_process_device *pdd, *peer_pdd;
  1123. void *mem;
  1124. struct kfd_dev *dev, *peer;
  1125. long err = 0;
  1126. uint32_t *devices_arr = NULL, i;
  1127. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  1128. if (!dev)
  1129. return -EINVAL;
  1130. if (!args->n_devices) {
  1131. pr_debug("Device IDs array empty\n");
  1132. return -EINVAL;
  1133. }
  1134. if (args->n_success > args->n_devices) {
  1135. pr_debug("n_success exceeds n_devices\n");
  1136. return -EINVAL;
  1137. }
  1138. devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
  1139. GFP_KERNEL);
  1140. if (!devices_arr)
  1141. return -ENOMEM;
  1142. err = copy_from_user(devices_arr,
  1143. (void __user *)args->device_ids_array_ptr,
  1144. args->n_devices * sizeof(*devices_arr));
  1145. if (err != 0) {
  1146. err = -EFAULT;
  1147. goto copy_from_user_failed;
  1148. }
  1149. mutex_lock(&p->mutex);
  1150. pdd = kfd_get_process_device_data(dev, p);
  1151. if (!pdd) {
  1152. err = -EINVAL;
  1153. goto bind_process_to_device_failed;
  1154. }
  1155. mem = kfd_process_device_translate_handle(pdd,
  1156. GET_IDR_HANDLE(args->handle));
  1157. if (!mem) {
  1158. err = -ENOMEM;
  1159. goto get_mem_obj_from_handle_failed;
  1160. }
  1161. for (i = args->n_success; i < args->n_devices; i++) {
  1162. peer = kfd_device_by_id(devices_arr[i]);
  1163. if (!peer) {
  1164. err = -EINVAL;
  1165. goto get_mem_obj_from_handle_failed;
  1166. }
  1167. peer_pdd = kfd_get_process_device_data(peer, p);
  1168. if (!peer_pdd) {
  1169. err = -ENODEV;
  1170. goto get_mem_obj_from_handle_failed;
  1171. }
  1172. err = dev->kfd2kgd->unmap_memory_to_gpu(
  1173. peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
  1174. if (err) {
  1175. pr_err("Failed to unmap from gpu %d/%d\n",
  1176. i, args->n_devices);
  1177. goto unmap_memory_from_gpu_failed;
  1178. }
  1179. args->n_success = i+1;
  1180. }
  1181. kfree(devices_arr);
  1182. mutex_unlock(&p->mutex);
  1183. return 0;
  1184. bind_process_to_device_failed:
  1185. get_mem_obj_from_handle_failed:
  1186. unmap_memory_from_gpu_failed:
  1187. mutex_unlock(&p->mutex);
  1188. copy_from_user_failed:
  1189. kfree(devices_arr);
  1190. return err;
  1191. }
  1192. #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
  1193. [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
  1194. .cmd_drv = 0, .name = #ioctl}
  1195. /** Ioctl table */
  1196. static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
  1197. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
  1198. kfd_ioctl_get_version, 0),
  1199. AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
  1200. kfd_ioctl_create_queue, 0),
  1201. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
  1202. kfd_ioctl_destroy_queue, 0),
  1203. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
  1204. kfd_ioctl_set_memory_policy, 0),
  1205. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
  1206. kfd_ioctl_get_clock_counters, 0),
  1207. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
  1208. kfd_ioctl_get_process_apertures, 0),
  1209. AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
  1210. kfd_ioctl_update_queue, 0),
  1211. AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
  1212. kfd_ioctl_create_event, 0),
  1213. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
  1214. kfd_ioctl_destroy_event, 0),
  1215. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
  1216. kfd_ioctl_set_event, 0),
  1217. AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
  1218. kfd_ioctl_reset_event, 0),
  1219. AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
  1220. kfd_ioctl_wait_events, 0),
  1221. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
  1222. kfd_ioctl_dbg_register, 0),
  1223. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
  1224. kfd_ioctl_dbg_unregister, 0),
  1225. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
  1226. kfd_ioctl_dbg_address_watch, 0),
  1227. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
  1228. kfd_ioctl_dbg_wave_control, 0),
  1229. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
  1230. kfd_ioctl_set_scratch_backing_va, 0),
  1231. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
  1232. kfd_ioctl_get_tile_config, 0),
  1233. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
  1234. kfd_ioctl_set_trap_handler, 0),
  1235. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
  1236. kfd_ioctl_get_process_apertures_new, 0),
  1237. AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
  1238. kfd_ioctl_acquire_vm, 0),
  1239. AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
  1240. kfd_ioctl_alloc_memory_of_gpu, 0),
  1241. AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
  1242. kfd_ioctl_free_memory_of_gpu, 0),
  1243. AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
  1244. kfd_ioctl_map_memory_to_gpu, 0),
  1245. AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
  1246. kfd_ioctl_unmap_memory_from_gpu, 0),
  1247. };
  1248. #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
  1249. static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
  1250. {
  1251. struct kfd_process *process;
  1252. amdkfd_ioctl_t *func;
  1253. const struct amdkfd_ioctl_desc *ioctl = NULL;
  1254. unsigned int nr = _IOC_NR(cmd);
  1255. char stack_kdata[128];
  1256. char *kdata = NULL;
  1257. unsigned int usize, asize;
  1258. int retcode = -EINVAL;
  1259. if (nr >= AMDKFD_CORE_IOCTL_COUNT)
  1260. goto err_i1;
  1261. if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
  1262. u32 amdkfd_size;
  1263. ioctl = &amdkfd_ioctls[nr];
  1264. amdkfd_size = _IOC_SIZE(ioctl->cmd);
  1265. usize = asize = _IOC_SIZE(cmd);
  1266. if (amdkfd_size > asize)
  1267. asize = amdkfd_size;
  1268. cmd = ioctl->cmd;
  1269. } else
  1270. goto err_i1;
  1271. dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
  1272. process = kfd_get_process(current);
  1273. if (IS_ERR(process)) {
  1274. dev_dbg(kfd_device, "no process\n");
  1275. goto err_i1;
  1276. }
  1277. /* Do not trust userspace, use our own definition */
  1278. func = ioctl->func;
  1279. if (unlikely(!func)) {
  1280. dev_dbg(kfd_device, "no function\n");
  1281. retcode = -EINVAL;
  1282. goto err_i1;
  1283. }
  1284. if (cmd & (IOC_IN | IOC_OUT)) {
  1285. if (asize <= sizeof(stack_kdata)) {
  1286. kdata = stack_kdata;
  1287. } else {
  1288. kdata = kmalloc(asize, GFP_KERNEL);
  1289. if (!kdata) {
  1290. retcode = -ENOMEM;
  1291. goto err_i1;
  1292. }
  1293. }
  1294. if (asize > usize)
  1295. memset(kdata + usize, 0, asize - usize);
  1296. }
  1297. if (cmd & IOC_IN) {
  1298. if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
  1299. retcode = -EFAULT;
  1300. goto err_i1;
  1301. }
  1302. } else if (cmd & IOC_OUT) {
  1303. memset(kdata, 0, usize);
  1304. }
  1305. retcode = func(filep, process, kdata);
  1306. if (cmd & IOC_OUT)
  1307. if (copy_to_user((void __user *)arg, kdata, usize) != 0)
  1308. retcode = -EFAULT;
  1309. err_i1:
  1310. if (!ioctl)
  1311. dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
  1312. task_pid_nr(current), cmd, nr);
  1313. if (kdata != stack_kdata)
  1314. kfree(kdata);
  1315. if (retcode)
  1316. dev_dbg(kfd_device, "ret = %d\n", retcode);
  1317. return retcode;
  1318. }
  1319. static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
  1320. {
  1321. struct kfd_process *process;
  1322. struct kfd_dev *dev = NULL;
  1323. unsigned long vm_pgoff;
  1324. unsigned int gpu_id;
  1325. process = kfd_get_process(current);
  1326. if (IS_ERR(process))
  1327. return PTR_ERR(process);
  1328. vm_pgoff = vma->vm_pgoff;
  1329. vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
  1330. gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
  1331. if (gpu_id)
  1332. dev = kfd_device_by_id(gpu_id);
  1333. switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
  1334. case KFD_MMAP_TYPE_DOORBELL:
  1335. if (!dev)
  1336. return -ENODEV;
  1337. return kfd_doorbell_mmap(dev, process, vma);
  1338. case KFD_MMAP_TYPE_EVENTS:
  1339. return kfd_event_mmap(process, vma);
  1340. case KFD_MMAP_TYPE_RESERVED_MEM:
  1341. if (!dev)
  1342. return -ENODEV;
  1343. return kfd_reserved_mem_mmap(dev, process, vma);
  1344. }
  1345. return -EFAULT;
  1346. }