kfd_chardev.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/device.h>
  23. #include <linux/export.h>
  24. #include <linux/err.h>
  25. #include <linux/fs.h>
  26. #include <linux/file.h>
  27. #include <linux/sched.h>
  28. #include <linux/slab.h>
  29. #include <linux/uaccess.h>
  30. #include <linux/compat.h>
  31. #include <uapi/linux/kfd_ioctl.h>
  32. #include <linux/time.h>
  33. #include <linux/mm.h>
  34. #include <linux/mman.h>
  35. #include <asm/processor.h>
  36. #include "kfd_priv.h"
  37. #include "kfd_device_queue_manager.h"
  38. #include "kfd_dbgmgr.h"
  39. static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  40. static int kfd_open(struct inode *, struct file *);
  41. static int kfd_mmap(struct file *, struct vm_area_struct *);
  42. static const char kfd_dev_name[] = "kfd";
  43. static const struct file_operations kfd_fops = {
  44. .owner = THIS_MODULE,
  45. .unlocked_ioctl = kfd_ioctl,
  46. .compat_ioctl = kfd_ioctl,
  47. .open = kfd_open,
  48. .mmap = kfd_mmap,
  49. };
  50. static int kfd_char_dev_major = -1;
  51. static struct class *kfd_class;
  52. struct device *kfd_device;
  53. int kfd_chardev_init(void)
  54. {
  55. int err = 0;
  56. kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  57. err = kfd_char_dev_major;
  58. if (err < 0)
  59. goto err_register_chrdev;
  60. kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  61. err = PTR_ERR(kfd_class);
  62. if (IS_ERR(kfd_class))
  63. goto err_class_create;
  64. kfd_device = device_create(kfd_class, NULL,
  65. MKDEV(kfd_char_dev_major, 0),
  66. NULL, kfd_dev_name);
  67. err = PTR_ERR(kfd_device);
  68. if (IS_ERR(kfd_device))
  69. goto err_device_create;
  70. return 0;
  71. err_device_create:
  72. class_destroy(kfd_class);
  73. err_class_create:
  74. unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  75. err_register_chrdev:
  76. return err;
  77. }
  78. void kfd_chardev_exit(void)
  79. {
  80. device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  81. class_destroy(kfd_class);
  82. unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  83. }
  84. struct device *kfd_chardev(void)
  85. {
  86. return kfd_device;
  87. }
  88. static int kfd_open(struct inode *inode, struct file *filep)
  89. {
  90. struct kfd_process *process;
  91. bool is_32bit_user_mode;
  92. if (iminor(inode) != 0)
  93. return -ENODEV;
  94. is_32bit_user_mode = in_compat_syscall();
  95. if (is_32bit_user_mode) {
  96. dev_warn(kfd_device,
  97. "Process %d (32-bit) failed to open /dev/kfd\n"
  98. "32-bit processes are not supported by amdkfd\n",
  99. current->pid);
  100. return -EPERM;
  101. }
  102. process = kfd_create_process(filep);
  103. if (IS_ERR(process))
  104. return PTR_ERR(process);
  105. dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
  106. process->pasid, process->is_32bit_user_mode);
  107. return 0;
  108. }
  109. static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
  110. void *data)
  111. {
  112. struct kfd_ioctl_get_version_args *args = data;
  113. args->major_version = KFD_IOCTL_MAJOR_VERSION;
  114. args->minor_version = KFD_IOCTL_MINOR_VERSION;
  115. return 0;
  116. }
  117. static int set_queue_properties_from_user(struct queue_properties *q_properties,
  118. struct kfd_ioctl_create_queue_args *args)
  119. {
  120. if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
  121. pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
  122. return -EINVAL;
  123. }
  124. if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
  125. pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
  126. return -EINVAL;
  127. }
  128. if ((args->ring_base_address) &&
  129. (!access_ok(VERIFY_WRITE,
  130. (const void __user *) args->ring_base_address,
  131. sizeof(uint64_t)))) {
  132. pr_err("Can't access ring base address\n");
  133. return -EFAULT;
  134. }
  135. if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
  136. pr_err("Ring size must be a power of 2 or 0\n");
  137. return -EINVAL;
  138. }
  139. if (!access_ok(VERIFY_WRITE,
  140. (const void __user *) args->read_pointer_address,
  141. sizeof(uint32_t))) {
  142. pr_err("Can't access read pointer\n");
  143. return -EFAULT;
  144. }
  145. if (!access_ok(VERIFY_WRITE,
  146. (const void __user *) args->write_pointer_address,
  147. sizeof(uint32_t))) {
  148. pr_err("Can't access write pointer\n");
  149. return -EFAULT;
  150. }
  151. if (args->eop_buffer_address &&
  152. !access_ok(VERIFY_WRITE,
  153. (const void __user *) args->eop_buffer_address,
  154. sizeof(uint32_t))) {
  155. pr_debug("Can't access eop buffer");
  156. return -EFAULT;
  157. }
  158. if (args->ctx_save_restore_address &&
  159. !access_ok(VERIFY_WRITE,
  160. (const void __user *) args->ctx_save_restore_address,
  161. sizeof(uint32_t))) {
  162. pr_debug("Can't access ctx save restore buffer");
  163. return -EFAULT;
  164. }
  165. q_properties->is_interop = false;
  166. q_properties->queue_percent = args->queue_percentage;
  167. q_properties->priority = args->queue_priority;
  168. q_properties->queue_address = args->ring_base_address;
  169. q_properties->queue_size = args->ring_size;
  170. q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
  171. q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
  172. q_properties->eop_ring_buffer_address = args->eop_buffer_address;
  173. q_properties->eop_ring_buffer_size = args->eop_buffer_size;
  174. q_properties->ctx_save_restore_area_address =
  175. args->ctx_save_restore_address;
  176. q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
  177. q_properties->ctl_stack_size = args->ctl_stack_size;
  178. if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
  179. args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
  180. q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
  181. else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
  182. q_properties->type = KFD_QUEUE_TYPE_SDMA;
  183. else
  184. return -ENOTSUPP;
  185. if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
  186. q_properties->format = KFD_QUEUE_FORMAT_AQL;
  187. else
  188. q_properties->format = KFD_QUEUE_FORMAT_PM4;
  189. pr_debug("Queue Percentage: %d, %d\n",
  190. q_properties->queue_percent, args->queue_percentage);
  191. pr_debug("Queue Priority: %d, %d\n",
  192. q_properties->priority, args->queue_priority);
  193. pr_debug("Queue Address: 0x%llX, 0x%llX\n",
  194. q_properties->queue_address, args->ring_base_address);
  195. pr_debug("Queue Size: 0x%llX, %u\n",
  196. q_properties->queue_size, args->ring_size);
  197. pr_debug("Queue r/w Pointers: %p, %p\n",
  198. q_properties->read_ptr,
  199. q_properties->write_ptr);
  200. pr_debug("Queue Format: %d\n", q_properties->format);
  201. pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
  202. pr_debug("Queue CTX save area: 0x%llX\n",
  203. q_properties->ctx_save_restore_area_address);
  204. return 0;
  205. }
  206. static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
  207. void *data)
  208. {
  209. struct kfd_ioctl_create_queue_args *args = data;
  210. struct kfd_dev *dev;
  211. int err = 0;
  212. unsigned int queue_id;
  213. struct kfd_process_device *pdd;
  214. struct queue_properties q_properties;
  215. memset(&q_properties, 0, sizeof(struct queue_properties));
  216. pr_debug("Creating queue ioctl\n");
  217. err = set_queue_properties_from_user(&q_properties, args);
  218. if (err)
  219. return err;
  220. pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
  221. dev = kfd_device_by_id(args->gpu_id);
  222. if (!dev) {
  223. pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
  224. return -EINVAL;
  225. }
  226. mutex_lock(&p->mutex);
  227. pdd = kfd_bind_process_to_device(dev, p);
  228. if (IS_ERR(pdd)) {
  229. err = -ESRCH;
  230. goto err_bind_process;
  231. }
  232. pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
  233. p->pasid,
  234. dev->id);
  235. err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
  236. if (err != 0)
  237. goto err_create_queue;
  238. args->queue_id = queue_id;
  239. /* Return gpu_id as doorbell offset for mmap usage */
  240. args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
  241. args->doorbell_offset <<= PAGE_SHIFT;
  242. mutex_unlock(&p->mutex);
  243. pr_debug("Queue id %d was created successfully\n", args->queue_id);
  244. pr_debug("Ring buffer address == 0x%016llX\n",
  245. args->ring_base_address);
  246. pr_debug("Read ptr address == 0x%016llX\n",
  247. args->read_pointer_address);
  248. pr_debug("Write ptr address == 0x%016llX\n",
  249. args->write_pointer_address);
  250. return 0;
  251. err_create_queue:
  252. err_bind_process:
  253. mutex_unlock(&p->mutex);
  254. return err;
  255. }
  256. static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
  257. void *data)
  258. {
  259. int retval;
  260. struct kfd_ioctl_destroy_queue_args *args = data;
  261. pr_debug("Destroying queue id %d for pasid %d\n",
  262. args->queue_id,
  263. p->pasid);
  264. mutex_lock(&p->mutex);
  265. retval = pqm_destroy_queue(&p->pqm, args->queue_id);
  266. mutex_unlock(&p->mutex);
  267. return retval;
  268. }
  269. static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
  270. void *data)
  271. {
  272. int retval;
  273. struct kfd_ioctl_update_queue_args *args = data;
  274. struct queue_properties properties;
  275. if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
  276. pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
  277. return -EINVAL;
  278. }
  279. if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
  280. pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
  281. return -EINVAL;
  282. }
  283. if ((args->ring_base_address) &&
  284. (!access_ok(VERIFY_WRITE,
  285. (const void __user *) args->ring_base_address,
  286. sizeof(uint64_t)))) {
  287. pr_err("Can't access ring base address\n");
  288. return -EFAULT;
  289. }
  290. if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
  291. pr_err("Ring size must be a power of 2 or 0\n");
  292. return -EINVAL;
  293. }
  294. properties.queue_address = args->ring_base_address;
  295. properties.queue_size = args->ring_size;
  296. properties.queue_percent = args->queue_percentage;
  297. properties.priority = args->queue_priority;
  298. pr_debug("Updating queue id %d for pasid %d\n",
  299. args->queue_id, p->pasid);
  300. mutex_lock(&p->mutex);
  301. retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
  302. mutex_unlock(&p->mutex);
  303. return retval;
  304. }
  305. static int kfd_ioctl_set_memory_policy(struct file *filep,
  306. struct kfd_process *p, void *data)
  307. {
  308. struct kfd_ioctl_set_memory_policy_args *args = data;
  309. struct kfd_dev *dev;
  310. int err = 0;
  311. struct kfd_process_device *pdd;
  312. enum cache_policy default_policy, alternate_policy;
  313. if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
  314. && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
  315. return -EINVAL;
  316. }
  317. if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
  318. && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
  319. return -EINVAL;
  320. }
  321. dev = kfd_device_by_id(args->gpu_id);
  322. if (!dev)
  323. return -EINVAL;
  324. mutex_lock(&p->mutex);
  325. pdd = kfd_bind_process_to_device(dev, p);
  326. if (IS_ERR(pdd)) {
  327. err = -ESRCH;
  328. goto out;
  329. }
  330. default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
  331. ? cache_policy_coherent : cache_policy_noncoherent;
  332. alternate_policy =
  333. (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
  334. ? cache_policy_coherent : cache_policy_noncoherent;
  335. if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
  336. &pdd->qpd,
  337. default_policy,
  338. alternate_policy,
  339. (void __user *)args->alternate_aperture_base,
  340. args->alternate_aperture_size))
  341. err = -EINVAL;
  342. out:
  343. mutex_unlock(&p->mutex);
  344. return err;
  345. }
  346. static int kfd_ioctl_set_trap_handler(struct file *filep,
  347. struct kfd_process *p, void *data)
  348. {
  349. struct kfd_ioctl_set_trap_handler_args *args = data;
  350. struct kfd_dev *dev;
  351. int err = 0;
  352. struct kfd_process_device *pdd;
  353. dev = kfd_device_by_id(args->gpu_id);
  354. if (dev == NULL)
  355. return -EINVAL;
  356. mutex_lock(&p->mutex);
  357. pdd = kfd_bind_process_to_device(dev, p);
  358. if (IS_ERR(pdd)) {
  359. err = -ESRCH;
  360. goto out;
  361. }
  362. if (dev->dqm->ops.set_trap_handler(dev->dqm,
  363. &pdd->qpd,
  364. args->tba_addr,
  365. args->tma_addr))
  366. err = -EINVAL;
  367. out:
  368. mutex_unlock(&p->mutex);
  369. return err;
  370. }
  371. static int kfd_ioctl_dbg_register(struct file *filep,
  372. struct kfd_process *p, void *data)
  373. {
  374. struct kfd_ioctl_dbg_register_args *args = data;
  375. struct kfd_dev *dev;
  376. struct kfd_dbgmgr *dbgmgr_ptr;
  377. struct kfd_process_device *pdd;
  378. bool create_ok;
  379. long status = 0;
  380. dev = kfd_device_by_id(args->gpu_id);
  381. if (!dev)
  382. return -EINVAL;
  383. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  384. pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
  385. return -EINVAL;
  386. }
  387. mutex_lock(&p->mutex);
  388. mutex_lock(kfd_get_dbgmgr_mutex());
  389. /*
  390. * make sure that we have pdd, if this the first queue created for
  391. * this process
  392. */
  393. pdd = kfd_bind_process_to_device(dev, p);
  394. if (IS_ERR(pdd)) {
  395. status = PTR_ERR(pdd);
  396. goto out;
  397. }
  398. if (!dev->dbgmgr) {
  399. /* In case of a legal call, we have no dbgmgr yet */
  400. create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
  401. if (create_ok) {
  402. status = kfd_dbgmgr_register(dbgmgr_ptr, p);
  403. if (status != 0)
  404. kfd_dbgmgr_destroy(dbgmgr_ptr);
  405. else
  406. dev->dbgmgr = dbgmgr_ptr;
  407. }
  408. } else {
  409. pr_debug("debugger already registered\n");
  410. status = -EINVAL;
  411. }
  412. out:
  413. mutex_unlock(kfd_get_dbgmgr_mutex());
  414. mutex_unlock(&p->mutex);
  415. return status;
  416. }
  417. static int kfd_ioctl_dbg_unregister(struct file *filep,
  418. struct kfd_process *p, void *data)
  419. {
  420. struct kfd_ioctl_dbg_unregister_args *args = data;
  421. struct kfd_dev *dev;
  422. long status;
  423. dev = kfd_device_by_id(args->gpu_id);
  424. if (!dev || !dev->dbgmgr)
  425. return -EINVAL;
  426. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  427. pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
  428. return -EINVAL;
  429. }
  430. mutex_lock(kfd_get_dbgmgr_mutex());
  431. status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
  432. if (!status) {
  433. kfd_dbgmgr_destroy(dev->dbgmgr);
  434. dev->dbgmgr = NULL;
  435. }
  436. mutex_unlock(kfd_get_dbgmgr_mutex());
  437. return status;
  438. }
  439. /*
  440. * Parse and generate variable size data structure for address watch.
  441. * Total size of the buffer and # watch points is limited in order
  442. * to prevent kernel abuse. (no bearing to the much smaller HW limitation
  443. * which is enforced by dbgdev module)
  444. * please also note that the watch address itself are not "copied from user",
  445. * since it be set into the HW in user mode values.
  446. *
  447. */
  448. static int kfd_ioctl_dbg_address_watch(struct file *filep,
  449. struct kfd_process *p, void *data)
  450. {
  451. struct kfd_ioctl_dbg_address_watch_args *args = data;
  452. struct kfd_dev *dev;
  453. struct dbg_address_watch_info aw_info;
  454. unsigned char *args_buff;
  455. long status;
  456. void __user *cmd_from_user;
  457. uint64_t watch_mask_value = 0;
  458. unsigned int args_idx = 0;
  459. memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
  460. dev = kfd_device_by_id(args->gpu_id);
  461. if (!dev)
  462. return -EINVAL;
  463. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  464. pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
  465. return -EINVAL;
  466. }
  467. cmd_from_user = (void __user *) args->content_ptr;
  468. /* Validate arguments */
  469. if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
  470. (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
  471. (cmd_from_user == NULL))
  472. return -EINVAL;
  473. /* this is the actual buffer to work with */
  474. args_buff = memdup_user(cmd_from_user,
  475. args->buf_size_in_bytes - sizeof(*args));
  476. if (IS_ERR(args_buff))
  477. return PTR_ERR(args_buff);
  478. aw_info.process = p;
  479. aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
  480. args_idx += sizeof(aw_info.num_watch_points);
  481. aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
  482. args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
  483. /*
  484. * set watch address base pointer to point on the array base
  485. * within args_buff
  486. */
  487. aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
  488. /* skip over the addresses buffer */
  489. args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
  490. if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
  491. status = -EINVAL;
  492. goto out;
  493. }
  494. watch_mask_value = (uint64_t) args_buff[args_idx];
  495. if (watch_mask_value > 0) {
  496. /*
  497. * There is an array of masks.
  498. * set watch mask base pointer to point on the array base
  499. * within args_buff
  500. */
  501. aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
  502. /* skip over the masks buffer */
  503. args_idx += sizeof(aw_info.watch_mask) *
  504. aw_info.num_watch_points;
  505. } else {
  506. /* just the NULL mask, set to NULL and skip over it */
  507. aw_info.watch_mask = NULL;
  508. args_idx += sizeof(aw_info.watch_mask);
  509. }
  510. if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
  511. status = -EINVAL;
  512. goto out;
  513. }
  514. /* Currently HSA Event is not supported for DBG */
  515. aw_info.watch_event = NULL;
  516. mutex_lock(kfd_get_dbgmgr_mutex());
  517. status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
  518. mutex_unlock(kfd_get_dbgmgr_mutex());
  519. out:
  520. kfree(args_buff);
  521. return status;
  522. }
  523. /* Parse and generate fixed size data structure for wave control */
  524. static int kfd_ioctl_dbg_wave_control(struct file *filep,
  525. struct kfd_process *p, void *data)
  526. {
  527. struct kfd_ioctl_dbg_wave_control_args *args = data;
  528. struct kfd_dev *dev;
  529. struct dbg_wave_control_info wac_info;
  530. unsigned char *args_buff;
  531. uint32_t computed_buff_size;
  532. long status;
  533. void __user *cmd_from_user;
  534. unsigned int args_idx = 0;
  535. memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
  536. /* we use compact form, independent of the packing attribute value */
  537. computed_buff_size = sizeof(*args) +
  538. sizeof(wac_info.mode) +
  539. sizeof(wac_info.operand) +
  540. sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
  541. sizeof(wac_info.dbgWave_msg.MemoryVA) +
  542. sizeof(wac_info.trapId);
  543. dev = kfd_device_by_id(args->gpu_id);
  544. if (!dev)
  545. return -EINVAL;
  546. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  547. pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
  548. return -EINVAL;
  549. }
  550. /* input size must match the computed "compact" size */
  551. if (args->buf_size_in_bytes != computed_buff_size) {
  552. pr_debug("size mismatch, computed : actual %u : %u\n",
  553. args->buf_size_in_bytes, computed_buff_size);
  554. return -EINVAL;
  555. }
  556. cmd_from_user = (void __user *) args->content_ptr;
  557. if (cmd_from_user == NULL)
  558. return -EINVAL;
  559. /* copy the entire buffer from user */
  560. args_buff = memdup_user(cmd_from_user,
  561. args->buf_size_in_bytes - sizeof(*args));
  562. if (IS_ERR(args_buff))
  563. return PTR_ERR(args_buff);
  564. /* move ptr to the start of the "pay-load" area */
  565. wac_info.process = p;
  566. wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
  567. args_idx += sizeof(wac_info.operand);
  568. wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
  569. args_idx += sizeof(wac_info.mode);
  570. wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
  571. args_idx += sizeof(wac_info.trapId);
  572. wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
  573. *((uint32_t *)(&args_buff[args_idx]));
  574. wac_info.dbgWave_msg.MemoryVA = NULL;
  575. mutex_lock(kfd_get_dbgmgr_mutex());
  576. pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
  577. wac_info.process, wac_info.operand,
  578. wac_info.mode, wac_info.trapId,
  579. wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  580. status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
  581. pr_debug("Returned status of dbg manager is %ld\n", status);
  582. mutex_unlock(kfd_get_dbgmgr_mutex());
  583. kfree(args_buff);
  584. return status;
  585. }
  586. static int kfd_ioctl_get_clock_counters(struct file *filep,
  587. struct kfd_process *p, void *data)
  588. {
  589. struct kfd_ioctl_get_clock_counters_args *args = data;
  590. struct kfd_dev *dev;
  591. struct timespec64 time;
  592. dev = kfd_device_by_id(args->gpu_id);
  593. if (dev)
  594. /* Reading GPU clock counter from KGD */
  595. args->gpu_clock_counter =
  596. dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
  597. else
  598. /* Node without GPU resource */
  599. args->gpu_clock_counter = 0;
  600. /* No access to rdtsc. Using raw monotonic time */
  601. getrawmonotonic64(&time);
  602. args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
  603. get_monotonic_boottime64(&time);
  604. args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
  605. /* Since the counter is in nano-seconds we use 1GHz frequency */
  606. args->system_clock_freq = 1000000000;
  607. return 0;
  608. }
  609. static int kfd_ioctl_get_process_apertures(struct file *filp,
  610. struct kfd_process *p, void *data)
  611. {
  612. struct kfd_ioctl_get_process_apertures_args *args = data;
  613. struct kfd_process_device_apertures *pAperture;
  614. struct kfd_process_device *pdd;
  615. dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
  616. args->num_of_nodes = 0;
  617. mutex_lock(&p->mutex);
  618. /*if the process-device list isn't empty*/
  619. if (kfd_has_process_device_data(p)) {
  620. /* Run over all pdd of the process */
  621. pdd = kfd_get_first_process_device_data(p);
  622. do {
  623. pAperture =
  624. &args->process_apertures[args->num_of_nodes];
  625. pAperture->gpu_id = pdd->dev->id;
  626. pAperture->lds_base = pdd->lds_base;
  627. pAperture->lds_limit = pdd->lds_limit;
  628. pAperture->gpuvm_base = pdd->gpuvm_base;
  629. pAperture->gpuvm_limit = pdd->gpuvm_limit;
  630. pAperture->scratch_base = pdd->scratch_base;
  631. pAperture->scratch_limit = pdd->scratch_limit;
  632. dev_dbg(kfd_device,
  633. "node id %u\n", args->num_of_nodes);
  634. dev_dbg(kfd_device,
  635. "gpu id %u\n", pdd->dev->id);
  636. dev_dbg(kfd_device,
  637. "lds_base %llX\n", pdd->lds_base);
  638. dev_dbg(kfd_device,
  639. "lds_limit %llX\n", pdd->lds_limit);
  640. dev_dbg(kfd_device,
  641. "gpuvm_base %llX\n", pdd->gpuvm_base);
  642. dev_dbg(kfd_device,
  643. "gpuvm_limit %llX\n", pdd->gpuvm_limit);
  644. dev_dbg(kfd_device,
  645. "scratch_base %llX\n", pdd->scratch_base);
  646. dev_dbg(kfd_device,
  647. "scratch_limit %llX\n", pdd->scratch_limit);
  648. args->num_of_nodes++;
  649. pdd = kfd_get_next_process_device_data(p, pdd);
  650. } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
  651. }
  652. mutex_unlock(&p->mutex);
  653. return 0;
  654. }
  655. static int kfd_ioctl_get_process_apertures_new(struct file *filp,
  656. struct kfd_process *p, void *data)
  657. {
  658. struct kfd_ioctl_get_process_apertures_new_args *args = data;
  659. struct kfd_process_device_apertures *pa;
  660. struct kfd_process_device *pdd;
  661. uint32_t nodes = 0;
  662. int ret;
  663. dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
  664. if (args->num_of_nodes == 0) {
  665. /* Return number of nodes, so that user space can alloacate
  666. * sufficient memory
  667. */
  668. mutex_lock(&p->mutex);
  669. if (!kfd_has_process_device_data(p))
  670. goto out_unlock;
  671. /* Run over all pdd of the process */
  672. pdd = kfd_get_first_process_device_data(p);
  673. do {
  674. args->num_of_nodes++;
  675. pdd = kfd_get_next_process_device_data(p, pdd);
  676. } while (pdd);
  677. goto out_unlock;
  678. }
  679. /* Fill in process-aperture information for all available
  680. * nodes, but not more than args->num_of_nodes as that is
  681. * the amount of memory allocated by user
  682. */
  683. pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
  684. args->num_of_nodes), GFP_KERNEL);
  685. if (!pa)
  686. return -ENOMEM;
  687. mutex_lock(&p->mutex);
  688. if (!kfd_has_process_device_data(p)) {
  689. args->num_of_nodes = 0;
  690. kfree(pa);
  691. goto out_unlock;
  692. }
  693. /* Run over all pdd of the process */
  694. pdd = kfd_get_first_process_device_data(p);
  695. do {
  696. pa[nodes].gpu_id = pdd->dev->id;
  697. pa[nodes].lds_base = pdd->lds_base;
  698. pa[nodes].lds_limit = pdd->lds_limit;
  699. pa[nodes].gpuvm_base = pdd->gpuvm_base;
  700. pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
  701. pa[nodes].scratch_base = pdd->scratch_base;
  702. pa[nodes].scratch_limit = pdd->scratch_limit;
  703. dev_dbg(kfd_device,
  704. "gpu id %u\n", pdd->dev->id);
  705. dev_dbg(kfd_device,
  706. "lds_base %llX\n", pdd->lds_base);
  707. dev_dbg(kfd_device,
  708. "lds_limit %llX\n", pdd->lds_limit);
  709. dev_dbg(kfd_device,
  710. "gpuvm_base %llX\n", pdd->gpuvm_base);
  711. dev_dbg(kfd_device,
  712. "gpuvm_limit %llX\n", pdd->gpuvm_limit);
  713. dev_dbg(kfd_device,
  714. "scratch_base %llX\n", pdd->scratch_base);
  715. dev_dbg(kfd_device,
  716. "scratch_limit %llX\n", pdd->scratch_limit);
  717. nodes++;
  718. pdd = kfd_get_next_process_device_data(p, pdd);
  719. } while (pdd && (nodes < args->num_of_nodes));
  720. mutex_unlock(&p->mutex);
  721. args->num_of_nodes = nodes;
  722. ret = copy_to_user(
  723. (void __user *)args->kfd_process_device_apertures_ptr,
  724. pa,
  725. (nodes * sizeof(struct kfd_process_device_apertures)));
  726. kfree(pa);
  727. return ret ? -EFAULT : 0;
  728. out_unlock:
  729. mutex_unlock(&p->mutex);
  730. return 0;
  731. }
  732. static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
  733. void *data)
  734. {
  735. struct kfd_ioctl_create_event_args *args = data;
  736. int err;
  737. /* For dGPUs the event page is allocated in user mode. The
  738. * handle is passed to KFD with the first call to this IOCTL
  739. * through the event_page_offset field.
  740. */
  741. if (args->event_page_offset) {
  742. struct kfd_dev *kfd;
  743. struct kfd_process_device *pdd;
  744. void *mem, *kern_addr;
  745. uint64_t size;
  746. if (p->signal_page) {
  747. pr_err("Event page is already set\n");
  748. return -EINVAL;
  749. }
  750. kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
  751. if (!kfd) {
  752. pr_err("Getting device by id failed in %s\n", __func__);
  753. return -EINVAL;
  754. }
  755. mutex_lock(&p->mutex);
  756. pdd = kfd_bind_process_to_device(kfd, p);
  757. if (IS_ERR(pdd)) {
  758. err = PTR_ERR(pdd);
  759. goto out_unlock;
  760. }
  761. mem = kfd_process_device_translate_handle(pdd,
  762. GET_IDR_HANDLE(args->event_page_offset));
  763. if (!mem) {
  764. pr_err("Can't find BO, offset is 0x%llx\n",
  765. args->event_page_offset);
  766. err = -EINVAL;
  767. goto out_unlock;
  768. }
  769. mutex_unlock(&p->mutex);
  770. err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
  771. mem, &kern_addr, &size);
  772. if (err) {
  773. pr_err("Failed to map event page to kernel\n");
  774. return err;
  775. }
  776. err = kfd_event_page_set(p, kern_addr, size);
  777. if (err) {
  778. pr_err("Failed to set event page\n");
  779. return err;
  780. }
  781. }
  782. err = kfd_event_create(filp, p, args->event_type,
  783. args->auto_reset != 0, args->node_id,
  784. &args->event_id, &args->event_trigger_data,
  785. &args->event_page_offset,
  786. &args->event_slot_index);
  787. return err;
  788. out_unlock:
  789. mutex_unlock(&p->mutex);
  790. return err;
  791. }
  792. static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
  793. void *data)
  794. {
  795. struct kfd_ioctl_destroy_event_args *args = data;
  796. return kfd_event_destroy(p, args->event_id);
  797. }
  798. static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
  799. void *data)
  800. {
  801. struct kfd_ioctl_set_event_args *args = data;
  802. return kfd_set_event(p, args->event_id);
  803. }
  804. static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
  805. void *data)
  806. {
  807. struct kfd_ioctl_reset_event_args *args = data;
  808. return kfd_reset_event(p, args->event_id);
  809. }
  810. static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
  811. void *data)
  812. {
  813. struct kfd_ioctl_wait_events_args *args = data;
  814. int err;
  815. err = kfd_wait_on_events(p, args->num_events,
  816. (void __user *)args->events_ptr,
  817. (args->wait_for_all != 0),
  818. args->timeout, &args->wait_result);
  819. return err;
  820. }
  821. static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
  822. struct kfd_process *p, void *data)
  823. {
  824. struct kfd_ioctl_set_scratch_backing_va_args *args = data;
  825. struct kfd_process_device *pdd;
  826. struct kfd_dev *dev;
  827. long err;
  828. dev = kfd_device_by_id(args->gpu_id);
  829. if (!dev)
  830. return -EINVAL;
  831. mutex_lock(&p->mutex);
  832. pdd = kfd_bind_process_to_device(dev, p);
  833. if (IS_ERR(pdd)) {
  834. err = PTR_ERR(pdd);
  835. goto bind_process_to_device_fail;
  836. }
  837. pdd->qpd.sh_hidden_private_base = args->va_addr;
  838. mutex_unlock(&p->mutex);
  839. if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
  840. pdd->qpd.vmid != 0)
  841. dev->kfd2kgd->set_scratch_backing_va(
  842. dev->kgd, args->va_addr, pdd->qpd.vmid);
  843. return 0;
  844. bind_process_to_device_fail:
  845. mutex_unlock(&p->mutex);
  846. return err;
  847. }
  848. static int kfd_ioctl_get_tile_config(struct file *filep,
  849. struct kfd_process *p, void *data)
  850. {
  851. struct kfd_ioctl_get_tile_config_args *args = data;
  852. struct kfd_dev *dev;
  853. struct tile_config config;
  854. int err = 0;
  855. dev = kfd_device_by_id(args->gpu_id);
  856. if (!dev)
  857. return -EINVAL;
  858. dev->kfd2kgd->get_tile_config(dev->kgd, &config);
  859. args->gb_addr_config = config.gb_addr_config;
  860. args->num_banks = config.num_banks;
  861. args->num_ranks = config.num_ranks;
  862. if (args->num_tile_configs > config.num_tile_configs)
  863. args->num_tile_configs = config.num_tile_configs;
  864. err = copy_to_user((void __user *)args->tile_config_ptr,
  865. config.tile_config_ptr,
  866. args->num_tile_configs * sizeof(uint32_t));
  867. if (err) {
  868. args->num_tile_configs = 0;
  869. return -EFAULT;
  870. }
  871. if (args->num_macro_tile_configs > config.num_macro_tile_configs)
  872. args->num_macro_tile_configs =
  873. config.num_macro_tile_configs;
  874. err = copy_to_user((void __user *)args->macro_tile_config_ptr,
  875. config.macro_tile_config_ptr,
  876. args->num_macro_tile_configs * sizeof(uint32_t));
  877. if (err) {
  878. args->num_macro_tile_configs = 0;
  879. return -EFAULT;
  880. }
  881. return 0;
  882. }
  883. static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
  884. void *data)
  885. {
  886. struct kfd_ioctl_acquire_vm_args *args = data;
  887. struct kfd_process_device *pdd;
  888. struct kfd_dev *dev;
  889. struct file *drm_file;
  890. int ret;
  891. dev = kfd_device_by_id(args->gpu_id);
  892. if (!dev)
  893. return -EINVAL;
  894. drm_file = fget(args->drm_fd);
  895. if (!drm_file)
  896. return -EINVAL;
  897. mutex_lock(&p->mutex);
  898. pdd = kfd_get_process_device_data(dev, p);
  899. if (!pdd) {
  900. ret = -EINVAL;
  901. goto err_unlock;
  902. }
  903. if (pdd->drm_file) {
  904. ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
  905. goto err_unlock;
  906. }
  907. ret = kfd_process_device_init_vm(pdd, drm_file);
  908. if (ret)
  909. goto err_unlock;
  910. /* On success, the PDD keeps the drm_file reference */
  911. mutex_unlock(&p->mutex);
  912. return 0;
  913. err_unlock:
  914. mutex_unlock(&p->mutex);
  915. fput(drm_file);
  916. return ret;
  917. }
  918. static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
  919. {
  920. struct kfd_local_mem_info mem_info;
  921. if (debug_largebar) {
  922. pr_debug("Simulate large-bar allocation on non large-bar machine\n");
  923. return true;
  924. }
  925. if (dev->device_info->needs_iommu_device)
  926. return false;
  927. dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
  928. if (mem_info.local_mem_size_private == 0 &&
  929. mem_info.local_mem_size_public > 0)
  930. return true;
  931. return false;
  932. }
  933. static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
  934. struct kfd_process *p, void *data)
  935. {
  936. struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
  937. struct kfd_process_device *pdd;
  938. void *mem;
  939. struct kfd_dev *dev;
  940. int idr_handle;
  941. long err;
  942. uint64_t offset = args->mmap_offset;
  943. uint32_t flags = args->flags;
  944. if (args->size == 0)
  945. return -EINVAL;
  946. dev = kfd_device_by_id(args->gpu_id);
  947. if (!dev)
  948. return -EINVAL;
  949. if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
  950. (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
  951. !kfd_dev_is_large_bar(dev)) {
  952. pr_err("Alloc host visible vram on small bar is not allowed\n");
  953. return -EINVAL;
  954. }
  955. mutex_lock(&p->mutex);
  956. pdd = kfd_bind_process_to_device(dev, p);
  957. if (IS_ERR(pdd)) {
  958. err = PTR_ERR(pdd);
  959. goto err_unlock;
  960. }
  961. err = dev->kfd2kgd->alloc_memory_of_gpu(
  962. dev->kgd, args->va_addr, args->size,
  963. pdd->vm, (struct kgd_mem **) &mem, &offset,
  964. flags);
  965. if (err)
  966. goto err_unlock;
  967. idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
  968. if (idr_handle < 0) {
  969. err = -EFAULT;
  970. goto err_free;
  971. }
  972. mutex_unlock(&p->mutex);
  973. args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
  974. args->mmap_offset = offset;
  975. return 0;
  976. err_free:
  977. dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
  978. err_unlock:
  979. mutex_unlock(&p->mutex);
  980. return err;
  981. }
  982. static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
  983. struct kfd_process *p, void *data)
  984. {
  985. struct kfd_ioctl_free_memory_of_gpu_args *args = data;
  986. struct kfd_process_device *pdd;
  987. void *mem;
  988. struct kfd_dev *dev;
  989. int ret;
  990. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  991. if (!dev)
  992. return -EINVAL;
  993. mutex_lock(&p->mutex);
  994. pdd = kfd_get_process_device_data(dev, p);
  995. if (!pdd) {
  996. pr_err("Process device data doesn't exist\n");
  997. ret = -EINVAL;
  998. goto err_unlock;
  999. }
  1000. mem = kfd_process_device_translate_handle(
  1001. pdd, GET_IDR_HANDLE(args->handle));
  1002. if (!mem) {
  1003. ret = -EINVAL;
  1004. goto err_unlock;
  1005. }
  1006. ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
  1007. /* If freeing the buffer failed, leave the handle in place for
  1008. * clean-up during process tear-down.
  1009. */
  1010. if (!ret)
  1011. kfd_process_device_remove_obj_handle(
  1012. pdd, GET_IDR_HANDLE(args->handle));
  1013. err_unlock:
  1014. mutex_unlock(&p->mutex);
  1015. return ret;
  1016. }
  1017. static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
  1018. struct kfd_process *p, void *data)
  1019. {
  1020. struct kfd_ioctl_map_memory_to_gpu_args *args = data;
  1021. struct kfd_process_device *pdd, *peer_pdd;
  1022. void *mem;
  1023. struct kfd_dev *dev, *peer;
  1024. long err = 0;
  1025. int i;
  1026. uint32_t *devices_arr = NULL;
  1027. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  1028. if (!dev)
  1029. return -EINVAL;
  1030. if (!args->n_devices) {
  1031. pr_debug("Device IDs array empty\n");
  1032. return -EINVAL;
  1033. }
  1034. if (args->n_success > args->n_devices) {
  1035. pr_debug("n_success exceeds n_devices\n");
  1036. return -EINVAL;
  1037. }
  1038. devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
  1039. GFP_KERNEL);
  1040. if (!devices_arr)
  1041. return -ENOMEM;
  1042. err = copy_from_user(devices_arr,
  1043. (void __user *)args->device_ids_array_ptr,
  1044. args->n_devices * sizeof(*devices_arr));
  1045. if (err != 0) {
  1046. err = -EFAULT;
  1047. goto copy_from_user_failed;
  1048. }
  1049. mutex_lock(&p->mutex);
  1050. pdd = kfd_bind_process_to_device(dev, p);
  1051. if (IS_ERR(pdd)) {
  1052. err = PTR_ERR(pdd);
  1053. goto bind_process_to_device_failed;
  1054. }
  1055. mem = kfd_process_device_translate_handle(pdd,
  1056. GET_IDR_HANDLE(args->handle));
  1057. if (!mem) {
  1058. err = -ENOMEM;
  1059. goto get_mem_obj_from_handle_failed;
  1060. }
  1061. for (i = args->n_success; i < args->n_devices; i++) {
  1062. peer = kfd_device_by_id(devices_arr[i]);
  1063. if (!peer) {
  1064. pr_debug("Getting device by id failed for 0x%x\n",
  1065. devices_arr[i]);
  1066. err = -EINVAL;
  1067. goto get_mem_obj_from_handle_failed;
  1068. }
  1069. peer_pdd = kfd_bind_process_to_device(peer, p);
  1070. if (IS_ERR(peer_pdd)) {
  1071. err = PTR_ERR(peer_pdd);
  1072. goto get_mem_obj_from_handle_failed;
  1073. }
  1074. err = peer->kfd2kgd->map_memory_to_gpu(
  1075. peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
  1076. if (err) {
  1077. pr_err("Failed to map to gpu %d/%d\n",
  1078. i, args->n_devices);
  1079. goto map_memory_to_gpu_failed;
  1080. }
  1081. args->n_success = i+1;
  1082. }
  1083. mutex_unlock(&p->mutex);
  1084. err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
  1085. if (err) {
  1086. pr_debug("Sync memory failed, wait interrupted by user signal\n");
  1087. goto sync_memory_failed;
  1088. }
  1089. /* Flush TLBs after waiting for the page table updates to complete */
  1090. for (i = 0; i < args->n_devices; i++) {
  1091. peer = kfd_device_by_id(devices_arr[i]);
  1092. if (WARN_ON_ONCE(!peer))
  1093. continue;
  1094. peer_pdd = kfd_get_process_device_data(peer, p);
  1095. if (WARN_ON_ONCE(!peer_pdd))
  1096. continue;
  1097. kfd_flush_tlb(peer_pdd);
  1098. }
  1099. kfree(devices_arr);
  1100. return err;
  1101. bind_process_to_device_failed:
  1102. get_mem_obj_from_handle_failed:
  1103. map_memory_to_gpu_failed:
  1104. mutex_unlock(&p->mutex);
  1105. copy_from_user_failed:
  1106. sync_memory_failed:
  1107. kfree(devices_arr);
  1108. return err;
  1109. }
  1110. static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
  1111. struct kfd_process *p, void *data)
  1112. {
  1113. struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
  1114. struct kfd_process_device *pdd, *peer_pdd;
  1115. void *mem;
  1116. struct kfd_dev *dev, *peer;
  1117. long err = 0;
  1118. uint32_t *devices_arr = NULL, i;
  1119. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  1120. if (!dev)
  1121. return -EINVAL;
  1122. if (!args->n_devices) {
  1123. pr_debug("Device IDs array empty\n");
  1124. return -EINVAL;
  1125. }
  1126. if (args->n_success > args->n_devices) {
  1127. pr_debug("n_success exceeds n_devices\n");
  1128. return -EINVAL;
  1129. }
  1130. devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
  1131. GFP_KERNEL);
  1132. if (!devices_arr)
  1133. return -ENOMEM;
  1134. err = copy_from_user(devices_arr,
  1135. (void __user *)args->device_ids_array_ptr,
  1136. args->n_devices * sizeof(*devices_arr));
  1137. if (err != 0) {
  1138. err = -EFAULT;
  1139. goto copy_from_user_failed;
  1140. }
  1141. mutex_lock(&p->mutex);
  1142. pdd = kfd_get_process_device_data(dev, p);
  1143. if (!pdd) {
  1144. err = -EINVAL;
  1145. goto bind_process_to_device_failed;
  1146. }
  1147. mem = kfd_process_device_translate_handle(pdd,
  1148. GET_IDR_HANDLE(args->handle));
  1149. if (!mem) {
  1150. err = -ENOMEM;
  1151. goto get_mem_obj_from_handle_failed;
  1152. }
  1153. for (i = args->n_success; i < args->n_devices; i++) {
  1154. peer = kfd_device_by_id(devices_arr[i]);
  1155. if (!peer) {
  1156. err = -EINVAL;
  1157. goto get_mem_obj_from_handle_failed;
  1158. }
  1159. peer_pdd = kfd_get_process_device_data(peer, p);
  1160. if (!peer_pdd) {
  1161. err = -ENODEV;
  1162. goto get_mem_obj_from_handle_failed;
  1163. }
  1164. err = dev->kfd2kgd->unmap_memory_to_gpu(
  1165. peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
  1166. if (err) {
  1167. pr_err("Failed to unmap from gpu %d/%d\n",
  1168. i, args->n_devices);
  1169. goto unmap_memory_from_gpu_failed;
  1170. }
  1171. args->n_success = i+1;
  1172. }
  1173. kfree(devices_arr);
  1174. mutex_unlock(&p->mutex);
  1175. return 0;
  1176. bind_process_to_device_failed:
  1177. get_mem_obj_from_handle_failed:
  1178. unmap_memory_from_gpu_failed:
  1179. mutex_unlock(&p->mutex);
  1180. copy_from_user_failed:
  1181. kfree(devices_arr);
  1182. return err;
  1183. }
  1184. #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
  1185. [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
  1186. .cmd_drv = 0, .name = #ioctl}
  1187. /** Ioctl table */
  1188. static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
  1189. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
  1190. kfd_ioctl_get_version, 0),
  1191. AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
  1192. kfd_ioctl_create_queue, 0),
  1193. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
  1194. kfd_ioctl_destroy_queue, 0),
  1195. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
  1196. kfd_ioctl_set_memory_policy, 0),
  1197. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
  1198. kfd_ioctl_get_clock_counters, 0),
  1199. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
  1200. kfd_ioctl_get_process_apertures, 0),
  1201. AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
  1202. kfd_ioctl_update_queue, 0),
  1203. AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
  1204. kfd_ioctl_create_event, 0),
  1205. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
  1206. kfd_ioctl_destroy_event, 0),
  1207. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
  1208. kfd_ioctl_set_event, 0),
  1209. AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
  1210. kfd_ioctl_reset_event, 0),
  1211. AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
  1212. kfd_ioctl_wait_events, 0),
  1213. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
  1214. kfd_ioctl_dbg_register, 0),
  1215. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
  1216. kfd_ioctl_dbg_unregister, 0),
  1217. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
  1218. kfd_ioctl_dbg_address_watch, 0),
  1219. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
  1220. kfd_ioctl_dbg_wave_control, 0),
  1221. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
  1222. kfd_ioctl_set_scratch_backing_va, 0),
  1223. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
  1224. kfd_ioctl_get_tile_config, 0),
  1225. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
  1226. kfd_ioctl_set_trap_handler, 0),
  1227. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
  1228. kfd_ioctl_get_process_apertures_new, 0),
  1229. AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
  1230. kfd_ioctl_acquire_vm, 0),
  1231. AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
  1232. kfd_ioctl_alloc_memory_of_gpu, 0),
  1233. AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
  1234. kfd_ioctl_free_memory_of_gpu, 0),
  1235. AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
  1236. kfd_ioctl_map_memory_to_gpu, 0),
  1237. AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
  1238. kfd_ioctl_unmap_memory_from_gpu, 0),
  1239. };
  1240. #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
  1241. static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
  1242. {
  1243. struct kfd_process *process;
  1244. amdkfd_ioctl_t *func;
  1245. const struct amdkfd_ioctl_desc *ioctl = NULL;
  1246. unsigned int nr = _IOC_NR(cmd);
  1247. char stack_kdata[128];
  1248. char *kdata = NULL;
  1249. unsigned int usize, asize;
  1250. int retcode = -EINVAL;
  1251. if (nr >= AMDKFD_CORE_IOCTL_COUNT)
  1252. goto err_i1;
  1253. if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
  1254. u32 amdkfd_size;
  1255. ioctl = &amdkfd_ioctls[nr];
  1256. amdkfd_size = _IOC_SIZE(ioctl->cmd);
  1257. usize = asize = _IOC_SIZE(cmd);
  1258. if (amdkfd_size > asize)
  1259. asize = amdkfd_size;
  1260. cmd = ioctl->cmd;
  1261. } else
  1262. goto err_i1;
  1263. dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
  1264. process = kfd_get_process(current);
  1265. if (IS_ERR(process)) {
  1266. dev_dbg(kfd_device, "no process\n");
  1267. goto err_i1;
  1268. }
  1269. /* Do not trust userspace, use our own definition */
  1270. func = ioctl->func;
  1271. if (unlikely(!func)) {
  1272. dev_dbg(kfd_device, "no function\n");
  1273. retcode = -EINVAL;
  1274. goto err_i1;
  1275. }
  1276. if (cmd & (IOC_IN | IOC_OUT)) {
  1277. if (asize <= sizeof(stack_kdata)) {
  1278. kdata = stack_kdata;
  1279. } else {
  1280. kdata = kmalloc(asize, GFP_KERNEL);
  1281. if (!kdata) {
  1282. retcode = -ENOMEM;
  1283. goto err_i1;
  1284. }
  1285. }
  1286. if (asize > usize)
  1287. memset(kdata + usize, 0, asize - usize);
  1288. }
  1289. if (cmd & IOC_IN) {
  1290. if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
  1291. retcode = -EFAULT;
  1292. goto err_i1;
  1293. }
  1294. } else if (cmd & IOC_OUT) {
  1295. memset(kdata, 0, usize);
  1296. }
  1297. retcode = func(filep, process, kdata);
  1298. if (cmd & IOC_OUT)
  1299. if (copy_to_user((void __user *)arg, kdata, usize) != 0)
  1300. retcode = -EFAULT;
  1301. err_i1:
  1302. if (!ioctl)
  1303. dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
  1304. task_pid_nr(current), cmd, nr);
  1305. if (kdata != stack_kdata)
  1306. kfree(kdata);
  1307. if (retcode)
  1308. dev_dbg(kfd_device, "ret = %d\n", retcode);
  1309. return retcode;
  1310. }
  1311. static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
  1312. {
  1313. struct kfd_process *process;
  1314. process = kfd_get_process(current);
  1315. if (IS_ERR(process))
  1316. return PTR_ERR(process);
  1317. if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
  1318. KFD_MMAP_DOORBELL_MASK) {
  1319. vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
  1320. return kfd_doorbell_mmap(process, vma);
  1321. } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
  1322. KFD_MMAP_EVENTS_MASK) {
  1323. vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
  1324. return kfd_event_mmap(process, vma);
  1325. } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
  1326. KFD_MMAP_RESERVED_MEM_MASK) {
  1327. vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
  1328. return kfd_reserved_mem_mmap(process, vma);
  1329. }
  1330. return -EFAULT;
  1331. }