kfd_chardev.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764
  1. /*
  2. * Copyright 2014 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. #include <linux/device.h>
  23. #include <linux/export.h>
  24. #include <linux/err.h>
  25. #include <linux/fs.h>
  26. #include <linux/file.h>
  27. #include <linux/sched.h>
  28. #include <linux/slab.h>
  29. #include <linux/uaccess.h>
  30. #include <linux/compat.h>
  31. #include <uapi/linux/kfd_ioctl.h>
  32. #include <linux/time.h>
  33. #include <linux/mm.h>
  34. #include <linux/mman.h>
  35. #include <asm/processor.h>
  36. #include "kfd_priv.h"
  37. #include "kfd_device_queue_manager.h"
  38. #include "kfd_dbgmgr.h"
  39. static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  40. static int kfd_open(struct inode *, struct file *);
  41. static int kfd_mmap(struct file *, struct vm_area_struct *);
  42. static const char kfd_dev_name[] = "kfd";
  43. static const struct file_operations kfd_fops = {
  44. .owner = THIS_MODULE,
  45. .unlocked_ioctl = kfd_ioctl,
  46. .compat_ioctl = kfd_ioctl,
  47. .open = kfd_open,
  48. .mmap = kfd_mmap,
  49. };
  50. static int kfd_char_dev_major = -1;
  51. static struct class *kfd_class;
  52. struct device *kfd_device;
  53. int kfd_chardev_init(void)
  54. {
  55. int err = 0;
  56. kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  57. err = kfd_char_dev_major;
  58. if (err < 0)
  59. goto err_register_chrdev;
  60. kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  61. err = PTR_ERR(kfd_class);
  62. if (IS_ERR(kfd_class))
  63. goto err_class_create;
  64. kfd_device = device_create(kfd_class, NULL,
  65. MKDEV(kfd_char_dev_major, 0),
  66. NULL, kfd_dev_name);
  67. err = PTR_ERR(kfd_device);
  68. if (IS_ERR(kfd_device))
  69. goto err_device_create;
  70. return 0;
  71. err_device_create:
  72. class_destroy(kfd_class);
  73. err_class_create:
  74. unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  75. err_register_chrdev:
  76. return err;
  77. }
  78. void kfd_chardev_exit(void)
  79. {
  80. device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  81. class_destroy(kfd_class);
  82. unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  83. }
  84. struct device *kfd_chardev(void)
  85. {
  86. return kfd_device;
  87. }
  88. static int kfd_open(struct inode *inode, struct file *filep)
  89. {
  90. struct kfd_process *process;
  91. bool is_32bit_user_mode;
  92. if (iminor(inode) != 0)
  93. return -ENODEV;
  94. is_32bit_user_mode = in_compat_syscall();
  95. if (is_32bit_user_mode) {
  96. dev_warn(kfd_device,
  97. "Process %d (32-bit) failed to open /dev/kfd\n"
  98. "32-bit processes are not supported by amdkfd\n",
  99. current->pid);
  100. return -EPERM;
  101. }
  102. process = kfd_create_process(filep);
  103. if (IS_ERR(process))
  104. return PTR_ERR(process);
  105. if (kfd_is_locked())
  106. return -EAGAIN;
  107. dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
  108. process->pasid, process->is_32bit_user_mode);
  109. return 0;
  110. }
  111. static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
  112. void *data)
  113. {
  114. struct kfd_ioctl_get_version_args *args = data;
  115. args->major_version = KFD_IOCTL_MAJOR_VERSION;
  116. args->minor_version = KFD_IOCTL_MINOR_VERSION;
  117. return 0;
  118. }
  119. static int set_queue_properties_from_user(struct queue_properties *q_properties,
  120. struct kfd_ioctl_create_queue_args *args)
  121. {
  122. if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
  123. pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
  124. return -EINVAL;
  125. }
  126. if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
  127. pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
  128. return -EINVAL;
  129. }
  130. if ((args->ring_base_address) &&
  131. (!access_ok(VERIFY_WRITE,
  132. (const void __user *) args->ring_base_address,
  133. sizeof(uint64_t)))) {
  134. pr_err("Can't access ring base address\n");
  135. return -EFAULT;
  136. }
  137. if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
  138. pr_err("Ring size must be a power of 2 or 0\n");
  139. return -EINVAL;
  140. }
  141. if (!access_ok(VERIFY_WRITE,
  142. (const void __user *) args->read_pointer_address,
  143. sizeof(uint32_t))) {
  144. pr_err("Can't access read pointer\n");
  145. return -EFAULT;
  146. }
  147. if (!access_ok(VERIFY_WRITE,
  148. (const void __user *) args->write_pointer_address,
  149. sizeof(uint32_t))) {
  150. pr_err("Can't access write pointer\n");
  151. return -EFAULT;
  152. }
  153. if (args->eop_buffer_address &&
  154. !access_ok(VERIFY_WRITE,
  155. (const void __user *) args->eop_buffer_address,
  156. sizeof(uint32_t))) {
  157. pr_debug("Can't access eop buffer");
  158. return -EFAULT;
  159. }
  160. if (args->ctx_save_restore_address &&
  161. !access_ok(VERIFY_WRITE,
  162. (const void __user *) args->ctx_save_restore_address,
  163. sizeof(uint32_t))) {
  164. pr_debug("Can't access ctx save restore buffer");
  165. return -EFAULT;
  166. }
  167. q_properties->is_interop = false;
  168. q_properties->queue_percent = args->queue_percentage;
  169. q_properties->priority = args->queue_priority;
  170. q_properties->queue_address = args->ring_base_address;
  171. q_properties->queue_size = args->ring_size;
  172. q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
  173. q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
  174. q_properties->eop_ring_buffer_address = args->eop_buffer_address;
  175. q_properties->eop_ring_buffer_size = args->eop_buffer_size;
  176. q_properties->ctx_save_restore_area_address =
  177. args->ctx_save_restore_address;
  178. q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
  179. q_properties->ctl_stack_size = args->ctl_stack_size;
  180. if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
  181. args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
  182. q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
  183. else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
  184. q_properties->type = KFD_QUEUE_TYPE_SDMA;
  185. else
  186. return -ENOTSUPP;
  187. if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
  188. q_properties->format = KFD_QUEUE_FORMAT_AQL;
  189. else
  190. q_properties->format = KFD_QUEUE_FORMAT_PM4;
  191. pr_debug("Queue Percentage: %d, %d\n",
  192. q_properties->queue_percent, args->queue_percentage);
  193. pr_debug("Queue Priority: %d, %d\n",
  194. q_properties->priority, args->queue_priority);
  195. pr_debug("Queue Address: 0x%llX, 0x%llX\n",
  196. q_properties->queue_address, args->ring_base_address);
  197. pr_debug("Queue Size: 0x%llX, %u\n",
  198. q_properties->queue_size, args->ring_size);
  199. pr_debug("Queue r/w Pointers: %px, %px\n",
  200. q_properties->read_ptr,
  201. q_properties->write_ptr);
  202. pr_debug("Queue Format: %d\n", q_properties->format);
  203. pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
  204. pr_debug("Queue CTX save area: 0x%llX\n",
  205. q_properties->ctx_save_restore_area_address);
  206. return 0;
  207. }
  208. static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
  209. void *data)
  210. {
  211. struct kfd_ioctl_create_queue_args *args = data;
  212. struct kfd_dev *dev;
  213. int err = 0;
  214. unsigned int queue_id;
  215. struct kfd_process_device *pdd;
  216. struct queue_properties q_properties;
  217. memset(&q_properties, 0, sizeof(struct queue_properties));
  218. pr_debug("Creating queue ioctl\n");
  219. err = set_queue_properties_from_user(&q_properties, args);
  220. if (err)
  221. return err;
  222. pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
  223. dev = kfd_device_by_id(args->gpu_id);
  224. if (!dev) {
  225. pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
  226. return -EINVAL;
  227. }
  228. mutex_lock(&p->mutex);
  229. pdd = kfd_bind_process_to_device(dev, p);
  230. if (IS_ERR(pdd)) {
  231. err = -ESRCH;
  232. goto err_bind_process;
  233. }
  234. pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
  235. p->pasid,
  236. dev->id);
  237. err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
  238. if (err != 0)
  239. goto err_create_queue;
  240. args->queue_id = queue_id;
  241. /* Return gpu_id as doorbell offset for mmap usage */
  242. args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
  243. args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
  244. args->doorbell_offset <<= PAGE_SHIFT;
  245. if (KFD_IS_SOC15(dev->device_info->asic_family))
  246. /* On SOC15 ASICs, doorbell allocation must be
  247. * per-device, and independent from the per-process
  248. * queue_id. Return the doorbell offset within the
  249. * doorbell aperture to user mode.
  250. */
  251. args->doorbell_offset |= q_properties.doorbell_off;
  252. mutex_unlock(&p->mutex);
  253. pr_debug("Queue id %d was created successfully\n", args->queue_id);
  254. pr_debug("Ring buffer address == 0x%016llX\n",
  255. args->ring_base_address);
  256. pr_debug("Read ptr address == 0x%016llX\n",
  257. args->read_pointer_address);
  258. pr_debug("Write ptr address == 0x%016llX\n",
  259. args->write_pointer_address);
  260. return 0;
  261. err_create_queue:
  262. err_bind_process:
  263. mutex_unlock(&p->mutex);
  264. return err;
  265. }
  266. static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
  267. void *data)
  268. {
  269. int retval;
  270. struct kfd_ioctl_destroy_queue_args *args = data;
  271. pr_debug("Destroying queue id %d for pasid %d\n",
  272. args->queue_id,
  273. p->pasid);
  274. mutex_lock(&p->mutex);
  275. retval = pqm_destroy_queue(&p->pqm, args->queue_id);
  276. mutex_unlock(&p->mutex);
  277. return retval;
  278. }
  279. static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
  280. void *data)
  281. {
  282. int retval;
  283. struct kfd_ioctl_update_queue_args *args = data;
  284. struct queue_properties properties;
  285. if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
  286. pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
  287. return -EINVAL;
  288. }
  289. if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
  290. pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
  291. return -EINVAL;
  292. }
  293. if ((args->ring_base_address) &&
  294. (!access_ok(VERIFY_WRITE,
  295. (const void __user *) args->ring_base_address,
  296. sizeof(uint64_t)))) {
  297. pr_err("Can't access ring base address\n");
  298. return -EFAULT;
  299. }
  300. if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
  301. pr_err("Ring size must be a power of 2 or 0\n");
  302. return -EINVAL;
  303. }
  304. properties.queue_address = args->ring_base_address;
  305. properties.queue_size = args->ring_size;
  306. properties.queue_percent = args->queue_percentage;
  307. properties.priority = args->queue_priority;
  308. pr_debug("Updating queue id %d for pasid %d\n",
  309. args->queue_id, p->pasid);
  310. mutex_lock(&p->mutex);
  311. retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
  312. mutex_unlock(&p->mutex);
  313. return retval;
  314. }
  315. static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
  316. void *data)
  317. {
  318. int retval;
  319. const int max_num_cus = 1024;
  320. struct kfd_ioctl_set_cu_mask_args *args = data;
  321. struct queue_properties properties;
  322. uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
  323. size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
  324. if ((args->num_cu_mask % 32) != 0) {
  325. pr_debug("num_cu_mask 0x%x must be a multiple of 32",
  326. args->num_cu_mask);
  327. return -EINVAL;
  328. }
  329. properties.cu_mask_count = args->num_cu_mask;
  330. if (properties.cu_mask_count == 0) {
  331. pr_debug("CU mask cannot be 0");
  332. return -EINVAL;
  333. }
  334. /* To prevent an unreasonably large CU mask size, set an arbitrary
  335. * limit of max_num_cus bits. We can then just drop any CU mask bits
  336. * past max_num_cus bits and just use the first max_num_cus bits.
  337. */
  338. if (properties.cu_mask_count > max_num_cus) {
  339. pr_debug("CU mask cannot be greater than 1024 bits");
  340. properties.cu_mask_count = max_num_cus;
  341. cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
  342. }
  343. properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
  344. if (!properties.cu_mask)
  345. return -ENOMEM;
  346. retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
  347. if (retval) {
  348. pr_debug("Could not copy CU mask from userspace");
  349. kfree(properties.cu_mask);
  350. return -EFAULT;
  351. }
  352. mutex_lock(&p->mutex);
  353. retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
  354. mutex_unlock(&p->mutex);
  355. if (retval)
  356. kfree(properties.cu_mask);
  357. return retval;
  358. }
  359. static int kfd_ioctl_get_queue_wave_state(struct file *filep,
  360. struct kfd_process *p, void *data)
  361. {
  362. struct kfd_ioctl_get_queue_wave_state_args *args = data;
  363. int r;
  364. mutex_lock(&p->mutex);
  365. r = pqm_get_wave_state(&p->pqm, args->queue_id,
  366. (void __user *)args->ctl_stack_address,
  367. &args->ctl_stack_used_size,
  368. &args->save_area_used_size);
  369. mutex_unlock(&p->mutex);
  370. return r;
  371. }
  372. static int kfd_ioctl_set_memory_policy(struct file *filep,
  373. struct kfd_process *p, void *data)
  374. {
  375. struct kfd_ioctl_set_memory_policy_args *args = data;
  376. struct kfd_dev *dev;
  377. int err = 0;
  378. struct kfd_process_device *pdd;
  379. enum cache_policy default_policy, alternate_policy;
  380. if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
  381. && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
  382. return -EINVAL;
  383. }
  384. if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
  385. && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
  386. return -EINVAL;
  387. }
  388. dev = kfd_device_by_id(args->gpu_id);
  389. if (!dev)
  390. return -EINVAL;
  391. mutex_lock(&p->mutex);
  392. pdd = kfd_bind_process_to_device(dev, p);
  393. if (IS_ERR(pdd)) {
  394. err = -ESRCH;
  395. goto out;
  396. }
  397. default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
  398. ? cache_policy_coherent : cache_policy_noncoherent;
  399. alternate_policy =
  400. (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
  401. ? cache_policy_coherent : cache_policy_noncoherent;
  402. if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
  403. &pdd->qpd,
  404. default_policy,
  405. alternate_policy,
  406. (void __user *)args->alternate_aperture_base,
  407. args->alternate_aperture_size))
  408. err = -EINVAL;
  409. out:
  410. mutex_unlock(&p->mutex);
  411. return err;
  412. }
  413. static int kfd_ioctl_set_trap_handler(struct file *filep,
  414. struct kfd_process *p, void *data)
  415. {
  416. struct kfd_ioctl_set_trap_handler_args *args = data;
  417. struct kfd_dev *dev;
  418. int err = 0;
  419. struct kfd_process_device *pdd;
  420. dev = kfd_device_by_id(args->gpu_id);
  421. if (dev == NULL)
  422. return -EINVAL;
  423. mutex_lock(&p->mutex);
  424. pdd = kfd_bind_process_to_device(dev, p);
  425. if (IS_ERR(pdd)) {
  426. err = -ESRCH;
  427. goto out;
  428. }
  429. if (dev->dqm->ops.set_trap_handler(dev->dqm,
  430. &pdd->qpd,
  431. args->tba_addr,
  432. args->tma_addr))
  433. err = -EINVAL;
  434. out:
  435. mutex_unlock(&p->mutex);
  436. return err;
  437. }
  438. static int kfd_ioctl_dbg_register(struct file *filep,
  439. struct kfd_process *p, void *data)
  440. {
  441. struct kfd_ioctl_dbg_register_args *args = data;
  442. struct kfd_dev *dev;
  443. struct kfd_dbgmgr *dbgmgr_ptr;
  444. struct kfd_process_device *pdd;
  445. bool create_ok;
  446. long status = 0;
  447. dev = kfd_device_by_id(args->gpu_id);
  448. if (!dev)
  449. return -EINVAL;
  450. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  451. pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
  452. return -EINVAL;
  453. }
  454. mutex_lock(&p->mutex);
  455. mutex_lock(kfd_get_dbgmgr_mutex());
  456. /*
  457. * make sure that we have pdd, if this the first queue created for
  458. * this process
  459. */
  460. pdd = kfd_bind_process_to_device(dev, p);
  461. if (IS_ERR(pdd)) {
  462. status = PTR_ERR(pdd);
  463. goto out;
  464. }
  465. if (!dev->dbgmgr) {
  466. /* In case of a legal call, we have no dbgmgr yet */
  467. create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
  468. if (create_ok) {
  469. status = kfd_dbgmgr_register(dbgmgr_ptr, p);
  470. if (status != 0)
  471. kfd_dbgmgr_destroy(dbgmgr_ptr);
  472. else
  473. dev->dbgmgr = dbgmgr_ptr;
  474. }
  475. } else {
  476. pr_debug("debugger already registered\n");
  477. status = -EINVAL;
  478. }
  479. out:
  480. mutex_unlock(kfd_get_dbgmgr_mutex());
  481. mutex_unlock(&p->mutex);
  482. return status;
  483. }
  484. static int kfd_ioctl_dbg_unregister(struct file *filep,
  485. struct kfd_process *p, void *data)
  486. {
  487. struct kfd_ioctl_dbg_unregister_args *args = data;
  488. struct kfd_dev *dev;
  489. long status;
  490. dev = kfd_device_by_id(args->gpu_id);
  491. if (!dev || !dev->dbgmgr)
  492. return -EINVAL;
  493. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  494. pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
  495. return -EINVAL;
  496. }
  497. mutex_lock(kfd_get_dbgmgr_mutex());
  498. status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
  499. if (!status) {
  500. kfd_dbgmgr_destroy(dev->dbgmgr);
  501. dev->dbgmgr = NULL;
  502. }
  503. mutex_unlock(kfd_get_dbgmgr_mutex());
  504. return status;
  505. }
  506. /*
  507. * Parse and generate variable size data structure for address watch.
  508. * Total size of the buffer and # watch points is limited in order
  509. * to prevent kernel abuse. (no bearing to the much smaller HW limitation
  510. * which is enforced by dbgdev module)
  511. * please also note that the watch address itself are not "copied from user",
  512. * since it be set into the HW in user mode values.
  513. *
  514. */
  515. static int kfd_ioctl_dbg_address_watch(struct file *filep,
  516. struct kfd_process *p, void *data)
  517. {
  518. struct kfd_ioctl_dbg_address_watch_args *args = data;
  519. struct kfd_dev *dev;
  520. struct dbg_address_watch_info aw_info;
  521. unsigned char *args_buff;
  522. long status;
  523. void __user *cmd_from_user;
  524. uint64_t watch_mask_value = 0;
  525. unsigned int args_idx = 0;
  526. memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
  527. dev = kfd_device_by_id(args->gpu_id);
  528. if (!dev)
  529. return -EINVAL;
  530. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  531. pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
  532. return -EINVAL;
  533. }
  534. cmd_from_user = (void __user *) args->content_ptr;
  535. /* Validate arguments */
  536. if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
  537. (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
  538. (cmd_from_user == NULL))
  539. return -EINVAL;
  540. /* this is the actual buffer to work with */
  541. args_buff = memdup_user(cmd_from_user,
  542. args->buf_size_in_bytes - sizeof(*args));
  543. if (IS_ERR(args_buff))
  544. return PTR_ERR(args_buff);
  545. aw_info.process = p;
  546. aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
  547. args_idx += sizeof(aw_info.num_watch_points);
  548. aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
  549. args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
  550. /*
  551. * set watch address base pointer to point on the array base
  552. * within args_buff
  553. */
  554. aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
  555. /* skip over the addresses buffer */
  556. args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
  557. if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
  558. status = -EINVAL;
  559. goto out;
  560. }
  561. watch_mask_value = (uint64_t) args_buff[args_idx];
  562. if (watch_mask_value > 0) {
  563. /*
  564. * There is an array of masks.
  565. * set watch mask base pointer to point on the array base
  566. * within args_buff
  567. */
  568. aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
  569. /* skip over the masks buffer */
  570. args_idx += sizeof(aw_info.watch_mask) *
  571. aw_info.num_watch_points;
  572. } else {
  573. /* just the NULL mask, set to NULL and skip over it */
  574. aw_info.watch_mask = NULL;
  575. args_idx += sizeof(aw_info.watch_mask);
  576. }
  577. if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
  578. status = -EINVAL;
  579. goto out;
  580. }
  581. /* Currently HSA Event is not supported for DBG */
  582. aw_info.watch_event = NULL;
  583. mutex_lock(kfd_get_dbgmgr_mutex());
  584. status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
  585. mutex_unlock(kfd_get_dbgmgr_mutex());
  586. out:
  587. kfree(args_buff);
  588. return status;
  589. }
  590. /* Parse and generate fixed size data structure for wave control */
  591. static int kfd_ioctl_dbg_wave_control(struct file *filep,
  592. struct kfd_process *p, void *data)
  593. {
  594. struct kfd_ioctl_dbg_wave_control_args *args = data;
  595. struct kfd_dev *dev;
  596. struct dbg_wave_control_info wac_info;
  597. unsigned char *args_buff;
  598. uint32_t computed_buff_size;
  599. long status;
  600. void __user *cmd_from_user;
  601. unsigned int args_idx = 0;
  602. memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
  603. /* we use compact form, independent of the packing attribute value */
  604. computed_buff_size = sizeof(*args) +
  605. sizeof(wac_info.mode) +
  606. sizeof(wac_info.operand) +
  607. sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
  608. sizeof(wac_info.dbgWave_msg.MemoryVA) +
  609. sizeof(wac_info.trapId);
  610. dev = kfd_device_by_id(args->gpu_id);
  611. if (!dev)
  612. return -EINVAL;
  613. if (dev->device_info->asic_family == CHIP_CARRIZO) {
  614. pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
  615. return -EINVAL;
  616. }
  617. /* input size must match the computed "compact" size */
  618. if (args->buf_size_in_bytes != computed_buff_size) {
  619. pr_debug("size mismatch, computed : actual %u : %u\n",
  620. args->buf_size_in_bytes, computed_buff_size);
  621. return -EINVAL;
  622. }
  623. cmd_from_user = (void __user *) args->content_ptr;
  624. if (cmd_from_user == NULL)
  625. return -EINVAL;
  626. /* copy the entire buffer from user */
  627. args_buff = memdup_user(cmd_from_user,
  628. args->buf_size_in_bytes - sizeof(*args));
  629. if (IS_ERR(args_buff))
  630. return PTR_ERR(args_buff);
  631. /* move ptr to the start of the "pay-load" area */
  632. wac_info.process = p;
  633. wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
  634. args_idx += sizeof(wac_info.operand);
  635. wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
  636. args_idx += sizeof(wac_info.mode);
  637. wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
  638. args_idx += sizeof(wac_info.trapId);
  639. wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
  640. *((uint32_t *)(&args_buff[args_idx]));
  641. wac_info.dbgWave_msg.MemoryVA = NULL;
  642. mutex_lock(kfd_get_dbgmgr_mutex());
  643. pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
  644. wac_info.process, wac_info.operand,
  645. wac_info.mode, wac_info.trapId,
  646. wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
  647. status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
  648. pr_debug("Returned status of dbg manager is %ld\n", status);
  649. mutex_unlock(kfd_get_dbgmgr_mutex());
  650. kfree(args_buff);
  651. return status;
  652. }
  653. static int kfd_ioctl_get_clock_counters(struct file *filep,
  654. struct kfd_process *p, void *data)
  655. {
  656. struct kfd_ioctl_get_clock_counters_args *args = data;
  657. struct kfd_dev *dev;
  658. dev = kfd_device_by_id(args->gpu_id);
  659. if (dev)
  660. /* Reading GPU clock counter from KGD */
  661. args->gpu_clock_counter =
  662. dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
  663. else
  664. /* Node without GPU resource */
  665. args->gpu_clock_counter = 0;
  666. /* No access to rdtsc. Using raw monotonic time */
  667. args->cpu_clock_counter = ktime_get_raw_ns();
  668. args->system_clock_counter = ktime_get_boot_ns();
  669. /* Since the counter is in nano-seconds we use 1GHz frequency */
  670. args->system_clock_freq = 1000000000;
  671. return 0;
  672. }
  673. static int kfd_ioctl_get_process_apertures(struct file *filp,
  674. struct kfd_process *p, void *data)
  675. {
  676. struct kfd_ioctl_get_process_apertures_args *args = data;
  677. struct kfd_process_device_apertures *pAperture;
  678. struct kfd_process_device *pdd;
  679. dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
  680. args->num_of_nodes = 0;
  681. mutex_lock(&p->mutex);
  682. /*if the process-device list isn't empty*/
  683. if (kfd_has_process_device_data(p)) {
  684. /* Run over all pdd of the process */
  685. pdd = kfd_get_first_process_device_data(p);
  686. do {
  687. pAperture =
  688. &args->process_apertures[args->num_of_nodes];
  689. pAperture->gpu_id = pdd->dev->id;
  690. pAperture->lds_base = pdd->lds_base;
  691. pAperture->lds_limit = pdd->lds_limit;
  692. pAperture->gpuvm_base = pdd->gpuvm_base;
  693. pAperture->gpuvm_limit = pdd->gpuvm_limit;
  694. pAperture->scratch_base = pdd->scratch_base;
  695. pAperture->scratch_limit = pdd->scratch_limit;
  696. dev_dbg(kfd_device,
  697. "node id %u\n", args->num_of_nodes);
  698. dev_dbg(kfd_device,
  699. "gpu id %u\n", pdd->dev->id);
  700. dev_dbg(kfd_device,
  701. "lds_base %llX\n", pdd->lds_base);
  702. dev_dbg(kfd_device,
  703. "lds_limit %llX\n", pdd->lds_limit);
  704. dev_dbg(kfd_device,
  705. "gpuvm_base %llX\n", pdd->gpuvm_base);
  706. dev_dbg(kfd_device,
  707. "gpuvm_limit %llX\n", pdd->gpuvm_limit);
  708. dev_dbg(kfd_device,
  709. "scratch_base %llX\n", pdd->scratch_base);
  710. dev_dbg(kfd_device,
  711. "scratch_limit %llX\n", pdd->scratch_limit);
  712. args->num_of_nodes++;
  713. pdd = kfd_get_next_process_device_data(p, pdd);
  714. } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
  715. }
  716. mutex_unlock(&p->mutex);
  717. return 0;
  718. }
  719. static int kfd_ioctl_get_process_apertures_new(struct file *filp,
  720. struct kfd_process *p, void *data)
  721. {
  722. struct kfd_ioctl_get_process_apertures_new_args *args = data;
  723. struct kfd_process_device_apertures *pa;
  724. struct kfd_process_device *pdd;
  725. uint32_t nodes = 0;
  726. int ret;
  727. dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
  728. if (args->num_of_nodes == 0) {
  729. /* Return number of nodes, so that user space can alloacate
  730. * sufficient memory
  731. */
  732. mutex_lock(&p->mutex);
  733. if (!kfd_has_process_device_data(p))
  734. goto out_unlock;
  735. /* Run over all pdd of the process */
  736. pdd = kfd_get_first_process_device_data(p);
  737. do {
  738. args->num_of_nodes++;
  739. pdd = kfd_get_next_process_device_data(p, pdd);
  740. } while (pdd);
  741. goto out_unlock;
  742. }
  743. /* Fill in process-aperture information for all available
  744. * nodes, but not more than args->num_of_nodes as that is
  745. * the amount of memory allocated by user
  746. */
  747. pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
  748. args->num_of_nodes), GFP_KERNEL);
  749. if (!pa)
  750. return -ENOMEM;
  751. mutex_lock(&p->mutex);
  752. if (!kfd_has_process_device_data(p)) {
  753. args->num_of_nodes = 0;
  754. kfree(pa);
  755. goto out_unlock;
  756. }
  757. /* Run over all pdd of the process */
  758. pdd = kfd_get_first_process_device_data(p);
  759. do {
  760. pa[nodes].gpu_id = pdd->dev->id;
  761. pa[nodes].lds_base = pdd->lds_base;
  762. pa[nodes].lds_limit = pdd->lds_limit;
  763. pa[nodes].gpuvm_base = pdd->gpuvm_base;
  764. pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
  765. pa[nodes].scratch_base = pdd->scratch_base;
  766. pa[nodes].scratch_limit = pdd->scratch_limit;
  767. dev_dbg(kfd_device,
  768. "gpu id %u\n", pdd->dev->id);
  769. dev_dbg(kfd_device,
  770. "lds_base %llX\n", pdd->lds_base);
  771. dev_dbg(kfd_device,
  772. "lds_limit %llX\n", pdd->lds_limit);
  773. dev_dbg(kfd_device,
  774. "gpuvm_base %llX\n", pdd->gpuvm_base);
  775. dev_dbg(kfd_device,
  776. "gpuvm_limit %llX\n", pdd->gpuvm_limit);
  777. dev_dbg(kfd_device,
  778. "scratch_base %llX\n", pdd->scratch_base);
  779. dev_dbg(kfd_device,
  780. "scratch_limit %llX\n", pdd->scratch_limit);
  781. nodes++;
  782. pdd = kfd_get_next_process_device_data(p, pdd);
  783. } while (pdd && (nodes < args->num_of_nodes));
  784. mutex_unlock(&p->mutex);
  785. args->num_of_nodes = nodes;
  786. ret = copy_to_user(
  787. (void __user *)args->kfd_process_device_apertures_ptr,
  788. pa,
  789. (nodes * sizeof(struct kfd_process_device_apertures)));
  790. kfree(pa);
  791. return ret ? -EFAULT : 0;
  792. out_unlock:
  793. mutex_unlock(&p->mutex);
  794. return 0;
  795. }
  796. static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
  797. void *data)
  798. {
  799. struct kfd_ioctl_create_event_args *args = data;
  800. int err;
  801. /* For dGPUs the event page is allocated in user mode. The
  802. * handle is passed to KFD with the first call to this IOCTL
  803. * through the event_page_offset field.
  804. */
  805. if (args->event_page_offset) {
  806. struct kfd_dev *kfd;
  807. struct kfd_process_device *pdd;
  808. void *mem, *kern_addr;
  809. uint64_t size;
  810. if (p->signal_page) {
  811. pr_err("Event page is already set\n");
  812. return -EINVAL;
  813. }
  814. kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
  815. if (!kfd) {
  816. pr_err("Getting device by id failed in %s\n", __func__);
  817. return -EINVAL;
  818. }
  819. mutex_lock(&p->mutex);
  820. pdd = kfd_bind_process_to_device(kfd, p);
  821. if (IS_ERR(pdd)) {
  822. err = PTR_ERR(pdd);
  823. goto out_unlock;
  824. }
  825. mem = kfd_process_device_translate_handle(pdd,
  826. GET_IDR_HANDLE(args->event_page_offset));
  827. if (!mem) {
  828. pr_err("Can't find BO, offset is 0x%llx\n",
  829. args->event_page_offset);
  830. err = -EINVAL;
  831. goto out_unlock;
  832. }
  833. mutex_unlock(&p->mutex);
  834. err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
  835. mem, &kern_addr, &size);
  836. if (err) {
  837. pr_err("Failed to map event page to kernel\n");
  838. return err;
  839. }
  840. err = kfd_event_page_set(p, kern_addr, size);
  841. if (err) {
  842. pr_err("Failed to set event page\n");
  843. return err;
  844. }
  845. }
  846. err = kfd_event_create(filp, p, args->event_type,
  847. args->auto_reset != 0, args->node_id,
  848. &args->event_id, &args->event_trigger_data,
  849. &args->event_page_offset,
  850. &args->event_slot_index);
  851. return err;
  852. out_unlock:
  853. mutex_unlock(&p->mutex);
  854. return err;
  855. }
  856. static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
  857. void *data)
  858. {
  859. struct kfd_ioctl_destroy_event_args *args = data;
  860. return kfd_event_destroy(p, args->event_id);
  861. }
  862. static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
  863. void *data)
  864. {
  865. struct kfd_ioctl_set_event_args *args = data;
  866. return kfd_set_event(p, args->event_id);
  867. }
  868. static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
  869. void *data)
  870. {
  871. struct kfd_ioctl_reset_event_args *args = data;
  872. return kfd_reset_event(p, args->event_id);
  873. }
  874. static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
  875. void *data)
  876. {
  877. struct kfd_ioctl_wait_events_args *args = data;
  878. int err;
  879. err = kfd_wait_on_events(p, args->num_events,
  880. (void __user *)args->events_ptr,
  881. (args->wait_for_all != 0),
  882. args->timeout, &args->wait_result);
  883. return err;
  884. }
  885. static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
  886. struct kfd_process *p, void *data)
  887. {
  888. struct kfd_ioctl_set_scratch_backing_va_args *args = data;
  889. struct kfd_process_device *pdd;
  890. struct kfd_dev *dev;
  891. long err;
  892. dev = kfd_device_by_id(args->gpu_id);
  893. if (!dev)
  894. return -EINVAL;
  895. mutex_lock(&p->mutex);
  896. pdd = kfd_bind_process_to_device(dev, p);
  897. if (IS_ERR(pdd)) {
  898. err = PTR_ERR(pdd);
  899. goto bind_process_to_device_fail;
  900. }
  901. pdd->qpd.sh_hidden_private_base = args->va_addr;
  902. mutex_unlock(&p->mutex);
  903. if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
  904. pdd->qpd.vmid != 0)
  905. dev->kfd2kgd->set_scratch_backing_va(
  906. dev->kgd, args->va_addr, pdd->qpd.vmid);
  907. return 0;
  908. bind_process_to_device_fail:
  909. mutex_unlock(&p->mutex);
  910. return err;
  911. }
  912. static int kfd_ioctl_get_tile_config(struct file *filep,
  913. struct kfd_process *p, void *data)
  914. {
  915. struct kfd_ioctl_get_tile_config_args *args = data;
  916. struct kfd_dev *dev;
  917. struct tile_config config;
  918. int err = 0;
  919. dev = kfd_device_by_id(args->gpu_id);
  920. if (!dev)
  921. return -EINVAL;
  922. dev->kfd2kgd->get_tile_config(dev->kgd, &config);
  923. args->gb_addr_config = config.gb_addr_config;
  924. args->num_banks = config.num_banks;
  925. args->num_ranks = config.num_ranks;
  926. if (args->num_tile_configs > config.num_tile_configs)
  927. args->num_tile_configs = config.num_tile_configs;
  928. err = copy_to_user((void __user *)args->tile_config_ptr,
  929. config.tile_config_ptr,
  930. args->num_tile_configs * sizeof(uint32_t));
  931. if (err) {
  932. args->num_tile_configs = 0;
  933. return -EFAULT;
  934. }
  935. if (args->num_macro_tile_configs > config.num_macro_tile_configs)
  936. args->num_macro_tile_configs =
  937. config.num_macro_tile_configs;
  938. err = copy_to_user((void __user *)args->macro_tile_config_ptr,
  939. config.macro_tile_config_ptr,
  940. args->num_macro_tile_configs * sizeof(uint32_t));
  941. if (err) {
  942. args->num_macro_tile_configs = 0;
  943. return -EFAULT;
  944. }
  945. return 0;
  946. }
  947. static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
  948. void *data)
  949. {
  950. struct kfd_ioctl_acquire_vm_args *args = data;
  951. struct kfd_process_device *pdd;
  952. struct kfd_dev *dev;
  953. struct file *drm_file;
  954. int ret;
  955. dev = kfd_device_by_id(args->gpu_id);
  956. if (!dev)
  957. return -EINVAL;
  958. drm_file = fget(args->drm_fd);
  959. if (!drm_file)
  960. return -EINVAL;
  961. mutex_lock(&p->mutex);
  962. pdd = kfd_get_process_device_data(dev, p);
  963. if (!pdd) {
  964. ret = -EINVAL;
  965. goto err_unlock;
  966. }
  967. if (pdd->drm_file) {
  968. ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
  969. goto err_unlock;
  970. }
  971. ret = kfd_process_device_init_vm(pdd, drm_file);
  972. if (ret)
  973. goto err_unlock;
  974. /* On success, the PDD keeps the drm_file reference */
  975. mutex_unlock(&p->mutex);
  976. return 0;
  977. err_unlock:
  978. mutex_unlock(&p->mutex);
  979. fput(drm_file);
  980. return ret;
  981. }
  982. bool kfd_dev_is_large_bar(struct kfd_dev *dev)
  983. {
  984. struct kfd_local_mem_info mem_info;
  985. if (debug_largebar) {
  986. pr_debug("Simulate large-bar allocation on non large-bar machine\n");
  987. return true;
  988. }
  989. if (dev->device_info->needs_iommu_device)
  990. return false;
  991. dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
  992. if (mem_info.local_mem_size_private == 0 &&
  993. mem_info.local_mem_size_public > 0)
  994. return true;
  995. return false;
  996. }
  997. static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
  998. struct kfd_process *p, void *data)
  999. {
  1000. struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
  1001. struct kfd_process_device *pdd;
  1002. void *mem;
  1003. struct kfd_dev *dev;
  1004. int idr_handle;
  1005. long err;
  1006. uint64_t offset = args->mmap_offset;
  1007. uint32_t flags = args->flags;
  1008. if (args->size == 0)
  1009. return -EINVAL;
  1010. dev = kfd_device_by_id(args->gpu_id);
  1011. if (!dev)
  1012. return -EINVAL;
  1013. if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
  1014. (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
  1015. !kfd_dev_is_large_bar(dev)) {
  1016. pr_err("Alloc host visible vram on small bar is not allowed\n");
  1017. return -EINVAL;
  1018. }
  1019. mutex_lock(&p->mutex);
  1020. pdd = kfd_bind_process_to_device(dev, p);
  1021. if (IS_ERR(pdd)) {
  1022. err = PTR_ERR(pdd);
  1023. goto err_unlock;
  1024. }
  1025. err = dev->kfd2kgd->alloc_memory_of_gpu(
  1026. dev->kgd, args->va_addr, args->size,
  1027. pdd->vm, (struct kgd_mem **) &mem, &offset,
  1028. flags);
  1029. if (err)
  1030. goto err_unlock;
  1031. idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
  1032. if (idr_handle < 0) {
  1033. err = -EFAULT;
  1034. goto err_free;
  1035. }
  1036. mutex_unlock(&p->mutex);
  1037. args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
  1038. args->mmap_offset = offset;
  1039. return 0;
  1040. err_free:
  1041. dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
  1042. err_unlock:
  1043. mutex_unlock(&p->mutex);
  1044. return err;
  1045. }
  1046. static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
  1047. struct kfd_process *p, void *data)
  1048. {
  1049. struct kfd_ioctl_free_memory_of_gpu_args *args = data;
  1050. struct kfd_process_device *pdd;
  1051. void *mem;
  1052. struct kfd_dev *dev;
  1053. int ret;
  1054. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  1055. if (!dev)
  1056. return -EINVAL;
  1057. mutex_lock(&p->mutex);
  1058. pdd = kfd_get_process_device_data(dev, p);
  1059. if (!pdd) {
  1060. pr_err("Process device data doesn't exist\n");
  1061. ret = -EINVAL;
  1062. goto err_unlock;
  1063. }
  1064. mem = kfd_process_device_translate_handle(
  1065. pdd, GET_IDR_HANDLE(args->handle));
  1066. if (!mem) {
  1067. ret = -EINVAL;
  1068. goto err_unlock;
  1069. }
  1070. ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
  1071. /* If freeing the buffer failed, leave the handle in place for
  1072. * clean-up during process tear-down.
  1073. */
  1074. if (!ret)
  1075. kfd_process_device_remove_obj_handle(
  1076. pdd, GET_IDR_HANDLE(args->handle));
  1077. err_unlock:
  1078. mutex_unlock(&p->mutex);
  1079. return ret;
  1080. }
  1081. static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
  1082. struct kfd_process *p, void *data)
  1083. {
  1084. struct kfd_ioctl_map_memory_to_gpu_args *args = data;
  1085. struct kfd_process_device *pdd, *peer_pdd;
  1086. void *mem;
  1087. struct kfd_dev *dev, *peer;
  1088. long err = 0;
  1089. int i;
  1090. uint32_t *devices_arr = NULL;
  1091. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  1092. if (!dev)
  1093. return -EINVAL;
  1094. if (!args->n_devices) {
  1095. pr_debug("Device IDs array empty\n");
  1096. return -EINVAL;
  1097. }
  1098. if (args->n_success > args->n_devices) {
  1099. pr_debug("n_success exceeds n_devices\n");
  1100. return -EINVAL;
  1101. }
  1102. devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
  1103. GFP_KERNEL);
  1104. if (!devices_arr)
  1105. return -ENOMEM;
  1106. err = copy_from_user(devices_arr,
  1107. (void __user *)args->device_ids_array_ptr,
  1108. args->n_devices * sizeof(*devices_arr));
  1109. if (err != 0) {
  1110. err = -EFAULT;
  1111. goto copy_from_user_failed;
  1112. }
  1113. mutex_lock(&p->mutex);
  1114. pdd = kfd_bind_process_to_device(dev, p);
  1115. if (IS_ERR(pdd)) {
  1116. err = PTR_ERR(pdd);
  1117. goto bind_process_to_device_failed;
  1118. }
  1119. mem = kfd_process_device_translate_handle(pdd,
  1120. GET_IDR_HANDLE(args->handle));
  1121. if (!mem) {
  1122. err = -ENOMEM;
  1123. goto get_mem_obj_from_handle_failed;
  1124. }
  1125. for (i = args->n_success; i < args->n_devices; i++) {
  1126. peer = kfd_device_by_id(devices_arr[i]);
  1127. if (!peer) {
  1128. pr_debug("Getting device by id failed for 0x%x\n",
  1129. devices_arr[i]);
  1130. err = -EINVAL;
  1131. goto get_mem_obj_from_handle_failed;
  1132. }
  1133. peer_pdd = kfd_bind_process_to_device(peer, p);
  1134. if (IS_ERR(peer_pdd)) {
  1135. err = PTR_ERR(peer_pdd);
  1136. goto get_mem_obj_from_handle_failed;
  1137. }
  1138. err = peer->kfd2kgd->map_memory_to_gpu(
  1139. peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
  1140. if (err) {
  1141. pr_err("Failed to map to gpu %d/%d\n",
  1142. i, args->n_devices);
  1143. goto map_memory_to_gpu_failed;
  1144. }
  1145. args->n_success = i+1;
  1146. }
  1147. mutex_unlock(&p->mutex);
  1148. err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
  1149. if (err) {
  1150. pr_debug("Sync memory failed, wait interrupted by user signal\n");
  1151. goto sync_memory_failed;
  1152. }
  1153. /* Flush TLBs after waiting for the page table updates to complete */
  1154. for (i = 0; i < args->n_devices; i++) {
  1155. peer = kfd_device_by_id(devices_arr[i]);
  1156. if (WARN_ON_ONCE(!peer))
  1157. continue;
  1158. peer_pdd = kfd_get_process_device_data(peer, p);
  1159. if (WARN_ON_ONCE(!peer_pdd))
  1160. continue;
  1161. kfd_flush_tlb(peer_pdd);
  1162. }
  1163. kfree(devices_arr);
  1164. return err;
  1165. bind_process_to_device_failed:
  1166. get_mem_obj_from_handle_failed:
  1167. map_memory_to_gpu_failed:
  1168. mutex_unlock(&p->mutex);
  1169. copy_from_user_failed:
  1170. sync_memory_failed:
  1171. kfree(devices_arr);
  1172. return err;
  1173. }
  1174. static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
  1175. struct kfd_process *p, void *data)
  1176. {
  1177. struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
  1178. struct kfd_process_device *pdd, *peer_pdd;
  1179. void *mem;
  1180. struct kfd_dev *dev, *peer;
  1181. long err = 0;
  1182. uint32_t *devices_arr = NULL, i;
  1183. dev = kfd_device_by_id(GET_GPU_ID(args->handle));
  1184. if (!dev)
  1185. return -EINVAL;
  1186. if (!args->n_devices) {
  1187. pr_debug("Device IDs array empty\n");
  1188. return -EINVAL;
  1189. }
  1190. if (args->n_success > args->n_devices) {
  1191. pr_debug("n_success exceeds n_devices\n");
  1192. return -EINVAL;
  1193. }
  1194. devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
  1195. GFP_KERNEL);
  1196. if (!devices_arr)
  1197. return -ENOMEM;
  1198. err = copy_from_user(devices_arr,
  1199. (void __user *)args->device_ids_array_ptr,
  1200. args->n_devices * sizeof(*devices_arr));
  1201. if (err != 0) {
  1202. err = -EFAULT;
  1203. goto copy_from_user_failed;
  1204. }
  1205. mutex_lock(&p->mutex);
  1206. pdd = kfd_get_process_device_data(dev, p);
  1207. if (!pdd) {
  1208. err = -EINVAL;
  1209. goto bind_process_to_device_failed;
  1210. }
  1211. mem = kfd_process_device_translate_handle(pdd,
  1212. GET_IDR_HANDLE(args->handle));
  1213. if (!mem) {
  1214. err = -ENOMEM;
  1215. goto get_mem_obj_from_handle_failed;
  1216. }
  1217. for (i = args->n_success; i < args->n_devices; i++) {
  1218. peer = kfd_device_by_id(devices_arr[i]);
  1219. if (!peer) {
  1220. err = -EINVAL;
  1221. goto get_mem_obj_from_handle_failed;
  1222. }
  1223. peer_pdd = kfd_get_process_device_data(peer, p);
  1224. if (!peer_pdd) {
  1225. err = -ENODEV;
  1226. goto get_mem_obj_from_handle_failed;
  1227. }
  1228. err = dev->kfd2kgd->unmap_memory_to_gpu(
  1229. peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
  1230. if (err) {
  1231. pr_err("Failed to unmap from gpu %d/%d\n",
  1232. i, args->n_devices);
  1233. goto unmap_memory_from_gpu_failed;
  1234. }
  1235. args->n_success = i+1;
  1236. }
  1237. kfree(devices_arr);
  1238. mutex_unlock(&p->mutex);
  1239. return 0;
  1240. bind_process_to_device_failed:
  1241. get_mem_obj_from_handle_failed:
  1242. unmap_memory_from_gpu_failed:
  1243. mutex_unlock(&p->mutex);
  1244. copy_from_user_failed:
  1245. kfree(devices_arr);
  1246. return err;
  1247. }
  1248. #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
  1249. [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
  1250. .cmd_drv = 0, .name = #ioctl}
  1251. /** Ioctl table */
  1252. static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
  1253. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
  1254. kfd_ioctl_get_version, 0),
  1255. AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
  1256. kfd_ioctl_create_queue, 0),
  1257. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
  1258. kfd_ioctl_destroy_queue, 0),
  1259. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
  1260. kfd_ioctl_set_memory_policy, 0),
  1261. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
  1262. kfd_ioctl_get_clock_counters, 0),
  1263. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
  1264. kfd_ioctl_get_process_apertures, 0),
  1265. AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
  1266. kfd_ioctl_update_queue, 0),
  1267. AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
  1268. kfd_ioctl_create_event, 0),
  1269. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
  1270. kfd_ioctl_destroy_event, 0),
  1271. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
  1272. kfd_ioctl_set_event, 0),
  1273. AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
  1274. kfd_ioctl_reset_event, 0),
  1275. AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
  1276. kfd_ioctl_wait_events, 0),
  1277. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
  1278. kfd_ioctl_dbg_register, 0),
  1279. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
  1280. kfd_ioctl_dbg_unregister, 0),
  1281. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
  1282. kfd_ioctl_dbg_address_watch, 0),
  1283. AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
  1284. kfd_ioctl_dbg_wave_control, 0),
  1285. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
  1286. kfd_ioctl_set_scratch_backing_va, 0),
  1287. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
  1288. kfd_ioctl_get_tile_config, 0),
  1289. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
  1290. kfd_ioctl_set_trap_handler, 0),
  1291. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
  1292. kfd_ioctl_get_process_apertures_new, 0),
  1293. AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
  1294. kfd_ioctl_acquire_vm, 0),
  1295. AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
  1296. kfd_ioctl_alloc_memory_of_gpu, 0),
  1297. AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
  1298. kfd_ioctl_free_memory_of_gpu, 0),
  1299. AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
  1300. kfd_ioctl_map_memory_to_gpu, 0),
  1301. AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
  1302. kfd_ioctl_unmap_memory_from_gpu, 0),
  1303. AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
  1304. kfd_ioctl_set_cu_mask, 0),
  1305. AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
  1306. kfd_ioctl_get_queue_wave_state, 0)
  1307. };
  1308. #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
  1309. static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
  1310. {
  1311. struct kfd_process *process;
  1312. amdkfd_ioctl_t *func;
  1313. const struct amdkfd_ioctl_desc *ioctl = NULL;
  1314. unsigned int nr = _IOC_NR(cmd);
  1315. char stack_kdata[128];
  1316. char *kdata = NULL;
  1317. unsigned int usize, asize;
  1318. int retcode = -EINVAL;
  1319. if (nr >= AMDKFD_CORE_IOCTL_COUNT)
  1320. goto err_i1;
  1321. if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
  1322. u32 amdkfd_size;
  1323. ioctl = &amdkfd_ioctls[nr];
  1324. amdkfd_size = _IOC_SIZE(ioctl->cmd);
  1325. usize = asize = _IOC_SIZE(cmd);
  1326. if (amdkfd_size > asize)
  1327. asize = amdkfd_size;
  1328. cmd = ioctl->cmd;
  1329. } else
  1330. goto err_i1;
  1331. dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
  1332. process = kfd_get_process(current);
  1333. if (IS_ERR(process)) {
  1334. dev_dbg(kfd_device, "no process\n");
  1335. goto err_i1;
  1336. }
  1337. /* Do not trust userspace, use our own definition */
  1338. func = ioctl->func;
  1339. if (unlikely(!func)) {
  1340. dev_dbg(kfd_device, "no function\n");
  1341. retcode = -EINVAL;
  1342. goto err_i1;
  1343. }
  1344. if (cmd & (IOC_IN | IOC_OUT)) {
  1345. if (asize <= sizeof(stack_kdata)) {
  1346. kdata = stack_kdata;
  1347. } else {
  1348. kdata = kmalloc(asize, GFP_KERNEL);
  1349. if (!kdata) {
  1350. retcode = -ENOMEM;
  1351. goto err_i1;
  1352. }
  1353. }
  1354. if (asize > usize)
  1355. memset(kdata + usize, 0, asize - usize);
  1356. }
  1357. if (cmd & IOC_IN) {
  1358. if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
  1359. retcode = -EFAULT;
  1360. goto err_i1;
  1361. }
  1362. } else if (cmd & IOC_OUT) {
  1363. memset(kdata, 0, usize);
  1364. }
  1365. retcode = func(filep, process, kdata);
  1366. if (cmd & IOC_OUT)
  1367. if (copy_to_user((void __user *)arg, kdata, usize) != 0)
  1368. retcode = -EFAULT;
  1369. err_i1:
  1370. if (!ioctl)
  1371. dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
  1372. task_pid_nr(current), cmd, nr);
  1373. if (kdata != stack_kdata)
  1374. kfree(kdata);
  1375. if (retcode)
  1376. dev_dbg(kfd_device, "ret = %d\n", retcode);
  1377. return retcode;
  1378. }
  1379. static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
  1380. {
  1381. struct kfd_process *process;
  1382. struct kfd_dev *dev = NULL;
  1383. unsigned long vm_pgoff;
  1384. unsigned int gpu_id;
  1385. process = kfd_get_process(current);
  1386. if (IS_ERR(process))
  1387. return PTR_ERR(process);
  1388. vm_pgoff = vma->vm_pgoff;
  1389. vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
  1390. gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
  1391. if (gpu_id)
  1392. dev = kfd_device_by_id(gpu_id);
  1393. switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
  1394. case KFD_MMAP_TYPE_DOORBELL:
  1395. if (!dev)
  1396. return -ENODEV;
  1397. return kfd_doorbell_mmap(dev, process, vma);
  1398. case KFD_MMAP_TYPE_EVENTS:
  1399. return kfd_event_mmap(process, vma);
  1400. case KFD_MMAP_TYPE_RESERVED_MEM:
  1401. if (!dev)
  1402. return -ENODEV;
  1403. return kfd_reserved_mem_mmap(dev, process, vma);
  1404. }
  1405. return -EFAULT;
  1406. }