kvmgt.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528
  1. /*
  2. * KVMGT - the implementation of Intel mediated pass-through framework for KVM
  3. *
  4. * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice (including the next
  14. * paragraph) shall be included in all copies or substantial portions of the
  15. * Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23. * SOFTWARE.
  24. *
  25. * Authors:
  26. * Kevin Tian <kevin.tian@intel.com>
  27. * Jike Song <jike.song@intel.com>
  28. * Xiaoguang Chen <xiaoguang.chen@intel.com>
  29. */
  30. #include <linux/init.h>
  31. #include <linux/device.h>
  32. #include <linux/mm.h>
  33. #include <linux/mmu_context.h>
  34. #include <linux/types.h>
  35. #include <linux/list.h>
  36. #include <linux/rbtree.h>
  37. #include <linux/spinlock.h>
  38. #include <linux/eventfd.h>
  39. #include <linux/uuid.h>
  40. #include <linux/kvm_host.h>
  41. #include <linux/vfio.h>
  42. #include <linux/mdev.h>
  43. #include "i915_drv.h"
  44. #include "gvt.h"
  45. static const struct intel_gvt_ops *intel_gvt_ops;
  46. /* helper macros copied from vfio-pci */
  47. #define VFIO_PCI_OFFSET_SHIFT 40
  48. #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
  49. #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
  50. #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
  51. struct vfio_region {
  52. u32 type;
  53. u32 subtype;
  54. size_t size;
  55. u32 flags;
  56. };
  57. struct kvmgt_pgfn {
  58. gfn_t gfn;
  59. struct hlist_node hnode;
  60. };
  61. struct kvmgt_guest_info {
  62. struct kvm *kvm;
  63. struct intel_vgpu *vgpu;
  64. struct kvm_page_track_notifier_node track_node;
  65. #define NR_BKT (1 << 18)
  66. struct hlist_head ptable[NR_BKT];
  67. #undef NR_BKT
  68. };
  69. struct gvt_dma {
  70. struct rb_node node;
  71. gfn_t gfn;
  72. unsigned long iova;
  73. };
  74. static inline bool handle_valid(unsigned long handle)
  75. {
  76. return !!(handle & ~0xff);
  77. }
  78. static int kvmgt_guest_init(struct mdev_device *mdev);
  79. static void intel_vgpu_release_work(struct work_struct *work);
  80. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
  81. static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
  82. unsigned long *iova)
  83. {
  84. struct page *page;
  85. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  86. dma_addr_t daddr;
  87. if (unlikely(!pfn_valid(pfn)))
  88. return -EFAULT;
  89. page = pfn_to_page(pfn);
  90. daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
  91. PCI_DMA_BIDIRECTIONAL);
  92. if (dma_mapping_error(dev, daddr))
  93. return -ENOMEM;
  94. *iova = (unsigned long)(daddr >> PAGE_SHIFT);
  95. return 0;
  96. }
  97. static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova)
  98. {
  99. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  100. dma_addr_t daddr;
  101. daddr = (dma_addr_t)(iova << PAGE_SHIFT);
  102. dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  103. }
  104. static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
  105. {
  106. struct rb_node *node = vgpu->vdev.cache.rb_node;
  107. struct gvt_dma *ret = NULL;
  108. while (node) {
  109. struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
  110. if (gfn < itr->gfn)
  111. node = node->rb_left;
  112. else if (gfn > itr->gfn)
  113. node = node->rb_right;
  114. else {
  115. ret = itr;
  116. goto out;
  117. }
  118. }
  119. out:
  120. return ret;
  121. }
  122. static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
  123. {
  124. struct gvt_dma *entry;
  125. unsigned long iova;
  126. mutex_lock(&vgpu->vdev.cache_lock);
  127. entry = __gvt_cache_find(vgpu, gfn);
  128. iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
  129. mutex_unlock(&vgpu->vdev.cache_lock);
  130. return iova;
  131. }
  132. static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
  133. unsigned long iova)
  134. {
  135. struct gvt_dma *new, *itr;
  136. struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
  137. new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
  138. if (!new)
  139. return;
  140. new->gfn = gfn;
  141. new->iova = iova;
  142. mutex_lock(&vgpu->vdev.cache_lock);
  143. while (*link) {
  144. parent = *link;
  145. itr = rb_entry(parent, struct gvt_dma, node);
  146. if (gfn == itr->gfn)
  147. goto out;
  148. else if (gfn < itr->gfn)
  149. link = &parent->rb_left;
  150. else
  151. link = &parent->rb_right;
  152. }
  153. rb_link_node(&new->node, parent, link);
  154. rb_insert_color(&new->node, &vgpu->vdev.cache);
  155. mutex_unlock(&vgpu->vdev.cache_lock);
  156. return;
  157. out:
  158. mutex_unlock(&vgpu->vdev.cache_lock);
  159. kfree(new);
  160. }
  161. static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
  162. struct gvt_dma *entry)
  163. {
  164. rb_erase(&entry->node, &vgpu->vdev.cache);
  165. kfree(entry);
  166. }
  167. static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
  168. {
  169. struct device *dev = mdev_dev(vgpu->vdev.mdev);
  170. struct gvt_dma *this;
  171. unsigned long g1;
  172. int rc;
  173. mutex_lock(&vgpu->vdev.cache_lock);
  174. this = __gvt_cache_find(vgpu, gfn);
  175. if (!this) {
  176. mutex_unlock(&vgpu->vdev.cache_lock);
  177. return;
  178. }
  179. g1 = gfn;
  180. gvt_dma_unmap_iova(vgpu, this->iova);
  181. rc = vfio_unpin_pages(dev, &g1, 1);
  182. WARN_ON(rc != 1);
  183. __gvt_cache_remove_entry(vgpu, this);
  184. mutex_unlock(&vgpu->vdev.cache_lock);
  185. }
  186. static void gvt_cache_init(struct intel_vgpu *vgpu)
  187. {
  188. vgpu->vdev.cache = RB_ROOT;
  189. mutex_init(&vgpu->vdev.cache_lock);
  190. }
  191. static void gvt_cache_destroy(struct intel_vgpu *vgpu)
  192. {
  193. struct gvt_dma *dma;
  194. struct rb_node *node = NULL;
  195. struct device *dev = mdev_dev(vgpu->vdev.mdev);
  196. unsigned long gfn;
  197. mutex_lock(&vgpu->vdev.cache_lock);
  198. while ((node = rb_first(&vgpu->vdev.cache))) {
  199. dma = rb_entry(node, struct gvt_dma, node);
  200. gvt_dma_unmap_iova(vgpu, dma->iova);
  201. gfn = dma->gfn;
  202. vfio_unpin_pages(dev, &gfn, 1);
  203. __gvt_cache_remove_entry(vgpu, dma);
  204. }
  205. mutex_unlock(&vgpu->vdev.cache_lock);
  206. }
  207. static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
  208. const char *name)
  209. {
  210. int i;
  211. struct intel_vgpu_type *t;
  212. const char *driver_name = dev_driver_string(
  213. &gvt->dev_priv->drm.pdev->dev);
  214. for (i = 0; i < gvt->num_types; i++) {
  215. t = &gvt->types[i];
  216. if (!strncmp(t->name, name + strlen(driver_name) + 1,
  217. sizeof(t->name)))
  218. return t;
  219. }
  220. return NULL;
  221. }
  222. static ssize_t available_instances_show(struct kobject *kobj,
  223. struct device *dev, char *buf)
  224. {
  225. struct intel_vgpu_type *type;
  226. unsigned int num = 0;
  227. void *gvt = kdev_to_i915(dev)->gvt;
  228. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  229. if (!type)
  230. num = 0;
  231. else
  232. num = type->avail_instance;
  233. return sprintf(buf, "%u\n", num);
  234. }
  235. static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
  236. char *buf)
  237. {
  238. return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
  239. }
  240. static ssize_t description_show(struct kobject *kobj, struct device *dev,
  241. char *buf)
  242. {
  243. struct intel_vgpu_type *type;
  244. void *gvt = kdev_to_i915(dev)->gvt;
  245. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  246. if (!type)
  247. return 0;
  248. return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
  249. "fence: %d\nresolution: %s\n"
  250. "weight: %d\n",
  251. BYTES_TO_MB(type->low_gm_size),
  252. BYTES_TO_MB(type->high_gm_size),
  253. type->fence, vgpu_edid_str(type->resolution),
  254. type->weight);
  255. }
  256. static MDEV_TYPE_ATTR_RO(available_instances);
  257. static MDEV_TYPE_ATTR_RO(device_api);
  258. static MDEV_TYPE_ATTR_RO(description);
  259. static struct attribute *type_attrs[] = {
  260. &mdev_type_attr_available_instances.attr,
  261. &mdev_type_attr_device_api.attr,
  262. &mdev_type_attr_description.attr,
  263. NULL,
  264. };
  265. static struct attribute_group *intel_vgpu_type_groups[] = {
  266. [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
  267. };
  268. static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
  269. {
  270. int i, j;
  271. struct intel_vgpu_type *type;
  272. struct attribute_group *group;
  273. for (i = 0; i < gvt->num_types; i++) {
  274. type = &gvt->types[i];
  275. group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
  276. if (WARN_ON(!group))
  277. goto unwind;
  278. group->name = type->name;
  279. group->attrs = type_attrs;
  280. intel_vgpu_type_groups[i] = group;
  281. }
  282. return true;
  283. unwind:
  284. for (j = 0; j < i; j++) {
  285. group = intel_vgpu_type_groups[j];
  286. kfree(group);
  287. }
  288. return false;
  289. }
  290. static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
  291. {
  292. int i;
  293. struct attribute_group *group;
  294. for (i = 0; i < gvt->num_types; i++) {
  295. group = intel_vgpu_type_groups[i];
  296. kfree(group);
  297. }
  298. }
  299. static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
  300. {
  301. hash_init(info->ptable);
  302. }
  303. static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
  304. {
  305. struct kvmgt_pgfn *p;
  306. struct hlist_node *tmp;
  307. int i;
  308. hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
  309. hash_del(&p->hnode);
  310. kfree(p);
  311. }
  312. }
  313. static struct kvmgt_pgfn *
  314. __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
  315. {
  316. struct kvmgt_pgfn *p, *res = NULL;
  317. hash_for_each_possible(info->ptable, p, hnode, gfn) {
  318. if (gfn == p->gfn) {
  319. res = p;
  320. break;
  321. }
  322. }
  323. return res;
  324. }
  325. static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
  326. gfn_t gfn)
  327. {
  328. struct kvmgt_pgfn *p;
  329. p = __kvmgt_protect_table_find(info, gfn);
  330. return !!p;
  331. }
  332. static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
  333. {
  334. struct kvmgt_pgfn *p;
  335. if (kvmgt_gfn_is_write_protected(info, gfn))
  336. return;
  337. p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
  338. if (WARN(!p, "gfn: 0x%llx\n", gfn))
  339. return;
  340. p->gfn = gfn;
  341. hash_add(info->ptable, &p->hnode, gfn);
  342. }
  343. static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
  344. gfn_t gfn)
  345. {
  346. struct kvmgt_pgfn *p;
  347. p = __kvmgt_protect_table_find(info, gfn);
  348. if (p) {
  349. hash_del(&p->hnode);
  350. kfree(p);
  351. }
  352. }
  353. static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
  354. {
  355. struct intel_vgpu *vgpu = NULL;
  356. struct intel_vgpu_type *type;
  357. struct device *pdev;
  358. void *gvt;
  359. int ret;
  360. pdev = mdev_parent_dev(mdev);
  361. gvt = kdev_to_i915(pdev)->gvt;
  362. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  363. if (!type) {
  364. gvt_vgpu_err("failed to find type %s to create\n",
  365. kobject_name(kobj));
  366. ret = -EINVAL;
  367. goto out;
  368. }
  369. vgpu = intel_gvt_ops->vgpu_create(gvt, type);
  370. if (IS_ERR_OR_NULL(vgpu)) {
  371. ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
  372. gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
  373. goto out;
  374. }
  375. INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
  376. vgpu->vdev.mdev = mdev;
  377. mdev_set_drvdata(mdev, vgpu);
  378. gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
  379. dev_name(mdev_dev(mdev)));
  380. ret = 0;
  381. out:
  382. return ret;
  383. }
  384. static int intel_vgpu_remove(struct mdev_device *mdev)
  385. {
  386. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  387. if (handle_valid(vgpu->handle))
  388. return -EBUSY;
  389. intel_gvt_ops->vgpu_destroy(vgpu);
  390. return 0;
  391. }
  392. static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
  393. unsigned long action, void *data)
  394. {
  395. struct intel_vgpu *vgpu = container_of(nb,
  396. struct intel_vgpu,
  397. vdev.iommu_notifier);
  398. if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
  399. struct vfio_iommu_type1_dma_unmap *unmap = data;
  400. unsigned long gfn, end_gfn;
  401. gfn = unmap->iova >> PAGE_SHIFT;
  402. end_gfn = gfn + unmap->size / PAGE_SIZE;
  403. while (gfn < end_gfn)
  404. gvt_cache_remove(vgpu, gfn++);
  405. }
  406. return NOTIFY_OK;
  407. }
  408. static int intel_vgpu_group_notifier(struct notifier_block *nb,
  409. unsigned long action, void *data)
  410. {
  411. struct intel_vgpu *vgpu = container_of(nb,
  412. struct intel_vgpu,
  413. vdev.group_notifier);
  414. /* the only action we care about */
  415. if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
  416. vgpu->vdev.kvm = data;
  417. if (!data)
  418. schedule_work(&vgpu->vdev.release_work);
  419. }
  420. return NOTIFY_OK;
  421. }
  422. static int intel_vgpu_open(struct mdev_device *mdev)
  423. {
  424. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  425. unsigned long events;
  426. int ret;
  427. vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
  428. vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
  429. events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
  430. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
  431. &vgpu->vdev.iommu_notifier);
  432. if (ret != 0) {
  433. gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
  434. ret);
  435. goto out;
  436. }
  437. events = VFIO_GROUP_NOTIFY_SET_KVM;
  438. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
  439. &vgpu->vdev.group_notifier);
  440. if (ret != 0) {
  441. gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
  442. ret);
  443. goto undo_iommu;
  444. }
  445. ret = kvmgt_guest_init(mdev);
  446. if (ret)
  447. goto undo_group;
  448. intel_gvt_ops->vgpu_activate(vgpu);
  449. atomic_set(&vgpu->vdev.released, 0);
  450. return ret;
  451. undo_group:
  452. vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
  453. &vgpu->vdev.group_notifier);
  454. undo_iommu:
  455. vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
  456. &vgpu->vdev.iommu_notifier);
  457. out:
  458. return ret;
  459. }
  460. static void __intel_vgpu_release(struct intel_vgpu *vgpu)
  461. {
  462. struct kvmgt_guest_info *info;
  463. int ret;
  464. if (!handle_valid(vgpu->handle))
  465. return;
  466. if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
  467. return;
  468. intel_gvt_ops->vgpu_deactivate(vgpu);
  469. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
  470. &vgpu->vdev.iommu_notifier);
  471. WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
  472. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
  473. &vgpu->vdev.group_notifier);
  474. WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
  475. info = (struct kvmgt_guest_info *)vgpu->handle;
  476. kvmgt_guest_exit(info);
  477. vgpu->vdev.kvm = NULL;
  478. vgpu->handle = 0;
  479. }
  480. static void intel_vgpu_release(struct mdev_device *mdev)
  481. {
  482. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  483. __intel_vgpu_release(vgpu);
  484. }
  485. static void intel_vgpu_release_work(struct work_struct *work)
  486. {
  487. struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
  488. vdev.release_work);
  489. __intel_vgpu_release(vgpu);
  490. }
  491. static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
  492. {
  493. u32 start_lo, start_hi;
  494. u32 mem_type;
  495. int pos = PCI_BASE_ADDRESS_0;
  496. start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
  497. PCI_BASE_ADDRESS_MEM_MASK;
  498. mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
  499. PCI_BASE_ADDRESS_MEM_TYPE_MASK;
  500. switch (mem_type) {
  501. case PCI_BASE_ADDRESS_MEM_TYPE_64:
  502. start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
  503. + pos + 4));
  504. break;
  505. case PCI_BASE_ADDRESS_MEM_TYPE_32:
  506. case PCI_BASE_ADDRESS_MEM_TYPE_1M:
  507. /* 1M mem BAR treated as 32-bit BAR */
  508. default:
  509. /* mem unknown type treated as 32-bit BAR */
  510. start_hi = 0;
  511. break;
  512. }
  513. return ((u64)start_hi << 32) | start_lo;
  514. }
  515. static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
  516. size_t count, loff_t *ppos, bool is_write)
  517. {
  518. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  519. unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
  520. uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
  521. int ret = -EINVAL;
  522. if (index >= VFIO_PCI_NUM_REGIONS) {
  523. gvt_vgpu_err("invalid index: %u\n", index);
  524. return -EINVAL;
  525. }
  526. switch (index) {
  527. case VFIO_PCI_CONFIG_REGION_INDEX:
  528. if (is_write)
  529. ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
  530. buf, count);
  531. else
  532. ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
  533. buf, count);
  534. break;
  535. case VFIO_PCI_BAR0_REGION_INDEX:
  536. case VFIO_PCI_BAR1_REGION_INDEX:
  537. if (is_write) {
  538. uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
  539. ret = intel_gvt_ops->emulate_mmio_write(vgpu,
  540. bar0_start + pos, buf, count);
  541. } else {
  542. uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
  543. ret = intel_gvt_ops->emulate_mmio_read(vgpu,
  544. bar0_start + pos, buf, count);
  545. }
  546. break;
  547. case VFIO_PCI_BAR2_REGION_INDEX:
  548. case VFIO_PCI_BAR3_REGION_INDEX:
  549. case VFIO_PCI_BAR4_REGION_INDEX:
  550. case VFIO_PCI_BAR5_REGION_INDEX:
  551. case VFIO_PCI_VGA_REGION_INDEX:
  552. case VFIO_PCI_ROM_REGION_INDEX:
  553. default:
  554. gvt_vgpu_err("unsupported region: %u\n", index);
  555. }
  556. return ret == 0 ? count : ret;
  557. }
  558. static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
  559. size_t count, loff_t *ppos)
  560. {
  561. unsigned int done = 0;
  562. int ret;
  563. while (count) {
  564. size_t filled;
  565. if (count >= 4 && !(*ppos % 4)) {
  566. u32 val;
  567. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  568. ppos, false);
  569. if (ret <= 0)
  570. goto read_err;
  571. if (copy_to_user(buf, &val, sizeof(val)))
  572. goto read_err;
  573. filled = 4;
  574. } else if (count >= 2 && !(*ppos % 2)) {
  575. u16 val;
  576. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  577. ppos, false);
  578. if (ret <= 0)
  579. goto read_err;
  580. if (copy_to_user(buf, &val, sizeof(val)))
  581. goto read_err;
  582. filled = 2;
  583. } else {
  584. u8 val;
  585. ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
  586. false);
  587. if (ret <= 0)
  588. goto read_err;
  589. if (copy_to_user(buf, &val, sizeof(val)))
  590. goto read_err;
  591. filled = 1;
  592. }
  593. count -= filled;
  594. done += filled;
  595. *ppos += filled;
  596. buf += filled;
  597. }
  598. return done;
  599. read_err:
  600. return -EFAULT;
  601. }
  602. static ssize_t intel_vgpu_write(struct mdev_device *mdev,
  603. const char __user *buf,
  604. size_t count, loff_t *ppos)
  605. {
  606. unsigned int done = 0;
  607. int ret;
  608. while (count) {
  609. size_t filled;
  610. if (count >= 4 && !(*ppos % 4)) {
  611. u32 val;
  612. if (copy_from_user(&val, buf, sizeof(val)))
  613. goto write_err;
  614. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  615. ppos, true);
  616. if (ret <= 0)
  617. goto write_err;
  618. filled = 4;
  619. } else if (count >= 2 && !(*ppos % 2)) {
  620. u16 val;
  621. if (copy_from_user(&val, buf, sizeof(val)))
  622. goto write_err;
  623. ret = intel_vgpu_rw(mdev, (char *)&val,
  624. sizeof(val), ppos, true);
  625. if (ret <= 0)
  626. goto write_err;
  627. filled = 2;
  628. } else {
  629. u8 val;
  630. if (copy_from_user(&val, buf, sizeof(val)))
  631. goto write_err;
  632. ret = intel_vgpu_rw(mdev, &val, sizeof(val),
  633. ppos, true);
  634. if (ret <= 0)
  635. goto write_err;
  636. filled = 1;
  637. }
  638. count -= filled;
  639. done += filled;
  640. *ppos += filled;
  641. buf += filled;
  642. }
  643. return done;
  644. write_err:
  645. return -EFAULT;
  646. }
  647. static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
  648. {
  649. unsigned int index;
  650. u64 virtaddr;
  651. unsigned long req_size, pgoff = 0;
  652. pgprot_t pg_prot;
  653. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  654. index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
  655. if (index >= VFIO_PCI_ROM_REGION_INDEX)
  656. return -EINVAL;
  657. if (vma->vm_end < vma->vm_start)
  658. return -EINVAL;
  659. if ((vma->vm_flags & VM_SHARED) == 0)
  660. return -EINVAL;
  661. if (index != VFIO_PCI_BAR2_REGION_INDEX)
  662. return -EINVAL;
  663. pg_prot = vma->vm_page_prot;
  664. virtaddr = vma->vm_start;
  665. req_size = vma->vm_end - vma->vm_start;
  666. pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
  667. return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
  668. }
  669. static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
  670. {
  671. if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
  672. return 1;
  673. return 0;
  674. }
  675. static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
  676. unsigned int index, unsigned int start,
  677. unsigned int count, uint32_t flags,
  678. void *data)
  679. {
  680. return 0;
  681. }
  682. static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
  683. unsigned int index, unsigned int start,
  684. unsigned int count, uint32_t flags, void *data)
  685. {
  686. return 0;
  687. }
  688. static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
  689. unsigned int index, unsigned int start, unsigned int count,
  690. uint32_t flags, void *data)
  691. {
  692. return 0;
  693. }
  694. static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
  695. unsigned int index, unsigned int start, unsigned int count,
  696. uint32_t flags, void *data)
  697. {
  698. struct eventfd_ctx *trigger;
  699. if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
  700. int fd = *(int *)data;
  701. trigger = eventfd_ctx_fdget(fd);
  702. if (IS_ERR(trigger)) {
  703. gvt_vgpu_err("eventfd_ctx_fdget failed\n");
  704. return PTR_ERR(trigger);
  705. }
  706. vgpu->vdev.msi_trigger = trigger;
  707. }
  708. return 0;
  709. }
  710. static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
  711. unsigned int index, unsigned int start, unsigned int count,
  712. void *data)
  713. {
  714. int (*func)(struct intel_vgpu *vgpu, unsigned int index,
  715. unsigned int start, unsigned int count, uint32_t flags,
  716. void *data) = NULL;
  717. switch (index) {
  718. case VFIO_PCI_INTX_IRQ_INDEX:
  719. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  720. case VFIO_IRQ_SET_ACTION_MASK:
  721. func = intel_vgpu_set_intx_mask;
  722. break;
  723. case VFIO_IRQ_SET_ACTION_UNMASK:
  724. func = intel_vgpu_set_intx_unmask;
  725. break;
  726. case VFIO_IRQ_SET_ACTION_TRIGGER:
  727. func = intel_vgpu_set_intx_trigger;
  728. break;
  729. }
  730. break;
  731. case VFIO_PCI_MSI_IRQ_INDEX:
  732. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  733. case VFIO_IRQ_SET_ACTION_MASK:
  734. case VFIO_IRQ_SET_ACTION_UNMASK:
  735. /* XXX Need masking support exported */
  736. break;
  737. case VFIO_IRQ_SET_ACTION_TRIGGER:
  738. func = intel_vgpu_set_msi_trigger;
  739. break;
  740. }
  741. break;
  742. }
  743. if (!func)
  744. return -ENOTTY;
  745. return func(vgpu, index, start, count, flags, data);
  746. }
  747. static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
  748. unsigned long arg)
  749. {
  750. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  751. unsigned long minsz;
  752. gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
  753. if (cmd == VFIO_DEVICE_GET_INFO) {
  754. struct vfio_device_info info;
  755. minsz = offsetofend(struct vfio_device_info, num_irqs);
  756. if (copy_from_user(&info, (void __user *)arg, minsz))
  757. return -EFAULT;
  758. if (info.argsz < minsz)
  759. return -EINVAL;
  760. info.flags = VFIO_DEVICE_FLAGS_PCI;
  761. info.flags |= VFIO_DEVICE_FLAGS_RESET;
  762. info.num_regions = VFIO_PCI_NUM_REGIONS;
  763. info.num_irqs = VFIO_PCI_NUM_IRQS;
  764. return copy_to_user((void __user *)arg, &info, minsz) ?
  765. -EFAULT : 0;
  766. } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
  767. struct vfio_region_info info;
  768. struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
  769. int i, ret;
  770. struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
  771. size_t size;
  772. int nr_areas = 1;
  773. int cap_type_id;
  774. minsz = offsetofend(struct vfio_region_info, offset);
  775. if (copy_from_user(&info, (void __user *)arg, minsz))
  776. return -EFAULT;
  777. if (info.argsz < minsz)
  778. return -EINVAL;
  779. switch (info.index) {
  780. case VFIO_PCI_CONFIG_REGION_INDEX:
  781. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  782. info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
  783. info.flags = VFIO_REGION_INFO_FLAG_READ |
  784. VFIO_REGION_INFO_FLAG_WRITE;
  785. break;
  786. case VFIO_PCI_BAR0_REGION_INDEX:
  787. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  788. info.size = vgpu->cfg_space.bar[info.index].size;
  789. if (!info.size) {
  790. info.flags = 0;
  791. break;
  792. }
  793. info.flags = VFIO_REGION_INFO_FLAG_READ |
  794. VFIO_REGION_INFO_FLAG_WRITE;
  795. break;
  796. case VFIO_PCI_BAR1_REGION_INDEX:
  797. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  798. info.size = 0;
  799. info.flags = 0;
  800. break;
  801. case VFIO_PCI_BAR2_REGION_INDEX:
  802. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  803. info.flags = VFIO_REGION_INFO_FLAG_CAPS |
  804. VFIO_REGION_INFO_FLAG_MMAP |
  805. VFIO_REGION_INFO_FLAG_READ |
  806. VFIO_REGION_INFO_FLAG_WRITE;
  807. info.size = gvt_aperture_sz(vgpu->gvt);
  808. size = sizeof(*sparse) +
  809. (nr_areas * sizeof(*sparse->areas));
  810. sparse = kzalloc(size, GFP_KERNEL);
  811. if (!sparse)
  812. return -ENOMEM;
  813. sparse->nr_areas = nr_areas;
  814. cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
  815. sparse->areas[0].offset =
  816. PAGE_ALIGN(vgpu_aperture_offset(vgpu));
  817. sparse->areas[0].size = vgpu_aperture_sz(vgpu);
  818. break;
  819. case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
  820. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  821. info.size = 0;
  822. info.flags = 0;
  823. gvt_dbg_core("get region info bar:%d\n", info.index);
  824. break;
  825. case VFIO_PCI_ROM_REGION_INDEX:
  826. case VFIO_PCI_VGA_REGION_INDEX:
  827. gvt_dbg_core("get region info index:%d\n", info.index);
  828. break;
  829. default:
  830. {
  831. struct vfio_region_info_cap_type cap_type;
  832. if (info.index >= VFIO_PCI_NUM_REGIONS +
  833. vgpu->vdev.num_regions)
  834. return -EINVAL;
  835. i = info.index - VFIO_PCI_NUM_REGIONS;
  836. info.offset =
  837. VFIO_PCI_INDEX_TO_OFFSET(info.index);
  838. info.size = vgpu->vdev.region[i].size;
  839. info.flags = vgpu->vdev.region[i].flags;
  840. cap_type.type = vgpu->vdev.region[i].type;
  841. cap_type.subtype = vgpu->vdev.region[i].subtype;
  842. ret = vfio_info_add_capability(&caps,
  843. VFIO_REGION_INFO_CAP_TYPE,
  844. &cap_type);
  845. if (ret)
  846. return ret;
  847. }
  848. }
  849. if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
  850. switch (cap_type_id) {
  851. case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
  852. ret = vfio_info_add_capability(&caps,
  853. VFIO_REGION_INFO_CAP_SPARSE_MMAP,
  854. sparse);
  855. kfree(sparse);
  856. if (ret)
  857. return ret;
  858. break;
  859. default:
  860. return -EINVAL;
  861. }
  862. }
  863. if (caps.size) {
  864. if (info.argsz < sizeof(info) + caps.size) {
  865. info.argsz = sizeof(info) + caps.size;
  866. info.cap_offset = 0;
  867. } else {
  868. vfio_info_cap_shift(&caps, sizeof(info));
  869. if (copy_to_user((void __user *)arg +
  870. sizeof(info), caps.buf,
  871. caps.size)) {
  872. kfree(caps.buf);
  873. return -EFAULT;
  874. }
  875. info.cap_offset = sizeof(info);
  876. }
  877. kfree(caps.buf);
  878. }
  879. return copy_to_user((void __user *)arg, &info, minsz) ?
  880. -EFAULT : 0;
  881. } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
  882. struct vfio_irq_info info;
  883. minsz = offsetofend(struct vfio_irq_info, count);
  884. if (copy_from_user(&info, (void __user *)arg, minsz))
  885. return -EFAULT;
  886. if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
  887. return -EINVAL;
  888. switch (info.index) {
  889. case VFIO_PCI_INTX_IRQ_INDEX:
  890. case VFIO_PCI_MSI_IRQ_INDEX:
  891. break;
  892. default:
  893. return -EINVAL;
  894. }
  895. info.flags = VFIO_IRQ_INFO_EVENTFD;
  896. info.count = intel_vgpu_get_irq_count(vgpu, info.index);
  897. if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
  898. info.flags |= (VFIO_IRQ_INFO_MASKABLE |
  899. VFIO_IRQ_INFO_AUTOMASKED);
  900. else
  901. info.flags |= VFIO_IRQ_INFO_NORESIZE;
  902. return copy_to_user((void __user *)arg, &info, minsz) ?
  903. -EFAULT : 0;
  904. } else if (cmd == VFIO_DEVICE_SET_IRQS) {
  905. struct vfio_irq_set hdr;
  906. u8 *data = NULL;
  907. int ret = 0;
  908. size_t data_size = 0;
  909. minsz = offsetofend(struct vfio_irq_set, count);
  910. if (copy_from_user(&hdr, (void __user *)arg, minsz))
  911. return -EFAULT;
  912. if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
  913. int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
  914. ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
  915. VFIO_PCI_NUM_IRQS, &data_size);
  916. if (ret) {
  917. gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
  918. return -EINVAL;
  919. }
  920. if (data_size) {
  921. data = memdup_user((void __user *)(arg + minsz),
  922. data_size);
  923. if (IS_ERR(data))
  924. return PTR_ERR(data);
  925. }
  926. }
  927. ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
  928. hdr.start, hdr.count, data);
  929. kfree(data);
  930. return ret;
  931. } else if (cmd == VFIO_DEVICE_RESET) {
  932. intel_gvt_ops->vgpu_reset(vgpu);
  933. return 0;
  934. }
  935. return 0;
  936. }
  937. static ssize_t
  938. vgpu_id_show(struct device *dev, struct device_attribute *attr,
  939. char *buf)
  940. {
  941. struct mdev_device *mdev = mdev_from_dev(dev);
  942. if (mdev) {
  943. struct intel_vgpu *vgpu = (struct intel_vgpu *)
  944. mdev_get_drvdata(mdev);
  945. return sprintf(buf, "%d\n", vgpu->id);
  946. }
  947. return sprintf(buf, "\n");
  948. }
  949. static DEVICE_ATTR_RO(vgpu_id);
  950. static struct attribute *intel_vgpu_attrs[] = {
  951. &dev_attr_vgpu_id.attr,
  952. NULL
  953. };
  954. static const struct attribute_group intel_vgpu_group = {
  955. .name = "intel_vgpu",
  956. .attrs = intel_vgpu_attrs,
  957. };
  958. static const struct attribute_group *intel_vgpu_groups[] = {
  959. &intel_vgpu_group,
  960. NULL,
  961. };
  962. static const struct mdev_parent_ops intel_vgpu_ops = {
  963. .supported_type_groups = intel_vgpu_type_groups,
  964. .mdev_attr_groups = intel_vgpu_groups,
  965. .create = intel_vgpu_create,
  966. .remove = intel_vgpu_remove,
  967. .open = intel_vgpu_open,
  968. .release = intel_vgpu_release,
  969. .read = intel_vgpu_read,
  970. .write = intel_vgpu_write,
  971. .mmap = intel_vgpu_mmap,
  972. .ioctl = intel_vgpu_ioctl,
  973. };
  974. static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
  975. {
  976. if (!intel_gvt_init_vgpu_type_groups(gvt))
  977. return -EFAULT;
  978. intel_gvt_ops = ops;
  979. return mdev_register_device(dev, &intel_vgpu_ops);
  980. }
  981. static void kvmgt_host_exit(struct device *dev, void *gvt)
  982. {
  983. intel_gvt_cleanup_vgpu_type_groups(gvt);
  984. mdev_unregister_device(dev);
  985. }
  986. static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
  987. {
  988. struct kvmgt_guest_info *info;
  989. struct kvm *kvm;
  990. struct kvm_memory_slot *slot;
  991. int idx;
  992. if (!handle_valid(handle))
  993. return -ESRCH;
  994. info = (struct kvmgt_guest_info *)handle;
  995. kvm = info->kvm;
  996. idx = srcu_read_lock(&kvm->srcu);
  997. slot = gfn_to_memslot(kvm, gfn);
  998. if (!slot) {
  999. srcu_read_unlock(&kvm->srcu, idx);
  1000. return -EINVAL;
  1001. }
  1002. spin_lock(&kvm->mmu_lock);
  1003. if (kvmgt_gfn_is_write_protected(info, gfn))
  1004. goto out;
  1005. kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  1006. kvmgt_protect_table_add(info, gfn);
  1007. out:
  1008. spin_unlock(&kvm->mmu_lock);
  1009. srcu_read_unlock(&kvm->srcu, idx);
  1010. return 0;
  1011. }
  1012. static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
  1013. {
  1014. struct kvmgt_guest_info *info;
  1015. struct kvm *kvm;
  1016. struct kvm_memory_slot *slot;
  1017. int idx;
  1018. if (!handle_valid(handle))
  1019. return 0;
  1020. info = (struct kvmgt_guest_info *)handle;
  1021. kvm = info->kvm;
  1022. idx = srcu_read_lock(&kvm->srcu);
  1023. slot = gfn_to_memslot(kvm, gfn);
  1024. if (!slot) {
  1025. srcu_read_unlock(&kvm->srcu, idx);
  1026. return -EINVAL;
  1027. }
  1028. spin_lock(&kvm->mmu_lock);
  1029. if (!kvmgt_gfn_is_write_protected(info, gfn))
  1030. goto out;
  1031. kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  1032. kvmgt_protect_table_del(info, gfn);
  1033. out:
  1034. spin_unlock(&kvm->mmu_lock);
  1035. srcu_read_unlock(&kvm->srcu, idx);
  1036. return 0;
  1037. }
  1038. static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
  1039. const u8 *val, int len,
  1040. struct kvm_page_track_notifier_node *node)
  1041. {
  1042. struct kvmgt_guest_info *info = container_of(node,
  1043. struct kvmgt_guest_info, track_node);
  1044. if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
  1045. intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
  1046. (void *)val, len);
  1047. }
  1048. static void kvmgt_page_track_flush_slot(struct kvm *kvm,
  1049. struct kvm_memory_slot *slot,
  1050. struct kvm_page_track_notifier_node *node)
  1051. {
  1052. int i;
  1053. gfn_t gfn;
  1054. struct kvmgt_guest_info *info = container_of(node,
  1055. struct kvmgt_guest_info, track_node);
  1056. spin_lock(&kvm->mmu_lock);
  1057. for (i = 0; i < slot->npages; i++) {
  1058. gfn = slot->base_gfn + i;
  1059. if (kvmgt_gfn_is_write_protected(info, gfn)) {
  1060. kvm_slot_page_track_remove_page(kvm, slot, gfn,
  1061. KVM_PAGE_TRACK_WRITE);
  1062. kvmgt_protect_table_del(info, gfn);
  1063. }
  1064. }
  1065. spin_unlock(&kvm->mmu_lock);
  1066. }
  1067. static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
  1068. {
  1069. struct intel_vgpu *itr;
  1070. struct kvmgt_guest_info *info;
  1071. int id;
  1072. bool ret = false;
  1073. mutex_lock(&vgpu->gvt->lock);
  1074. for_each_active_vgpu(vgpu->gvt, itr, id) {
  1075. if (!handle_valid(itr->handle))
  1076. continue;
  1077. info = (struct kvmgt_guest_info *)itr->handle;
  1078. if (kvm && kvm == info->kvm) {
  1079. ret = true;
  1080. goto out;
  1081. }
  1082. }
  1083. out:
  1084. mutex_unlock(&vgpu->gvt->lock);
  1085. return ret;
  1086. }
  1087. static int kvmgt_guest_init(struct mdev_device *mdev)
  1088. {
  1089. struct kvmgt_guest_info *info;
  1090. struct intel_vgpu *vgpu;
  1091. struct kvm *kvm;
  1092. vgpu = mdev_get_drvdata(mdev);
  1093. if (handle_valid(vgpu->handle))
  1094. return -EEXIST;
  1095. kvm = vgpu->vdev.kvm;
  1096. if (!kvm || kvm->mm != current->mm) {
  1097. gvt_vgpu_err("KVM is required to use Intel vGPU\n");
  1098. return -ESRCH;
  1099. }
  1100. if (__kvmgt_vgpu_exist(vgpu, kvm))
  1101. return -EEXIST;
  1102. info = vzalloc(sizeof(struct kvmgt_guest_info));
  1103. if (!info)
  1104. return -ENOMEM;
  1105. vgpu->handle = (unsigned long)info;
  1106. info->vgpu = vgpu;
  1107. info->kvm = kvm;
  1108. kvm_get_kvm(info->kvm);
  1109. kvmgt_protect_table_init(info);
  1110. gvt_cache_init(vgpu);
  1111. info->track_node.track_write = kvmgt_page_track_write;
  1112. info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
  1113. kvm_page_track_register_notifier(kvm, &info->track_node);
  1114. return 0;
  1115. }
  1116. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
  1117. {
  1118. kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
  1119. kvm_put_kvm(info->kvm);
  1120. kvmgt_protect_table_destroy(info);
  1121. gvt_cache_destroy(info->vgpu);
  1122. vfree(info);
  1123. return true;
  1124. }
  1125. static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
  1126. {
  1127. /* nothing to do here */
  1128. return 0;
  1129. }
  1130. static void kvmgt_detach_vgpu(unsigned long handle)
  1131. {
  1132. /* nothing to do here */
  1133. }
  1134. static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
  1135. {
  1136. struct kvmgt_guest_info *info;
  1137. struct intel_vgpu *vgpu;
  1138. if (!handle_valid(handle))
  1139. return -ESRCH;
  1140. info = (struct kvmgt_guest_info *)handle;
  1141. vgpu = info->vgpu;
  1142. if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
  1143. return 0;
  1144. return -EFAULT;
  1145. }
  1146. static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
  1147. {
  1148. unsigned long iova, pfn;
  1149. struct kvmgt_guest_info *info;
  1150. struct device *dev;
  1151. struct intel_vgpu *vgpu;
  1152. int rc;
  1153. if (!handle_valid(handle))
  1154. return INTEL_GVT_INVALID_ADDR;
  1155. info = (struct kvmgt_guest_info *)handle;
  1156. vgpu = info->vgpu;
  1157. iova = gvt_cache_find(info->vgpu, gfn);
  1158. if (iova != INTEL_GVT_INVALID_ADDR)
  1159. return iova;
  1160. pfn = INTEL_GVT_INVALID_ADDR;
  1161. dev = mdev_dev(info->vgpu->vdev.mdev);
  1162. rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
  1163. if (rc != 1) {
  1164. gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
  1165. gfn, rc);
  1166. return INTEL_GVT_INVALID_ADDR;
  1167. }
  1168. /* transfer to host iova for GFX to use DMA */
  1169. rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
  1170. if (rc) {
  1171. gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
  1172. vfio_unpin_pages(dev, &gfn, 1);
  1173. return INTEL_GVT_INVALID_ADDR;
  1174. }
  1175. gvt_cache_add(info->vgpu, gfn, iova);
  1176. return iova;
  1177. }
  1178. static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
  1179. void *buf, unsigned long len, bool write)
  1180. {
  1181. struct kvmgt_guest_info *info;
  1182. struct kvm *kvm;
  1183. int idx, ret;
  1184. bool kthread = current->mm == NULL;
  1185. if (!handle_valid(handle))
  1186. return -ESRCH;
  1187. info = (struct kvmgt_guest_info *)handle;
  1188. kvm = info->kvm;
  1189. if (kthread)
  1190. use_mm(kvm->mm);
  1191. idx = srcu_read_lock(&kvm->srcu);
  1192. ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
  1193. kvm_read_guest(kvm, gpa, buf, len);
  1194. srcu_read_unlock(&kvm->srcu, idx);
  1195. if (kthread)
  1196. unuse_mm(kvm->mm);
  1197. return ret;
  1198. }
  1199. static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
  1200. void *buf, unsigned long len)
  1201. {
  1202. return kvmgt_rw_gpa(handle, gpa, buf, len, false);
  1203. }
  1204. static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
  1205. void *buf, unsigned long len)
  1206. {
  1207. return kvmgt_rw_gpa(handle, gpa, buf, len, true);
  1208. }
  1209. static unsigned long kvmgt_virt_to_pfn(void *addr)
  1210. {
  1211. return PFN_DOWN(__pa(addr));
  1212. }
  1213. struct intel_gvt_mpt kvmgt_mpt = {
  1214. .host_init = kvmgt_host_init,
  1215. .host_exit = kvmgt_host_exit,
  1216. .attach_vgpu = kvmgt_attach_vgpu,
  1217. .detach_vgpu = kvmgt_detach_vgpu,
  1218. .inject_msi = kvmgt_inject_msi,
  1219. .from_virt_to_mfn = kvmgt_virt_to_pfn,
  1220. .set_wp_page = kvmgt_write_protect_add,
  1221. .unset_wp_page = kvmgt_write_protect_remove,
  1222. .read_gpa = kvmgt_read_gpa,
  1223. .write_gpa = kvmgt_write_gpa,
  1224. .gfn_to_mfn = kvmgt_gfn_to_pfn,
  1225. };
  1226. EXPORT_SYMBOL_GPL(kvmgt_mpt);
  1227. static int __init kvmgt_init(void)
  1228. {
  1229. return 0;
  1230. }
  1231. static void __exit kvmgt_exit(void)
  1232. {
  1233. }
  1234. module_init(kvmgt_init);
  1235. module_exit(kvmgt_exit);
  1236. MODULE_LICENSE("GPL and additional rights");
  1237. MODULE_AUTHOR("Intel Corporation");