kvmgt.c 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562
  1. /*
  2. * KVMGT - the implementation of Intel mediated pass-through framework for KVM
  3. *
  4. * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice (including the next
  14. * paragraph) shall be included in all copies or substantial portions of the
  15. * Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23. * SOFTWARE.
  24. *
  25. * Authors:
  26. * Kevin Tian <kevin.tian@intel.com>
  27. * Jike Song <jike.song@intel.com>
  28. * Xiaoguang Chen <xiaoguang.chen@intel.com>
  29. */
  30. #include <linux/init.h>
  31. #include <linux/device.h>
  32. #include <linux/mm.h>
  33. #include <linux/mmu_context.h>
  34. #include <linux/types.h>
  35. #include <linux/list.h>
  36. #include <linux/rbtree.h>
  37. #include <linux/spinlock.h>
  38. #include <linux/eventfd.h>
  39. #include <linux/uuid.h>
  40. #include <linux/kvm_host.h>
  41. #include <linux/vfio.h>
  42. #include <linux/mdev.h>
  43. #include "i915_drv.h"
  44. #include "gvt.h"
  45. static const struct intel_gvt_ops *intel_gvt_ops;
  46. /* helper macros copied from vfio-pci */
  47. #define VFIO_PCI_OFFSET_SHIFT 40
  48. #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
  49. #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
  50. #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
  51. struct vfio_region {
  52. u32 type;
  53. u32 subtype;
  54. size_t size;
  55. u32 flags;
  56. };
  57. struct kvmgt_pgfn {
  58. gfn_t gfn;
  59. struct hlist_node hnode;
  60. };
  61. struct kvmgt_guest_info {
  62. struct kvm *kvm;
  63. struct intel_vgpu *vgpu;
  64. struct kvm_page_track_notifier_node track_node;
  65. #define NR_BKT (1 << 18)
  66. struct hlist_head ptable[NR_BKT];
  67. #undef NR_BKT
  68. };
  69. struct gvt_dma {
  70. struct rb_node node;
  71. gfn_t gfn;
  72. unsigned long iova;
  73. };
  74. static inline bool handle_valid(unsigned long handle)
  75. {
  76. return !!(handle & ~0xff);
  77. }
  78. static int kvmgt_guest_init(struct mdev_device *mdev);
  79. static void intel_vgpu_release_work(struct work_struct *work);
  80. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
  81. static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
  82. unsigned long *iova)
  83. {
  84. struct page *page;
  85. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  86. dma_addr_t daddr;
  87. if (unlikely(!pfn_valid(pfn)))
  88. return -EFAULT;
  89. page = pfn_to_page(pfn);
  90. daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
  91. PCI_DMA_BIDIRECTIONAL);
  92. if (dma_mapping_error(dev, daddr))
  93. return -ENOMEM;
  94. *iova = (unsigned long)(daddr >> PAGE_SHIFT);
  95. return 0;
  96. }
  97. static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova)
  98. {
  99. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  100. dma_addr_t daddr;
  101. daddr = (dma_addr_t)(iova << PAGE_SHIFT);
  102. dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  103. }
  104. static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
  105. {
  106. struct rb_node *node = vgpu->vdev.cache.rb_node;
  107. struct gvt_dma *ret = NULL;
  108. while (node) {
  109. struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
  110. if (gfn < itr->gfn)
  111. node = node->rb_left;
  112. else if (gfn > itr->gfn)
  113. node = node->rb_right;
  114. else {
  115. ret = itr;
  116. goto out;
  117. }
  118. }
  119. out:
  120. return ret;
  121. }
  122. static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
  123. {
  124. struct gvt_dma *entry;
  125. unsigned long iova;
  126. mutex_lock(&vgpu->vdev.cache_lock);
  127. entry = __gvt_cache_find(vgpu, gfn);
  128. iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
  129. mutex_unlock(&vgpu->vdev.cache_lock);
  130. return iova;
  131. }
  132. static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
  133. unsigned long iova)
  134. {
  135. struct gvt_dma *new, *itr;
  136. struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
  137. new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
  138. if (!new)
  139. return;
  140. new->gfn = gfn;
  141. new->iova = iova;
  142. mutex_lock(&vgpu->vdev.cache_lock);
  143. while (*link) {
  144. parent = *link;
  145. itr = rb_entry(parent, struct gvt_dma, node);
  146. if (gfn == itr->gfn)
  147. goto out;
  148. else if (gfn < itr->gfn)
  149. link = &parent->rb_left;
  150. else
  151. link = &parent->rb_right;
  152. }
  153. rb_link_node(&new->node, parent, link);
  154. rb_insert_color(&new->node, &vgpu->vdev.cache);
  155. mutex_unlock(&vgpu->vdev.cache_lock);
  156. return;
  157. out:
  158. mutex_unlock(&vgpu->vdev.cache_lock);
  159. kfree(new);
  160. }
  161. static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
  162. struct gvt_dma *entry)
  163. {
  164. rb_erase(&entry->node, &vgpu->vdev.cache);
  165. kfree(entry);
  166. }
  167. static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
  168. {
  169. struct device *dev = mdev_dev(vgpu->vdev.mdev);
  170. struct gvt_dma *this;
  171. unsigned long g1;
  172. int rc;
  173. mutex_lock(&vgpu->vdev.cache_lock);
  174. this = __gvt_cache_find(vgpu, gfn);
  175. if (!this) {
  176. mutex_unlock(&vgpu->vdev.cache_lock);
  177. return;
  178. }
  179. g1 = gfn;
  180. gvt_dma_unmap_iova(vgpu, this->iova);
  181. rc = vfio_unpin_pages(dev, &g1, 1);
  182. WARN_ON(rc != 1);
  183. __gvt_cache_remove_entry(vgpu, this);
  184. mutex_unlock(&vgpu->vdev.cache_lock);
  185. }
  186. static void gvt_cache_init(struct intel_vgpu *vgpu)
  187. {
  188. vgpu->vdev.cache = RB_ROOT;
  189. mutex_init(&vgpu->vdev.cache_lock);
  190. }
  191. static void gvt_cache_destroy(struct intel_vgpu *vgpu)
  192. {
  193. struct gvt_dma *dma;
  194. struct rb_node *node = NULL;
  195. struct device *dev = mdev_dev(vgpu->vdev.mdev);
  196. unsigned long gfn;
  197. for (;;) {
  198. mutex_lock(&vgpu->vdev.cache_lock);
  199. node = rb_first(&vgpu->vdev.cache);
  200. if (!node) {
  201. mutex_unlock(&vgpu->vdev.cache_lock);
  202. break;
  203. }
  204. dma = rb_entry(node, struct gvt_dma, node);
  205. gvt_dma_unmap_iova(vgpu, dma->iova);
  206. gfn = dma->gfn;
  207. __gvt_cache_remove_entry(vgpu, dma);
  208. mutex_unlock(&vgpu->vdev.cache_lock);
  209. vfio_unpin_pages(dev, &gfn, 1);
  210. }
  211. }
  212. static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
  213. const char *name)
  214. {
  215. int i;
  216. struct intel_vgpu_type *t;
  217. const char *driver_name = dev_driver_string(
  218. &gvt->dev_priv->drm.pdev->dev);
  219. for (i = 0; i < gvt->num_types; i++) {
  220. t = &gvt->types[i];
  221. if (!strncmp(t->name, name + strlen(driver_name) + 1,
  222. sizeof(t->name)))
  223. return t;
  224. }
  225. return NULL;
  226. }
  227. static ssize_t available_instances_show(struct kobject *kobj,
  228. struct device *dev, char *buf)
  229. {
  230. struct intel_vgpu_type *type;
  231. unsigned int num = 0;
  232. void *gvt = kdev_to_i915(dev)->gvt;
  233. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  234. if (!type)
  235. num = 0;
  236. else
  237. num = type->avail_instance;
  238. return sprintf(buf, "%u\n", num);
  239. }
  240. static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
  241. char *buf)
  242. {
  243. return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
  244. }
  245. static ssize_t description_show(struct kobject *kobj, struct device *dev,
  246. char *buf)
  247. {
  248. struct intel_vgpu_type *type;
  249. void *gvt = kdev_to_i915(dev)->gvt;
  250. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  251. if (!type)
  252. return 0;
  253. return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
  254. "fence: %d\nresolution: %s\n"
  255. "weight: %d\n",
  256. BYTES_TO_MB(type->low_gm_size),
  257. BYTES_TO_MB(type->high_gm_size),
  258. type->fence, vgpu_edid_str(type->resolution),
  259. type->weight);
  260. }
  261. static MDEV_TYPE_ATTR_RO(available_instances);
  262. static MDEV_TYPE_ATTR_RO(device_api);
  263. static MDEV_TYPE_ATTR_RO(description);
  264. static struct attribute *type_attrs[] = {
  265. &mdev_type_attr_available_instances.attr,
  266. &mdev_type_attr_device_api.attr,
  267. &mdev_type_attr_description.attr,
  268. NULL,
  269. };
  270. static struct attribute_group *intel_vgpu_type_groups[] = {
  271. [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
  272. };
  273. static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
  274. {
  275. int i, j;
  276. struct intel_vgpu_type *type;
  277. struct attribute_group *group;
  278. for (i = 0; i < gvt->num_types; i++) {
  279. type = &gvt->types[i];
  280. group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
  281. if (WARN_ON(!group))
  282. goto unwind;
  283. group->name = type->name;
  284. group->attrs = type_attrs;
  285. intel_vgpu_type_groups[i] = group;
  286. }
  287. return true;
  288. unwind:
  289. for (j = 0; j < i; j++) {
  290. group = intel_vgpu_type_groups[j];
  291. kfree(group);
  292. }
  293. return false;
  294. }
  295. static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
  296. {
  297. int i;
  298. struct attribute_group *group;
  299. for (i = 0; i < gvt->num_types; i++) {
  300. group = intel_vgpu_type_groups[i];
  301. kfree(group);
  302. }
  303. }
  304. static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
  305. {
  306. hash_init(info->ptable);
  307. }
  308. static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
  309. {
  310. struct kvmgt_pgfn *p;
  311. struct hlist_node *tmp;
  312. int i;
  313. hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
  314. hash_del(&p->hnode);
  315. kfree(p);
  316. }
  317. }
  318. static struct kvmgt_pgfn *
  319. __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
  320. {
  321. struct kvmgt_pgfn *p, *res = NULL;
  322. hash_for_each_possible(info->ptable, p, hnode, gfn) {
  323. if (gfn == p->gfn) {
  324. res = p;
  325. break;
  326. }
  327. }
  328. return res;
  329. }
  330. static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
  331. gfn_t gfn)
  332. {
  333. struct kvmgt_pgfn *p;
  334. p = __kvmgt_protect_table_find(info, gfn);
  335. return !!p;
  336. }
  337. static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
  338. {
  339. struct kvmgt_pgfn *p;
  340. if (kvmgt_gfn_is_write_protected(info, gfn))
  341. return;
  342. p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
  343. if (WARN(!p, "gfn: 0x%llx\n", gfn))
  344. return;
  345. p->gfn = gfn;
  346. hash_add(info->ptable, &p->hnode, gfn);
  347. }
  348. static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
  349. gfn_t gfn)
  350. {
  351. struct kvmgt_pgfn *p;
  352. p = __kvmgt_protect_table_find(info, gfn);
  353. if (p) {
  354. hash_del(&p->hnode);
  355. kfree(p);
  356. }
  357. }
  358. static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
  359. {
  360. struct intel_vgpu *vgpu = NULL;
  361. struct intel_vgpu_type *type;
  362. struct device *pdev;
  363. void *gvt;
  364. int ret;
  365. pdev = mdev_parent_dev(mdev);
  366. gvt = kdev_to_i915(pdev)->gvt;
  367. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  368. if (!type) {
  369. gvt_vgpu_err("failed to find type %s to create\n",
  370. kobject_name(kobj));
  371. ret = -EINVAL;
  372. goto out;
  373. }
  374. vgpu = intel_gvt_ops->vgpu_create(gvt, type);
  375. if (IS_ERR_OR_NULL(vgpu)) {
  376. ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
  377. gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
  378. goto out;
  379. }
  380. INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
  381. vgpu->vdev.mdev = mdev;
  382. mdev_set_drvdata(mdev, vgpu);
  383. gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
  384. dev_name(mdev_dev(mdev)));
  385. ret = 0;
  386. out:
  387. return ret;
  388. }
  389. static int intel_vgpu_remove(struct mdev_device *mdev)
  390. {
  391. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  392. if (handle_valid(vgpu->handle))
  393. return -EBUSY;
  394. intel_gvt_ops->vgpu_destroy(vgpu);
  395. return 0;
  396. }
  397. static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
  398. unsigned long action, void *data)
  399. {
  400. struct intel_vgpu *vgpu = container_of(nb,
  401. struct intel_vgpu,
  402. vdev.iommu_notifier);
  403. if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
  404. struct vfio_iommu_type1_dma_unmap *unmap = data;
  405. unsigned long gfn, end_gfn;
  406. gfn = unmap->iova >> PAGE_SHIFT;
  407. end_gfn = gfn + unmap->size / PAGE_SIZE;
  408. while (gfn < end_gfn)
  409. gvt_cache_remove(vgpu, gfn++);
  410. }
  411. return NOTIFY_OK;
  412. }
  413. static int intel_vgpu_group_notifier(struct notifier_block *nb,
  414. unsigned long action, void *data)
  415. {
  416. struct intel_vgpu *vgpu = container_of(nb,
  417. struct intel_vgpu,
  418. vdev.group_notifier);
  419. /* the only action we care about */
  420. if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
  421. vgpu->vdev.kvm = data;
  422. if (!data)
  423. schedule_work(&vgpu->vdev.release_work);
  424. }
  425. return NOTIFY_OK;
  426. }
  427. static int intel_vgpu_open(struct mdev_device *mdev)
  428. {
  429. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  430. unsigned long events;
  431. int ret;
  432. vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
  433. vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
  434. events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
  435. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
  436. &vgpu->vdev.iommu_notifier);
  437. if (ret != 0) {
  438. gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
  439. ret);
  440. goto out;
  441. }
  442. events = VFIO_GROUP_NOTIFY_SET_KVM;
  443. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
  444. &vgpu->vdev.group_notifier);
  445. if (ret != 0) {
  446. gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
  447. ret);
  448. goto undo_iommu;
  449. }
  450. ret = kvmgt_guest_init(mdev);
  451. if (ret)
  452. goto undo_group;
  453. intel_gvt_ops->vgpu_activate(vgpu);
  454. atomic_set(&vgpu->vdev.released, 0);
  455. return ret;
  456. undo_group:
  457. vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
  458. &vgpu->vdev.group_notifier);
  459. undo_iommu:
  460. vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
  461. &vgpu->vdev.iommu_notifier);
  462. out:
  463. return ret;
  464. }
  465. static void __intel_vgpu_release(struct intel_vgpu *vgpu)
  466. {
  467. struct kvmgt_guest_info *info;
  468. int ret;
  469. if (!handle_valid(vgpu->handle))
  470. return;
  471. if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
  472. return;
  473. intel_gvt_ops->vgpu_deactivate(vgpu);
  474. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
  475. &vgpu->vdev.iommu_notifier);
  476. WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
  477. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
  478. &vgpu->vdev.group_notifier);
  479. WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
  480. info = (struct kvmgt_guest_info *)vgpu->handle;
  481. kvmgt_guest_exit(info);
  482. vgpu->vdev.kvm = NULL;
  483. vgpu->handle = 0;
  484. }
  485. static void intel_vgpu_release(struct mdev_device *mdev)
  486. {
  487. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  488. __intel_vgpu_release(vgpu);
  489. }
  490. static void intel_vgpu_release_work(struct work_struct *work)
  491. {
  492. struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
  493. vdev.release_work);
  494. __intel_vgpu_release(vgpu);
  495. }
  496. static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
  497. {
  498. u32 start_lo, start_hi;
  499. u32 mem_type;
  500. start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
  501. PCI_BASE_ADDRESS_MEM_MASK;
  502. mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
  503. PCI_BASE_ADDRESS_MEM_TYPE_MASK;
  504. switch (mem_type) {
  505. case PCI_BASE_ADDRESS_MEM_TYPE_64:
  506. start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
  507. + bar + 4));
  508. break;
  509. case PCI_BASE_ADDRESS_MEM_TYPE_32:
  510. case PCI_BASE_ADDRESS_MEM_TYPE_1M:
  511. /* 1M mem BAR treated as 32-bit BAR */
  512. default:
  513. /* mem unknown type treated as 32-bit BAR */
  514. start_hi = 0;
  515. break;
  516. }
  517. return ((u64)start_hi << 32) | start_lo;
  518. }
  519. static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off,
  520. void *buf, unsigned int count, bool is_write)
  521. {
  522. uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
  523. int ret;
  524. if (is_write)
  525. ret = intel_gvt_ops->emulate_mmio_write(vgpu,
  526. bar_start + off, buf, count);
  527. else
  528. ret = intel_gvt_ops->emulate_mmio_read(vgpu,
  529. bar_start + off, buf, count);
  530. return ret;
  531. }
  532. static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
  533. size_t count, loff_t *ppos, bool is_write)
  534. {
  535. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  536. unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
  537. uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
  538. int ret = -EINVAL;
  539. if (index >= VFIO_PCI_NUM_REGIONS) {
  540. gvt_vgpu_err("invalid index: %u\n", index);
  541. return -EINVAL;
  542. }
  543. switch (index) {
  544. case VFIO_PCI_CONFIG_REGION_INDEX:
  545. if (is_write)
  546. ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
  547. buf, count);
  548. else
  549. ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
  550. buf, count);
  551. break;
  552. case VFIO_PCI_BAR0_REGION_INDEX:
  553. ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
  554. buf, count, is_write);
  555. break;
  556. case VFIO_PCI_BAR2_REGION_INDEX:
  557. ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_2, pos,
  558. buf, count, is_write);
  559. break;
  560. case VFIO_PCI_BAR1_REGION_INDEX:
  561. case VFIO_PCI_BAR3_REGION_INDEX:
  562. case VFIO_PCI_BAR4_REGION_INDEX:
  563. case VFIO_PCI_BAR5_REGION_INDEX:
  564. case VFIO_PCI_VGA_REGION_INDEX:
  565. case VFIO_PCI_ROM_REGION_INDEX:
  566. default:
  567. gvt_vgpu_err("unsupported region: %u\n", index);
  568. }
  569. return ret == 0 ? count : ret;
  570. }
  571. static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
  572. size_t count, loff_t *ppos)
  573. {
  574. unsigned int done = 0;
  575. int ret;
  576. while (count) {
  577. size_t filled;
  578. if (count >= 4 && !(*ppos % 4)) {
  579. u32 val;
  580. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  581. ppos, false);
  582. if (ret <= 0)
  583. goto read_err;
  584. if (copy_to_user(buf, &val, sizeof(val)))
  585. goto read_err;
  586. filled = 4;
  587. } else if (count >= 2 && !(*ppos % 2)) {
  588. u16 val;
  589. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  590. ppos, false);
  591. if (ret <= 0)
  592. goto read_err;
  593. if (copy_to_user(buf, &val, sizeof(val)))
  594. goto read_err;
  595. filled = 2;
  596. } else {
  597. u8 val;
  598. ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
  599. false);
  600. if (ret <= 0)
  601. goto read_err;
  602. if (copy_to_user(buf, &val, sizeof(val)))
  603. goto read_err;
  604. filled = 1;
  605. }
  606. count -= filled;
  607. done += filled;
  608. *ppos += filled;
  609. buf += filled;
  610. }
  611. return done;
  612. read_err:
  613. return -EFAULT;
  614. }
  615. static ssize_t intel_vgpu_write(struct mdev_device *mdev,
  616. const char __user *buf,
  617. size_t count, loff_t *ppos)
  618. {
  619. unsigned int done = 0;
  620. int ret;
  621. while (count) {
  622. size_t filled;
  623. if (count >= 4 && !(*ppos % 4)) {
  624. u32 val;
  625. if (copy_from_user(&val, buf, sizeof(val)))
  626. goto write_err;
  627. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  628. ppos, true);
  629. if (ret <= 0)
  630. goto write_err;
  631. filled = 4;
  632. } else if (count >= 2 && !(*ppos % 2)) {
  633. u16 val;
  634. if (copy_from_user(&val, buf, sizeof(val)))
  635. goto write_err;
  636. ret = intel_vgpu_rw(mdev, (char *)&val,
  637. sizeof(val), ppos, true);
  638. if (ret <= 0)
  639. goto write_err;
  640. filled = 2;
  641. } else {
  642. u8 val;
  643. if (copy_from_user(&val, buf, sizeof(val)))
  644. goto write_err;
  645. ret = intel_vgpu_rw(mdev, &val, sizeof(val),
  646. ppos, true);
  647. if (ret <= 0)
  648. goto write_err;
  649. filled = 1;
  650. }
  651. count -= filled;
  652. done += filled;
  653. *ppos += filled;
  654. buf += filled;
  655. }
  656. return done;
  657. write_err:
  658. return -EFAULT;
  659. }
  660. static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
  661. {
  662. unsigned int index;
  663. u64 virtaddr;
  664. unsigned long req_size, pgoff = 0;
  665. pgprot_t pg_prot;
  666. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  667. index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
  668. if (index >= VFIO_PCI_ROM_REGION_INDEX)
  669. return -EINVAL;
  670. if (vma->vm_end < vma->vm_start)
  671. return -EINVAL;
  672. if ((vma->vm_flags & VM_SHARED) == 0)
  673. return -EINVAL;
  674. if (index != VFIO_PCI_BAR2_REGION_INDEX)
  675. return -EINVAL;
  676. pg_prot = vma->vm_page_prot;
  677. virtaddr = vma->vm_start;
  678. req_size = vma->vm_end - vma->vm_start;
  679. pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
  680. return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
  681. }
  682. static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
  683. {
  684. if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
  685. return 1;
  686. return 0;
  687. }
  688. static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
  689. unsigned int index, unsigned int start,
  690. unsigned int count, uint32_t flags,
  691. void *data)
  692. {
  693. return 0;
  694. }
  695. static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
  696. unsigned int index, unsigned int start,
  697. unsigned int count, uint32_t flags, void *data)
  698. {
  699. return 0;
  700. }
  701. static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
  702. unsigned int index, unsigned int start, unsigned int count,
  703. uint32_t flags, void *data)
  704. {
  705. return 0;
  706. }
  707. static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
  708. unsigned int index, unsigned int start, unsigned int count,
  709. uint32_t flags, void *data)
  710. {
  711. struct eventfd_ctx *trigger;
  712. if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
  713. int fd = *(int *)data;
  714. trigger = eventfd_ctx_fdget(fd);
  715. if (IS_ERR(trigger)) {
  716. gvt_vgpu_err("eventfd_ctx_fdget failed\n");
  717. return PTR_ERR(trigger);
  718. }
  719. vgpu->vdev.msi_trigger = trigger;
  720. }
  721. return 0;
  722. }
  723. static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
  724. unsigned int index, unsigned int start, unsigned int count,
  725. void *data)
  726. {
  727. int (*func)(struct intel_vgpu *vgpu, unsigned int index,
  728. unsigned int start, unsigned int count, uint32_t flags,
  729. void *data) = NULL;
  730. switch (index) {
  731. case VFIO_PCI_INTX_IRQ_INDEX:
  732. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  733. case VFIO_IRQ_SET_ACTION_MASK:
  734. func = intel_vgpu_set_intx_mask;
  735. break;
  736. case VFIO_IRQ_SET_ACTION_UNMASK:
  737. func = intel_vgpu_set_intx_unmask;
  738. break;
  739. case VFIO_IRQ_SET_ACTION_TRIGGER:
  740. func = intel_vgpu_set_intx_trigger;
  741. break;
  742. }
  743. break;
  744. case VFIO_PCI_MSI_IRQ_INDEX:
  745. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  746. case VFIO_IRQ_SET_ACTION_MASK:
  747. case VFIO_IRQ_SET_ACTION_UNMASK:
  748. /* XXX Need masking support exported */
  749. break;
  750. case VFIO_IRQ_SET_ACTION_TRIGGER:
  751. func = intel_vgpu_set_msi_trigger;
  752. break;
  753. }
  754. break;
  755. }
  756. if (!func)
  757. return -ENOTTY;
  758. return func(vgpu, index, start, count, flags, data);
  759. }
  760. static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
  761. unsigned long arg)
  762. {
  763. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  764. unsigned long minsz;
  765. gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
  766. if (cmd == VFIO_DEVICE_GET_INFO) {
  767. struct vfio_device_info info;
  768. minsz = offsetofend(struct vfio_device_info, num_irqs);
  769. if (copy_from_user(&info, (void __user *)arg, minsz))
  770. return -EFAULT;
  771. if (info.argsz < minsz)
  772. return -EINVAL;
  773. info.flags = VFIO_DEVICE_FLAGS_PCI;
  774. info.flags |= VFIO_DEVICE_FLAGS_RESET;
  775. info.num_regions = VFIO_PCI_NUM_REGIONS;
  776. info.num_irqs = VFIO_PCI_NUM_IRQS;
  777. return copy_to_user((void __user *)arg, &info, minsz) ?
  778. -EFAULT : 0;
  779. } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
  780. struct vfio_region_info info;
  781. struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
  782. int i, ret;
  783. struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
  784. size_t size;
  785. int nr_areas = 1;
  786. int cap_type_id;
  787. minsz = offsetofend(struct vfio_region_info, offset);
  788. if (copy_from_user(&info, (void __user *)arg, minsz))
  789. return -EFAULT;
  790. if (info.argsz < minsz)
  791. return -EINVAL;
  792. switch (info.index) {
  793. case VFIO_PCI_CONFIG_REGION_INDEX:
  794. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  795. info.size = vgpu->gvt->device_info.cfg_space_size;
  796. info.flags = VFIO_REGION_INFO_FLAG_READ |
  797. VFIO_REGION_INFO_FLAG_WRITE;
  798. break;
  799. case VFIO_PCI_BAR0_REGION_INDEX:
  800. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  801. info.size = vgpu->cfg_space.bar[info.index].size;
  802. if (!info.size) {
  803. info.flags = 0;
  804. break;
  805. }
  806. info.flags = VFIO_REGION_INFO_FLAG_READ |
  807. VFIO_REGION_INFO_FLAG_WRITE;
  808. break;
  809. case VFIO_PCI_BAR1_REGION_INDEX:
  810. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  811. info.size = 0;
  812. info.flags = 0;
  813. break;
  814. case VFIO_PCI_BAR2_REGION_INDEX:
  815. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  816. info.flags = VFIO_REGION_INFO_FLAG_CAPS |
  817. VFIO_REGION_INFO_FLAG_MMAP |
  818. VFIO_REGION_INFO_FLAG_READ |
  819. VFIO_REGION_INFO_FLAG_WRITE;
  820. info.size = gvt_aperture_sz(vgpu->gvt);
  821. size = sizeof(*sparse) +
  822. (nr_areas * sizeof(*sparse->areas));
  823. sparse = kzalloc(size, GFP_KERNEL);
  824. if (!sparse)
  825. return -ENOMEM;
  826. sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
  827. sparse->header.version = 1;
  828. sparse->nr_areas = nr_areas;
  829. cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
  830. sparse->areas[0].offset =
  831. PAGE_ALIGN(vgpu_aperture_offset(vgpu));
  832. sparse->areas[0].size = vgpu_aperture_sz(vgpu);
  833. break;
  834. case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
  835. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  836. info.size = 0;
  837. info.flags = 0;
  838. gvt_dbg_core("get region info bar:%d\n", info.index);
  839. break;
  840. case VFIO_PCI_ROM_REGION_INDEX:
  841. case VFIO_PCI_VGA_REGION_INDEX:
  842. gvt_dbg_core("get region info index:%d\n", info.index);
  843. break;
  844. default:
  845. {
  846. struct vfio_region_info_cap_type cap_type = {
  847. .header.id = VFIO_REGION_INFO_CAP_TYPE,
  848. .header.version = 1 };
  849. if (info.index >= VFIO_PCI_NUM_REGIONS +
  850. vgpu->vdev.num_regions)
  851. return -EINVAL;
  852. i = info.index - VFIO_PCI_NUM_REGIONS;
  853. info.offset =
  854. VFIO_PCI_INDEX_TO_OFFSET(info.index);
  855. info.size = vgpu->vdev.region[i].size;
  856. info.flags = vgpu->vdev.region[i].flags;
  857. cap_type.type = vgpu->vdev.region[i].type;
  858. cap_type.subtype = vgpu->vdev.region[i].subtype;
  859. ret = vfio_info_add_capability(&caps,
  860. &cap_type.header,
  861. sizeof(cap_type));
  862. if (ret)
  863. return ret;
  864. }
  865. }
  866. if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
  867. switch (cap_type_id) {
  868. case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
  869. ret = vfio_info_add_capability(&caps,
  870. &sparse->header, sizeof(*sparse) +
  871. (sparse->nr_areas *
  872. sizeof(*sparse->areas)));
  873. kfree(sparse);
  874. if (ret)
  875. return ret;
  876. break;
  877. default:
  878. return -EINVAL;
  879. }
  880. }
  881. if (caps.size) {
  882. if (info.argsz < sizeof(info) + caps.size) {
  883. info.argsz = sizeof(info) + caps.size;
  884. info.cap_offset = 0;
  885. } else {
  886. vfio_info_cap_shift(&caps, sizeof(info));
  887. if (copy_to_user((void __user *)arg +
  888. sizeof(info), caps.buf,
  889. caps.size)) {
  890. kfree(caps.buf);
  891. return -EFAULT;
  892. }
  893. info.cap_offset = sizeof(info);
  894. }
  895. kfree(caps.buf);
  896. }
  897. return copy_to_user((void __user *)arg, &info, minsz) ?
  898. -EFAULT : 0;
  899. } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
  900. struct vfio_irq_info info;
  901. minsz = offsetofend(struct vfio_irq_info, count);
  902. if (copy_from_user(&info, (void __user *)arg, minsz))
  903. return -EFAULT;
  904. if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
  905. return -EINVAL;
  906. switch (info.index) {
  907. case VFIO_PCI_INTX_IRQ_INDEX:
  908. case VFIO_PCI_MSI_IRQ_INDEX:
  909. break;
  910. default:
  911. return -EINVAL;
  912. }
  913. info.flags = VFIO_IRQ_INFO_EVENTFD;
  914. info.count = intel_vgpu_get_irq_count(vgpu, info.index);
  915. if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
  916. info.flags |= (VFIO_IRQ_INFO_MASKABLE |
  917. VFIO_IRQ_INFO_AUTOMASKED);
  918. else
  919. info.flags |= VFIO_IRQ_INFO_NORESIZE;
  920. return copy_to_user((void __user *)arg, &info, minsz) ?
  921. -EFAULT : 0;
  922. } else if (cmd == VFIO_DEVICE_SET_IRQS) {
  923. struct vfio_irq_set hdr;
  924. u8 *data = NULL;
  925. int ret = 0;
  926. size_t data_size = 0;
  927. minsz = offsetofend(struct vfio_irq_set, count);
  928. if (copy_from_user(&hdr, (void __user *)arg, minsz))
  929. return -EFAULT;
  930. if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
  931. int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
  932. ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
  933. VFIO_PCI_NUM_IRQS, &data_size);
  934. if (ret) {
  935. gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
  936. return -EINVAL;
  937. }
  938. if (data_size) {
  939. data = memdup_user((void __user *)(arg + minsz),
  940. data_size);
  941. if (IS_ERR(data))
  942. return PTR_ERR(data);
  943. }
  944. }
  945. ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
  946. hdr.start, hdr.count, data);
  947. kfree(data);
  948. return ret;
  949. } else if (cmd == VFIO_DEVICE_RESET) {
  950. intel_gvt_ops->vgpu_reset(vgpu);
  951. return 0;
  952. }
  953. return 0;
  954. }
  955. static ssize_t
  956. vgpu_id_show(struct device *dev, struct device_attribute *attr,
  957. char *buf)
  958. {
  959. struct mdev_device *mdev = mdev_from_dev(dev);
  960. if (mdev) {
  961. struct intel_vgpu *vgpu = (struct intel_vgpu *)
  962. mdev_get_drvdata(mdev);
  963. return sprintf(buf, "%d\n", vgpu->id);
  964. }
  965. return sprintf(buf, "\n");
  966. }
  967. static ssize_t
  968. hw_id_show(struct device *dev, struct device_attribute *attr,
  969. char *buf)
  970. {
  971. struct mdev_device *mdev = mdev_from_dev(dev);
  972. if (mdev) {
  973. struct intel_vgpu *vgpu = (struct intel_vgpu *)
  974. mdev_get_drvdata(mdev);
  975. return sprintf(buf, "%u\n",
  976. vgpu->shadow_ctx->hw_id);
  977. }
  978. return sprintf(buf, "\n");
  979. }
  980. static DEVICE_ATTR_RO(vgpu_id);
  981. static DEVICE_ATTR_RO(hw_id);
  982. static struct attribute *intel_vgpu_attrs[] = {
  983. &dev_attr_vgpu_id.attr,
  984. &dev_attr_hw_id.attr,
  985. NULL
  986. };
  987. static const struct attribute_group intel_vgpu_group = {
  988. .name = "intel_vgpu",
  989. .attrs = intel_vgpu_attrs,
  990. };
  991. static const struct attribute_group *intel_vgpu_groups[] = {
  992. &intel_vgpu_group,
  993. NULL,
  994. };
  995. static const struct mdev_parent_ops intel_vgpu_ops = {
  996. .supported_type_groups = intel_vgpu_type_groups,
  997. .mdev_attr_groups = intel_vgpu_groups,
  998. .create = intel_vgpu_create,
  999. .remove = intel_vgpu_remove,
  1000. .open = intel_vgpu_open,
  1001. .release = intel_vgpu_release,
  1002. .read = intel_vgpu_read,
  1003. .write = intel_vgpu_write,
  1004. .mmap = intel_vgpu_mmap,
  1005. .ioctl = intel_vgpu_ioctl,
  1006. };
  1007. static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
  1008. {
  1009. if (!intel_gvt_init_vgpu_type_groups(gvt))
  1010. return -EFAULT;
  1011. intel_gvt_ops = ops;
  1012. return mdev_register_device(dev, &intel_vgpu_ops);
  1013. }
  1014. static void kvmgt_host_exit(struct device *dev, void *gvt)
  1015. {
  1016. intel_gvt_cleanup_vgpu_type_groups(gvt);
  1017. mdev_unregister_device(dev);
  1018. }
  1019. static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
  1020. {
  1021. struct kvmgt_guest_info *info;
  1022. struct kvm *kvm;
  1023. struct kvm_memory_slot *slot;
  1024. int idx;
  1025. if (!handle_valid(handle))
  1026. return -ESRCH;
  1027. info = (struct kvmgt_guest_info *)handle;
  1028. kvm = info->kvm;
  1029. idx = srcu_read_lock(&kvm->srcu);
  1030. slot = gfn_to_memslot(kvm, gfn);
  1031. if (!slot) {
  1032. srcu_read_unlock(&kvm->srcu, idx);
  1033. return -EINVAL;
  1034. }
  1035. spin_lock(&kvm->mmu_lock);
  1036. if (kvmgt_gfn_is_write_protected(info, gfn))
  1037. goto out;
  1038. kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  1039. kvmgt_protect_table_add(info, gfn);
  1040. out:
  1041. spin_unlock(&kvm->mmu_lock);
  1042. srcu_read_unlock(&kvm->srcu, idx);
  1043. return 0;
  1044. }
  1045. static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
  1046. {
  1047. struct kvmgt_guest_info *info;
  1048. struct kvm *kvm;
  1049. struct kvm_memory_slot *slot;
  1050. int idx;
  1051. if (!handle_valid(handle))
  1052. return 0;
  1053. info = (struct kvmgt_guest_info *)handle;
  1054. kvm = info->kvm;
  1055. idx = srcu_read_lock(&kvm->srcu);
  1056. slot = gfn_to_memslot(kvm, gfn);
  1057. if (!slot) {
  1058. srcu_read_unlock(&kvm->srcu, idx);
  1059. return -EINVAL;
  1060. }
  1061. spin_lock(&kvm->mmu_lock);
  1062. if (!kvmgt_gfn_is_write_protected(info, gfn))
  1063. goto out;
  1064. kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  1065. kvmgt_protect_table_del(info, gfn);
  1066. out:
  1067. spin_unlock(&kvm->mmu_lock);
  1068. srcu_read_unlock(&kvm->srcu, idx);
  1069. return 0;
  1070. }
  1071. static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
  1072. const u8 *val, int len,
  1073. struct kvm_page_track_notifier_node *node)
  1074. {
  1075. struct kvmgt_guest_info *info = container_of(node,
  1076. struct kvmgt_guest_info, track_node);
  1077. if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
  1078. intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
  1079. (void *)val, len);
  1080. }
  1081. static void kvmgt_page_track_flush_slot(struct kvm *kvm,
  1082. struct kvm_memory_slot *slot,
  1083. struct kvm_page_track_notifier_node *node)
  1084. {
  1085. int i;
  1086. gfn_t gfn;
  1087. struct kvmgt_guest_info *info = container_of(node,
  1088. struct kvmgt_guest_info, track_node);
  1089. spin_lock(&kvm->mmu_lock);
  1090. for (i = 0; i < slot->npages; i++) {
  1091. gfn = slot->base_gfn + i;
  1092. if (kvmgt_gfn_is_write_protected(info, gfn)) {
  1093. kvm_slot_page_track_remove_page(kvm, slot, gfn,
  1094. KVM_PAGE_TRACK_WRITE);
  1095. kvmgt_protect_table_del(info, gfn);
  1096. }
  1097. }
  1098. spin_unlock(&kvm->mmu_lock);
  1099. }
  1100. static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
  1101. {
  1102. struct intel_vgpu *itr;
  1103. struct kvmgt_guest_info *info;
  1104. int id;
  1105. bool ret = false;
  1106. mutex_lock(&vgpu->gvt->lock);
  1107. for_each_active_vgpu(vgpu->gvt, itr, id) {
  1108. if (!handle_valid(itr->handle))
  1109. continue;
  1110. info = (struct kvmgt_guest_info *)itr->handle;
  1111. if (kvm && kvm == info->kvm) {
  1112. ret = true;
  1113. goto out;
  1114. }
  1115. }
  1116. out:
  1117. mutex_unlock(&vgpu->gvt->lock);
  1118. return ret;
  1119. }
  1120. static int kvmgt_guest_init(struct mdev_device *mdev)
  1121. {
  1122. struct kvmgt_guest_info *info;
  1123. struct intel_vgpu *vgpu;
  1124. struct kvm *kvm;
  1125. vgpu = mdev_get_drvdata(mdev);
  1126. if (handle_valid(vgpu->handle))
  1127. return -EEXIST;
  1128. kvm = vgpu->vdev.kvm;
  1129. if (!kvm || kvm->mm != current->mm) {
  1130. gvt_vgpu_err("KVM is required to use Intel vGPU\n");
  1131. return -ESRCH;
  1132. }
  1133. if (__kvmgt_vgpu_exist(vgpu, kvm))
  1134. return -EEXIST;
  1135. info = vzalloc(sizeof(struct kvmgt_guest_info));
  1136. if (!info)
  1137. return -ENOMEM;
  1138. vgpu->handle = (unsigned long)info;
  1139. info->vgpu = vgpu;
  1140. info->kvm = kvm;
  1141. kvm_get_kvm(info->kvm);
  1142. kvmgt_protect_table_init(info);
  1143. gvt_cache_init(vgpu);
  1144. info->track_node.track_write = kvmgt_page_track_write;
  1145. info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
  1146. kvm_page_track_register_notifier(kvm, &info->track_node);
  1147. return 0;
  1148. }
  1149. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
  1150. {
  1151. kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
  1152. kvm_put_kvm(info->kvm);
  1153. kvmgt_protect_table_destroy(info);
  1154. gvt_cache_destroy(info->vgpu);
  1155. vfree(info);
  1156. return true;
  1157. }
  1158. static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
  1159. {
  1160. /* nothing to do here */
  1161. return 0;
  1162. }
  1163. static void kvmgt_detach_vgpu(unsigned long handle)
  1164. {
  1165. /* nothing to do here */
  1166. }
  1167. static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
  1168. {
  1169. struct kvmgt_guest_info *info;
  1170. struct intel_vgpu *vgpu;
  1171. if (!handle_valid(handle))
  1172. return -ESRCH;
  1173. info = (struct kvmgt_guest_info *)handle;
  1174. vgpu = info->vgpu;
  1175. if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
  1176. return 0;
  1177. return -EFAULT;
  1178. }
  1179. static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
  1180. {
  1181. unsigned long iova, pfn;
  1182. struct kvmgt_guest_info *info;
  1183. struct device *dev;
  1184. struct intel_vgpu *vgpu;
  1185. int rc;
  1186. if (!handle_valid(handle))
  1187. return INTEL_GVT_INVALID_ADDR;
  1188. info = (struct kvmgt_guest_info *)handle;
  1189. vgpu = info->vgpu;
  1190. iova = gvt_cache_find(info->vgpu, gfn);
  1191. if (iova != INTEL_GVT_INVALID_ADDR)
  1192. return iova;
  1193. pfn = INTEL_GVT_INVALID_ADDR;
  1194. dev = mdev_dev(info->vgpu->vdev.mdev);
  1195. rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
  1196. if (rc != 1) {
  1197. gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
  1198. gfn, rc);
  1199. return INTEL_GVT_INVALID_ADDR;
  1200. }
  1201. /* transfer to host iova for GFX to use DMA */
  1202. rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
  1203. if (rc) {
  1204. gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
  1205. vfio_unpin_pages(dev, &gfn, 1);
  1206. return INTEL_GVT_INVALID_ADDR;
  1207. }
  1208. gvt_cache_add(info->vgpu, gfn, iova);
  1209. return iova;
  1210. }
  1211. static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
  1212. void *buf, unsigned long len, bool write)
  1213. {
  1214. struct kvmgt_guest_info *info;
  1215. struct kvm *kvm;
  1216. int idx, ret;
  1217. bool kthread = current->mm == NULL;
  1218. if (!handle_valid(handle))
  1219. return -ESRCH;
  1220. info = (struct kvmgt_guest_info *)handle;
  1221. kvm = info->kvm;
  1222. if (kthread)
  1223. use_mm(kvm->mm);
  1224. idx = srcu_read_lock(&kvm->srcu);
  1225. ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
  1226. kvm_read_guest(kvm, gpa, buf, len);
  1227. srcu_read_unlock(&kvm->srcu, idx);
  1228. if (kthread)
  1229. unuse_mm(kvm->mm);
  1230. return ret;
  1231. }
  1232. static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
  1233. void *buf, unsigned long len)
  1234. {
  1235. return kvmgt_rw_gpa(handle, gpa, buf, len, false);
  1236. }
  1237. static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
  1238. void *buf, unsigned long len)
  1239. {
  1240. return kvmgt_rw_gpa(handle, gpa, buf, len, true);
  1241. }
  1242. static unsigned long kvmgt_virt_to_pfn(void *addr)
  1243. {
  1244. return PFN_DOWN(__pa(addr));
  1245. }
  1246. struct intel_gvt_mpt kvmgt_mpt = {
  1247. .host_init = kvmgt_host_init,
  1248. .host_exit = kvmgt_host_exit,
  1249. .attach_vgpu = kvmgt_attach_vgpu,
  1250. .detach_vgpu = kvmgt_detach_vgpu,
  1251. .inject_msi = kvmgt_inject_msi,
  1252. .from_virt_to_mfn = kvmgt_virt_to_pfn,
  1253. .set_wp_page = kvmgt_write_protect_add,
  1254. .unset_wp_page = kvmgt_write_protect_remove,
  1255. .read_gpa = kvmgt_read_gpa,
  1256. .write_gpa = kvmgt_write_gpa,
  1257. .gfn_to_mfn = kvmgt_gfn_to_pfn,
  1258. };
  1259. EXPORT_SYMBOL_GPL(kvmgt_mpt);
  1260. static int __init kvmgt_init(void)
  1261. {
  1262. return 0;
  1263. }
  1264. static void __exit kvmgt_exit(void)
  1265. {
  1266. }
  1267. module_init(kvmgt_init);
  1268. module_exit(kvmgt_exit);
  1269. MODULE_LICENSE("GPL and additional rights");
  1270. MODULE_AUTHOR("Intel Corporation");