kvmgt.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490
  1. /*
  2. * KVMGT - the implementation of Intel mediated pass-through framework for KVM
  3. *
  4. * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice (including the next
  14. * paragraph) shall be included in all copies or substantial portions of the
  15. * Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23. * SOFTWARE.
  24. *
  25. * Authors:
  26. * Kevin Tian <kevin.tian@intel.com>
  27. * Jike Song <jike.song@intel.com>
  28. * Xiaoguang Chen <xiaoguang.chen@intel.com>
  29. */
  30. #include <linux/init.h>
  31. #include <linux/device.h>
  32. #include <linux/mm.h>
  33. #include <linux/mmu_context.h>
  34. #include <linux/types.h>
  35. #include <linux/list.h>
  36. #include <linux/rbtree.h>
  37. #include <linux/spinlock.h>
  38. #include <linux/eventfd.h>
  39. #include <linux/uuid.h>
  40. #include <linux/kvm_host.h>
  41. #include <linux/vfio.h>
  42. #include <linux/mdev.h>
  43. #include "i915_drv.h"
  44. #include "gvt.h"
  45. static const struct intel_gvt_ops *intel_gvt_ops;
  46. /* helper macros copied from vfio-pci */
  47. #define VFIO_PCI_OFFSET_SHIFT 40
  48. #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
  49. #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
  50. #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
  51. struct vfio_region {
  52. u32 type;
  53. u32 subtype;
  54. size_t size;
  55. u32 flags;
  56. };
  57. struct kvmgt_pgfn {
  58. gfn_t gfn;
  59. struct hlist_node hnode;
  60. };
  61. struct kvmgt_guest_info {
  62. struct kvm *kvm;
  63. struct intel_vgpu *vgpu;
  64. struct kvm_page_track_notifier_node track_node;
  65. #define NR_BKT (1 << 18)
  66. struct hlist_head ptable[NR_BKT];
  67. #undef NR_BKT
  68. };
  69. struct gvt_dma {
  70. struct rb_node node;
  71. gfn_t gfn;
  72. kvm_pfn_t pfn;
  73. };
  74. static inline bool handle_valid(unsigned long handle)
  75. {
  76. return !!(handle & ~0xff);
  77. }
  78. static int kvmgt_guest_init(struct mdev_device *mdev);
  79. static void intel_vgpu_release_work(struct work_struct *work);
  80. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
  81. static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
  82. {
  83. struct rb_node *node = vgpu->vdev.cache.rb_node;
  84. struct gvt_dma *ret = NULL;
  85. while (node) {
  86. struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
  87. if (gfn < itr->gfn)
  88. node = node->rb_left;
  89. else if (gfn > itr->gfn)
  90. node = node->rb_right;
  91. else {
  92. ret = itr;
  93. goto out;
  94. }
  95. }
  96. out:
  97. return ret;
  98. }
  99. static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
  100. {
  101. struct gvt_dma *entry;
  102. kvm_pfn_t pfn;
  103. mutex_lock(&vgpu->vdev.cache_lock);
  104. entry = __gvt_cache_find(vgpu, gfn);
  105. pfn = (entry == NULL) ? 0 : entry->pfn;
  106. mutex_unlock(&vgpu->vdev.cache_lock);
  107. return pfn;
  108. }
  109. static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
  110. {
  111. struct gvt_dma *new, *itr;
  112. struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
  113. new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
  114. if (!new)
  115. return;
  116. new->gfn = gfn;
  117. new->pfn = pfn;
  118. mutex_lock(&vgpu->vdev.cache_lock);
  119. while (*link) {
  120. parent = *link;
  121. itr = rb_entry(parent, struct gvt_dma, node);
  122. if (gfn == itr->gfn)
  123. goto out;
  124. else if (gfn < itr->gfn)
  125. link = &parent->rb_left;
  126. else
  127. link = &parent->rb_right;
  128. }
  129. rb_link_node(&new->node, parent, link);
  130. rb_insert_color(&new->node, &vgpu->vdev.cache);
  131. mutex_unlock(&vgpu->vdev.cache_lock);
  132. return;
  133. out:
  134. mutex_unlock(&vgpu->vdev.cache_lock);
  135. kfree(new);
  136. }
  137. static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
  138. struct gvt_dma *entry)
  139. {
  140. rb_erase(&entry->node, &vgpu->vdev.cache);
  141. kfree(entry);
  142. }
  143. static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
  144. {
  145. struct device *dev = mdev_dev(vgpu->vdev.mdev);
  146. struct gvt_dma *this;
  147. unsigned long g1;
  148. int rc;
  149. mutex_lock(&vgpu->vdev.cache_lock);
  150. this = __gvt_cache_find(vgpu, gfn);
  151. if (!this) {
  152. mutex_unlock(&vgpu->vdev.cache_lock);
  153. return;
  154. }
  155. g1 = gfn;
  156. rc = vfio_unpin_pages(dev, &g1, 1);
  157. WARN_ON(rc != 1);
  158. __gvt_cache_remove_entry(vgpu, this);
  159. mutex_unlock(&vgpu->vdev.cache_lock);
  160. }
  161. static void gvt_cache_init(struct intel_vgpu *vgpu)
  162. {
  163. vgpu->vdev.cache = RB_ROOT;
  164. mutex_init(&vgpu->vdev.cache_lock);
  165. }
  166. static void gvt_cache_destroy(struct intel_vgpu *vgpu)
  167. {
  168. struct gvt_dma *dma;
  169. struct rb_node *node = NULL;
  170. struct device *dev = mdev_dev(vgpu->vdev.mdev);
  171. unsigned long gfn;
  172. mutex_lock(&vgpu->vdev.cache_lock);
  173. while ((node = rb_first(&vgpu->vdev.cache))) {
  174. dma = rb_entry(node, struct gvt_dma, node);
  175. gfn = dma->gfn;
  176. vfio_unpin_pages(dev, &gfn, 1);
  177. __gvt_cache_remove_entry(vgpu, dma);
  178. }
  179. mutex_unlock(&vgpu->vdev.cache_lock);
  180. }
  181. static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
  182. const char *name)
  183. {
  184. int i;
  185. struct intel_vgpu_type *t;
  186. const char *driver_name = dev_driver_string(
  187. &gvt->dev_priv->drm.pdev->dev);
  188. for (i = 0; i < gvt->num_types; i++) {
  189. t = &gvt->types[i];
  190. if (!strncmp(t->name, name + strlen(driver_name) + 1,
  191. sizeof(t->name)))
  192. return t;
  193. }
  194. return NULL;
  195. }
  196. static ssize_t available_instance_show(struct kobject *kobj, struct device *dev,
  197. char *buf)
  198. {
  199. struct intel_vgpu_type *type;
  200. unsigned int num = 0;
  201. void *gvt = kdev_to_i915(dev)->gvt;
  202. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  203. if (!type)
  204. num = 0;
  205. else
  206. num = type->avail_instance;
  207. return sprintf(buf, "%u\n", num);
  208. }
  209. static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
  210. char *buf)
  211. {
  212. return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
  213. }
  214. static ssize_t description_show(struct kobject *kobj, struct device *dev,
  215. char *buf)
  216. {
  217. struct intel_vgpu_type *type;
  218. void *gvt = kdev_to_i915(dev)->gvt;
  219. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  220. if (!type)
  221. return 0;
  222. return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
  223. "fence: %d\n",
  224. BYTES_TO_MB(type->low_gm_size),
  225. BYTES_TO_MB(type->high_gm_size),
  226. type->fence);
  227. }
  228. static MDEV_TYPE_ATTR_RO(available_instance);
  229. static MDEV_TYPE_ATTR_RO(device_api);
  230. static MDEV_TYPE_ATTR_RO(description);
  231. static struct attribute *type_attrs[] = {
  232. &mdev_type_attr_available_instance.attr,
  233. &mdev_type_attr_device_api.attr,
  234. &mdev_type_attr_description.attr,
  235. NULL,
  236. };
  237. static struct attribute_group *intel_vgpu_type_groups[] = {
  238. [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
  239. };
  240. static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
  241. {
  242. int i, j;
  243. struct intel_vgpu_type *type;
  244. struct attribute_group *group;
  245. for (i = 0; i < gvt->num_types; i++) {
  246. type = &gvt->types[i];
  247. group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
  248. if (WARN_ON(!group))
  249. goto unwind;
  250. group->name = type->name;
  251. group->attrs = type_attrs;
  252. intel_vgpu_type_groups[i] = group;
  253. }
  254. return true;
  255. unwind:
  256. for (j = 0; j < i; j++) {
  257. group = intel_vgpu_type_groups[j];
  258. kfree(group);
  259. }
  260. return false;
  261. }
  262. static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
  263. {
  264. int i;
  265. struct attribute_group *group;
  266. for (i = 0; i < gvt->num_types; i++) {
  267. group = intel_vgpu_type_groups[i];
  268. kfree(group);
  269. }
  270. }
  271. static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
  272. {
  273. hash_init(info->ptable);
  274. }
  275. static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
  276. {
  277. struct kvmgt_pgfn *p;
  278. struct hlist_node *tmp;
  279. int i;
  280. hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
  281. hash_del(&p->hnode);
  282. kfree(p);
  283. }
  284. }
  285. static struct kvmgt_pgfn *
  286. __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
  287. {
  288. struct kvmgt_pgfn *p, *res = NULL;
  289. hash_for_each_possible(info->ptable, p, hnode, gfn) {
  290. if (gfn == p->gfn) {
  291. res = p;
  292. break;
  293. }
  294. }
  295. return res;
  296. }
  297. static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
  298. gfn_t gfn)
  299. {
  300. struct kvmgt_pgfn *p;
  301. p = __kvmgt_protect_table_find(info, gfn);
  302. return !!p;
  303. }
  304. static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
  305. {
  306. struct kvmgt_pgfn *p;
  307. if (kvmgt_gfn_is_write_protected(info, gfn))
  308. return;
  309. p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
  310. if (WARN(!p, "gfn: 0x%llx\n", gfn))
  311. return;
  312. p->gfn = gfn;
  313. hash_add(info->ptable, &p->hnode, gfn);
  314. }
  315. static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
  316. gfn_t gfn)
  317. {
  318. struct kvmgt_pgfn *p;
  319. p = __kvmgt_protect_table_find(info, gfn);
  320. if (p) {
  321. hash_del(&p->hnode);
  322. kfree(p);
  323. }
  324. }
  325. static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
  326. {
  327. struct intel_vgpu *vgpu;
  328. struct intel_vgpu_type *type;
  329. struct device *pdev;
  330. void *gvt;
  331. int ret;
  332. pdev = mdev_parent_dev(mdev);
  333. gvt = kdev_to_i915(pdev)->gvt;
  334. type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
  335. if (!type) {
  336. gvt_err("failed to find type %s to create\n",
  337. kobject_name(kobj));
  338. ret = -EINVAL;
  339. goto out;
  340. }
  341. vgpu = intel_gvt_ops->vgpu_create(gvt, type);
  342. if (IS_ERR_OR_NULL(vgpu)) {
  343. ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
  344. gvt_err("failed to create intel vgpu: %d\n", ret);
  345. goto out;
  346. }
  347. INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
  348. vgpu->vdev.mdev = mdev;
  349. mdev_set_drvdata(mdev, vgpu);
  350. gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
  351. dev_name(mdev_dev(mdev)));
  352. ret = 0;
  353. out:
  354. return ret;
  355. }
  356. static int intel_vgpu_remove(struct mdev_device *mdev)
  357. {
  358. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  359. if (handle_valid(vgpu->handle))
  360. return -EBUSY;
  361. intel_gvt_ops->vgpu_destroy(vgpu);
  362. return 0;
  363. }
  364. static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
  365. unsigned long action, void *data)
  366. {
  367. struct intel_vgpu *vgpu = container_of(nb,
  368. struct intel_vgpu,
  369. vdev.iommu_notifier);
  370. if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
  371. struct vfio_iommu_type1_dma_unmap *unmap = data;
  372. unsigned long gfn, end_gfn;
  373. gfn = unmap->iova >> PAGE_SHIFT;
  374. end_gfn = gfn + unmap->size / PAGE_SIZE;
  375. while (gfn < end_gfn)
  376. gvt_cache_remove(vgpu, gfn++);
  377. }
  378. return NOTIFY_OK;
  379. }
  380. static int intel_vgpu_group_notifier(struct notifier_block *nb,
  381. unsigned long action, void *data)
  382. {
  383. struct intel_vgpu *vgpu = container_of(nb,
  384. struct intel_vgpu,
  385. vdev.group_notifier);
  386. /* the only action we care about */
  387. if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
  388. vgpu->vdev.kvm = data;
  389. if (!data)
  390. schedule_work(&vgpu->vdev.release_work);
  391. }
  392. return NOTIFY_OK;
  393. }
  394. static int intel_vgpu_open(struct mdev_device *mdev)
  395. {
  396. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  397. unsigned long events;
  398. int ret;
  399. vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
  400. vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
  401. events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
  402. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
  403. &vgpu->vdev.iommu_notifier);
  404. if (ret != 0) {
  405. gvt_err("vfio_register_notifier for iommu failed: %d\n", ret);
  406. goto out;
  407. }
  408. events = VFIO_GROUP_NOTIFY_SET_KVM;
  409. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
  410. &vgpu->vdev.group_notifier);
  411. if (ret != 0) {
  412. gvt_err("vfio_register_notifier for group failed: %d\n", ret);
  413. goto undo_iommu;
  414. }
  415. ret = kvmgt_guest_init(mdev);
  416. if (ret)
  417. goto undo_group;
  418. atomic_set(&vgpu->vdev.released, 0);
  419. return ret;
  420. undo_group:
  421. vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
  422. &vgpu->vdev.group_notifier);
  423. undo_iommu:
  424. vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
  425. &vgpu->vdev.iommu_notifier);
  426. out:
  427. return ret;
  428. }
  429. static void __intel_vgpu_release(struct intel_vgpu *vgpu)
  430. {
  431. struct kvmgt_guest_info *info;
  432. int ret;
  433. if (!handle_valid(vgpu->handle))
  434. return;
  435. if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
  436. return;
  437. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
  438. &vgpu->vdev.iommu_notifier);
  439. WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
  440. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
  441. &vgpu->vdev.group_notifier);
  442. WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
  443. info = (struct kvmgt_guest_info *)vgpu->handle;
  444. kvmgt_guest_exit(info);
  445. vgpu->vdev.kvm = NULL;
  446. vgpu->handle = 0;
  447. }
  448. static void intel_vgpu_release(struct mdev_device *mdev)
  449. {
  450. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  451. __intel_vgpu_release(vgpu);
  452. }
  453. static void intel_vgpu_release_work(struct work_struct *work)
  454. {
  455. struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
  456. vdev.release_work);
  457. __intel_vgpu_release(vgpu);
  458. }
  459. static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
  460. {
  461. u32 start_lo, start_hi;
  462. u32 mem_type;
  463. int pos = PCI_BASE_ADDRESS_0;
  464. start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
  465. PCI_BASE_ADDRESS_MEM_MASK;
  466. mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
  467. PCI_BASE_ADDRESS_MEM_TYPE_MASK;
  468. switch (mem_type) {
  469. case PCI_BASE_ADDRESS_MEM_TYPE_64:
  470. start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
  471. + pos + 4));
  472. break;
  473. case PCI_BASE_ADDRESS_MEM_TYPE_32:
  474. case PCI_BASE_ADDRESS_MEM_TYPE_1M:
  475. /* 1M mem BAR treated as 32-bit BAR */
  476. default:
  477. /* mem unknown type treated as 32-bit BAR */
  478. start_hi = 0;
  479. break;
  480. }
  481. return ((u64)start_hi << 32) | start_lo;
  482. }
  483. static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
  484. size_t count, loff_t *ppos, bool is_write)
  485. {
  486. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  487. unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
  488. uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
  489. int ret = -EINVAL;
  490. if (index >= VFIO_PCI_NUM_REGIONS) {
  491. gvt_err("invalid index: %u\n", index);
  492. return -EINVAL;
  493. }
  494. switch (index) {
  495. case VFIO_PCI_CONFIG_REGION_INDEX:
  496. if (is_write)
  497. ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
  498. buf, count);
  499. else
  500. ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
  501. buf, count);
  502. break;
  503. case VFIO_PCI_BAR0_REGION_INDEX:
  504. case VFIO_PCI_BAR1_REGION_INDEX:
  505. if (is_write) {
  506. uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
  507. ret = intel_gvt_ops->emulate_mmio_write(vgpu,
  508. bar0_start + pos, buf, count);
  509. } else {
  510. uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
  511. ret = intel_gvt_ops->emulate_mmio_read(vgpu,
  512. bar0_start + pos, buf, count);
  513. }
  514. break;
  515. case VFIO_PCI_BAR2_REGION_INDEX:
  516. case VFIO_PCI_BAR3_REGION_INDEX:
  517. case VFIO_PCI_BAR4_REGION_INDEX:
  518. case VFIO_PCI_BAR5_REGION_INDEX:
  519. case VFIO_PCI_VGA_REGION_INDEX:
  520. case VFIO_PCI_ROM_REGION_INDEX:
  521. default:
  522. gvt_err("unsupported region: %u\n", index);
  523. }
  524. return ret == 0 ? count : ret;
  525. }
  526. static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
  527. size_t count, loff_t *ppos)
  528. {
  529. unsigned int done = 0;
  530. int ret;
  531. while (count) {
  532. size_t filled;
  533. if (count >= 4 && !(*ppos % 4)) {
  534. u32 val;
  535. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  536. ppos, false);
  537. if (ret <= 0)
  538. goto read_err;
  539. if (copy_to_user(buf, &val, sizeof(val)))
  540. goto read_err;
  541. filled = 4;
  542. } else if (count >= 2 && !(*ppos % 2)) {
  543. u16 val;
  544. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  545. ppos, false);
  546. if (ret <= 0)
  547. goto read_err;
  548. if (copy_to_user(buf, &val, sizeof(val)))
  549. goto read_err;
  550. filled = 2;
  551. } else {
  552. u8 val;
  553. ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
  554. false);
  555. if (ret <= 0)
  556. goto read_err;
  557. if (copy_to_user(buf, &val, sizeof(val)))
  558. goto read_err;
  559. filled = 1;
  560. }
  561. count -= filled;
  562. done += filled;
  563. *ppos += filled;
  564. buf += filled;
  565. }
  566. return done;
  567. read_err:
  568. return -EFAULT;
  569. }
  570. static ssize_t intel_vgpu_write(struct mdev_device *mdev,
  571. const char __user *buf,
  572. size_t count, loff_t *ppos)
  573. {
  574. unsigned int done = 0;
  575. int ret;
  576. while (count) {
  577. size_t filled;
  578. if (count >= 4 && !(*ppos % 4)) {
  579. u32 val;
  580. if (copy_from_user(&val, buf, sizeof(val)))
  581. goto write_err;
  582. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  583. ppos, true);
  584. if (ret <= 0)
  585. goto write_err;
  586. filled = 4;
  587. } else if (count >= 2 && !(*ppos % 2)) {
  588. u16 val;
  589. if (copy_from_user(&val, buf, sizeof(val)))
  590. goto write_err;
  591. ret = intel_vgpu_rw(mdev, (char *)&val,
  592. sizeof(val), ppos, true);
  593. if (ret <= 0)
  594. goto write_err;
  595. filled = 2;
  596. } else {
  597. u8 val;
  598. if (copy_from_user(&val, buf, sizeof(val)))
  599. goto write_err;
  600. ret = intel_vgpu_rw(mdev, &val, sizeof(val),
  601. ppos, true);
  602. if (ret <= 0)
  603. goto write_err;
  604. filled = 1;
  605. }
  606. count -= filled;
  607. done += filled;
  608. *ppos += filled;
  609. buf += filled;
  610. }
  611. return done;
  612. write_err:
  613. return -EFAULT;
  614. }
  615. static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
  616. {
  617. unsigned int index;
  618. u64 virtaddr;
  619. unsigned long req_size, pgoff = 0;
  620. pgprot_t pg_prot;
  621. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  622. index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
  623. if (index >= VFIO_PCI_ROM_REGION_INDEX)
  624. return -EINVAL;
  625. if (vma->vm_end < vma->vm_start)
  626. return -EINVAL;
  627. if ((vma->vm_flags & VM_SHARED) == 0)
  628. return -EINVAL;
  629. if (index != VFIO_PCI_BAR2_REGION_INDEX)
  630. return -EINVAL;
  631. pg_prot = vma->vm_page_prot;
  632. virtaddr = vma->vm_start;
  633. req_size = vma->vm_end - vma->vm_start;
  634. pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
  635. return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
  636. }
  637. static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
  638. {
  639. if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
  640. return 1;
  641. return 0;
  642. }
  643. static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
  644. unsigned int index, unsigned int start,
  645. unsigned int count, uint32_t flags,
  646. void *data)
  647. {
  648. return 0;
  649. }
  650. static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
  651. unsigned int index, unsigned int start,
  652. unsigned int count, uint32_t flags, void *data)
  653. {
  654. return 0;
  655. }
  656. static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
  657. unsigned int index, unsigned int start, unsigned int count,
  658. uint32_t flags, void *data)
  659. {
  660. return 0;
  661. }
  662. static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
  663. unsigned int index, unsigned int start, unsigned int count,
  664. uint32_t flags, void *data)
  665. {
  666. struct eventfd_ctx *trigger;
  667. if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
  668. int fd = *(int *)data;
  669. trigger = eventfd_ctx_fdget(fd);
  670. if (IS_ERR(trigger)) {
  671. gvt_err("eventfd_ctx_fdget failed\n");
  672. return PTR_ERR(trigger);
  673. }
  674. vgpu->vdev.msi_trigger = trigger;
  675. }
  676. return 0;
  677. }
  678. static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
  679. unsigned int index, unsigned int start, unsigned int count,
  680. void *data)
  681. {
  682. int (*func)(struct intel_vgpu *vgpu, unsigned int index,
  683. unsigned int start, unsigned int count, uint32_t flags,
  684. void *data) = NULL;
  685. switch (index) {
  686. case VFIO_PCI_INTX_IRQ_INDEX:
  687. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  688. case VFIO_IRQ_SET_ACTION_MASK:
  689. func = intel_vgpu_set_intx_mask;
  690. break;
  691. case VFIO_IRQ_SET_ACTION_UNMASK:
  692. func = intel_vgpu_set_intx_unmask;
  693. break;
  694. case VFIO_IRQ_SET_ACTION_TRIGGER:
  695. func = intel_vgpu_set_intx_trigger;
  696. break;
  697. }
  698. break;
  699. case VFIO_PCI_MSI_IRQ_INDEX:
  700. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  701. case VFIO_IRQ_SET_ACTION_MASK:
  702. case VFIO_IRQ_SET_ACTION_UNMASK:
  703. /* XXX Need masking support exported */
  704. break;
  705. case VFIO_IRQ_SET_ACTION_TRIGGER:
  706. func = intel_vgpu_set_msi_trigger;
  707. break;
  708. }
  709. break;
  710. }
  711. if (!func)
  712. return -ENOTTY;
  713. return func(vgpu, index, start, count, flags, data);
  714. }
  715. static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
  716. unsigned long arg)
  717. {
  718. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  719. unsigned long minsz;
  720. gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
  721. if (cmd == VFIO_DEVICE_GET_INFO) {
  722. struct vfio_device_info info;
  723. minsz = offsetofend(struct vfio_device_info, num_irqs);
  724. if (copy_from_user(&info, (void __user *)arg, minsz))
  725. return -EFAULT;
  726. if (info.argsz < minsz)
  727. return -EINVAL;
  728. info.flags = VFIO_DEVICE_FLAGS_PCI;
  729. info.flags |= VFIO_DEVICE_FLAGS_RESET;
  730. info.num_regions = VFIO_PCI_NUM_REGIONS;
  731. info.num_irqs = VFIO_PCI_NUM_IRQS;
  732. return copy_to_user((void __user *)arg, &info, minsz) ?
  733. -EFAULT : 0;
  734. } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
  735. struct vfio_region_info info;
  736. struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
  737. int i, ret;
  738. struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
  739. size_t size;
  740. int nr_areas = 1;
  741. int cap_type_id;
  742. minsz = offsetofend(struct vfio_region_info, offset);
  743. if (copy_from_user(&info, (void __user *)arg, minsz))
  744. return -EFAULT;
  745. if (info.argsz < minsz)
  746. return -EINVAL;
  747. switch (info.index) {
  748. case VFIO_PCI_CONFIG_REGION_INDEX:
  749. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  750. info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
  751. info.flags = VFIO_REGION_INFO_FLAG_READ |
  752. VFIO_REGION_INFO_FLAG_WRITE;
  753. break;
  754. case VFIO_PCI_BAR0_REGION_INDEX:
  755. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  756. info.size = vgpu->cfg_space.bar[info.index].size;
  757. if (!info.size) {
  758. info.flags = 0;
  759. break;
  760. }
  761. info.flags = VFIO_REGION_INFO_FLAG_READ |
  762. VFIO_REGION_INFO_FLAG_WRITE;
  763. break;
  764. case VFIO_PCI_BAR1_REGION_INDEX:
  765. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  766. info.size = 0;
  767. info.flags = 0;
  768. break;
  769. case VFIO_PCI_BAR2_REGION_INDEX:
  770. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  771. info.flags = VFIO_REGION_INFO_FLAG_CAPS |
  772. VFIO_REGION_INFO_FLAG_MMAP |
  773. VFIO_REGION_INFO_FLAG_READ |
  774. VFIO_REGION_INFO_FLAG_WRITE;
  775. info.size = gvt_aperture_sz(vgpu->gvt);
  776. size = sizeof(*sparse) +
  777. (nr_areas * sizeof(*sparse->areas));
  778. sparse = kzalloc(size, GFP_KERNEL);
  779. if (!sparse)
  780. return -ENOMEM;
  781. sparse->nr_areas = nr_areas;
  782. cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
  783. sparse->areas[0].offset =
  784. PAGE_ALIGN(vgpu_aperture_offset(vgpu));
  785. sparse->areas[0].size = vgpu_aperture_sz(vgpu);
  786. if (!caps.buf) {
  787. kfree(caps.buf);
  788. caps.buf = NULL;
  789. caps.size = 0;
  790. }
  791. break;
  792. case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
  793. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  794. info.size = 0;
  795. info.flags = 0;
  796. gvt_dbg_core("get region info bar:%d\n", info.index);
  797. break;
  798. case VFIO_PCI_ROM_REGION_INDEX:
  799. case VFIO_PCI_VGA_REGION_INDEX:
  800. gvt_dbg_core("get region info index:%d\n", info.index);
  801. break;
  802. default:
  803. {
  804. struct vfio_region_info_cap_type cap_type;
  805. if (info.index >= VFIO_PCI_NUM_REGIONS +
  806. vgpu->vdev.num_regions)
  807. return -EINVAL;
  808. i = info.index - VFIO_PCI_NUM_REGIONS;
  809. info.offset =
  810. VFIO_PCI_INDEX_TO_OFFSET(info.index);
  811. info.size = vgpu->vdev.region[i].size;
  812. info.flags = vgpu->vdev.region[i].flags;
  813. cap_type.type = vgpu->vdev.region[i].type;
  814. cap_type.subtype = vgpu->vdev.region[i].subtype;
  815. ret = vfio_info_add_capability(&caps,
  816. VFIO_REGION_INFO_CAP_TYPE,
  817. &cap_type);
  818. if (ret)
  819. return ret;
  820. }
  821. }
  822. if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
  823. switch (cap_type_id) {
  824. case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
  825. ret = vfio_info_add_capability(&caps,
  826. VFIO_REGION_INFO_CAP_SPARSE_MMAP,
  827. sparse);
  828. kfree(sparse);
  829. if (ret)
  830. return ret;
  831. break;
  832. default:
  833. return -EINVAL;
  834. }
  835. }
  836. if (caps.size) {
  837. if (info.argsz < sizeof(info) + caps.size) {
  838. info.argsz = sizeof(info) + caps.size;
  839. info.cap_offset = 0;
  840. } else {
  841. vfio_info_cap_shift(&caps, sizeof(info));
  842. if (copy_to_user((void __user *)arg +
  843. sizeof(info), caps.buf,
  844. caps.size)) {
  845. kfree(caps.buf);
  846. return -EFAULT;
  847. }
  848. info.cap_offset = sizeof(info);
  849. }
  850. kfree(caps.buf);
  851. }
  852. return copy_to_user((void __user *)arg, &info, minsz) ?
  853. -EFAULT : 0;
  854. } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
  855. struct vfio_irq_info info;
  856. minsz = offsetofend(struct vfio_irq_info, count);
  857. if (copy_from_user(&info, (void __user *)arg, minsz))
  858. return -EFAULT;
  859. if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
  860. return -EINVAL;
  861. switch (info.index) {
  862. case VFIO_PCI_INTX_IRQ_INDEX:
  863. case VFIO_PCI_MSI_IRQ_INDEX:
  864. break;
  865. default:
  866. return -EINVAL;
  867. }
  868. info.flags = VFIO_IRQ_INFO_EVENTFD;
  869. info.count = intel_vgpu_get_irq_count(vgpu, info.index);
  870. if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
  871. info.flags |= (VFIO_IRQ_INFO_MASKABLE |
  872. VFIO_IRQ_INFO_AUTOMASKED);
  873. else
  874. info.flags |= VFIO_IRQ_INFO_NORESIZE;
  875. return copy_to_user((void __user *)arg, &info, minsz) ?
  876. -EFAULT : 0;
  877. } else if (cmd == VFIO_DEVICE_SET_IRQS) {
  878. struct vfio_irq_set hdr;
  879. u8 *data = NULL;
  880. int ret = 0;
  881. size_t data_size = 0;
  882. minsz = offsetofend(struct vfio_irq_set, count);
  883. if (copy_from_user(&hdr, (void __user *)arg, minsz))
  884. return -EFAULT;
  885. if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
  886. int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
  887. ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
  888. VFIO_PCI_NUM_IRQS, &data_size);
  889. if (ret) {
  890. gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
  891. return -EINVAL;
  892. }
  893. if (data_size) {
  894. data = memdup_user((void __user *)(arg + minsz),
  895. data_size);
  896. if (IS_ERR(data))
  897. return PTR_ERR(data);
  898. }
  899. }
  900. ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
  901. hdr.start, hdr.count, data);
  902. kfree(data);
  903. return ret;
  904. } else if (cmd == VFIO_DEVICE_RESET) {
  905. intel_gvt_ops->vgpu_reset(vgpu);
  906. return 0;
  907. }
  908. return 0;
  909. }
  910. static const struct mdev_parent_ops intel_vgpu_ops = {
  911. .supported_type_groups = intel_vgpu_type_groups,
  912. .create = intel_vgpu_create,
  913. .remove = intel_vgpu_remove,
  914. .open = intel_vgpu_open,
  915. .release = intel_vgpu_release,
  916. .read = intel_vgpu_read,
  917. .write = intel_vgpu_write,
  918. .mmap = intel_vgpu_mmap,
  919. .ioctl = intel_vgpu_ioctl,
  920. };
  921. static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
  922. {
  923. if (!intel_gvt_init_vgpu_type_groups(gvt))
  924. return -EFAULT;
  925. intel_gvt_ops = ops;
  926. return mdev_register_device(dev, &intel_vgpu_ops);
  927. }
  928. static void kvmgt_host_exit(struct device *dev, void *gvt)
  929. {
  930. intel_gvt_cleanup_vgpu_type_groups(gvt);
  931. mdev_unregister_device(dev);
  932. }
  933. static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
  934. {
  935. struct kvmgt_guest_info *info;
  936. struct kvm *kvm;
  937. struct kvm_memory_slot *slot;
  938. int idx;
  939. if (!handle_valid(handle))
  940. return -ESRCH;
  941. info = (struct kvmgt_guest_info *)handle;
  942. kvm = info->kvm;
  943. idx = srcu_read_lock(&kvm->srcu);
  944. slot = gfn_to_memslot(kvm, gfn);
  945. if (!slot) {
  946. srcu_read_unlock(&kvm->srcu, idx);
  947. return -EINVAL;
  948. }
  949. spin_lock(&kvm->mmu_lock);
  950. if (kvmgt_gfn_is_write_protected(info, gfn))
  951. goto out;
  952. kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  953. kvmgt_protect_table_add(info, gfn);
  954. out:
  955. spin_unlock(&kvm->mmu_lock);
  956. srcu_read_unlock(&kvm->srcu, idx);
  957. return 0;
  958. }
  959. static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
  960. {
  961. struct kvmgt_guest_info *info;
  962. struct kvm *kvm;
  963. struct kvm_memory_slot *slot;
  964. int idx;
  965. if (!handle_valid(handle))
  966. return 0;
  967. info = (struct kvmgt_guest_info *)handle;
  968. kvm = info->kvm;
  969. idx = srcu_read_lock(&kvm->srcu);
  970. slot = gfn_to_memslot(kvm, gfn);
  971. if (!slot) {
  972. srcu_read_unlock(&kvm->srcu, idx);
  973. return -EINVAL;
  974. }
  975. spin_lock(&kvm->mmu_lock);
  976. if (!kvmgt_gfn_is_write_protected(info, gfn))
  977. goto out;
  978. kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  979. kvmgt_protect_table_del(info, gfn);
  980. out:
  981. spin_unlock(&kvm->mmu_lock);
  982. srcu_read_unlock(&kvm->srcu, idx);
  983. return 0;
  984. }
  985. static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
  986. const u8 *val, int len,
  987. struct kvm_page_track_notifier_node *node)
  988. {
  989. struct kvmgt_guest_info *info = container_of(node,
  990. struct kvmgt_guest_info, track_node);
  991. if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
  992. intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
  993. (void *)val, len);
  994. }
  995. static void kvmgt_page_track_flush_slot(struct kvm *kvm,
  996. struct kvm_memory_slot *slot,
  997. struct kvm_page_track_notifier_node *node)
  998. {
  999. int i;
  1000. gfn_t gfn;
  1001. struct kvmgt_guest_info *info = container_of(node,
  1002. struct kvmgt_guest_info, track_node);
  1003. spin_lock(&kvm->mmu_lock);
  1004. for (i = 0; i < slot->npages; i++) {
  1005. gfn = slot->base_gfn + i;
  1006. if (kvmgt_gfn_is_write_protected(info, gfn)) {
  1007. kvm_slot_page_track_remove_page(kvm, slot, gfn,
  1008. KVM_PAGE_TRACK_WRITE);
  1009. kvmgt_protect_table_del(info, gfn);
  1010. }
  1011. }
  1012. spin_unlock(&kvm->mmu_lock);
  1013. }
  1014. static bool kvmgt_check_guest(void)
  1015. {
  1016. unsigned int eax, ebx, ecx, edx;
  1017. char s[12];
  1018. unsigned int *i;
  1019. eax = KVM_CPUID_SIGNATURE;
  1020. ebx = ecx = edx = 0;
  1021. asm volatile ("cpuid"
  1022. : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
  1023. :
  1024. : "cc", "memory");
  1025. i = (unsigned int *)s;
  1026. i[0] = ebx;
  1027. i[1] = ecx;
  1028. i[2] = edx;
  1029. return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
  1030. }
  1031. /**
  1032. * NOTE:
  1033. * It's actually impossible to check if we are running in KVM host,
  1034. * since the "KVM host" is simply native. So we only dectect guest here.
  1035. */
  1036. static int kvmgt_detect_host(void)
  1037. {
  1038. #ifdef CONFIG_INTEL_IOMMU
  1039. if (intel_iommu_gfx_mapped) {
  1040. gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
  1041. return -ENODEV;
  1042. }
  1043. #endif
  1044. return kvmgt_check_guest() ? -ENODEV : 0;
  1045. }
  1046. static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
  1047. {
  1048. struct intel_vgpu *itr;
  1049. struct kvmgt_guest_info *info;
  1050. int id;
  1051. bool ret = false;
  1052. mutex_lock(&vgpu->gvt->lock);
  1053. for_each_active_vgpu(vgpu->gvt, itr, id) {
  1054. if (!handle_valid(itr->handle))
  1055. continue;
  1056. info = (struct kvmgt_guest_info *)itr->handle;
  1057. if (kvm && kvm == info->kvm) {
  1058. ret = true;
  1059. goto out;
  1060. }
  1061. }
  1062. out:
  1063. mutex_unlock(&vgpu->gvt->lock);
  1064. return ret;
  1065. }
  1066. static int kvmgt_guest_init(struct mdev_device *mdev)
  1067. {
  1068. struct kvmgt_guest_info *info;
  1069. struct intel_vgpu *vgpu;
  1070. struct kvm *kvm;
  1071. vgpu = mdev_get_drvdata(mdev);
  1072. if (handle_valid(vgpu->handle))
  1073. return -EEXIST;
  1074. kvm = vgpu->vdev.kvm;
  1075. if (!kvm || kvm->mm != current->mm) {
  1076. gvt_err("KVM is required to use Intel vGPU\n");
  1077. return -ESRCH;
  1078. }
  1079. if (__kvmgt_vgpu_exist(vgpu, kvm))
  1080. return -EEXIST;
  1081. info = vzalloc(sizeof(struct kvmgt_guest_info));
  1082. if (!info)
  1083. return -ENOMEM;
  1084. vgpu->handle = (unsigned long)info;
  1085. info->vgpu = vgpu;
  1086. info->kvm = kvm;
  1087. kvmgt_protect_table_init(info);
  1088. gvt_cache_init(vgpu);
  1089. info->track_node.track_write = kvmgt_page_track_write;
  1090. info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
  1091. kvm_page_track_register_notifier(kvm, &info->track_node);
  1092. return 0;
  1093. }
  1094. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
  1095. {
  1096. if (!info) {
  1097. gvt_err("kvmgt_guest_info invalid\n");
  1098. return false;
  1099. }
  1100. kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
  1101. kvmgt_protect_table_destroy(info);
  1102. gvt_cache_destroy(info->vgpu);
  1103. vfree(info);
  1104. return true;
  1105. }
  1106. static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
  1107. {
  1108. /* nothing to do here */
  1109. return 0;
  1110. }
  1111. static void kvmgt_detach_vgpu(unsigned long handle)
  1112. {
  1113. /* nothing to do here */
  1114. }
  1115. static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
  1116. {
  1117. struct kvmgt_guest_info *info;
  1118. struct intel_vgpu *vgpu;
  1119. if (!handle_valid(handle))
  1120. return -ESRCH;
  1121. info = (struct kvmgt_guest_info *)handle;
  1122. vgpu = info->vgpu;
  1123. if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
  1124. return 0;
  1125. return -EFAULT;
  1126. }
  1127. static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
  1128. {
  1129. unsigned long pfn;
  1130. struct kvmgt_guest_info *info;
  1131. struct device *dev;
  1132. int rc;
  1133. if (!handle_valid(handle))
  1134. return INTEL_GVT_INVALID_ADDR;
  1135. info = (struct kvmgt_guest_info *)handle;
  1136. pfn = gvt_cache_find(info->vgpu, gfn);
  1137. if (pfn != 0)
  1138. return pfn;
  1139. pfn = INTEL_GVT_INVALID_ADDR;
  1140. dev = mdev_dev(info->vgpu->vdev.mdev);
  1141. rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
  1142. if (rc != 1) {
  1143. gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
  1144. return INTEL_GVT_INVALID_ADDR;
  1145. }
  1146. gvt_cache_add(info->vgpu, gfn, pfn);
  1147. return pfn;
  1148. }
  1149. static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
  1150. void *buf, unsigned long len, bool write)
  1151. {
  1152. struct kvmgt_guest_info *info;
  1153. struct kvm *kvm;
  1154. int ret;
  1155. bool kthread = current->mm == NULL;
  1156. if (!handle_valid(handle))
  1157. return -ESRCH;
  1158. info = (struct kvmgt_guest_info *)handle;
  1159. kvm = info->kvm;
  1160. if (kthread)
  1161. use_mm(kvm->mm);
  1162. ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
  1163. kvm_read_guest(kvm, gpa, buf, len);
  1164. if (kthread)
  1165. unuse_mm(kvm->mm);
  1166. return ret;
  1167. }
  1168. static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
  1169. void *buf, unsigned long len)
  1170. {
  1171. return kvmgt_rw_gpa(handle, gpa, buf, len, false);
  1172. }
  1173. static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
  1174. void *buf, unsigned long len)
  1175. {
  1176. return kvmgt_rw_gpa(handle, gpa, buf, len, true);
  1177. }
  1178. static unsigned long kvmgt_virt_to_pfn(void *addr)
  1179. {
  1180. return PFN_DOWN(__pa(addr));
  1181. }
  1182. struct intel_gvt_mpt kvmgt_mpt = {
  1183. .detect_host = kvmgt_detect_host,
  1184. .host_init = kvmgt_host_init,
  1185. .host_exit = kvmgt_host_exit,
  1186. .attach_vgpu = kvmgt_attach_vgpu,
  1187. .detach_vgpu = kvmgt_detach_vgpu,
  1188. .inject_msi = kvmgt_inject_msi,
  1189. .from_virt_to_mfn = kvmgt_virt_to_pfn,
  1190. .set_wp_page = kvmgt_write_protect_add,
  1191. .unset_wp_page = kvmgt_write_protect_remove,
  1192. .read_gpa = kvmgt_read_gpa,
  1193. .write_gpa = kvmgt_write_gpa,
  1194. .gfn_to_mfn = kvmgt_gfn_to_pfn,
  1195. };
  1196. EXPORT_SYMBOL_GPL(kvmgt_mpt);
  1197. static int __init kvmgt_init(void)
  1198. {
  1199. return 0;
  1200. }
  1201. static void __exit kvmgt_exit(void)
  1202. {
  1203. }
  1204. module_init(kvmgt_init);
  1205. module_exit(kvmgt_exit);
  1206. MODULE_LICENSE("GPL and additional rights");
  1207. MODULE_AUTHOR("Intel Corporation");