kvmgt.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809
  1. /*
  2. * KVMGT - the implementation of Intel mediated pass-through framework for KVM
  3. *
  4. * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice (including the next
  14. * paragraph) shall be included in all copies or substantial portions of the
  15. * Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23. * SOFTWARE.
  24. *
  25. * Authors:
  26. * Kevin Tian <kevin.tian@intel.com>
  27. * Jike Song <jike.song@intel.com>
  28. * Xiaoguang Chen <xiaoguang.chen@intel.com>
  29. */
  30. #include <linux/init.h>
  31. #include <linux/device.h>
  32. #include <linux/mm.h>
  33. #include <linux/mmu_context.h>
  34. #include <linux/types.h>
  35. #include <linux/list.h>
  36. #include <linux/rbtree.h>
  37. #include <linux/spinlock.h>
  38. #include <linux/eventfd.h>
  39. #include <linux/uuid.h>
  40. #include <linux/kvm_host.h>
  41. #include <linux/vfio.h>
  42. #include <linux/mdev.h>
  43. #include <linux/debugfs.h>
  44. #include "i915_drv.h"
  45. #include "gvt.h"
  46. static const struct intel_gvt_ops *intel_gvt_ops;
  47. /* helper macros copied from vfio-pci */
  48. #define VFIO_PCI_OFFSET_SHIFT 40
  49. #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
  50. #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
  51. #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
  52. #define OPREGION_SIGNATURE "IntelGraphicsMem"
  53. struct vfio_region;
  54. struct intel_vgpu_regops {
  55. size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
  56. size_t count, loff_t *ppos, bool iswrite);
  57. void (*release)(struct intel_vgpu *vgpu,
  58. struct vfio_region *region);
  59. };
  60. struct vfio_region {
  61. u32 type;
  62. u32 subtype;
  63. size_t size;
  64. u32 flags;
  65. const struct intel_vgpu_regops *ops;
  66. void *data;
  67. };
  68. struct kvmgt_pgfn {
  69. gfn_t gfn;
  70. struct hlist_node hnode;
  71. };
  72. struct kvmgt_guest_info {
  73. struct kvm *kvm;
  74. struct intel_vgpu *vgpu;
  75. struct kvm_page_track_notifier_node track_node;
  76. #define NR_BKT (1 << 18)
  77. struct hlist_head ptable[NR_BKT];
  78. #undef NR_BKT
  79. struct dentry *debugfs_cache_entries;
  80. };
  81. struct gvt_dma {
  82. struct intel_vgpu *vgpu;
  83. struct rb_node gfn_node;
  84. struct rb_node dma_addr_node;
  85. gfn_t gfn;
  86. dma_addr_t dma_addr;
  87. struct kref ref;
  88. };
  89. static inline bool handle_valid(unsigned long handle)
  90. {
  91. return !!(handle & ~0xff);
  92. }
  93. static int kvmgt_guest_init(struct mdev_device *mdev);
  94. static void intel_vgpu_release_work(struct work_struct *work);
  95. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
  96. static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
  97. dma_addr_t *dma_addr)
  98. {
  99. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  100. struct page *page;
  101. unsigned long pfn;
  102. int ret;
  103. /* Pin the page first. */
  104. ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1,
  105. IOMMU_READ | IOMMU_WRITE, &pfn);
  106. if (ret != 1) {
  107. gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
  108. gfn, ret);
  109. return -EINVAL;
  110. }
  111. if (!pfn_valid(pfn)) {
  112. gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
  113. vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
  114. return -EINVAL;
  115. }
  116. /* Setup DMA mapping. */
  117. page = pfn_to_page(pfn);
  118. *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
  119. PCI_DMA_BIDIRECTIONAL);
  120. if (dma_mapping_error(dev, *dma_addr)) {
  121. gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn);
  122. vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
  123. return -ENOMEM;
  124. }
  125. return 0;
  126. }
  127. static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
  128. dma_addr_t dma_addr)
  129. {
  130. struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
  131. int ret;
  132. dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  133. ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
  134. WARN_ON(ret != 1);
  135. }
  136. static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
  137. dma_addr_t dma_addr)
  138. {
  139. struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node;
  140. struct gvt_dma *itr;
  141. while (node) {
  142. itr = rb_entry(node, struct gvt_dma, dma_addr_node);
  143. if (dma_addr < itr->dma_addr)
  144. node = node->rb_left;
  145. else if (dma_addr > itr->dma_addr)
  146. node = node->rb_right;
  147. else
  148. return itr;
  149. }
  150. return NULL;
  151. }
  152. static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
  153. {
  154. struct rb_node *node = vgpu->vdev.gfn_cache.rb_node;
  155. struct gvt_dma *itr;
  156. while (node) {
  157. itr = rb_entry(node, struct gvt_dma, gfn_node);
  158. if (gfn < itr->gfn)
  159. node = node->rb_left;
  160. else if (gfn > itr->gfn)
  161. node = node->rb_right;
  162. else
  163. return itr;
  164. }
  165. return NULL;
  166. }
  167. static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
  168. dma_addr_t dma_addr)
  169. {
  170. struct gvt_dma *new, *itr;
  171. struct rb_node **link, *parent = NULL;
  172. new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
  173. if (!new)
  174. return -ENOMEM;
  175. new->vgpu = vgpu;
  176. new->gfn = gfn;
  177. new->dma_addr = dma_addr;
  178. kref_init(&new->ref);
  179. /* gfn_cache maps gfn to struct gvt_dma. */
  180. link = &vgpu->vdev.gfn_cache.rb_node;
  181. while (*link) {
  182. parent = *link;
  183. itr = rb_entry(parent, struct gvt_dma, gfn_node);
  184. if (gfn < itr->gfn)
  185. link = &parent->rb_left;
  186. else
  187. link = &parent->rb_right;
  188. }
  189. rb_link_node(&new->gfn_node, parent, link);
  190. rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache);
  191. /* dma_addr_cache maps dma addr to struct gvt_dma. */
  192. parent = NULL;
  193. link = &vgpu->vdev.dma_addr_cache.rb_node;
  194. while (*link) {
  195. parent = *link;
  196. itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
  197. if (dma_addr < itr->dma_addr)
  198. link = &parent->rb_left;
  199. else
  200. link = &parent->rb_right;
  201. }
  202. rb_link_node(&new->dma_addr_node, parent, link);
  203. rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache);
  204. vgpu->vdev.nr_cache_entries++;
  205. return 0;
  206. }
  207. static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
  208. struct gvt_dma *entry)
  209. {
  210. rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache);
  211. rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache);
  212. kfree(entry);
  213. vgpu->vdev.nr_cache_entries--;
  214. }
  215. static void gvt_cache_destroy(struct intel_vgpu *vgpu)
  216. {
  217. struct gvt_dma *dma;
  218. struct rb_node *node = NULL;
  219. for (;;) {
  220. mutex_lock(&vgpu->vdev.cache_lock);
  221. node = rb_first(&vgpu->vdev.gfn_cache);
  222. if (!node) {
  223. mutex_unlock(&vgpu->vdev.cache_lock);
  224. break;
  225. }
  226. dma = rb_entry(node, struct gvt_dma, gfn_node);
  227. gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr);
  228. __gvt_cache_remove_entry(vgpu, dma);
  229. mutex_unlock(&vgpu->vdev.cache_lock);
  230. }
  231. }
  232. static void gvt_cache_init(struct intel_vgpu *vgpu)
  233. {
  234. vgpu->vdev.gfn_cache = RB_ROOT;
  235. vgpu->vdev.dma_addr_cache = RB_ROOT;
  236. vgpu->vdev.nr_cache_entries = 0;
  237. mutex_init(&vgpu->vdev.cache_lock);
  238. }
  239. static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
  240. {
  241. hash_init(info->ptable);
  242. }
  243. static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
  244. {
  245. struct kvmgt_pgfn *p;
  246. struct hlist_node *tmp;
  247. int i;
  248. hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
  249. hash_del(&p->hnode);
  250. kfree(p);
  251. }
  252. }
  253. static struct kvmgt_pgfn *
  254. __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
  255. {
  256. struct kvmgt_pgfn *p, *res = NULL;
  257. hash_for_each_possible(info->ptable, p, hnode, gfn) {
  258. if (gfn == p->gfn) {
  259. res = p;
  260. break;
  261. }
  262. }
  263. return res;
  264. }
  265. static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
  266. gfn_t gfn)
  267. {
  268. struct kvmgt_pgfn *p;
  269. p = __kvmgt_protect_table_find(info, gfn);
  270. return !!p;
  271. }
  272. static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
  273. {
  274. struct kvmgt_pgfn *p;
  275. if (kvmgt_gfn_is_write_protected(info, gfn))
  276. return;
  277. p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
  278. if (WARN(!p, "gfn: 0x%llx\n", gfn))
  279. return;
  280. p->gfn = gfn;
  281. hash_add(info->ptable, &p->hnode, gfn);
  282. }
  283. static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
  284. gfn_t gfn)
  285. {
  286. struct kvmgt_pgfn *p;
  287. p = __kvmgt_protect_table_find(info, gfn);
  288. if (p) {
  289. hash_del(&p->hnode);
  290. kfree(p);
  291. }
  292. }
  293. static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
  294. size_t count, loff_t *ppos, bool iswrite)
  295. {
  296. unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
  297. VFIO_PCI_NUM_REGIONS;
  298. void *base = vgpu->vdev.region[i].data;
  299. loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
  300. if (pos >= vgpu->vdev.region[i].size || iswrite) {
  301. gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
  302. return -EINVAL;
  303. }
  304. count = min(count, (size_t)(vgpu->vdev.region[i].size - pos));
  305. memcpy(buf, base + pos, count);
  306. return count;
  307. }
  308. static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
  309. struct vfio_region *region)
  310. {
  311. }
  312. static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
  313. .rw = intel_vgpu_reg_rw_opregion,
  314. .release = intel_vgpu_reg_release_opregion,
  315. };
  316. static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
  317. unsigned int type, unsigned int subtype,
  318. const struct intel_vgpu_regops *ops,
  319. size_t size, u32 flags, void *data)
  320. {
  321. struct vfio_region *region;
  322. region = krealloc(vgpu->vdev.region,
  323. (vgpu->vdev.num_regions + 1) * sizeof(*region),
  324. GFP_KERNEL);
  325. if (!region)
  326. return -ENOMEM;
  327. vgpu->vdev.region = region;
  328. vgpu->vdev.region[vgpu->vdev.num_regions].type = type;
  329. vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype;
  330. vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops;
  331. vgpu->vdev.region[vgpu->vdev.num_regions].size = size;
  332. vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags;
  333. vgpu->vdev.region[vgpu->vdev.num_regions].data = data;
  334. vgpu->vdev.num_regions++;
  335. return 0;
  336. }
  337. static int kvmgt_get_vfio_device(void *p_vgpu)
  338. {
  339. struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
  340. vgpu->vdev.vfio_device = vfio_device_get_from_dev(
  341. mdev_dev(vgpu->vdev.mdev));
  342. if (!vgpu->vdev.vfio_device) {
  343. gvt_vgpu_err("failed to get vfio device\n");
  344. return -ENODEV;
  345. }
  346. return 0;
  347. }
  348. static int kvmgt_set_opregion(void *p_vgpu)
  349. {
  350. struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
  351. void *base;
  352. int ret;
  353. /* Each vgpu has its own opregion, although VFIO would create another
  354. * one later. This one is used to expose opregion to VFIO. And the
  355. * other one created by VFIO later, is used by guest actually.
  356. */
  357. base = vgpu_opregion(vgpu)->va;
  358. if (!base)
  359. return -ENOMEM;
  360. if (memcmp(base, OPREGION_SIGNATURE, 16)) {
  361. memunmap(base);
  362. return -EINVAL;
  363. }
  364. ret = intel_vgpu_register_reg(vgpu,
  365. PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
  366. VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
  367. &intel_vgpu_regops_opregion, OPREGION_SIZE,
  368. VFIO_REGION_INFO_FLAG_READ, base);
  369. return ret;
  370. }
  371. static void kvmgt_put_vfio_device(void *vgpu)
  372. {
  373. if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device))
  374. return;
  375. vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device);
  376. }
  377. static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
  378. {
  379. struct intel_vgpu *vgpu = NULL;
  380. struct intel_vgpu_type *type;
  381. struct device *pdev;
  382. void *gvt;
  383. int ret;
  384. pdev = mdev_parent_dev(mdev);
  385. gvt = kdev_to_i915(pdev)->gvt;
  386. type = intel_gvt_ops->gvt_find_vgpu_type(gvt, kobject_name(kobj));
  387. if (!type) {
  388. gvt_vgpu_err("failed to find type %s to create\n",
  389. kobject_name(kobj));
  390. ret = -EINVAL;
  391. goto out;
  392. }
  393. vgpu = intel_gvt_ops->vgpu_create(gvt, type);
  394. if (IS_ERR_OR_NULL(vgpu)) {
  395. ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
  396. gvt_err("failed to create intel vgpu: %d\n", ret);
  397. goto out;
  398. }
  399. INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
  400. vgpu->vdev.mdev = mdev;
  401. mdev_set_drvdata(mdev, vgpu);
  402. gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
  403. dev_name(mdev_dev(mdev)));
  404. ret = 0;
  405. out:
  406. return ret;
  407. }
  408. static int intel_vgpu_remove(struct mdev_device *mdev)
  409. {
  410. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  411. if (handle_valid(vgpu->handle))
  412. return -EBUSY;
  413. intel_gvt_ops->vgpu_destroy(vgpu);
  414. return 0;
  415. }
  416. static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
  417. unsigned long action, void *data)
  418. {
  419. struct intel_vgpu *vgpu = container_of(nb,
  420. struct intel_vgpu,
  421. vdev.iommu_notifier);
  422. if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
  423. struct vfio_iommu_type1_dma_unmap *unmap = data;
  424. struct gvt_dma *entry;
  425. unsigned long iov_pfn, end_iov_pfn;
  426. iov_pfn = unmap->iova >> PAGE_SHIFT;
  427. end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
  428. mutex_lock(&vgpu->vdev.cache_lock);
  429. for (; iov_pfn < end_iov_pfn; iov_pfn++) {
  430. entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
  431. if (!entry)
  432. continue;
  433. gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr);
  434. __gvt_cache_remove_entry(vgpu, entry);
  435. }
  436. mutex_unlock(&vgpu->vdev.cache_lock);
  437. }
  438. return NOTIFY_OK;
  439. }
  440. static int intel_vgpu_group_notifier(struct notifier_block *nb,
  441. unsigned long action, void *data)
  442. {
  443. struct intel_vgpu *vgpu = container_of(nb,
  444. struct intel_vgpu,
  445. vdev.group_notifier);
  446. /* the only action we care about */
  447. if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
  448. vgpu->vdev.kvm = data;
  449. if (!data)
  450. schedule_work(&vgpu->vdev.release_work);
  451. }
  452. return NOTIFY_OK;
  453. }
  454. static int intel_vgpu_open(struct mdev_device *mdev)
  455. {
  456. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  457. unsigned long events;
  458. int ret;
  459. vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
  460. vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
  461. events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
  462. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
  463. &vgpu->vdev.iommu_notifier);
  464. if (ret != 0) {
  465. gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
  466. ret);
  467. goto out;
  468. }
  469. events = VFIO_GROUP_NOTIFY_SET_KVM;
  470. ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
  471. &vgpu->vdev.group_notifier);
  472. if (ret != 0) {
  473. gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
  474. ret);
  475. goto undo_iommu;
  476. }
  477. ret = kvmgt_guest_init(mdev);
  478. if (ret)
  479. goto undo_group;
  480. intel_gvt_ops->vgpu_activate(vgpu);
  481. atomic_set(&vgpu->vdev.released, 0);
  482. return ret;
  483. undo_group:
  484. vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
  485. &vgpu->vdev.group_notifier);
  486. undo_iommu:
  487. vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
  488. &vgpu->vdev.iommu_notifier);
  489. out:
  490. return ret;
  491. }
  492. static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
  493. {
  494. struct eventfd_ctx *trigger;
  495. trigger = vgpu->vdev.msi_trigger;
  496. if (trigger) {
  497. eventfd_ctx_put(trigger);
  498. vgpu->vdev.msi_trigger = NULL;
  499. }
  500. }
  501. static void __intel_vgpu_release(struct intel_vgpu *vgpu)
  502. {
  503. struct kvmgt_guest_info *info;
  504. int ret;
  505. if (!handle_valid(vgpu->handle))
  506. return;
  507. if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
  508. return;
  509. intel_gvt_ops->vgpu_deactivate(vgpu);
  510. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
  511. &vgpu->vdev.iommu_notifier);
  512. WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
  513. ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
  514. &vgpu->vdev.group_notifier);
  515. WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
  516. info = (struct kvmgt_guest_info *)vgpu->handle;
  517. kvmgt_guest_exit(info);
  518. intel_vgpu_release_msi_eventfd_ctx(vgpu);
  519. vgpu->vdev.kvm = NULL;
  520. vgpu->handle = 0;
  521. }
  522. static void intel_vgpu_release(struct mdev_device *mdev)
  523. {
  524. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  525. __intel_vgpu_release(vgpu);
  526. }
  527. static void intel_vgpu_release_work(struct work_struct *work)
  528. {
  529. struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
  530. vdev.release_work);
  531. __intel_vgpu_release(vgpu);
  532. }
  533. static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
  534. {
  535. u32 start_lo, start_hi;
  536. u32 mem_type;
  537. start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
  538. PCI_BASE_ADDRESS_MEM_MASK;
  539. mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
  540. PCI_BASE_ADDRESS_MEM_TYPE_MASK;
  541. switch (mem_type) {
  542. case PCI_BASE_ADDRESS_MEM_TYPE_64:
  543. start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
  544. + bar + 4));
  545. break;
  546. case PCI_BASE_ADDRESS_MEM_TYPE_32:
  547. case PCI_BASE_ADDRESS_MEM_TYPE_1M:
  548. /* 1M mem BAR treated as 32-bit BAR */
  549. default:
  550. /* mem unknown type treated as 32-bit BAR */
  551. start_hi = 0;
  552. break;
  553. }
  554. return ((u64)start_hi << 32) | start_lo;
  555. }
  556. static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off,
  557. void *buf, unsigned int count, bool is_write)
  558. {
  559. uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
  560. int ret;
  561. if (is_write)
  562. ret = intel_gvt_ops->emulate_mmio_write(vgpu,
  563. bar_start + off, buf, count);
  564. else
  565. ret = intel_gvt_ops->emulate_mmio_read(vgpu,
  566. bar_start + off, buf, count);
  567. return ret;
  568. }
  569. static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, uint64_t off)
  570. {
  571. return off >= vgpu_aperture_offset(vgpu) &&
  572. off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
  573. }
  574. static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, uint64_t off,
  575. void *buf, unsigned long count, bool is_write)
  576. {
  577. void *aperture_va;
  578. if (!intel_vgpu_in_aperture(vgpu, off) ||
  579. !intel_vgpu_in_aperture(vgpu, off + count)) {
  580. gvt_vgpu_err("Invalid aperture offset %llu\n", off);
  581. return -EINVAL;
  582. }
  583. aperture_va = io_mapping_map_wc(&vgpu->gvt->dev_priv->ggtt.iomap,
  584. ALIGN_DOWN(off, PAGE_SIZE),
  585. count + offset_in_page(off));
  586. if (!aperture_va)
  587. return -EIO;
  588. if (is_write)
  589. memcpy(aperture_va + offset_in_page(off), buf, count);
  590. else
  591. memcpy(buf, aperture_va + offset_in_page(off), count);
  592. io_mapping_unmap(aperture_va);
  593. return 0;
  594. }
  595. static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
  596. size_t count, loff_t *ppos, bool is_write)
  597. {
  598. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  599. unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
  600. uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
  601. int ret = -EINVAL;
  602. if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) {
  603. gvt_vgpu_err("invalid index: %u\n", index);
  604. return -EINVAL;
  605. }
  606. switch (index) {
  607. case VFIO_PCI_CONFIG_REGION_INDEX:
  608. if (is_write)
  609. ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
  610. buf, count);
  611. else
  612. ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
  613. buf, count);
  614. break;
  615. case VFIO_PCI_BAR0_REGION_INDEX:
  616. ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
  617. buf, count, is_write);
  618. break;
  619. case VFIO_PCI_BAR2_REGION_INDEX:
  620. ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
  621. break;
  622. case VFIO_PCI_BAR1_REGION_INDEX:
  623. case VFIO_PCI_BAR3_REGION_INDEX:
  624. case VFIO_PCI_BAR4_REGION_INDEX:
  625. case VFIO_PCI_BAR5_REGION_INDEX:
  626. case VFIO_PCI_VGA_REGION_INDEX:
  627. case VFIO_PCI_ROM_REGION_INDEX:
  628. break;
  629. default:
  630. if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions)
  631. return -EINVAL;
  632. index -= VFIO_PCI_NUM_REGIONS;
  633. return vgpu->vdev.region[index].ops->rw(vgpu, buf, count,
  634. ppos, is_write);
  635. }
  636. return ret == 0 ? count : ret;
  637. }
  638. static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
  639. {
  640. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  641. unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
  642. struct intel_gvt *gvt = vgpu->gvt;
  643. int offset;
  644. /* Only allow MMIO GGTT entry access */
  645. if (index != PCI_BASE_ADDRESS_0)
  646. return false;
  647. offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
  648. intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
  649. return (offset >= gvt->device_info.gtt_start_offset &&
  650. offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
  651. true : false;
  652. }
  653. static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
  654. size_t count, loff_t *ppos)
  655. {
  656. unsigned int done = 0;
  657. int ret;
  658. while (count) {
  659. size_t filled;
  660. /* Only support GGTT entry 8 bytes read */
  661. if (count >= 8 && !(*ppos % 8) &&
  662. gtt_entry(mdev, ppos)) {
  663. u64 val;
  664. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  665. ppos, false);
  666. if (ret <= 0)
  667. goto read_err;
  668. if (copy_to_user(buf, &val, sizeof(val)))
  669. goto read_err;
  670. filled = 8;
  671. } else if (count >= 4 && !(*ppos % 4)) {
  672. u32 val;
  673. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  674. ppos, false);
  675. if (ret <= 0)
  676. goto read_err;
  677. if (copy_to_user(buf, &val, sizeof(val)))
  678. goto read_err;
  679. filled = 4;
  680. } else if (count >= 2 && !(*ppos % 2)) {
  681. u16 val;
  682. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  683. ppos, false);
  684. if (ret <= 0)
  685. goto read_err;
  686. if (copy_to_user(buf, &val, sizeof(val)))
  687. goto read_err;
  688. filled = 2;
  689. } else {
  690. u8 val;
  691. ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
  692. false);
  693. if (ret <= 0)
  694. goto read_err;
  695. if (copy_to_user(buf, &val, sizeof(val)))
  696. goto read_err;
  697. filled = 1;
  698. }
  699. count -= filled;
  700. done += filled;
  701. *ppos += filled;
  702. buf += filled;
  703. }
  704. return done;
  705. read_err:
  706. return -EFAULT;
  707. }
  708. static ssize_t intel_vgpu_write(struct mdev_device *mdev,
  709. const char __user *buf,
  710. size_t count, loff_t *ppos)
  711. {
  712. unsigned int done = 0;
  713. int ret;
  714. while (count) {
  715. size_t filled;
  716. /* Only support GGTT entry 8 bytes write */
  717. if (count >= 8 && !(*ppos % 8) &&
  718. gtt_entry(mdev, ppos)) {
  719. u64 val;
  720. if (copy_from_user(&val, buf, sizeof(val)))
  721. goto write_err;
  722. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  723. ppos, true);
  724. if (ret <= 0)
  725. goto write_err;
  726. filled = 8;
  727. } else if (count >= 4 && !(*ppos % 4)) {
  728. u32 val;
  729. if (copy_from_user(&val, buf, sizeof(val)))
  730. goto write_err;
  731. ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
  732. ppos, true);
  733. if (ret <= 0)
  734. goto write_err;
  735. filled = 4;
  736. } else if (count >= 2 && !(*ppos % 2)) {
  737. u16 val;
  738. if (copy_from_user(&val, buf, sizeof(val)))
  739. goto write_err;
  740. ret = intel_vgpu_rw(mdev, (char *)&val,
  741. sizeof(val), ppos, true);
  742. if (ret <= 0)
  743. goto write_err;
  744. filled = 2;
  745. } else {
  746. u8 val;
  747. if (copy_from_user(&val, buf, sizeof(val)))
  748. goto write_err;
  749. ret = intel_vgpu_rw(mdev, &val, sizeof(val),
  750. ppos, true);
  751. if (ret <= 0)
  752. goto write_err;
  753. filled = 1;
  754. }
  755. count -= filled;
  756. done += filled;
  757. *ppos += filled;
  758. buf += filled;
  759. }
  760. return done;
  761. write_err:
  762. return -EFAULT;
  763. }
  764. static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
  765. {
  766. unsigned int index;
  767. u64 virtaddr;
  768. unsigned long req_size, pgoff = 0;
  769. pgprot_t pg_prot;
  770. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  771. index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
  772. if (index >= VFIO_PCI_ROM_REGION_INDEX)
  773. return -EINVAL;
  774. if (vma->vm_end < vma->vm_start)
  775. return -EINVAL;
  776. if ((vma->vm_flags & VM_SHARED) == 0)
  777. return -EINVAL;
  778. if (index != VFIO_PCI_BAR2_REGION_INDEX)
  779. return -EINVAL;
  780. pg_prot = vma->vm_page_prot;
  781. virtaddr = vma->vm_start;
  782. req_size = vma->vm_end - vma->vm_start;
  783. pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
  784. return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
  785. }
  786. static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
  787. {
  788. if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
  789. return 1;
  790. return 0;
  791. }
  792. static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
  793. unsigned int index, unsigned int start,
  794. unsigned int count, uint32_t flags,
  795. void *data)
  796. {
  797. return 0;
  798. }
  799. static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
  800. unsigned int index, unsigned int start,
  801. unsigned int count, uint32_t flags, void *data)
  802. {
  803. return 0;
  804. }
  805. static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
  806. unsigned int index, unsigned int start, unsigned int count,
  807. uint32_t flags, void *data)
  808. {
  809. return 0;
  810. }
  811. static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
  812. unsigned int index, unsigned int start, unsigned int count,
  813. uint32_t flags, void *data)
  814. {
  815. struct eventfd_ctx *trigger;
  816. if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
  817. int fd = *(int *)data;
  818. trigger = eventfd_ctx_fdget(fd);
  819. if (IS_ERR(trigger)) {
  820. gvt_vgpu_err("eventfd_ctx_fdget failed\n");
  821. return PTR_ERR(trigger);
  822. }
  823. vgpu->vdev.msi_trigger = trigger;
  824. } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
  825. intel_vgpu_release_msi_eventfd_ctx(vgpu);
  826. return 0;
  827. }
  828. static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
  829. unsigned int index, unsigned int start, unsigned int count,
  830. void *data)
  831. {
  832. int (*func)(struct intel_vgpu *vgpu, unsigned int index,
  833. unsigned int start, unsigned int count, uint32_t flags,
  834. void *data) = NULL;
  835. switch (index) {
  836. case VFIO_PCI_INTX_IRQ_INDEX:
  837. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  838. case VFIO_IRQ_SET_ACTION_MASK:
  839. func = intel_vgpu_set_intx_mask;
  840. break;
  841. case VFIO_IRQ_SET_ACTION_UNMASK:
  842. func = intel_vgpu_set_intx_unmask;
  843. break;
  844. case VFIO_IRQ_SET_ACTION_TRIGGER:
  845. func = intel_vgpu_set_intx_trigger;
  846. break;
  847. }
  848. break;
  849. case VFIO_PCI_MSI_IRQ_INDEX:
  850. switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
  851. case VFIO_IRQ_SET_ACTION_MASK:
  852. case VFIO_IRQ_SET_ACTION_UNMASK:
  853. /* XXX Need masking support exported */
  854. break;
  855. case VFIO_IRQ_SET_ACTION_TRIGGER:
  856. func = intel_vgpu_set_msi_trigger;
  857. break;
  858. }
  859. break;
  860. }
  861. if (!func)
  862. return -ENOTTY;
  863. return func(vgpu, index, start, count, flags, data);
  864. }
  865. static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
  866. unsigned long arg)
  867. {
  868. struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
  869. unsigned long minsz;
  870. gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
  871. if (cmd == VFIO_DEVICE_GET_INFO) {
  872. struct vfio_device_info info;
  873. minsz = offsetofend(struct vfio_device_info, num_irqs);
  874. if (copy_from_user(&info, (void __user *)arg, minsz))
  875. return -EFAULT;
  876. if (info.argsz < minsz)
  877. return -EINVAL;
  878. info.flags = VFIO_DEVICE_FLAGS_PCI;
  879. info.flags |= VFIO_DEVICE_FLAGS_RESET;
  880. info.num_regions = VFIO_PCI_NUM_REGIONS +
  881. vgpu->vdev.num_regions;
  882. info.num_irqs = VFIO_PCI_NUM_IRQS;
  883. return copy_to_user((void __user *)arg, &info, minsz) ?
  884. -EFAULT : 0;
  885. } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
  886. struct vfio_region_info info;
  887. struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
  888. int i, ret;
  889. struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
  890. size_t size;
  891. int nr_areas = 1;
  892. int cap_type_id;
  893. minsz = offsetofend(struct vfio_region_info, offset);
  894. if (copy_from_user(&info, (void __user *)arg, minsz))
  895. return -EFAULT;
  896. if (info.argsz < minsz)
  897. return -EINVAL;
  898. switch (info.index) {
  899. case VFIO_PCI_CONFIG_REGION_INDEX:
  900. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  901. info.size = vgpu->gvt->device_info.cfg_space_size;
  902. info.flags = VFIO_REGION_INFO_FLAG_READ |
  903. VFIO_REGION_INFO_FLAG_WRITE;
  904. break;
  905. case VFIO_PCI_BAR0_REGION_INDEX:
  906. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  907. info.size = vgpu->cfg_space.bar[info.index].size;
  908. if (!info.size) {
  909. info.flags = 0;
  910. break;
  911. }
  912. info.flags = VFIO_REGION_INFO_FLAG_READ |
  913. VFIO_REGION_INFO_FLAG_WRITE;
  914. break;
  915. case VFIO_PCI_BAR1_REGION_INDEX:
  916. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  917. info.size = 0;
  918. info.flags = 0;
  919. break;
  920. case VFIO_PCI_BAR2_REGION_INDEX:
  921. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  922. info.flags = VFIO_REGION_INFO_FLAG_CAPS |
  923. VFIO_REGION_INFO_FLAG_MMAP |
  924. VFIO_REGION_INFO_FLAG_READ |
  925. VFIO_REGION_INFO_FLAG_WRITE;
  926. info.size = gvt_aperture_sz(vgpu->gvt);
  927. size = sizeof(*sparse) +
  928. (nr_areas * sizeof(*sparse->areas));
  929. sparse = kzalloc(size, GFP_KERNEL);
  930. if (!sparse)
  931. return -ENOMEM;
  932. sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
  933. sparse->header.version = 1;
  934. sparse->nr_areas = nr_areas;
  935. cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
  936. sparse->areas[0].offset =
  937. PAGE_ALIGN(vgpu_aperture_offset(vgpu));
  938. sparse->areas[0].size = vgpu_aperture_sz(vgpu);
  939. break;
  940. case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
  941. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  942. info.size = 0;
  943. info.flags = 0;
  944. gvt_dbg_core("get region info bar:%d\n", info.index);
  945. break;
  946. case VFIO_PCI_ROM_REGION_INDEX:
  947. case VFIO_PCI_VGA_REGION_INDEX:
  948. info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
  949. info.size = 0;
  950. info.flags = 0;
  951. gvt_dbg_core("get region info index:%d\n", info.index);
  952. break;
  953. default:
  954. {
  955. struct vfio_region_info_cap_type cap_type = {
  956. .header.id = VFIO_REGION_INFO_CAP_TYPE,
  957. .header.version = 1 };
  958. if (info.index >= VFIO_PCI_NUM_REGIONS +
  959. vgpu->vdev.num_regions)
  960. return -EINVAL;
  961. i = info.index - VFIO_PCI_NUM_REGIONS;
  962. info.offset =
  963. VFIO_PCI_INDEX_TO_OFFSET(info.index);
  964. info.size = vgpu->vdev.region[i].size;
  965. info.flags = vgpu->vdev.region[i].flags;
  966. cap_type.type = vgpu->vdev.region[i].type;
  967. cap_type.subtype = vgpu->vdev.region[i].subtype;
  968. ret = vfio_info_add_capability(&caps,
  969. &cap_type.header,
  970. sizeof(cap_type));
  971. if (ret)
  972. return ret;
  973. }
  974. }
  975. if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
  976. switch (cap_type_id) {
  977. case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
  978. ret = vfio_info_add_capability(&caps,
  979. &sparse->header, sizeof(*sparse) +
  980. (sparse->nr_areas *
  981. sizeof(*sparse->areas)));
  982. kfree(sparse);
  983. if (ret)
  984. return ret;
  985. break;
  986. default:
  987. return -EINVAL;
  988. }
  989. }
  990. if (caps.size) {
  991. info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
  992. if (info.argsz < sizeof(info) + caps.size) {
  993. info.argsz = sizeof(info) + caps.size;
  994. info.cap_offset = 0;
  995. } else {
  996. vfio_info_cap_shift(&caps, sizeof(info));
  997. if (copy_to_user((void __user *)arg +
  998. sizeof(info), caps.buf,
  999. caps.size)) {
  1000. kfree(caps.buf);
  1001. return -EFAULT;
  1002. }
  1003. info.cap_offset = sizeof(info);
  1004. }
  1005. kfree(caps.buf);
  1006. }
  1007. return copy_to_user((void __user *)arg, &info, minsz) ?
  1008. -EFAULT : 0;
  1009. } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
  1010. struct vfio_irq_info info;
  1011. minsz = offsetofend(struct vfio_irq_info, count);
  1012. if (copy_from_user(&info, (void __user *)arg, minsz))
  1013. return -EFAULT;
  1014. if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
  1015. return -EINVAL;
  1016. switch (info.index) {
  1017. case VFIO_PCI_INTX_IRQ_INDEX:
  1018. case VFIO_PCI_MSI_IRQ_INDEX:
  1019. break;
  1020. default:
  1021. return -EINVAL;
  1022. }
  1023. info.flags = VFIO_IRQ_INFO_EVENTFD;
  1024. info.count = intel_vgpu_get_irq_count(vgpu, info.index);
  1025. if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
  1026. info.flags |= (VFIO_IRQ_INFO_MASKABLE |
  1027. VFIO_IRQ_INFO_AUTOMASKED);
  1028. else
  1029. info.flags |= VFIO_IRQ_INFO_NORESIZE;
  1030. return copy_to_user((void __user *)arg, &info, minsz) ?
  1031. -EFAULT : 0;
  1032. } else if (cmd == VFIO_DEVICE_SET_IRQS) {
  1033. struct vfio_irq_set hdr;
  1034. u8 *data = NULL;
  1035. int ret = 0;
  1036. size_t data_size = 0;
  1037. minsz = offsetofend(struct vfio_irq_set, count);
  1038. if (copy_from_user(&hdr, (void __user *)arg, minsz))
  1039. return -EFAULT;
  1040. if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
  1041. int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
  1042. ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
  1043. VFIO_PCI_NUM_IRQS, &data_size);
  1044. if (ret) {
  1045. gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
  1046. return -EINVAL;
  1047. }
  1048. if (data_size) {
  1049. data = memdup_user((void __user *)(arg + minsz),
  1050. data_size);
  1051. if (IS_ERR(data))
  1052. return PTR_ERR(data);
  1053. }
  1054. }
  1055. ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
  1056. hdr.start, hdr.count, data);
  1057. kfree(data);
  1058. return ret;
  1059. } else if (cmd == VFIO_DEVICE_RESET) {
  1060. intel_gvt_ops->vgpu_reset(vgpu);
  1061. return 0;
  1062. } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
  1063. struct vfio_device_gfx_plane_info dmabuf;
  1064. int ret = 0;
  1065. minsz = offsetofend(struct vfio_device_gfx_plane_info,
  1066. dmabuf_id);
  1067. if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
  1068. return -EFAULT;
  1069. if (dmabuf.argsz < minsz)
  1070. return -EINVAL;
  1071. ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf);
  1072. if (ret != 0)
  1073. return ret;
  1074. return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
  1075. -EFAULT : 0;
  1076. } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
  1077. __u32 dmabuf_id;
  1078. __s32 dmabuf_fd;
  1079. if (get_user(dmabuf_id, (__u32 __user *)arg))
  1080. return -EFAULT;
  1081. dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id);
  1082. return dmabuf_fd;
  1083. }
  1084. return -ENOTTY;
  1085. }
  1086. static ssize_t
  1087. vgpu_id_show(struct device *dev, struct device_attribute *attr,
  1088. char *buf)
  1089. {
  1090. struct mdev_device *mdev = mdev_from_dev(dev);
  1091. if (mdev) {
  1092. struct intel_vgpu *vgpu = (struct intel_vgpu *)
  1093. mdev_get_drvdata(mdev);
  1094. return sprintf(buf, "%d\n", vgpu->id);
  1095. }
  1096. return sprintf(buf, "\n");
  1097. }
  1098. static ssize_t
  1099. hw_id_show(struct device *dev, struct device_attribute *attr,
  1100. char *buf)
  1101. {
  1102. struct mdev_device *mdev = mdev_from_dev(dev);
  1103. if (mdev) {
  1104. struct intel_vgpu *vgpu = (struct intel_vgpu *)
  1105. mdev_get_drvdata(mdev);
  1106. return sprintf(buf, "%u\n",
  1107. vgpu->submission.shadow_ctx->hw_id);
  1108. }
  1109. return sprintf(buf, "\n");
  1110. }
  1111. static DEVICE_ATTR_RO(vgpu_id);
  1112. static DEVICE_ATTR_RO(hw_id);
  1113. static struct attribute *intel_vgpu_attrs[] = {
  1114. &dev_attr_vgpu_id.attr,
  1115. &dev_attr_hw_id.attr,
  1116. NULL
  1117. };
  1118. static const struct attribute_group intel_vgpu_group = {
  1119. .name = "intel_vgpu",
  1120. .attrs = intel_vgpu_attrs,
  1121. };
  1122. static const struct attribute_group *intel_vgpu_groups[] = {
  1123. &intel_vgpu_group,
  1124. NULL,
  1125. };
  1126. static struct mdev_parent_ops intel_vgpu_ops = {
  1127. .mdev_attr_groups = intel_vgpu_groups,
  1128. .create = intel_vgpu_create,
  1129. .remove = intel_vgpu_remove,
  1130. .open = intel_vgpu_open,
  1131. .release = intel_vgpu_release,
  1132. .read = intel_vgpu_read,
  1133. .write = intel_vgpu_write,
  1134. .mmap = intel_vgpu_mmap,
  1135. .ioctl = intel_vgpu_ioctl,
  1136. };
  1137. static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
  1138. {
  1139. struct attribute **kvm_type_attrs;
  1140. struct attribute_group **kvm_vgpu_type_groups;
  1141. intel_gvt_ops = ops;
  1142. if (!intel_gvt_ops->get_gvt_attrs(&kvm_type_attrs,
  1143. &kvm_vgpu_type_groups))
  1144. return -EFAULT;
  1145. intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups;
  1146. return mdev_register_device(dev, &intel_vgpu_ops);
  1147. }
  1148. static void kvmgt_host_exit(struct device *dev, void *gvt)
  1149. {
  1150. mdev_unregister_device(dev);
  1151. }
  1152. static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
  1153. {
  1154. struct kvmgt_guest_info *info;
  1155. struct kvm *kvm;
  1156. struct kvm_memory_slot *slot;
  1157. int idx;
  1158. if (!handle_valid(handle))
  1159. return -ESRCH;
  1160. info = (struct kvmgt_guest_info *)handle;
  1161. kvm = info->kvm;
  1162. idx = srcu_read_lock(&kvm->srcu);
  1163. slot = gfn_to_memslot(kvm, gfn);
  1164. if (!slot) {
  1165. srcu_read_unlock(&kvm->srcu, idx);
  1166. return -EINVAL;
  1167. }
  1168. spin_lock(&kvm->mmu_lock);
  1169. if (kvmgt_gfn_is_write_protected(info, gfn))
  1170. goto out;
  1171. kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  1172. kvmgt_protect_table_add(info, gfn);
  1173. out:
  1174. spin_unlock(&kvm->mmu_lock);
  1175. srcu_read_unlock(&kvm->srcu, idx);
  1176. return 0;
  1177. }
  1178. static int kvmgt_page_track_remove(unsigned long handle, u64 gfn)
  1179. {
  1180. struct kvmgt_guest_info *info;
  1181. struct kvm *kvm;
  1182. struct kvm_memory_slot *slot;
  1183. int idx;
  1184. if (!handle_valid(handle))
  1185. return 0;
  1186. info = (struct kvmgt_guest_info *)handle;
  1187. kvm = info->kvm;
  1188. idx = srcu_read_lock(&kvm->srcu);
  1189. slot = gfn_to_memslot(kvm, gfn);
  1190. if (!slot) {
  1191. srcu_read_unlock(&kvm->srcu, idx);
  1192. return -EINVAL;
  1193. }
  1194. spin_lock(&kvm->mmu_lock);
  1195. if (!kvmgt_gfn_is_write_protected(info, gfn))
  1196. goto out;
  1197. kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
  1198. kvmgt_protect_table_del(info, gfn);
  1199. out:
  1200. spin_unlock(&kvm->mmu_lock);
  1201. srcu_read_unlock(&kvm->srcu, idx);
  1202. return 0;
  1203. }
  1204. static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
  1205. const u8 *val, int len,
  1206. struct kvm_page_track_notifier_node *node)
  1207. {
  1208. struct kvmgt_guest_info *info = container_of(node,
  1209. struct kvmgt_guest_info, track_node);
  1210. if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
  1211. intel_gvt_ops->write_protect_handler(info->vgpu, gpa,
  1212. (void *)val, len);
  1213. }
  1214. static void kvmgt_page_track_flush_slot(struct kvm *kvm,
  1215. struct kvm_memory_slot *slot,
  1216. struct kvm_page_track_notifier_node *node)
  1217. {
  1218. int i;
  1219. gfn_t gfn;
  1220. struct kvmgt_guest_info *info = container_of(node,
  1221. struct kvmgt_guest_info, track_node);
  1222. spin_lock(&kvm->mmu_lock);
  1223. for (i = 0; i < slot->npages; i++) {
  1224. gfn = slot->base_gfn + i;
  1225. if (kvmgt_gfn_is_write_protected(info, gfn)) {
  1226. kvm_slot_page_track_remove_page(kvm, slot, gfn,
  1227. KVM_PAGE_TRACK_WRITE);
  1228. kvmgt_protect_table_del(info, gfn);
  1229. }
  1230. }
  1231. spin_unlock(&kvm->mmu_lock);
  1232. }
  1233. static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
  1234. {
  1235. struct intel_vgpu *itr;
  1236. struct kvmgt_guest_info *info;
  1237. int id;
  1238. bool ret = false;
  1239. mutex_lock(&vgpu->gvt->lock);
  1240. for_each_active_vgpu(vgpu->gvt, itr, id) {
  1241. if (!handle_valid(itr->handle))
  1242. continue;
  1243. info = (struct kvmgt_guest_info *)itr->handle;
  1244. if (kvm && kvm == info->kvm) {
  1245. ret = true;
  1246. goto out;
  1247. }
  1248. }
  1249. out:
  1250. mutex_unlock(&vgpu->gvt->lock);
  1251. return ret;
  1252. }
  1253. static int kvmgt_guest_init(struct mdev_device *mdev)
  1254. {
  1255. struct kvmgt_guest_info *info;
  1256. struct intel_vgpu *vgpu;
  1257. struct kvm *kvm;
  1258. vgpu = mdev_get_drvdata(mdev);
  1259. if (handle_valid(vgpu->handle))
  1260. return -EEXIST;
  1261. kvm = vgpu->vdev.kvm;
  1262. if (!kvm || kvm->mm != current->mm) {
  1263. gvt_vgpu_err("KVM is required to use Intel vGPU\n");
  1264. return -ESRCH;
  1265. }
  1266. if (__kvmgt_vgpu_exist(vgpu, kvm))
  1267. return -EEXIST;
  1268. info = vzalloc(sizeof(struct kvmgt_guest_info));
  1269. if (!info)
  1270. return -ENOMEM;
  1271. vgpu->handle = (unsigned long)info;
  1272. info->vgpu = vgpu;
  1273. info->kvm = kvm;
  1274. kvm_get_kvm(info->kvm);
  1275. kvmgt_protect_table_init(info);
  1276. gvt_cache_init(vgpu);
  1277. mutex_init(&vgpu->dmabuf_lock);
  1278. init_completion(&vgpu->vblank_done);
  1279. info->track_node.track_write = kvmgt_page_track_write;
  1280. info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
  1281. kvm_page_track_register_notifier(kvm, &info->track_node);
  1282. info->debugfs_cache_entries = debugfs_create_ulong(
  1283. "kvmgt_nr_cache_entries",
  1284. 0444, vgpu->debugfs,
  1285. &vgpu->vdev.nr_cache_entries);
  1286. if (!info->debugfs_cache_entries)
  1287. gvt_vgpu_err("Cannot create kvmgt debugfs entry\n");
  1288. return 0;
  1289. }
  1290. static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
  1291. {
  1292. debugfs_remove(info->debugfs_cache_entries);
  1293. kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
  1294. kvm_put_kvm(info->kvm);
  1295. kvmgt_protect_table_destroy(info);
  1296. gvt_cache_destroy(info->vgpu);
  1297. vfree(info);
  1298. return true;
  1299. }
  1300. static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
  1301. {
  1302. /* nothing to do here */
  1303. return 0;
  1304. }
  1305. static void kvmgt_detach_vgpu(unsigned long handle)
  1306. {
  1307. /* nothing to do here */
  1308. }
  1309. static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
  1310. {
  1311. struct kvmgt_guest_info *info;
  1312. struct intel_vgpu *vgpu;
  1313. if (!handle_valid(handle))
  1314. return -ESRCH;
  1315. info = (struct kvmgt_guest_info *)handle;
  1316. vgpu = info->vgpu;
  1317. /*
  1318. * When guest is poweroff, msi_trigger is set to NULL, but vgpu's
  1319. * config and mmio register isn't restored to default during guest
  1320. * poweroff. If this vgpu is still used in next vm, this vgpu's pipe
  1321. * may be enabled, then once this vgpu is active, it will get inject
  1322. * vblank interrupt request. But msi_trigger is null until msi is
  1323. * enabled by guest. so if msi_trigger is null, success is still
  1324. * returned and don't inject interrupt into guest.
  1325. */
  1326. if (vgpu->vdev.msi_trigger == NULL)
  1327. return 0;
  1328. if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
  1329. return 0;
  1330. return -EFAULT;
  1331. }
  1332. static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
  1333. {
  1334. struct kvmgt_guest_info *info;
  1335. kvm_pfn_t pfn;
  1336. if (!handle_valid(handle))
  1337. return INTEL_GVT_INVALID_ADDR;
  1338. info = (struct kvmgt_guest_info *)handle;
  1339. pfn = gfn_to_pfn(info->kvm, gfn);
  1340. if (is_error_noslot_pfn(pfn))
  1341. return INTEL_GVT_INVALID_ADDR;
  1342. return pfn;
  1343. }
  1344. int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
  1345. dma_addr_t *dma_addr)
  1346. {
  1347. struct kvmgt_guest_info *info;
  1348. struct intel_vgpu *vgpu;
  1349. struct gvt_dma *entry;
  1350. int ret;
  1351. if (!handle_valid(handle))
  1352. return -EINVAL;
  1353. info = (struct kvmgt_guest_info *)handle;
  1354. vgpu = info->vgpu;
  1355. mutex_lock(&info->vgpu->vdev.cache_lock);
  1356. entry = __gvt_cache_find_gfn(info->vgpu, gfn);
  1357. if (!entry) {
  1358. ret = gvt_dma_map_page(vgpu, gfn, dma_addr);
  1359. if (ret)
  1360. goto err_unlock;
  1361. ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr);
  1362. if (ret)
  1363. goto err_unmap;
  1364. } else {
  1365. kref_get(&entry->ref);
  1366. *dma_addr = entry->dma_addr;
  1367. }
  1368. mutex_unlock(&info->vgpu->vdev.cache_lock);
  1369. return 0;
  1370. err_unmap:
  1371. gvt_dma_unmap_page(vgpu, gfn, *dma_addr);
  1372. err_unlock:
  1373. mutex_unlock(&info->vgpu->vdev.cache_lock);
  1374. return ret;
  1375. }
  1376. static void __gvt_dma_release(struct kref *ref)
  1377. {
  1378. struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
  1379. gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr);
  1380. __gvt_cache_remove_entry(entry->vgpu, entry);
  1381. }
  1382. void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
  1383. {
  1384. struct kvmgt_guest_info *info;
  1385. struct gvt_dma *entry;
  1386. if (!handle_valid(handle))
  1387. return;
  1388. info = (struct kvmgt_guest_info *)handle;
  1389. mutex_lock(&info->vgpu->vdev.cache_lock);
  1390. entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
  1391. if (entry)
  1392. kref_put(&entry->ref, __gvt_dma_release);
  1393. mutex_unlock(&info->vgpu->vdev.cache_lock);
  1394. }
  1395. static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
  1396. void *buf, unsigned long len, bool write)
  1397. {
  1398. struct kvmgt_guest_info *info;
  1399. struct kvm *kvm;
  1400. int idx, ret;
  1401. bool kthread = current->mm == NULL;
  1402. if (!handle_valid(handle))
  1403. return -ESRCH;
  1404. info = (struct kvmgt_guest_info *)handle;
  1405. kvm = info->kvm;
  1406. if (kthread)
  1407. use_mm(kvm->mm);
  1408. idx = srcu_read_lock(&kvm->srcu);
  1409. ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
  1410. kvm_read_guest(kvm, gpa, buf, len);
  1411. srcu_read_unlock(&kvm->srcu, idx);
  1412. if (kthread)
  1413. unuse_mm(kvm->mm);
  1414. return ret;
  1415. }
  1416. static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
  1417. void *buf, unsigned long len)
  1418. {
  1419. return kvmgt_rw_gpa(handle, gpa, buf, len, false);
  1420. }
  1421. static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
  1422. void *buf, unsigned long len)
  1423. {
  1424. return kvmgt_rw_gpa(handle, gpa, buf, len, true);
  1425. }
  1426. static unsigned long kvmgt_virt_to_pfn(void *addr)
  1427. {
  1428. return PFN_DOWN(__pa(addr));
  1429. }
  1430. static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn)
  1431. {
  1432. struct kvmgt_guest_info *info;
  1433. struct kvm *kvm;
  1434. if (!handle_valid(handle))
  1435. return false;
  1436. info = (struct kvmgt_guest_info *)handle;
  1437. kvm = info->kvm;
  1438. return kvm_is_visible_gfn(kvm, gfn);
  1439. }
  1440. struct intel_gvt_mpt kvmgt_mpt = {
  1441. .host_init = kvmgt_host_init,
  1442. .host_exit = kvmgt_host_exit,
  1443. .attach_vgpu = kvmgt_attach_vgpu,
  1444. .detach_vgpu = kvmgt_detach_vgpu,
  1445. .inject_msi = kvmgt_inject_msi,
  1446. .from_virt_to_mfn = kvmgt_virt_to_pfn,
  1447. .enable_page_track = kvmgt_page_track_add,
  1448. .disable_page_track = kvmgt_page_track_remove,
  1449. .read_gpa = kvmgt_read_gpa,
  1450. .write_gpa = kvmgt_write_gpa,
  1451. .gfn_to_mfn = kvmgt_gfn_to_pfn,
  1452. .dma_map_guest_page = kvmgt_dma_map_guest_page,
  1453. .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
  1454. .set_opregion = kvmgt_set_opregion,
  1455. .get_vfio_device = kvmgt_get_vfio_device,
  1456. .put_vfio_device = kvmgt_put_vfio_device,
  1457. .is_valid_gfn = kvmgt_is_valid_gfn,
  1458. };
  1459. EXPORT_SYMBOL_GPL(kvmgt_mpt);
  1460. static int __init kvmgt_init(void)
  1461. {
  1462. return 0;
  1463. }
  1464. static void __exit kvmgt_exit(void)
  1465. {
  1466. }
  1467. module_init(kvmgt_init);
  1468. module_exit(kvmgt_exit);
  1469. MODULE_LICENSE("GPL and additional rights");
  1470. MODULE_AUTHOR("Intel Corporation");