Browse Source

Merge tag 'gvt-next-kvmgt-framework' of https://github.com/01org/gvt-linux into drm-intel-next-queued

Zhenyu Wang writes:

gvt-next-kvmgt-framework

This adds initial KVMGT framework based on GVT-g MPT(Mediated Passthrough) interface.

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Daniel Vetter 8 years ago
parent
commit
8be8f4a9a9

+ 9 - 0
drivers/gpu/drm/i915/Kconfig

@@ -107,6 +107,15 @@ config DRM_I915_GVT
 
 	  If in doubt, say "N".
 
+config DRM_I915_GVT_KVMGT
+	tristate "Enable KVM/VFIO support for Intel GVT-g"
+	depends on DRM_I915_GVT
+	depends on KVM
+	default n
+	help
+	  Choose this option if you want to enable KVMGT support for
+	  Intel GVT-g.
+
 menu "drm/i915 Debugging"
 depends on DRM_I915
 depends on EXPERT

+ 5 - 2
drivers/gpu/drm/i915/gvt/Makefile

@@ -3,5 +3,8 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
 	interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \
 	execlist.o scheduler.o sched_policy.o render.o cmd_parser.o
 
-ccflags-y                      += -I$(src) -I$(src)/$(GVT_DIR) -Wall
-i915-y			       += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
+ccflags-y				+= -I$(src) -I$(src)/$(GVT_DIR) -Wall
+i915-y					+= $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
+
+CFLAGS_kvmgt.o				:= -Wno-unused-function
+obj-$(CONFIG_DRM_I915_GVT_KVMGT)	+= $(GVT_DIR)/kvmgt.o

+ 4 - 8
drivers/gpu/drm/i915/gvt/cfg_space.c

@@ -47,11 +47,9 @@ enum {
  * Returns:
  * Zero on success, negative error code if failed.
  */
-int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset,
+int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
 	void *p_data, unsigned int bytes)
 {
-	struct intel_vgpu *vgpu = __vgpu;
-
 	if (WARN_ON(bytes > 4))
 		return -EINVAL;
 
@@ -82,9 +80,8 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map)
 
 	ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn,
 						  first_mfn,
-						  vgpu_aperture_sz(vgpu)
-						  >> PAGE_SHIFT, map,
-						  GVT_MAP_APERTURE);
+						  vgpu_aperture_sz(vgpu) >>
+						  PAGE_SHIFT, map);
 	if (ret)
 		return ret;
 
@@ -235,10 +232,9 @@ static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
  * Returns:
  * Zero on success, negative error code if failed.
  */
-int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset,
+int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 	void *p_data, unsigned int bytes)
 {
-	struct intel_vgpu *vgpu = __vgpu;
 	int ret;
 
 	if (WARN_ON(bytes > 4))

+ 17 - 34
drivers/gpu/drm/i915/gvt/gtt.c

@@ -974,7 +974,7 @@ fail:
 }
 
 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
-		struct intel_gvt_gtt_entry *we, unsigned long index)
+		unsigned long index)
 {
 	struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
 	struct intel_vgpu_shadow_page *sp = &spt->shadow_page;
@@ -983,25 +983,26 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 	struct intel_gvt_gtt_entry e;
 	int ret;
 
-	trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type,
-		we->val64, index);
-
 	ppgtt_get_shadow_entry(spt, &e, index);
+
+	trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, e.val64,
+			 index);
+
 	if (!ops->test_present(&e))
 		return 0;
 
 	if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn)
 		return 0;
 
-	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
-		struct intel_vgpu_guest_page *g =
-			intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we));
-		if (!g) {
+	if (gtt_type_is_pt(get_next_pt_type(e.type))) {
+		struct intel_vgpu_ppgtt_spt *s =
+			ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e));
+		if (!s) {
 			gvt_err("fail to find guest page\n");
 			ret = -ENXIO;
 			goto fail;
 		}
-		ret = ppgtt_invalidate_shadow_page(guest_page_to_ppgtt_spt(g));
+		ret = ppgtt_invalidate_shadow_page(s);
 		if (ret)
 			goto fail;
 	}
@@ -1010,7 +1011,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 	return 0;
 fail:
 	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-			vgpu->id, spt, we->val64, we->type);
+			vgpu->id, spt, e.val64, e.type);
 	return ret;
 }
 
@@ -1231,23 +1232,16 @@ static int ppgtt_handle_guest_write_page_table(
 	struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
 	struct intel_vgpu *vgpu = spt->vgpu;
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
-	struct intel_gvt_gtt_entry ge;
 
-	int old_present, new_present;
 	int ret;
+	int new_present;
 
-	ppgtt_get_guest_entry(spt, &ge, index);
-
-	old_present = ops->test_present(&ge);
 	new_present = ops->test_present(we);
 
-	ppgtt_set_guest_entry(spt, we, index);
+	ret = ppgtt_handle_guest_entry_removal(gpt, index);
+	if (ret)
+		goto fail;
 
-	if (old_present) {
-		ret = ppgtt_handle_guest_entry_removal(gpt, &ge, index);
-		if (ret)
-			goto fail;
-	}
 	if (new_present) {
 		ret = ppgtt_handle_guest_entry_add(gpt, we, index);
 		if (ret)
@@ -1293,7 +1287,7 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
 {
 	struct list_head *pos, *n;
 	struct intel_vgpu_ppgtt_spt *spt;
-	struct intel_gvt_gtt_entry ge, e;
+	struct intel_gvt_gtt_entry ge;
 	unsigned long index;
 	int ret;
 
@@ -1304,9 +1298,6 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
 		for_each_set_bit(index, spt->post_shadow_bitmap,
 				GTT_ENTRY_NUM_IN_ONE_PAGE) {
 			ppgtt_get_guest_entry(spt, &ge, index);
-			e = ge;
-			e.val64 = 0;
-			ppgtt_set_guest_entry(spt, &e, index);
 
 			ret = ppgtt_handle_guest_write_page_table(
 					&spt->guest_page, &ge, index);
@@ -1334,8 +1325,6 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
 	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
 
 	ppgtt_get_guest_entry(spt, &we, index);
-	memcpy((void *)&we.val64 + (pa & (info->gtt_entry_size - 1)),
-			p_data, bytes);
 
 	ops->test_pse(&we);
 
@@ -1344,19 +1333,13 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
 		if (ret)
 			return ret;
 	} else {
-		struct intel_gvt_gtt_entry ge;
-
-		ppgtt_get_guest_entry(spt, &ge, index);
-
 		if (!test_bit(index, spt->post_shadow_bitmap)) {
-			ret = ppgtt_handle_guest_entry_removal(gpt,
-					&ge, index);
+			ret = ppgtt_handle_guest_entry_removal(gpt, index);
 			if (ret)
 				return ret;
 		}
 
 		ppgtt_set_post_shadow(spt, index);
-		ppgtt_set_guest_entry(spt, &we, index);
 	}
 
 	if (!enable_out_of_sync)

+ 26 - 3
drivers/gpu/drm/i915/gvt/gvt.c

@@ -44,11 +44,14 @@ static const char * const supported_hypervisors[] = {
 	[INTEL_GVT_HYPERVISOR_KVM] = "KVM",
 };
 
-struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops = {
+static const struct intel_gvt_ops intel_gvt_ops = {
 	.emulate_cfg_read = intel_vgpu_emulate_cfg_read,
 	.emulate_cfg_write = intel_vgpu_emulate_cfg_write,
 	.emulate_mmio_read = intel_vgpu_emulate_mmio_read,
 	.emulate_mmio_write = intel_vgpu_emulate_mmio_write,
+	.vgpu_create = intel_gvt_create_vgpu,
+	.vgpu_destroy = intel_gvt_destroy_vgpu,
+	.vgpu_reset = intel_gvt_reset_vgpu,
 };
 
 /**
@@ -81,10 +84,12 @@ int intel_gvt_init_host(void)
 				symbol_get(xengt_mpt), "xengt");
 		intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_XEN;
 	} else {
+#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
 		/* not in Xen. Try KVMGT */
 		intel_gvt_host.mpt = try_then_request_module(
-				symbol_get(kvmgt_mpt), "kvm");
+				symbol_get(kvmgt_mpt), "kvmgt");
 		intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_KVM;
+#endif
 	}
 
 	/* Fail to load MPT modules - bail out */
@@ -193,6 +198,9 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
 	intel_gvt_clean_mmio_info(gvt);
 	intel_gvt_free_firmware(gvt);
 
+	intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt);
+	intel_gvt_clean_vgpu_types(gvt);
+
 	kfree(dev_priv->gvt);
 	dev_priv->gvt = NULL;
 }
@@ -270,10 +278,25 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto out_clean_cmd_parser;
 
-	gvt_dbg_core("gvt device creation is done\n");
+	ret = intel_gvt_init_vgpu_types(gvt);
+	if (ret)
+		goto out_clean_thread;
+
+	ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt,
+				&intel_gvt_ops);
+	if (ret) {
+		gvt_err("failed to register gvt-g host device: %d\n", ret);
+		goto out_clean_types;
+	}
+
+	gvt_dbg_core("gvt device initialization is done\n");
 	dev_priv->gvt = gvt;
 	return 0;
 
+out_clean_types:
+	intel_gvt_clean_vgpu_types(gvt);
+out_clean_thread:
+	clean_service_thread(gvt);
 out_clean_cmd_parser:
 	intel_gvt_clean_cmd_parser(gvt);
 out_clean_sched_policy:

+ 58 - 5
drivers/gpu/drm/i915/gvt/gvt.h

@@ -161,6 +161,20 @@ struct intel_vgpu {
 	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
 	struct i915_gem_context *shadow_ctx;
 	struct notifier_block shadow_ctx_notifier_block;
+
+#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
+	struct {
+		struct device *mdev;
+		struct vfio_region *region;
+		int num_regions;
+		struct eventfd_ctx *intx_trigger;
+		struct eventfd_ctx *msi_trigger;
+		struct rb_root cache;
+		struct mutex cache_lock;
+		void *vfio_group;
+		struct notifier_block iommu_notifier;
+	} vdev;
+#endif
 };
 
 struct intel_gvt_gm {
@@ -190,6 +204,16 @@ struct intel_gvt_opregion {
 	u32 opregion_pa;
 };
 
+#define NR_MAX_INTEL_VGPU_TYPES 20
+struct intel_vgpu_type {
+	char name[16];
+	unsigned int max_instance;
+	unsigned int avail_instance;
+	unsigned int low_gm_size;
+	unsigned int high_gm_size;
+	unsigned int fence;
+};
+
 struct intel_gvt {
 	struct mutex lock;
 	struct drm_i915_private *dev_priv;
@@ -205,6 +229,8 @@ struct intel_gvt {
 	struct intel_gvt_opregion opregion;
 	struct intel_gvt_workload_scheduler scheduler;
 	DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
+	struct intel_vgpu_type *types;
+	unsigned int num_types;
 
 	struct task_struct *service_thread;
 	wait_queue_head_t service_thread_wq;
@@ -230,6 +256,14 @@ static inline void intel_gvt_request_service(struct intel_gvt *gvt,
 void intel_gvt_free_firmware(struct intel_gvt *gvt);
 int intel_gvt_load_firmware(struct intel_gvt *gvt);
 
+/* Aperture/GM space definitions for GVT device */
+#define MB_TO_BYTES(mb) ((mb) << 20ULL)
+#define BYTES_TO_MB(b) ((b) >> 20ULL)
+
+#define HOST_LOW_GM_SIZE MB_TO_BYTES(128)
+#define HOST_HIGH_GM_SIZE MB_TO_BYTES(384)
+#define HOST_FENCE 4
+
 /* Aperture/GM space definitions for GVT device */
 #define gvt_aperture_sz(gvt)	  (gvt->dev_priv->ggtt.mappable_end)
 #define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.mappable_base)
@@ -330,11 +364,14 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu,
 	}
 }
 
-struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
-					 struct intel_vgpu_creation_params *
-					 param);
+int intel_gvt_init_vgpu_types(struct intel_gvt *gvt);
+void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt);
 
+struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
+					 struct intel_vgpu_type *type);
 void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu);
+
 
 /* validating GM functions */
 #define vgpu_gmadr_is_aperture(vgpu, gmadr) \
@@ -369,10 +406,10 @@ int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
 			     unsigned long *g_index);
 
-int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset,
+int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes);
 
-int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset,
+int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes);
 
 void intel_gvt_clean_opregion(struct intel_gvt *gvt);
@@ -385,6 +422,22 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
 int setup_vgpu_mmio(struct intel_vgpu *vgpu);
 void populate_pvinfo_page(struct intel_vgpu *vgpu);
 
+struct intel_gvt_ops {
+	int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *,
+				unsigned int);
+	int (*emulate_cfg_write)(struct intel_vgpu *, unsigned int, void *,
+				unsigned int);
+	int (*emulate_mmio_read)(struct intel_vgpu *, u64, void *,
+				unsigned int);
+	int (*emulate_mmio_write)(struct intel_vgpu *, u64, void *,
+				unsigned int);
+	struct intel_vgpu *(*vgpu_create)(struct intel_gvt *,
+				struct intel_vgpu_type *);
+	void (*vgpu_destroy)(struct intel_vgpu *);
+	void (*vgpu_reset)(struct intel_vgpu *);
+};
+
+
 #include "mpt.h"
 
 #endif

+ 3 - 11
drivers/gpu/drm/i915/gvt/hypercall.h

@@ -33,21 +33,14 @@
 #ifndef _GVT_HYPERCALL_H_
 #define _GVT_HYPERCALL_H_
 
-struct intel_gvt_io_emulation_ops {
-	int (*emulate_cfg_read)(void *, unsigned int, void *, unsigned int);
-	int (*emulate_cfg_write)(void *, unsigned int, void *, unsigned int);
-	int (*emulate_mmio_read)(void *, u64, void *, unsigned int);
-	int (*emulate_mmio_write)(void *, u64, void *, unsigned int);
-};
-
-extern struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops;
-
 /*
  * Specific GVT-g MPT modules function collections. Currently GVT-g supports
  * both Xen and KVM by providing dedicated hypervisor-related MPT modules.
  */
 struct intel_gvt_mpt {
 	int (*detect_host)(void);
+	int (*host_init)(struct device *dev, void *gvt, const void *ops);
+	void (*host_exit)(struct device *dev, void *gvt);
 	int (*attach_vgpu)(void *vgpu, unsigned long *handle);
 	void (*detach_vgpu)(unsigned long handle);
 	int (*inject_msi)(unsigned long handle, u32 addr, u16 data);
@@ -60,8 +53,7 @@ struct intel_gvt_mpt {
 			 unsigned long len);
 	unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);
 	int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
-			      unsigned long mfn, unsigned int nr, bool map,
-			      int type);
+			      unsigned long mfn, unsigned int nr, bool map);
 	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
 			     bool map);
 };

+ 601 - 0
drivers/gpu/drm/i915/gvt/kvmgt.c

@@ -0,0 +1,601 @@
+/*
+ * KVMGT - the implementation of Intel mediated pass-through framework for KVM
+ *
+ * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Jike Song <jike.song@intel.com>
+ *    Xiaoguang Chen <xiaoguang.chen@intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/eventfd.h>
+#include <linux/uuid.h>
+#include <linux/kvm_host.h>
+#include <linux/vfio.h>
+
+#include "i915_drv.h"
+#include "gvt.h"
+
+#if IS_ENABLED(CONFIG_VFIO_MDEV)
+#include <linux/mdev.h>
+#else
+static inline long vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+			long npage, int prot, unsigned long *phys_pfn)
+{
+	return 0;
+}
+static inline long vfio_unpin_pages(struct device *dev, unsigned long *pfn,
+			long npage)
+{
+	return 0;
+}
+#endif
+
+static const struct intel_gvt_ops *intel_gvt_ops;
+
+
+/* helper macros copied from vfio-pci */
+#define VFIO_PCI_OFFSET_SHIFT   40
+#define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
+
+struct vfio_region {
+	u32				type;
+	u32				subtype;
+	size_t				size;
+	u32				flags;
+};
+
+struct kvmgt_pgfn {
+	gfn_t gfn;
+	struct hlist_node hnode;
+};
+
+struct kvmgt_guest_info {
+	struct kvm *kvm;
+	struct intel_vgpu *vgpu;
+	struct kvm_page_track_notifier_node track_node;
+#define NR_BKT (1 << 18)
+	struct hlist_head ptable[NR_BKT];
+#undef NR_BKT
+};
+
+struct gvt_dma {
+	struct rb_node node;
+	gfn_t gfn;
+	kvm_pfn_t pfn;
+};
+
+static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
+{
+	struct rb_node *node = vgpu->vdev.cache.rb_node;
+	struct gvt_dma *ret = NULL;
+
+	while (node) {
+		struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
+
+		if (gfn < itr->gfn)
+			node = node->rb_left;
+		else if (gfn > itr->gfn)
+			node = node->rb_right;
+		else {
+			ret = itr;
+			goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
+static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
+{
+	struct gvt_dma *entry;
+
+	mutex_lock(&vgpu->vdev.cache_lock);
+	entry = __gvt_cache_find(vgpu, gfn);
+	mutex_unlock(&vgpu->vdev.cache_lock);
+
+	return entry == NULL ? 0 : entry->pfn;
+}
+
+static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
+{
+	struct gvt_dma *new, *itr;
+	struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
+
+	new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
+	if (!new)
+		return;
+
+	new->gfn = gfn;
+	new->pfn = pfn;
+
+	mutex_lock(&vgpu->vdev.cache_lock);
+	while (*link) {
+		parent = *link;
+		itr = rb_entry(parent, struct gvt_dma, node);
+
+		if (gfn == itr->gfn)
+			goto out;
+		else if (gfn < itr->gfn)
+			link = &parent->rb_left;
+		else
+			link = &parent->rb_right;
+	}
+
+	rb_link_node(&new->node, parent, link);
+	rb_insert_color(&new->node, &vgpu->vdev.cache);
+	mutex_unlock(&vgpu->vdev.cache_lock);
+	return;
+
+out:
+	mutex_unlock(&vgpu->vdev.cache_lock);
+	kfree(new);
+}
+
+static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
+				struct gvt_dma *entry)
+{
+	rb_erase(&entry->node, &vgpu->vdev.cache);
+	kfree(entry);
+}
+
+static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
+{
+	struct device *dev = vgpu->vdev.mdev;
+	struct gvt_dma *this;
+	unsigned long pfn;
+
+	mutex_lock(&vgpu->vdev.cache_lock);
+	this  = __gvt_cache_find(vgpu, gfn);
+	if (!this) {
+		mutex_unlock(&vgpu->vdev.cache_lock);
+		return;
+	}
+
+	pfn = this->pfn;
+	WARN_ON((vfio_unpin_pages(dev, &pfn, 1) != 1));
+	__gvt_cache_remove_entry(vgpu, this);
+	mutex_unlock(&vgpu->vdev.cache_lock);
+}
+
+static void gvt_cache_init(struct intel_vgpu *vgpu)
+{
+	vgpu->vdev.cache = RB_ROOT;
+	mutex_init(&vgpu->vdev.cache_lock);
+}
+
+static void gvt_cache_destroy(struct intel_vgpu *vgpu)
+{
+	struct gvt_dma *dma;
+	struct rb_node *node = NULL;
+	struct device *dev = vgpu->vdev.mdev;
+	unsigned long pfn;
+
+	mutex_lock(&vgpu->vdev.cache_lock);
+	while ((node = rb_first(&vgpu->vdev.cache))) {
+		dma = rb_entry(node, struct gvt_dma, node);
+		pfn = dma->pfn;
+
+		vfio_unpin_pages(dev, &pfn, 1);
+		__gvt_cache_remove_entry(vgpu, dma);
+	}
+	mutex_unlock(&vgpu->vdev.cache_lock);
+}
+
+static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
+		const char *name)
+{
+	int i;
+	struct intel_vgpu_type *t;
+	const char *driver_name = dev_driver_string(
+			&gvt->dev_priv->drm.pdev->dev);
+
+	for (i = 0; i < gvt->num_types; i++) {
+		t = &gvt->types[i];
+		if (!strncmp(t->name, name + strlen(driver_name) + 1,
+			sizeof(t->name)))
+			return t;
+	}
+
+	return NULL;
+}
+
+static struct attribute *type_attrs[] = {
+	NULL,
+};
+
+static struct attribute_group *intel_vgpu_type_groups[] = {
+	[0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
+};
+
+static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
+{
+	int i, j;
+	struct intel_vgpu_type *type;
+	struct attribute_group *group;
+
+	for (i = 0; i < gvt->num_types; i++) {
+		type = &gvt->types[i];
+
+		group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
+		if (WARN_ON(!group))
+			goto unwind;
+
+		group->name = type->name;
+		group->attrs = type_attrs;
+		intel_vgpu_type_groups[i] = group;
+	}
+
+	return true;
+
+unwind:
+	for (j = 0; j < i; j++) {
+		group = intel_vgpu_type_groups[j];
+		kfree(group);
+	}
+
+	return false;
+}
+
+static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
+{
+	int i;
+	struct attribute_group *group;
+
+	for (i = 0; i < gvt->num_types; i++) {
+		group = intel_vgpu_type_groups[i];
+		kfree(group);
+	}
+}
+
+static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
+{
+	hash_init(info->ptable);
+}
+
+static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
+{
+	struct kvmgt_pgfn *p;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
+		hash_del(&p->hnode);
+		kfree(p);
+	}
+}
+
+static struct kvmgt_pgfn *
+__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
+{
+	struct kvmgt_pgfn *p, *res = NULL;
+
+	hash_for_each_possible(info->ptable, p, hnode, gfn) {
+		if (gfn == p->gfn) {
+			res = p;
+			break;
+		}
+	}
+
+	return res;
+}
+
+static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
+				gfn_t gfn)
+{
+	struct kvmgt_pgfn *p;
+
+	p = __kvmgt_protect_table_find(info, gfn);
+	return !!p;
+}
+
+static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
+{
+	struct kvmgt_pgfn *p;
+
+	if (kvmgt_gfn_is_write_protected(info, gfn))
+		return;
+
+	p = kmalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
+	if (WARN(!p, "gfn: 0x%llx\n", gfn))
+		return;
+
+	p->gfn = gfn;
+	hash_add(info->ptable, &p->hnode, gfn);
+}
+
+static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
+				gfn_t gfn)
+{
+	struct kvmgt_pgfn *p;
+
+	p = __kvmgt_protect_table_find(info, gfn);
+	if (p) {
+		hash_del(&p->hnode);
+		kfree(p);
+	}
+}
+
+static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
+{
+	if (!intel_gvt_init_vgpu_type_groups(gvt))
+		return -EFAULT;
+
+	intel_gvt_ops = ops;
+
+	/* MDEV is not yet available */
+	return -ENODEV;
+}
+
+static void kvmgt_host_exit(struct device *dev, void *gvt)
+{
+	intel_gvt_cleanup_vgpu_type_groups(gvt);
+}
+
+static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
+{
+	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
+	struct kvm *kvm = info->kvm;
+	struct kvm_memory_slot *slot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slot = gfn_to_memslot(kvm, gfn);
+
+	spin_lock(&kvm->mmu_lock);
+
+	if (kvmgt_gfn_is_write_protected(info, gfn))
+		goto out;
+
+	kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
+	kvmgt_protect_table_add(info, gfn);
+
+out:
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+	return 0;
+}
+
+static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
+{
+	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
+	struct kvm *kvm = info->kvm;
+	struct kvm_memory_slot *slot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slot = gfn_to_memslot(kvm, gfn);
+
+	spin_lock(&kvm->mmu_lock);
+
+	if (!kvmgt_gfn_is_write_protected(info, gfn))
+		goto out;
+
+	kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
+	kvmgt_protect_table_del(info, gfn);
+
+out:
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+	return 0;
+}
+
+static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+		const u8 *val, int len,
+		struct kvm_page_track_notifier_node *node)
+{
+	struct kvmgt_guest_info *info = container_of(node,
+					struct kvmgt_guest_info, track_node);
+
+	if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
+		intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
+					(void *)val, len);
+}
+
+static void kvmgt_page_track_flush_slot(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		struct kvm_page_track_notifier_node *node)
+{
+	int i;
+	gfn_t gfn;
+	struct kvmgt_guest_info *info = container_of(node,
+					struct kvmgt_guest_info, track_node);
+
+	spin_lock(&kvm->mmu_lock);
+	for (i = 0; i < slot->npages; i++) {
+		gfn = slot->base_gfn + i;
+		if (kvmgt_gfn_is_write_protected(info, gfn)) {
+			kvm_slot_page_track_remove_page(kvm, slot, gfn,
+						KVM_PAGE_TRACK_WRITE);
+			kvmgt_protect_table_del(info, gfn);
+		}
+	}
+	spin_unlock(&kvm->mmu_lock);
+}
+
+static bool kvmgt_check_guest(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	char s[12];
+	unsigned int *i;
+
+	eax = KVM_CPUID_SIGNATURE;
+	ebx = ecx = edx = 0;
+
+	asm volatile ("cpuid"
+		      : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+		      :
+		      : "cc", "memory");
+	i = (unsigned int *)s;
+	i[0] = ebx;
+	i[1] = ecx;
+	i[2] = edx;
+
+	return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
+}
+
+/**
+ * NOTE:
+ * It's actually impossible to check if we are running in KVM host,
+ * since the "KVM host" is simply native. So we only dectect guest here.
+ */
+static int kvmgt_detect_host(void)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	if (intel_iommu_gfx_mapped) {
+		gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
+		return -ENODEV;
+	}
+#endif
+	return kvmgt_check_guest() ? -ENODEV : 0;
+}
+
+static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
+{
+	/* nothing to do here */
+	return 0;
+}
+
+static void kvmgt_detach_vgpu(unsigned long handle)
+{
+	/* nothing to do here */
+}
+
+static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
+{
+	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
+	struct intel_vgpu *vgpu = info->vgpu;
+
+	if (vgpu->vdev.msi_trigger)
+		return eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1;
+
+	return false;
+}
+
+static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
+{
+	unsigned long pfn;
+	struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
+	int rc;
+
+	pfn = gvt_cache_find(info->vgpu, gfn);
+	if (pfn != 0)
+		return pfn;
+
+	rc = vfio_pin_pages(info->vgpu->vdev.mdev, &gfn, 1,
+				IOMMU_READ | IOMMU_WRITE, &pfn);
+	if (rc != 1) {
+		gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn);
+		return 0;
+	}
+
+	gvt_cache_add(info->vgpu, gfn, pfn);
+	return pfn;
+}
+
+static void *kvmgt_gpa_to_hva(unsigned long handle, unsigned long gpa)
+{
+	unsigned long pfn;
+	gfn_t gfn = gpa_to_gfn(gpa);
+
+	pfn = kvmgt_gfn_to_pfn(handle, gfn);
+	if (!pfn)
+		return NULL;
+
+	return (char *)pfn_to_kaddr(pfn) + offset_in_page(gpa);
+}
+
+static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
+			void *buf, unsigned long len, bool write)
+{
+	void *hva = NULL;
+
+	hva = kvmgt_gpa_to_hva(handle, gpa);
+	if (!hva)
+		return -EFAULT;
+
+	if (write)
+		memcpy(hva, buf, len);
+	else
+		memcpy(buf, hva, len);
+
+	return 0;
+}
+
+static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
+			void *buf, unsigned long len)
+{
+	return kvmgt_rw_gpa(handle, gpa, buf, len, false);
+}
+
+static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
+			void *buf, unsigned long len)
+{
+	return kvmgt_rw_gpa(handle, gpa, buf, len, true);
+}
+
+static unsigned long kvmgt_virt_to_pfn(void *addr)
+{
+	return PFN_DOWN(__pa(addr));
+}
+
+struct intel_gvt_mpt kvmgt_mpt = {
+	.detect_host = kvmgt_detect_host,
+	.host_init = kvmgt_host_init,
+	.host_exit = kvmgt_host_exit,
+	.attach_vgpu = kvmgt_attach_vgpu,
+	.detach_vgpu = kvmgt_detach_vgpu,
+	.inject_msi = kvmgt_inject_msi,
+	.from_virt_to_mfn = kvmgt_virt_to_pfn,
+	.set_wp_page = kvmgt_write_protect_add,
+	.unset_wp_page = kvmgt_write_protect_remove,
+	.read_gpa = kvmgt_read_gpa,
+	.write_gpa = kvmgt_write_gpa,
+	.gfn_to_mfn = kvmgt_gfn_to_pfn,
+};
+EXPORT_SYMBOL_GPL(kvmgt_mpt);
+
+static int __init kvmgt_init(void)
+{
+	return 0;
+}
+
+static void __exit kvmgt_exit(void)
+{
+}
+
+module_init(kvmgt_init);
+module_exit(kvmgt_exit);
+
+MODULE_LICENSE("GPL and additional rights");
+MODULE_AUTHOR("Intel Corporation");

+ 2 - 4
drivers/gpu/drm/i915/gvt/mmio.c

@@ -67,10 +67,9 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
  * Returns:
  * Zero on success, negative error code if failed
  */
-int intel_vgpu_emulate_mmio_read(void *__vgpu, uint64_t pa,
+int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 		void *p_data, unsigned int bytes)
 {
-	struct intel_vgpu *vgpu = __vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_mmio_info *mmio;
 	unsigned int offset = 0;
@@ -179,10 +178,9 @@ err:
  * Returns:
  * Zero on success, negative error code if failed
  */
-int intel_vgpu_emulate_mmio_write(void *__vgpu, uint64_t pa,
+int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 		void *p_data, unsigned int bytes)
 {
-	struct intel_vgpu *vgpu = __vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_mmio_info *mmio;
 	unsigned int offset = 0;

+ 5 - 4
drivers/gpu/drm/i915/gvt/mmio.h

@@ -87,10 +87,11 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
 })
 
 int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa);
-int intel_vgpu_emulate_mmio_read(void *__vgpu, u64 pa, void *p_data,
-				 unsigned int bytes);
-int intel_vgpu_emulate_mmio_write(void *__vgpu, u64 pa, void *p_data,
-				  unsigned int bytes);
+
+int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa,
+				void *p_data, unsigned int bytes);
+int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa,
+				void *p_data, unsigned int bytes);
 bool intel_gvt_mmio_is_cmd_access(struct intel_gvt *gvt,
 				  unsigned int offset);
 bool intel_gvt_mmio_is_unalign(struct intel_gvt *gvt, unsigned int offset);

+ 47 - 8
drivers/gpu/drm/i915/gvt/mpt.h

@@ -55,6 +55,35 @@ static inline int intel_gvt_hypervisor_detect_host(void)
 	return intel_gvt_host.mpt->detect_host();
 }
 
+/**
+ * intel_gvt_hypervisor_host_init - init GVT-g host side
+ *
+ * Returns:
+ * Zero on success, negative error code if failed
+ */
+static inline int intel_gvt_hypervisor_host_init(struct device *dev,
+			void *gvt, const void *ops)
+{
+	/* optional to provide */
+	if (!intel_gvt_host.mpt->host_init)
+		return 0;
+
+	return intel_gvt_host.mpt->host_init(dev, gvt, ops);
+}
+
+/**
+ * intel_gvt_hypervisor_host_exit - exit GVT-g host side
+ */
+static inline void intel_gvt_hypervisor_host_exit(struct device *dev,
+			void *gvt)
+{
+	/* optional to provide */
+	if (!intel_gvt_host.mpt->host_exit)
+		return;
+
+	intel_gvt_host.mpt->host_exit(dev, gvt);
+}
+
 /**
  * intel_gvt_hypervisor_attach_vgpu - call hypervisor to initialize vGPU
  * related stuffs inside hypervisor.
@@ -64,6 +93,10 @@ static inline int intel_gvt_hypervisor_detect_host(void)
  */
 static inline int intel_gvt_hypervisor_attach_vgpu(struct intel_vgpu *vgpu)
 {
+	/* optional to provide */
+	if (!intel_gvt_host.mpt->attach_vgpu)
+		return 0;
+
 	return intel_gvt_host.mpt->attach_vgpu(vgpu, &vgpu->handle);
 }
 
@@ -76,6 +109,10 @@ static inline int intel_gvt_hypervisor_attach_vgpu(struct intel_vgpu *vgpu)
  */
 static inline void intel_gvt_hypervisor_detach_vgpu(struct intel_vgpu *vgpu)
 {
+	/* optional to provide */
+	if (!intel_gvt_host.mpt->detach_vgpu)
+		return;
+
 	intel_gvt_host.mpt->detach_vgpu(vgpu->handle);
 }
 
@@ -224,11 +261,6 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn(
 	return intel_gvt_host.mpt->gfn_to_mfn(vgpu->handle, gfn);
 }
 
-enum {
-	GVT_MAP_APERTURE = 0,
-	GVT_MAP_OPREGION,
-};
-
 /**
  * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN
  * @vgpu: a vGPU
@@ -236,7 +268,6 @@ enum {
  * @mfn: host PFN
  * @nr: amount of PFNs
  * @map: map or unmap
- * @type: map type
  *
  * Returns:
  * Zero on success, negative error code if failed.
@@ -244,10 +275,14 @@ enum {
 static inline int intel_gvt_hypervisor_map_gfn_to_mfn(
 		struct intel_vgpu *vgpu, unsigned long gfn,
 		unsigned long mfn, unsigned int nr,
-		bool map, int type)
+		bool map)
 {
+	/* a MPT implementation could have MMIO mapped elsewhere */
+	if (!intel_gvt_host.mpt->map_gfn_to_mfn)
+		return 0;
+
 	return intel_gvt_host.mpt->map_gfn_to_mfn(vgpu->handle, gfn, mfn, nr,
-						  map, type);
+						  map);
 }
 
 /**
@@ -263,6 +298,10 @@ static inline int intel_gvt_hypervisor_map_gfn_to_mfn(
 static inline int intel_gvt_hypervisor_set_trap_area(
 		struct intel_vgpu *vgpu, u64 start, u64 end, bool map)
 {
+	/* a MPT implementation could have MMIO trapped elsewhere */
+	if (!intel_gvt_host.mpt->set_trap_area)
+		return 0;
+
 	return intel_gvt_host.mpt->set_trap_area(vgpu->handle, start, end, map);
 }
 

+ 5 - 29
drivers/gpu/drm/i915/gvt/opregion.c

@@ -73,7 +73,7 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
 		}
 		ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu,
 				vgpu_opregion(vgpu)->gfn[i],
-				mfn, 1, map, GVT_MAP_OPREGION);
+				mfn, 1, map);
 		if (ret) {
 			gvt_err("fail to map GFN to MFN, errno: %d\n", ret);
 			return ret;
@@ -89,28 +89,18 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
  */
 void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu)
 {
-	int i;
-
 	gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id);
 
 	if (!vgpu_opregion(vgpu)->va)
 		return;
 
-	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) {
-		vunmap(vgpu_opregion(vgpu)->va);
-		for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) {
-			if (vgpu_opregion(vgpu)->pages[i]) {
-				put_page(vgpu_opregion(vgpu)->pages[i]);
-				vgpu_opregion(vgpu)->pages[i] = NULL;
-			}
-		}
-	} else {
+	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) {
 		map_vgpu_opregion(vgpu, false);
 		free_pages((unsigned long)vgpu_opregion(vgpu)->va,
 				INTEL_GVT_OPREGION_PORDER);
-	}
 
-	vgpu_opregion(vgpu)->va = NULL;
+		vgpu_opregion(vgpu)->va = NULL;
+	}
 }
 
 /**
@@ -137,22 +127,8 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa)
 		ret = map_vgpu_opregion(vgpu, true);
 		if (ret)
 			return ret;
-	} else {
-		gvt_dbg_core("emulate opregion from userspace\n");
-
-		/*
-		 * If opregion pages are not allocated from host kenrel,
-		 * most of the params are meaningless
-		 */
-		ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu,
-				0, /* not used */
-				0, /* not used */
-				2, /* not used */
-				1,
-				GVT_MAP_OPREGION);
-		if (ret)
-			return ret;
 	}
+
 	return 0;
 }
 

+ 8 - 8
drivers/gpu/drm/i915/gvt/scheduler.c

@@ -89,15 +89,15 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 		}
 
 		page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i);
-		dst = kmap_atomic(page);
+		dst = kmap(page);
 		intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
 				GTT_PAGE_SIZE);
-		kunmap_atomic(dst);
+		kunmap(page);
 		i++;
 	}
 
 	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	shadow_ring_context = kmap_atomic(page);
+	shadow_ring_context = kmap(page);
 
 #define COPY_REG(name) \
 	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
@@ -123,7 +123,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 			sizeof(*shadow_ring_context),
 			GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
-	kunmap_atomic(shadow_ring_context);
+	kunmap(page);
 	return 0;
 }
 
@@ -318,10 +318,10 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 		}
 
 		page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i);
-		src = kmap_atomic(page);
+		src = kmap(page);
 		intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
 				GTT_PAGE_SIZE);
-		kunmap_atomic(src);
+		kunmap(page);
 		i++;
 	}
 
@@ -329,7 +329,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 		RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
 
 	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	shadow_ring_context = kmap_atomic(page);
+	shadow_ring_context = kmap(page);
 
 #define COPY_REG(name) \
 	intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
@@ -347,7 +347,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 			sizeof(*shadow_ring_context),
 			GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
-	kunmap_atomic(shadow_ring_context);
+	kunmap(page);
 }
 
 static void complete_current_workload(struct intel_gvt *gvt, int ring_id)

+ 149 - 20
drivers/gpu/drm/i915/gvt/vgpu.c

@@ -132,6 +132,106 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
 	WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 }
 
+/**
+ * intel_gvt_init_vgpu_types - initialize vGPU type list
+ * @gvt : GVT device
+ *
+ * Initialize vGPU type list based on available resource.
+ *
+ */
+int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
+{
+	unsigned int num_types;
+	unsigned int i, low_avail;
+	unsigned int min_low;
+
+	/* vGPU type name is defined as GVTg_Vx_y which contains
+	 * physical GPU generation type and 'y' means maximum vGPU
+	 * instances user can create on one physical GPU for this
+	 * type.
+	 *
+	 * Depend on physical SKU resource, might see vGPU types like
+	 * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create
+	 * different types of vGPU on same physical GPU depending on
+	 * available resource. Each vGPU type will have "avail_instance"
+	 * to indicate how many vGPU instance can be created for this
+	 * type.
+	 *
+	 * Currently use static size here as we init type earlier..
+	 */
+	low_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE;
+	num_types = 4;
+
+	gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type),
+			     GFP_KERNEL);
+	if (!gvt->types)
+		return -ENOMEM;
+
+	min_low = MB_TO_BYTES(32);
+	for (i = 0; i < num_types; ++i) {
+		if (low_avail / min_low == 0)
+			break;
+		gvt->types[i].low_gm_size = min_low;
+		gvt->types[i].high_gm_size = 3 * gvt->types[i].low_gm_size;
+		gvt->types[i].fence = 4;
+		gvt->types[i].max_instance = low_avail / min_low;
+		gvt->types[i].avail_instance = gvt->types[i].max_instance;
+
+		if (IS_GEN8(gvt->dev_priv))
+			sprintf(gvt->types[i].name, "GVTg_V4_%u",
+						gvt->types[i].max_instance);
+		else if (IS_GEN9(gvt->dev_priv))
+			sprintf(gvt->types[i].name, "GVTg_V5_%u",
+						gvt->types[i].max_instance);
+
+		min_low <<= 1;
+		gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n",
+			     i, gvt->types[i].name, gvt->types[i].max_instance,
+			     gvt->types[i].avail_instance,
+			     gvt->types[i].low_gm_size,
+			     gvt->types[i].high_gm_size, gvt->types[i].fence);
+	}
+
+	gvt->num_types = i;
+	return 0;
+}
+
+void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt)
+{
+	kfree(gvt->types);
+}
+
+static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
+{
+	int i;
+	unsigned int low_gm_avail, high_gm_avail, fence_avail;
+	unsigned int low_gm_min, high_gm_min, fence_min, total_min;
+
+	/* Need to depend on maxium hw resource size but keep on
+	 * static config for now.
+	 */
+	low_gm_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE -
+		gvt->gm.vgpu_allocated_low_gm_size;
+	high_gm_avail = MB_TO_BYTES(256) * 3 - HOST_HIGH_GM_SIZE -
+		gvt->gm.vgpu_allocated_high_gm_size;
+	fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
+		gvt->fence.vgpu_allocated_fence_num;
+
+	for (i = 0; i < gvt->num_types; i++) {
+		low_gm_min = low_gm_avail / gvt->types[i].low_gm_size;
+		high_gm_min = high_gm_avail / gvt->types[i].high_gm_size;
+		fence_min = fence_avail / gvt->types[i].fence;
+		total_min = min(min(low_gm_min, high_gm_min), fence_min);
+		gvt->types[i].avail_instance = min(gvt->types[i].max_instance,
+						   total_min);
+
+		gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n",
+		       i, gvt->types[i].name, gvt->types[i].max_instance,
+		       gvt->types[i].avail_instance, gvt->types[i].low_gm_size,
+		       gvt->types[i].high_gm_size, gvt->types[i].fence);
+	}
+}
+
 /**
  * intel_gvt_destroy_vgpu - destroy a virtual GPU
  * @vgpu: virtual GPU
@@ -166,20 +266,11 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 	clean_vgpu_mmio(vgpu);
 	vfree(vgpu);
 
+	intel_gvt_update_vgpu_types(gvt);
 	mutex_unlock(&gvt->lock);
 }
 
-/**
- * intel_gvt_create_vgpu - create a virtual GPU
- * @gvt: GVT device
- * @param: vGPU creation parameters
- *
- * This function is called when user wants to create a virtual GPU.
- *
- * Returns:
- * pointer to intel_vgpu, error pointer if failed.
- */
-struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
+static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 		struct intel_vgpu_creation_params *param)
 {
 	struct intel_vgpu *vgpu;
@@ -224,15 +315,9 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_detach_hypervisor_vgpu;
 
-	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) {
-		ret = intel_vgpu_init_opregion(vgpu, 0);
-		if (ret)
-			goto out_clean_gtt;
-	}
-
 	ret = intel_vgpu_init_display(vgpu);
 	if (ret)
-		goto out_clean_opregion;
+		goto out_clean_gtt;
 
 	ret = intel_vgpu_init_execlist(vgpu);
 	if (ret)
@@ -257,8 +342,6 @@ out_clean_execlist:
 	intel_vgpu_clean_execlist(vgpu);
 out_clean_display:
 	intel_vgpu_clean_display(vgpu);
-out_clean_opregion:
-	intel_vgpu_clean_opregion(vgpu);
 out_clean_gtt:
 	intel_vgpu_clean_gtt(vgpu);
 out_detach_hypervisor_vgpu:
@@ -272,3 +355,49 @@ out_free_vgpu:
 	mutex_unlock(&gvt->lock);
 	return ERR_PTR(ret);
 }
+
+/**
+ * intel_gvt_create_vgpu - create a virtual GPU
+ * @gvt: GVT device
+ * @type: type of the vGPU to create
+ *
+ * This function is called when user wants to create a virtual GPU.
+ *
+ * Returns:
+ * pointer to intel_vgpu, error pointer if failed.
+ */
+struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
+				struct intel_vgpu_type *type)
+{
+	struct intel_vgpu_creation_params param;
+	struct intel_vgpu *vgpu;
+
+	param.handle = 0;
+	param.low_gm_sz = type->low_gm_size;
+	param.high_gm_sz = type->high_gm_size;
+	param.fence_sz = type->fence;
+
+	/* XXX current param based on MB */
+	param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz);
+	param.high_gm_sz = BYTES_TO_MB(param.high_gm_sz);
+
+	vgpu = __intel_gvt_create_vgpu(gvt, &param);
+	if (IS_ERR(vgpu))
+		return vgpu;
+
+	/* calculate left instance change for types */
+	intel_gvt_update_vgpu_types(gvt);
+
+	return vgpu;
+}
+
+/**
+ * intel_gvt_reset_vgpu - reset a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to reset a virtual GPU.
+ *
+ */
+void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu)
+{
+}