Pārlūkot izejas kodu

drm/i915/gvt: vGPU PCI configuration space virtualization

This patch introduces vGPU PCI configuration space virtualization.

- Adjust the trapped GPFN(Guest Page Frame Number) window of virtual GEN
PCI BAR 0 when guest initializes PCI BAR 0 address.

- Emulate OpRegion when guest touches OpRegion.

- Pass-through a part of aperture to guest when guest initializes
aperture BAR.

Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Zhi Wang 9 gadi atpakaļ
vecāks
revīzija
4d60c5fd3f

+ 1 - 1
drivers/gpu/drm/i915/gvt/Makefile

@@ -1,6 +1,6 @@
 GVT_DIR := gvt
 GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
-	interrupt.o gtt.o
+	interrupt.o gtt.o cfg_space.o opregion.o
 
 ccflags-y                      += -I$(src) -I$(src)/$(GVT_DIR) -Wall
 i915-y			       += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))

+ 287 - 0
drivers/gpu/drm/i915/gvt/cfg_space.c

@@ -0,0 +1,287 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eddie Dong <eddie.dong@intel.com>
+ *    Jike Song <jike.song@intel.com>
+ *
+ * Contributors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+
+enum {
+	INTEL_GVT_PCI_BAR_GTTMMIO = 0,
+	INTEL_GVT_PCI_BAR_APERTURE,
+	INTEL_GVT_PCI_BAR_PIO,
+	INTEL_GVT_PCI_BAR_MAX,
+};
+
+/**
+ * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	struct intel_vgpu *vgpu = __vgpu;
+
+	if (WARN_ON(bytes > 4))
+		return -EINVAL;
+
+	if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ))
+		return -EINVAL;
+
+	memcpy(p_data, vgpu_cfg_space(vgpu) + offset, bytes);
+	return 0;
+}
+
+static int map_aperture(struct intel_vgpu *vgpu, bool map)
+{
+	u64 first_gfn, first_mfn;
+	u64 val;
+	int ret;
+
+	if (map == vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked)
+		return 0;
+
+	val = vgpu_cfg_space(vgpu)[PCI_BASE_ADDRESS_2];
+	if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
+		val = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2);
+	else
+		val = *(u32 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2);
+
+	first_gfn = (val + vgpu_aperture_offset(vgpu)) >> PAGE_SHIFT;
+	first_mfn = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
+
+	ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn,
+						  first_mfn,
+						  vgpu_aperture_sz(vgpu)
+						  >> PAGE_SHIFT, map,
+						  GVT_MAP_APERTURE);
+	if (ret)
+		return ret;
+
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked = map;
+	return 0;
+}
+
+static int trap_gttmmio(struct intel_vgpu *vgpu, bool trap)
+{
+	u64 start, end;
+	u64 val;
+	int ret;
+
+	if (trap == vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].tracked)
+		return 0;
+
+	val = vgpu_cfg_space(vgpu)[PCI_BASE_ADDRESS_0];
+	if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
+		start = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_0);
+	else
+		start = *(u32 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_0);
+
+	start &= ~GENMASK(3, 0);
+	end = start + vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size - 1;
+
+	ret = intel_gvt_hypervisor_set_trap_area(vgpu, start, end, trap);
+	if (ret)
+		return ret;
+
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].tracked = trap;
+	return 0;
+}
+
+static int emulate_pci_command_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u8 old = vgpu_cfg_space(vgpu)[offset];
+	u8 new = *(u8 *)p_data;
+	u8 changed = old ^ new;
+	int ret;
+
+	if (!(changed & PCI_COMMAND_MEMORY))
+		return 0;
+
+	if (old & PCI_COMMAND_MEMORY) {
+		ret = trap_gttmmio(vgpu, false);
+		if (ret)
+			return ret;
+		ret = map_aperture(vgpu, false);
+		if (ret)
+			return ret;
+	} else {
+		ret = trap_gttmmio(vgpu, true);
+		if (ret)
+			return ret;
+		ret = map_aperture(vgpu, true);
+		if (ret)
+			return ret;
+	}
+
+	memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+	return 0;
+}
+
+static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	unsigned int bar_index =
+		(rounddown(offset, 8) % PCI_BASE_ADDRESS_0) / 8;
+	u32 new = *(u32 *)(p_data);
+	bool lo = IS_ALIGNED(offset, 8);
+	u64 size;
+	int ret = 0;
+	bool mmio_enabled =
+		vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY;
+
+	if (WARN_ON(bar_index >= INTEL_GVT_PCI_BAR_MAX))
+		return -EINVAL;
+
+	if (new == 0xffffffff) {
+		/*
+		 * Power-up software can determine how much address
+		 * space the device requires by writing a value of
+		 * all 1's to the register and then reading the value
+		 * back. The device will return 0's in all don't-care
+		 * address bits.
+		 */
+		size = vgpu->cfg_space.bar[bar_index].size;
+		if (lo) {
+			new = rounddown(new, size);
+		} else {
+			u32 val = vgpu_cfg_space(vgpu)[rounddown(offset, 8)];
+			/* for 32bit mode bar it returns all-0 in upper 32
+			 * bit, for 64bit mode bar it will calculate the
+			 * size with lower 32bit and return the corresponding
+			 * value
+			 */
+			if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
+				new &= (~(size-1)) >> 32;
+			else
+				new = 0;
+		}
+		/*
+		 * Unmapp & untrap the BAR, since guest hasn't configured a
+		 * valid GPA
+		 */
+		switch (bar_index) {
+		case INTEL_GVT_PCI_BAR_GTTMMIO:
+			ret = trap_gttmmio(vgpu, false);
+			break;
+		case INTEL_GVT_PCI_BAR_APERTURE:
+			ret = map_aperture(vgpu, false);
+			break;
+		}
+		intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+	} else {
+		/*
+		 * Unmapp & untrap the old BAR first, since guest has
+		 * re-configured the BAR
+		 */
+		switch (bar_index) {
+		case INTEL_GVT_PCI_BAR_GTTMMIO:
+			ret = trap_gttmmio(vgpu, false);
+			break;
+		case INTEL_GVT_PCI_BAR_APERTURE:
+			ret = map_aperture(vgpu, false);
+			break;
+		}
+		intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+		/* Track the new BAR */
+		if (mmio_enabled) {
+			switch (bar_index) {
+			case INTEL_GVT_PCI_BAR_GTTMMIO:
+				ret = trap_gttmmio(vgpu, true);
+				break;
+			case INTEL_GVT_PCI_BAR_APERTURE:
+				ret = map_aperture(vgpu, true);
+				break;
+			}
+		}
+	}
+	return ret;
+}
+
+/**
+ * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space write
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	struct intel_vgpu *vgpu = __vgpu;
+	int ret;
+
+	if (WARN_ON(bytes > 4))
+		return -EINVAL;
+
+	if (WARN_ON(offset + bytes >= INTEL_GVT_MAX_CFG_SPACE_SZ))
+		return -EINVAL;
+
+	/* First check if it's PCI_COMMAND */
+	if (IS_ALIGNED(offset, 2) && offset == PCI_COMMAND) {
+		if (WARN_ON(bytes > 2))
+			return -EINVAL;
+		return emulate_pci_command_write(vgpu, offset, p_data, bytes);
+	}
+
+	switch (rounddown(offset, 4)) {
+	case PCI_BASE_ADDRESS_0:
+	case PCI_BASE_ADDRESS_1:
+	case PCI_BASE_ADDRESS_2:
+	case PCI_BASE_ADDRESS_3:
+		if (WARN_ON(!IS_ALIGNED(offset, 4)))
+			return -EINVAL;
+		return emulate_pci_bar_write(vgpu, offset, p_data, bytes);
+
+	case INTEL_GVT_PCI_SWSCI:
+		if (WARN_ON(!IS_ALIGNED(offset, 4)))
+			return -EINVAL;
+		ret = intel_vgpu_emulate_opregion_request(vgpu, *(u32 *)p_data);
+		if (ret)
+			return ret;
+		break;
+
+	case INTEL_GVT_PCI_OPREGION:
+		if (WARN_ON(!IS_ALIGNED(offset, 4)))
+			return -EINVAL;
+		ret = intel_vgpu_init_opregion(vgpu, *(u32 *)p_data);
+		if (ret)
+			return ret;
+
+		memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+		break;
+	default:
+		memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+		break;
+	}
+	return 0;
+}

+ 12 - 0
drivers/gpu/drm/i915/gvt/gvt.c

@@ -42,6 +42,11 @@ static const char * const supported_hypervisors[] = {
 	[INTEL_GVT_HYPERVISOR_KVM] = "KVM",
 };
 
+struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops = {
+	.emulate_cfg_read = intel_vgpu_emulate_cfg_read,
+	.emulate_cfg_write = intel_vgpu_emulate_cfg_write,
+};
+
 /**
  * intel_gvt_init_host - Load MPT modules and detect if we're running in host
  * @gvt: intel gvt device
@@ -122,6 +127,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
 	if (WARN_ON(!gvt->initialized))
 		return;
 
+	intel_gvt_clean_opregion(gvt);
 	intel_gvt_clean_gtt(gvt);
 	intel_gvt_clean_irq(gvt);
 	intel_gvt_clean_mmio_info(gvt);
@@ -179,10 +185,16 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto out_clean_irq;
 
+	ret = intel_gvt_init_opregion(gvt);
+	if (ret)
+		goto out_clean_gtt;
+
 	gvt_dbg_core("gvt device creation is done\n");
 	gvt->initialized = true;
 	return 0;
 
+out_clean_gtt:
+	intel_gvt_clean_gtt(gvt);
 out_clean_irq:
 	intel_gvt_clean_irq(gvt);
 out_free_firmware:

+ 30 - 0
drivers/gpu/drm/i915/gvt/gvt.h

@@ -108,6 +108,14 @@ struct intel_vgpu_irq {
 	bool irq_warn_once[INTEL_GVT_EVENT_MAX];
 };
 
+struct intel_vgpu_opregion {
+	void *va;
+	u32 gfn[INTEL_GVT_OPREGION_PAGES];
+	struct page *pages[INTEL_GVT_OPREGION_PAGES];
+};
+
+#define vgpu_opregion(vgpu) (&(vgpu->opregion))
+
 struct intel_vgpu {
 	struct intel_gvt *gvt;
 	int id;
@@ -121,6 +129,7 @@ struct intel_vgpu {
 	struct intel_vgpu_mmio mmio;
 	struct intel_vgpu_irq irq;
 	struct intel_vgpu_gtt gtt;
+	struct intel_vgpu_opregion opregion;
 };
 
 struct intel_gvt_gm {
@@ -145,6 +154,11 @@ struct intel_gvt_firmware {
 	bool firmware_loaded;
 };
 
+struct intel_gvt_opregion {
+	void *opregion_va;
+	u32 opregion_pa;
+};
+
 struct intel_gvt {
 	struct mutex lock;
 	bool initialized;
@@ -159,6 +173,7 @@ struct intel_gvt {
 	struct intel_gvt_firmware firmware;
 	struct intel_gvt_irq irq;
 	struct intel_gvt_gtt gtt;
+	struct intel_gvt_opregion opregion;
 };
 
 void intel_gvt_free_firmware(struct intel_gvt *gvt);
@@ -300,6 +315,21 @@ int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
 			     unsigned long *h_index);
 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
 			     unsigned long *g_index);
+
+int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes);
+
+int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes);
+
+void intel_gvt_clean_opregion(struct intel_gvt *gvt);
+int intel_gvt_init_opregion(struct intel_gvt *gvt);
+
+void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu);
+int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa);
+
+int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
+
 #include "mpt.h"
 
 #endif

+ 14 - 0
drivers/gpu/drm/i915/gvt/hypercall.h

@@ -33,6 +33,15 @@
 #ifndef _GVT_HYPERCALL_H_
 #define _GVT_HYPERCALL_H_
 
+struct intel_gvt_io_emulation_ops {
+	int (*emulate_cfg_read)(void *, unsigned int,
+				void *, unsigned int);
+	int (*emulate_cfg_write)(void *, unsigned int,
+				 void *, unsigned int);
+};
+
+extern struct intel_gvt_io_emulation_ops *gvt_io_emulation_ops;
+
 /*
  * Specific GVT-g MPT modules function collections. Currently GVT-g supports
  * both Xen and KVM by providing dedicated hypervisor-related MPT modules.
@@ -50,6 +59,11 @@ struct intel_gvt_mpt {
 	int (*write_gpa)(unsigned long handle, unsigned long gpa, void *buf,
 			 unsigned long len);
 	unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);
+	int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
+			      unsigned long mfn, unsigned int nr, bool map,
+			      int type);
+	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
+			     bool map);
 };
 
 extern struct intel_gvt_mpt xengt_mpt;

+ 42 - 0
drivers/gpu/drm/i915/gvt/mpt.h

@@ -224,4 +224,46 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn(
 	return intel_gvt_host.mpt->gfn_to_mfn(vgpu->handle, gfn);
 }
 
+enum {
+	GVT_MAP_APERTURE = 0,
+	GVT_MAP_OPREGION,
+};
+
+/**
+ * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN
+ * @vgpu: a vGPU
+ * @gfn: guest PFN
+ * @mfn: host PFN
+ * @nr: amount of PFNs
+ * @map: map or unmap
+ * @type: map type
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_hypervisor_map_gfn_to_mfn(
+		struct intel_vgpu *vgpu, unsigned long gfn,
+		unsigned long mfn, unsigned int nr,
+		bool map, int type)
+{
+	return intel_gvt_host.mpt->map_gfn_to_mfn(vgpu->handle, gfn, mfn, nr,
+						  map, type);
+}
+
+/**
+ * intel_gvt_hypervisor_set_trap_area - Trap a guest PA region
+ * @vgpu: a vGPU
+ * @start: the beginning of the guest physical address region
+ * @end: the end of the guest physical address region
+ * @map: map or unmap
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_hypervisor_set_trap_area(
+		struct intel_vgpu *vgpu, u64 start, u64 end, bool map)
+{
+	return intel_gvt_host.mpt->set_trap_area(vgpu->handle, start, end, map);
+}
+
 #endif /* _GVT_MPT_H_ */

+ 343 - 0
drivers/gpu/drm/i915/gvt/opregion.c

@@ -0,0 +1,343 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/acpi.h>
+#include "i915_drv.h"
+
+static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
+{
+	void *host_va = vgpu->gvt->opregion.opregion_va;
+	u8 *buf;
+	int i;
+
+	if (WARN((vgpu_opregion(vgpu)->va),
+			"vgpu%d: opregion has been initialized already.\n",
+			vgpu->id))
+		return -EINVAL;
+
+	vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_ATOMIC |
+			GFP_DMA32 | __GFP_ZERO,
+			INTEL_GVT_OPREGION_PORDER);
+
+	if (!vgpu_opregion(vgpu)->va)
+		return -ENOMEM;
+
+	memcpy_fromio(vgpu_opregion(vgpu)->va, host_va,
+			INTEL_GVT_OPREGION_SIZE);
+
+	for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++)
+		vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i;
+
+	/* for unknown reason, the value in LID field is incorrect
+	 * which block the windows guest, so workaround it by force
+	 * setting it to "OPEN"
+	 */
+	buf = (u8 *)vgpu_opregion(vgpu)->va;
+	buf[INTEL_GVT_OPREGION_CLID] = 0x3;
+
+	return 0;
+}
+
+static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
+{
+	u64 mfn;
+	int i, ret;
+
+	for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) {
+		mfn = intel_gvt_hypervisor_virt_to_mfn(vgpu_opregion(vgpu)
+			+ i * PAGE_SIZE);
+		if (mfn == INTEL_GVT_INVALID_ADDR) {
+			gvt_err("fail to get MFN from VA\n");
+			return -EINVAL;
+		}
+		ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu,
+				vgpu_opregion(vgpu)->gfn[i],
+				mfn, 1, map, GVT_MAP_OPREGION);
+		if (ret) {
+			gvt_err("fail to map GFN to MFN, errno: %d\n", ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * intel_vgpu_clean_opregion - clean the stuff used to emulate opregion
+ * @vgpu: a vGPU
+ *
+ */
+void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu)
+{
+	int i;
+
+	gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id);
+
+	if (!vgpu_opregion(vgpu)->va)
+		return;
+
+	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) {
+		vunmap(vgpu_opregion(vgpu)->va);
+		for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) {
+			if (vgpu_opregion(vgpu)->pages[i]) {
+				put_page(vgpu_opregion(vgpu)->pages[i]);
+				vgpu_opregion(vgpu)->pages[i] = NULL;
+			}
+		}
+	} else {
+		map_vgpu_opregion(vgpu, false);
+		free_pages((unsigned long)vgpu_opregion(vgpu)->va,
+				INTEL_GVT_OPREGION_PORDER);
+	}
+
+	vgpu_opregion(vgpu)->va = NULL;
+}
+
+/**
+ * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion
+ * @vgpu: a vGPU
+ * @gpa: guest physical address of opregion
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa)
+{
+	int ret;
+
+	gvt_dbg_core("vgpu%d: init vgpu opregion\n", vgpu->id);
+
+	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) {
+		gvt_dbg_core("emulate opregion from kernel\n");
+
+		ret = init_vgpu_opregion(vgpu, gpa);
+		if (ret)
+			return ret;
+
+		ret = map_vgpu_opregion(vgpu, true);
+		if (ret)
+			return ret;
+	} else {
+		gvt_dbg_core("emulate opregion from userspace\n");
+
+		/*
+		 * If opregion pages are not allocated from host kenrel,
+		 * most of the params are meaningless
+		 */
+		ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu,
+				0, /* not used */
+				0, /* not used */
+				2, /* not used */
+				1,
+				GVT_MAP_OPREGION);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/**
+ * intel_gvt_clean_opregion - clean host opergion related stuffs
+ * @gvt: a GVT device
+ *
+ */
+void intel_gvt_clean_opregion(struct intel_gvt *gvt)
+{
+	iounmap(gvt->opregion.opregion_va);
+	gvt->opregion.opregion_va = NULL;
+}
+
+/**
+ * intel_gvt_init_opregion - initialize host opergion related stuffs
+ * @gvt: a GVT device
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_gvt_init_opregion(struct intel_gvt *gvt)
+{
+	gvt_dbg_core("init host opregion\n");
+
+	pci_read_config_dword(gvt->dev_priv->drm.pdev, INTEL_GVT_PCI_OPREGION,
+			&gvt->opregion.opregion_pa);
+
+	gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa,
+			INTEL_GVT_OPREGION_SIZE);
+	if (!gvt->opregion.opregion_va) {
+		gvt_err("fail to map host opregion\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+#define GVT_OPREGION_FUNC(scic)					\
+	({							\
+	 u32 __ret;						\
+	 __ret = (scic & OPREGION_SCIC_FUNC_MASK) >>		\
+	 OPREGION_SCIC_FUNC_SHIFT;				\
+	 __ret;							\
+	 })
+
+#define GVT_OPREGION_SUBFUNC(scic)				\
+	({							\
+	 u32 __ret;						\
+	 __ret = (scic & OPREGION_SCIC_SUBFUNC_MASK) >>		\
+	 OPREGION_SCIC_SUBFUNC_SHIFT;				\
+	 __ret;							\
+	 })
+
+static const char *opregion_func_name(u32 func)
+{
+	const char *name = NULL;
+
+	switch (func) {
+	case 0 ... 3:
+	case 5:
+	case 7 ... 15:
+		name = "Reserved";
+		break;
+
+	case 4:
+		name = "Get BIOS Data";
+		break;
+
+	case 6:
+		name = "System BIOS Callbacks";
+		break;
+
+	default:
+		name = "Unknown";
+		break;
+	}
+	return name;
+}
+
+static const char *opregion_subfunc_name(u32 subfunc)
+{
+	const char *name = NULL;
+
+	switch (subfunc) {
+	case 0:
+		name = "Supported Calls";
+		break;
+
+	case 1:
+		name = "Requested Callbacks";
+		break;
+
+	case 2 ... 3:
+	case 8 ... 9:
+		name = "Reserved";
+		break;
+
+	case 5:
+		name = "Boot Display";
+		break;
+
+	case 6:
+		name = "TV-Standard/Video-Connector";
+		break;
+
+	case 7:
+		name = "Internal Graphics";
+		break;
+
+	case 10:
+		name = "Spread Spectrum Clocks";
+		break;
+
+	case 11:
+		name = "Get AKSV";
+		break;
+
+	default:
+		name = "Unknown";
+		break;
+	}
+	return name;
+};
+
+static bool querying_capabilities(u32 scic)
+{
+	u32 func, subfunc;
+
+	func = GVT_OPREGION_FUNC(scic);
+	subfunc = GVT_OPREGION_SUBFUNC(scic);
+
+	if ((func == INTEL_GVT_OPREGION_SCIC_F_GETBIOSDATA &&
+		subfunc == INTEL_GVT_OPREGION_SCIC_SF_SUPPRTEDCALLS)
+		|| (func == INTEL_GVT_OPREGION_SCIC_F_GETBIOSDATA &&
+		 subfunc == INTEL_GVT_OPREGION_SCIC_SF_REQEUSTEDCALLBACKS)
+		|| (func == INTEL_GVT_OPREGION_SCIC_F_GETBIOSCALLBACKS &&
+		 subfunc == INTEL_GVT_OPREGION_SCIC_SF_SUPPRTEDCALLS)) {
+		return true;
+	}
+	return false;
+}
+
+/**
+ * intel_vgpu_emulate_opregion_request - emulating OpRegion request
+ * @vgpu: a vGPU
+ * @swsci: SWSCI request
+ *
+ * Returns:
+ * Zero on success, negative error code if failed
+ */
+int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
+{
+	u32 *scic, *parm;
+	u32 func, subfunc;
+
+	scic = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_SCIC;
+	parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM;
+
+	if (!(swsci & SWSCI_SCI_SELECT)) {
+		gvt_err("vgpu%d: requesting SMI service\n", vgpu->id);
+		return 0;
+	}
+	/* ignore non 0->1 trasitions */
+	if ((vgpu_cfg_space(vgpu)[INTEL_GVT_PCI_SWSCI]
+				& SWSCI_SCI_TRIGGER) ||
+			!(swsci & SWSCI_SCI_TRIGGER)) {
+		return 0;
+	}
+
+	func = GVT_OPREGION_FUNC(*scic);
+	subfunc = GVT_OPREGION_SUBFUNC(*scic);
+	if (!querying_capabilities(*scic)) {
+		gvt_err("vgpu%d: requesting runtime service: func \"%s\","
+				" subfunc \"%s\"\n",
+				vgpu->id,
+				opregion_func_name(func),
+				opregion_subfunc_name(subfunc));
+		/*
+		 * emulate exit status of function call, '0' means
+		 * "failure, generic, unsupported or unknown cause"
+		 */
+		*scic &= ~OPREGION_SCIC_EXIT_MASK;
+		return 0;
+	}
+
+	*scic = 0;
+	*parm = 0;
+	return 0;
+}

+ 23 - 0
drivers/gpu/drm/i915/gvt/reg.h

@@ -30,4 +30,27 @@
 #define   BDW_GMCH_GMS_SHIFT		8
 #define   BDW_GMCH_GMS_MASK		0xff
 
+#define INTEL_GVT_PCI_SWSCI		0xe8
+#define   SWSCI_SCI_SELECT		(1 << 15)
+#define   SWSCI_SCI_TRIGGER		1
+
+#define INTEL_GVT_PCI_OPREGION		0xfc
+
+#define INTEL_GVT_OPREGION_CLID		0x1AC
+#define INTEL_GVT_OPREGION_SCIC		0x200
+#define   OPREGION_SCIC_FUNC_MASK	0x1E
+#define   OPREGION_SCIC_FUNC_SHIFT	1
+#define   OPREGION_SCIC_SUBFUNC_MASK	0xFF00
+#define   OPREGION_SCIC_SUBFUNC_SHIFT	8
+#define   OPREGION_SCIC_EXIT_MASK	0xE0
+#define INTEL_GVT_OPREGION_SCIC_F_GETBIOSDATA         4
+#define INTEL_GVT_OPREGION_SCIC_F_GETBIOSCALLBACKS    6
+#define INTEL_GVT_OPREGION_SCIC_SF_SUPPRTEDCALLS      0
+#define INTEL_GVT_OPREGION_SCIC_SF_REQEUSTEDCALLBACKS 1
+#define INTEL_GVT_OPREGION_PARM                   0x204
+
+#define INTEL_GVT_OPREGION_PAGES	2
+#define INTEL_GVT_OPREGION_PORDER	1
+#define INTEL_GVT_OPREGION_SIZE		(2 * 4096)
+
 #endif

+ 9 - 0
drivers/gpu/drm/i915/gvt/vgpu.c

@@ -141,6 +141,7 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 	vgpu->active = false;
 	idr_remove(&gvt->vgpu_idr, vgpu->id);
 
+	intel_vgpu_clean_opregion(vgpu);
 	intel_vgpu_clean_gtt(vgpu);
 	intel_gvt_hypervisor_detach_vgpu(vgpu);
 	intel_vgpu_free_resource(vgpu);
@@ -204,11 +205,19 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_detach_hypervisor_vgpu;
 
+	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) {
+		ret = intel_vgpu_init_opregion(vgpu, 0);
+		if (ret)
+			goto out_clean_gtt;
+	}
+
 	vgpu->active = true;
 	mutex_unlock(&gvt->lock);
 
 	return vgpu;
 
+out_clean_gtt:
+	intel_vgpu_clean_gtt(vgpu);
 out_detach_hypervisor_vgpu:
 	intel_gvt_hypervisor_detach_vgpu(vgpu);
 out_clean_vgpu_resource: