Selaa lähdekoodia

Merge tag 'vfio-v4.20-rc1.v2' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - EDID interfaces for vfio devices supporting display extensions (Gerd
   Hoffmann)

 - Generically select Type-1 IOMMU model support on ARM/ARM64 (Geert
   Uytterhoeven)

 - Quirk for VFs reporting INTx pin (Alex Williamson)

 - Fix error path memory leak in MSI support (Li Qiang)

* tag 'vfio-v4.20-rc1.v2' of git://github.com/awilliam/linux-vfio:
  vfio: add edid support to mbochs sample driver
  vfio: add edid api for display (vgpu) devices.
  drivers/vfio: Allow type-1 IOMMU instantiation with all ARM/ARM64 IOMMUs
  vfio/pci: Mask buggy SR-IOV VF INTx support
  vfio/pci: Fix potential memory leak in vfio_msi_cap_len
Linus Torvalds 6 vuotta sitten
vanhempi
commit
0c86e761b9

+ 1 - 1
drivers/vfio/Kconfig

@@ -21,7 +21,7 @@ config VFIO_VIRQFD
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	depends on IOMMU_API
-	select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
+	select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64)
 	select ANON_INODES
 	help
 	  VFIO provides a framework for secure userspace device drivers.

+ 6 - 2
drivers/vfio/pci/vfio_pci.c

@@ -434,10 +434,14 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
 {
 	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
 		u8 pin;
+
+		if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
+		    vdev->nointx || vdev->pdev->is_virtfn)
+			return 0;
+
 		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
-		if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin)
-			return 1;
 
+		return pin ? 1 : 0;
 	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
 		u8 pos;
 		u16 flags;

+ 30 - 1
drivers/vfio/pci/vfio_pci_config.c

@@ -1180,8 +1180,10 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos)
 		return -ENOMEM;
 
 	ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags);
-	if (ret)
+	if (ret) {
+		kfree(vdev->msi_perm);
 		return ret;
+	}
 
 	return len;
 }
@@ -1609,6 +1611,15 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
 	return 0;
 }
 
+/*
+ * Nag about hardware bugs, hopefully to have vendors fix them, but at least
+ * to collect a list of dependencies for the VF INTx pin quirk below.
+ */
+static const struct pci_device_id known_bogus_vf_intx_pin[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x270c) },
+	{}
+};
+
 /*
  * For each device we allocate a pci_config_map that indicates the
  * capability occupying each dword and thus the struct perm_bits we
@@ -1674,6 +1685,24 @@ int vfio_config_init(struct vfio_pci_device *vdev)
 	if (pdev->is_virtfn) {
 		*(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor);
 		*(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device);
+
+		/*
+		 * Per SR-IOV spec rev 1.1, 3.4.1.18 the interrupt pin register
+		 * does not apply to VFs and VFs must implement this register
+		 * as read-only with value zero.  Userspace is not readily able
+		 * to identify whether a device is a VF and thus that the pin
+		 * definition on the device is bogus should it violate this
+		 * requirement.  We already virtualize the pin register for
+		 * other purposes, so we simply need to replace the bogus value
+		 * and consider VFs when we determine INTx IRQ count.
+		 */
+		if (vconfig[PCI_INTERRUPT_PIN] &&
+		    !pci_match_id(known_bogus_vf_intx_pin, pdev))
+			pci_warn(pdev,
+				 "Hardware bug: VF reports bogus INTx pin %d\n",
+				 vconfig[PCI_INTERRUPT_PIN]);
+
+		vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */
 	}
 
 	if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)

+ 50 - 0
include/uapi/linux/vfio.h

@@ -303,6 +303,56 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
 
+#define VFIO_REGION_TYPE_GFX                    (1)
+#define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
+
+/**
+ * struct vfio_region_gfx_edid - EDID region layout.
+ *
+ * Set display link state and EDID blob.
+ *
+ * The EDID blob has monitor information such as brand, name, serial
+ * number, physical size, supported video modes and more.
+ *
+ * This special region allows userspace (typically qemu) set a virtual
+ * EDID for the virtual monitor, which allows a flexible display
+ * configuration.
+ *
+ * For the edid blob spec look here:
+ *    https://en.wikipedia.org/wiki/Extended_Display_Identification_Data
+ *
+ * On linux systems you can find the EDID blob in sysfs:
+ *    /sys/class/drm/${card}/${connector}/edid
+ *
+ * You can use the edid-decode ulility (comes with xorg-x11-utils) to
+ * decode the EDID blob.
+ *
+ * @edid_offset: location of the edid blob, relative to the
+ *               start of the region (readonly).
+ * @edid_max_size: max size of the edid blob (readonly).
+ * @edid_size: actual edid size (read/write).
+ * @link_state: display link state (read/write).
+ * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on.
+ * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off.
+ * @max_xres: max display width (0 == no limitation, readonly).
+ * @max_yres: max display height (0 == no limitation, readonly).
+ *
+ * EDID update protocol:
+ *   (1) set link-state to down.
+ *   (2) update edid blob and size.
+ *   (3) set link-state to up.
+ */
+struct vfio_region_gfx_edid {
+	__u32 edid_offset;
+	__u32 edid_max_size;
+	__u32 edid_size;
+	__u32 max_xres;
+	__u32 max_yres;
+	__u32 link_state;
+#define VFIO_DEVICE_GFX_LINK_STATE_UP    1
+#define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
+};
+
 /*
  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
  * which allows direct access to non-MSIX registers which happened to be within

+ 117 - 19
samples/vfio-mdev/mbochs.c

@@ -71,11 +71,19 @@
 #define MBOCHS_NAME		  "mbochs"
 #define MBOCHS_CLASS_NAME	  "mbochs"
 
+#define MBOCHS_EDID_REGION_INDEX  VFIO_PCI_NUM_REGIONS
+#define MBOCHS_NUM_REGIONS        (MBOCHS_EDID_REGION_INDEX+1)
+
 #define MBOCHS_CONFIG_SPACE_SIZE  0xff
 #define MBOCHS_MMIO_BAR_OFFSET	  PAGE_SIZE
 #define MBOCHS_MMIO_BAR_SIZE	  PAGE_SIZE
-#define MBOCHS_MEMORY_BAR_OFFSET  (MBOCHS_MMIO_BAR_OFFSET + \
+#define MBOCHS_EDID_OFFSET	  (MBOCHS_MMIO_BAR_OFFSET +	\
 				   MBOCHS_MMIO_BAR_SIZE)
+#define MBOCHS_EDID_SIZE	  PAGE_SIZE
+#define MBOCHS_MEMORY_BAR_OFFSET  (MBOCHS_EDID_OFFSET + \
+				   MBOCHS_EDID_SIZE)
+
+#define MBOCHS_EDID_BLOB_OFFSET   (MBOCHS_EDID_SIZE/2)
 
 #define STORE_LE16(addr, val)	(*(u16 *)addr = val)
 #define STORE_LE32(addr, val)	(*(u32 *)addr = val)
@@ -95,16 +103,24 @@ MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices");
 static const struct mbochs_type {
 	const char *name;
 	u32 mbytes;
+	u32 max_x;
+	u32 max_y;
 } mbochs_types[] = {
 	{
 		.name	= MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1,
 		.mbytes = 4,
+		.max_x  = 800,
+		.max_y  = 600,
 	}, {
 		.name	= MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2,
 		.mbytes = 16,
+		.max_x  = 1920,
+		.max_y  = 1440,
 	}, {
 		.name	= MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3,
 		.mbytes = 64,
+		.max_x  = 0,
+		.max_y  = 0,
 	},
 };
 
@@ -115,6 +131,11 @@ static struct cdev	mbochs_cdev;
 static struct device	mbochs_dev;
 static int		mbochs_used_mbytes;
 
+struct vfio_region_info_ext {
+	struct vfio_region_info          base;
+	struct vfio_region_info_cap_type type;
+};
+
 struct mbochs_mode {
 	u32 drm_format;
 	u32 bytepp;
@@ -144,13 +165,14 @@ struct mdev_state {
 	u32 memory_bar_mask;
 	struct mutex ops_lock;
 	struct mdev_device *mdev;
-	struct vfio_device_info dev_info;
 
 	const struct mbochs_type *type;
 	u16 vbe[VBE_DISPI_INDEX_COUNT];
 	u64 memsize;
 	struct page **pages;
 	pgoff_t pagecount;
+	struct vfio_region_gfx_edid edid_regs;
+	u8 edid_blob[0x400];
 
 	struct list_head dmabufs;
 	u32 active_id;
@@ -342,10 +364,20 @@ static void handle_mmio_read(struct mdev_state *mdev_state, u16 offset,
 			     char *buf, u32 count)
 {
 	struct device *dev = mdev_dev(mdev_state->mdev);
+	struct vfio_region_gfx_edid *edid;
 	u16 reg16 = 0;
 	int index;
 
 	switch (offset) {
+	case 0x000 ... 0x3ff: /* edid block */
+		edid = &mdev_state->edid_regs;
+		if (edid->link_state != VFIO_DEVICE_GFX_LINK_STATE_UP ||
+		    offset >= edid->edid_size) {
+			memset(buf, 0, count);
+			break;
+		}
+		memcpy(buf, mdev_state->edid_blob + offset, count);
+		break;
 	case 0x500 ... 0x515: /* bochs dispi interface */
 		if (count != 2)
 			goto unhandled;
@@ -365,6 +397,44 @@ unhandled:
 	}
 }
 
+static void handle_edid_regs(struct mdev_state *mdev_state, u16 offset,
+			     char *buf, u32 count, bool is_write)
+{
+	char *regs = (void *)&mdev_state->edid_regs;
+
+	if (offset + count > sizeof(mdev_state->edid_regs))
+		return;
+	if (count != 4)
+		return;
+	if (offset % 4)
+		return;
+
+	if (is_write) {
+		switch (offset) {
+		case offsetof(struct vfio_region_gfx_edid, link_state):
+		case offsetof(struct vfio_region_gfx_edid, edid_size):
+			memcpy(regs + offset, buf, count);
+			break;
+		default:
+			/* read-only regs */
+			break;
+		}
+	} else {
+		memcpy(buf, regs + offset, count);
+	}
+}
+
+static void handle_edid_blob(struct mdev_state *mdev_state, u16 offset,
+			     char *buf, u32 count, bool is_write)
+{
+	if (offset + count > mdev_state->edid_regs.edid_max_size)
+		return;
+	if (is_write)
+		memcpy(mdev_state->edid_blob + offset, buf, count);
+	else
+		memcpy(buf, mdev_state->edid_blob + offset, count);
+}
+
 static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
 			   loff_t pos, bool is_write)
 {
@@ -384,13 +454,25 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
 			memcpy(buf, (mdev_state->vconfig + pos), count);
 
 	} else if (pos >= MBOCHS_MMIO_BAR_OFFSET &&
-		   pos + count <= MBOCHS_MEMORY_BAR_OFFSET) {
+		   pos + count <= (MBOCHS_MMIO_BAR_OFFSET +
+				   MBOCHS_MMIO_BAR_SIZE)) {
 		pos -= MBOCHS_MMIO_BAR_OFFSET;
 		if (is_write)
 			handle_mmio_write(mdev_state, pos, buf, count);
 		else
 			handle_mmio_read(mdev_state, pos, buf, count);
 
+	} else if (pos >= MBOCHS_EDID_OFFSET &&
+		   pos + count <= (MBOCHS_EDID_OFFSET +
+				   MBOCHS_EDID_SIZE)) {
+		pos -= MBOCHS_EDID_OFFSET;
+		if (pos < MBOCHS_EDID_BLOB_OFFSET) {
+			handle_edid_regs(mdev_state, pos, buf, count, is_write);
+		} else {
+			pos -= MBOCHS_EDID_BLOB_OFFSET;
+			handle_edid_blob(mdev_state, pos, buf, count, is_write);
+		}
+
 	} else if (pos >= MBOCHS_MEMORY_BAR_OFFSET &&
 		   pos + count <=
 		   MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) {
@@ -471,6 +553,10 @@ static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
 	mdev_state->next_id = 1;
 
 	mdev_state->type = type;
+	mdev_state->edid_regs.max_xres = type->max_x;
+	mdev_state->edid_regs.max_yres = type->max_y;
+	mdev_state->edid_regs.edid_offset = MBOCHS_EDID_BLOB_OFFSET;
+	mdev_state->edid_regs.edid_max_size = sizeof(mdev_state->edid_blob);
 	mbochs_create_config_space(mdev_state);
 	mbochs_reset(mdev);
 
@@ -932,16 +1018,16 @@ static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
 }
 
 static int mbochs_get_region_info(struct mdev_device *mdev,
-				  struct vfio_region_info *region_info,
-				  u16 *cap_type_id, void **cap_type)
+				  struct vfio_region_info_ext *ext)
 {
+	struct vfio_region_info *region_info = &ext->base;
 	struct mdev_state *mdev_state;
 
 	mdev_state = mdev_get_drvdata(mdev);
 	if (!mdev_state)
 		return -EINVAL;
 
-	if (region_info->index >= VFIO_PCI_NUM_REGIONS)
+	if (region_info->index >= MBOCHS_NUM_REGIONS)
 		return -EINVAL;
 
 	switch (region_info->index) {
@@ -964,6 +1050,20 @@ static int mbochs_get_region_info(struct mdev_device *mdev,
 		region_info->flags  = (VFIO_REGION_INFO_FLAG_READ  |
 				       VFIO_REGION_INFO_FLAG_WRITE);
 		break;
+	case MBOCHS_EDID_REGION_INDEX:
+		ext->base.argsz = sizeof(*ext);
+		ext->base.offset = MBOCHS_EDID_OFFSET;
+		ext->base.size = MBOCHS_EDID_SIZE;
+		ext->base.flags = (VFIO_REGION_INFO_FLAG_READ  |
+				   VFIO_REGION_INFO_FLAG_WRITE |
+				   VFIO_REGION_INFO_FLAG_CAPS);
+		ext->base.cap_offset = offsetof(typeof(*ext), type);
+		ext->type.header.id = VFIO_REGION_INFO_CAP_TYPE;
+		ext->type.header.version = 1;
+		ext->type.header.next = 0;
+		ext->type.type = VFIO_REGION_TYPE_GFX;
+		ext->type.subtype = VFIO_REGION_SUBTYPE_GFX_EDID;
+		break;
 	default:
 		region_info->size   = 0;
 		region_info->offset = 0;
@@ -984,7 +1084,7 @@ static int mbochs_get_device_info(struct mdev_device *mdev,
 				  struct vfio_device_info *dev_info)
 {
 	dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
-	dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+	dev_info->num_regions = MBOCHS_NUM_REGIONS;
 	dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
 	return 0;
 }
@@ -1084,7 +1184,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			unsigned long arg)
 {
 	int ret = 0;
-	unsigned long minsz;
+	unsigned long minsz, outsz;
 	struct mdev_state *mdev_state;
 
 	mdev_state = mdev_get_drvdata(mdev);
@@ -1106,8 +1206,6 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		if (ret)
 			return ret;
 
-		memcpy(&mdev_state->dev_info, &info, sizeof(info));
-
 		if (copy_to_user((void __user *)arg, &info, minsz))
 			return -EFAULT;
 
@@ -1115,24 +1213,24 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
 	}
 	case VFIO_DEVICE_GET_REGION_INFO:
 	{
-		struct vfio_region_info info;
-		u16 cap_type_id = 0;
-		void *cap_type = NULL;
+		struct vfio_region_info_ext info;
 
-		minsz = offsetofend(struct vfio_region_info, offset);
+		minsz = offsetofend(typeof(info), base.offset);
 
 		if (copy_from_user(&info, (void __user *)arg, minsz))
 			return -EFAULT;
 
-		if (info.argsz < minsz)
+		outsz = info.base.argsz;
+		if (outsz < minsz)
+			return -EINVAL;
+		if (outsz > sizeof(info))
 			return -EINVAL;
 
-		ret = mbochs_get_region_info(mdev, &info, &cap_type_id,
-					   &cap_type);
+		ret = mbochs_get_region_info(mdev, &info);
 		if (ret)
 			return ret;
 
-		if (copy_to_user((void __user *)arg, &info, minsz))
+		if (copy_to_user((void __user *)arg, &info, outsz))
 			return -EFAULT;
 
 		return 0;
@@ -1148,7 +1246,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			return -EFAULT;
 
 		if ((info.argsz < minsz) ||
-		    (info.index >= mdev_state->dev_info.num_irqs))
+		    (info.index >= VFIO_PCI_NUM_IRQS))
 			return -EINVAL;
 
 		ret = mbochs_get_irq_info(mdev, &info);