浏览代码

PCI: Add sysfs sriov_drivers_autoprobe to control VF driver binding

Sometimes it is not desirable to bind SR-IOV VFs to drivers.  This can save
host side resource usage by VF instances that will be assigned to VMs.

Add a new PCI sysfs interface "sriov_drivers_autoprobe" to control that
from the PF.  To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable
probe) to:

  /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe

Note that this must be done before enabling VFs.  The change will not take
effect if VFs are already enabled.  Simply, one can disable VFs by setting
sriov_numvfs to 0, choose whether to probe or not, and then re-enable the
VFs by restoring sriov_numvfs.

[bhelgaas: changelog, ABI doc]
Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Bodong Wang 8 年之前
父节点
当前提交
0e7df22401

+ 22 - 0
Documentation/ABI/testing/sysfs-bus-pci

@@ -301,3 +301,25 @@ Contact:	Emil Velikov <emil.l.velikov@gmail.com>
 Description:
 Description:
 		This file contains the revision field of the the PCI device.
 		This file contains the revision field of the the PCI device.
 		The value comes from device config space. The file is read only.
 		The value comes from device config space. The file is read only.
+
+What:		/sys/bus/pci/devices/.../sriov_drivers_autoprobe
+Date:		April 2017
+Contact:	Bodong Wang<bodong@mellanox.com>
+Description:
+		This file is associated with the PF of a device that
+		supports SR-IOV.  It determines whether newly-enabled VFs
+		are immediately bound to a driver.  It initially contains
+		1, which means the kernel automatically binds VFs to a
+		compatible driver immediately after they are enabled.  If
+		an application writes 0 to the file before enabling VFs,
+		the kernel will not bind VFs to a driver.
+
+		A typical use case is to write 0 to this file, then enable
+		VFs, then assign the newly-created VFs to virtual machines.
+		Note that changing this file does not affect already-
+		enabled VFs.  In this scenario, the user must first disable
+		the VFs, write 0 to sriov_drivers_autoprobe, then re-enable
+		the VFs.
+
+		This is similar to /sys/bus/pci/drivers_autoprobe, but
+		affects only the VFs associated with a specific PF.

+ 12 - 0
Documentation/PCI/pci-iov-howto.txt

@@ -68,6 +68,18 @@ To disable SR-IOV capability:
 	echo  0 > \
 	echo  0 > \
         /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
         /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
 
 
+To enable auto probing VFs by a compatible driver on the host, run
+command below before enabling SR-IOV capabilities. This is the
+default behavior.
+	echo 1 > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
+
+To disable auto probing VFs by a compatible driver on the host, run
+command below before enabling SR-IOV capabilities. Updating this
+entry will not affect VFs which are already probed.
+	echo  0 > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_drivers_autoprobe
+
 3.2 Usage example
 3.2 Usage example
 
 
 Following piece of code illustrates the usage of the SR-IOV API.
 Following piece of code illustrates the usage of the SR-IOV API.

+ 1 - 0
drivers/pci/iov.c

@@ -450,6 +450,7 @@ found:
 	iov->total_VFs = total;
 	iov->total_VFs = total;
 	iov->pgsz = pgsz;
 	iov->pgsz = pgsz;
 	iov->self = dev;
 	iov->self = dev;
+	iov->drivers_autoprobe = true;
 	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
 	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
 	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
 	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)

+ 18 - 4
drivers/pci/pci-driver.c

@@ -394,6 +394,18 @@ void __weak pcibios_free_irq(struct pci_dev *dev)
 {
 {
 }
 }
 
 
+#ifdef CONFIG_PCI_IOV
+static inline bool pci_device_can_probe(struct pci_dev *pdev)
+{
+	return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe);
+}
+#else
+static inline bool pci_device_can_probe(struct pci_dev *pdev)
+{
+	return true;
+}
+#endif
+
 static int pci_device_probe(struct device *dev)
 static int pci_device_probe(struct device *dev)
 {
 {
 	int error;
 	int error;
@@ -405,10 +417,12 @@ static int pci_device_probe(struct device *dev)
 		return error;
 		return error;
 
 
 	pci_dev_get(pci_dev);
 	pci_dev_get(pci_dev);
-	error = __pci_device_probe(drv, pci_dev);
-	if (error) {
-		pcibios_free_irq(pci_dev);
-		pci_dev_put(pci_dev);
+	if (pci_device_can_probe(pci_dev)) {
+		error = __pci_device_probe(drv, pci_dev);
+		if (error) {
+			pcibios_free_irq(pci_dev);
+			pci_dev_put(pci_dev);
+		}
 	}
 	}
 
 
 	return error;
 	return error;

+ 28 - 0
drivers/pci/pci-sysfs.c

@@ -526,10 +526,37 @@ exit:
 	return count;
 	return count;
 }
 }
 
 
+static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
+}
+
+static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
+					     struct device_attribute *attr,
+					     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool drivers_autoprobe;
+
+	if (kstrtobool(buf, &drivers_autoprobe) < 0)
+		return -EINVAL;
+
+	pdev->sriov->drivers_autoprobe = drivers_autoprobe;
+
+	return count;
+}
+
 static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
 static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
 static struct device_attribute sriov_numvfs_attr =
 static struct device_attribute sriov_numvfs_attr =
 		__ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
 		__ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
 		       sriov_numvfs_show, sriov_numvfs_store);
 		       sriov_numvfs_show, sriov_numvfs_store);
+static struct device_attribute sriov_drivers_autoprobe_attr =
+		__ATTR(sriov_drivers_autoprobe, (S_IRUGO|S_IWUSR|S_IWGRP),
+		       sriov_drivers_autoprobe_show, sriov_drivers_autoprobe_store);
 #endif /* CONFIG_PCI_IOV */
 #endif /* CONFIG_PCI_IOV */
 
 
 static ssize_t driver_override_store(struct device *dev,
 static ssize_t driver_override_store(struct device *dev,
@@ -1549,6 +1576,7 @@ static struct attribute_group pci_dev_hp_attr_group = {
 static struct attribute *sriov_dev_attrs[] = {
 static struct attribute *sriov_dev_attrs[] = {
 	&sriov_totalvfs_attr.attr,
 	&sriov_totalvfs_attr.attr,
 	&sriov_numvfs_attr.attr,
 	&sriov_numvfs_attr.attr,
+	&sriov_drivers_autoprobe_attr.attr,
 	NULL,
 	NULL,
 };
 };
 
 

+ 1 - 0
drivers/pci/pci.h

@@ -272,6 +272,7 @@ struct pci_sriov {
 	struct pci_dev *self;	/* this PF */
 	struct pci_dev *self;	/* this PF */
 	struct mutex lock;	/* lock for setting sriov_numvfs in sysfs */
 	struct mutex lock;	/* lock for setting sriov_numvfs in sysfs */
 	resource_size_t barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
 	resource_size_t barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
+	bool drivers_autoprobe;	/* auto probing of VFs by driver */
 };
 };
 
 
 #ifdef CONFIG_PCI_ATS
 #ifdef CONFIG_PCI_ATS