Browse Source

Merge branch 'lorenzo/pci/hv'

* lorenzo/pci/hv:
  PCI: hv: Only queue new work items in hv_pci_devices_present() if necessary
  PCI: hv: Remove the bogus test in hv_eject_device_work()
  PCI: hv: Fix a comment typo in _hv_pcifront_read_config()
  PCI: hv: Fix 2 hang issues in hv_compose_msi_msg()
  PCI: hv: Serialize the present and eject work items
Bjorn Helgaas 7 years ago
parent
commit
84d4d6f882
1 changed files with 87 additions and 25 deletions
  1. 87 25
      drivers/pci/host/pci-hyperv.c

+ 87 - 25
drivers/pci/host/pci-hyperv.c

@@ -447,7 +447,6 @@ struct hv_pcibus_device {
 	spinlock_t device_list_lock;	/* Protect lists below */
 	spinlock_t device_list_lock;	/* Protect lists below */
 	void __iomem *cfg_addr;
 	void __iomem *cfg_addr;
 
 
-	struct semaphore enum_sem;
 	struct list_head resources_for_children;
 	struct list_head resources_for_children;
 
 
 	struct list_head children;
 	struct list_head children;
@@ -461,6 +460,8 @@ struct hv_pcibus_device {
 	struct retarget_msi_interrupt retarget_msi_interrupt_params;
 	struct retarget_msi_interrupt retarget_msi_interrupt_params;
 
 
 	spinlock_t retarget_msi_interrupt_lock;
 	spinlock_t retarget_msi_interrupt_lock;
+
+	struct workqueue_struct *wq;
 };
 };
 
 
 /*
 /*
@@ -520,6 +521,8 @@ struct hv_pci_compl {
 	s32 completion_status;
 	s32 completion_status;
 };
 };
 
 
+static void hv_pci_onchannelcallback(void *context);
+
 /**
 /**
  * hv_pci_generic_compl() - Invoked for a completion packet
  * hv_pci_generic_compl() - Invoked for a completion packet
  * @context:		Set up by the sender of the packet.
  * @context:		Set up by the sender of the packet.
@@ -653,7 +656,7 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
 			break;
 			break;
 		}
 		}
 		/*
 		/*
-		 * Make sure the write was done before we release the spinlock
+		 * Make sure the read was done before we release the spinlock
 		 * allowing consecutive reads/writes.
 		 * allowing consecutive reads/writes.
 		 */
 		 */
 		mb();
 		mb();
@@ -664,6 +667,31 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
 	}
 	}
 }
 }
 
 
+static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
+{
+	u16 ret;
+	unsigned long flags;
+	void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET +
+			     PCI_VENDOR_ID;
+
+	spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
+
+	/* Choose the function to be read. (See comment above) */
+	writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
+	/* Make sure the function was chosen before we start reading. */
+	mb();
+	/* Read from that function's config space. */
+	ret = readw(addr);
+	/*
+	 * mb() is not required here, because the spin_unlock_irqrestore()
+	 * is a barrier.
+	 */
+
+	spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
+
+	return ret;
+}
+
 /**
 /**
  * _hv_pcifront_write_config() - Internal PCI config write
  * _hv_pcifront_write_config() - Internal PCI config write
  * @hpdev:	The PCI driver's representation of the device
  * @hpdev:	The PCI driver's representation of the device
@@ -1106,8 +1134,37 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	 * Since this function is called with IRQ locks held, can't
 	 * Since this function is called with IRQ locks held, can't
 	 * do normal wait for completion; instead poll.
 	 * do normal wait for completion; instead poll.
 	 */
 	 */
-	while (!try_wait_for_completion(&comp.comp_pkt.host_event))
+	while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
+		/* 0xFFFF means an invalid PCI VENDOR ID. */
+		if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
+			dev_err_once(&hbus->hdev->device,
+				     "the device has gone\n");
+			goto free_int_desc;
+		}
+
+		/*
+		 * When the higher level interrupt code calls us with
+		 * interrupt disabled, we must poll the channel by calling
+		 * the channel callback directly when channel->target_cpu is
+		 * the current CPU. When the higher level interrupt code
+		 * calls us with interrupt enabled, let's add the
+		 * local_bh_disable()/enable() to avoid race.
+		 */
+		local_bh_disable();
+
+		if (hbus->hdev->channel->target_cpu == smp_processor_id())
+			hv_pci_onchannelcallback(hbus);
+
+		local_bh_enable();
+
+		if (hpdev->state == hv_pcichild_ejecting) {
+			dev_err_once(&hbus->hdev->device,
+				     "the device is being ejected\n");
+			goto free_int_desc;
+		}
+
 		udelay(100);
 		udelay(100);
+	}
 
 
 	if (comp.comp_pkt.completion_status < 0) {
 	if (comp.comp_pkt.completion_status < 0) {
 		dev_err(&hbus->hdev->device,
 		dev_err(&hbus->hdev->device,
@@ -1590,12 +1647,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
  * It must also treat the omission of a previously observed device as
  * It must also treat the omission of a previously observed device as
  * notification that the device no longer exists.
  * notification that the device no longer exists.
  *
  *
- * Note that this function is a work item, and it may not be
- * invoked in the order that it was queued.  Back to back
- * updates of the list of present devices may involve queuing
- * multiple work items, and this one may run before ones that
- * were sent later. As such, this function only does something
- * if is the last one in the queue.
+ * Note that this function is serialized with hv_eject_device_work(),
+ * because both are pushed to the ordered workqueue hbus->wq.
  */
  */
 static void pci_devices_present_work(struct work_struct *work)
 static void pci_devices_present_work(struct work_struct *work)
 {
 {
@@ -1616,11 +1669,6 @@ static void pci_devices_present_work(struct work_struct *work)
 
 
 	INIT_LIST_HEAD(&removed);
 	INIT_LIST_HEAD(&removed);
 
 
-	if (down_interruptible(&hbus->enum_sem)) {
-		put_hvpcibus(hbus);
-		return;
-	}
-
 	/* Pull this off the queue and process it if it was the last one. */
 	/* Pull this off the queue and process it if it was the last one. */
 	spin_lock_irqsave(&hbus->device_list_lock, flags);
 	spin_lock_irqsave(&hbus->device_list_lock, flags);
 	while (!list_empty(&hbus->dr_list)) {
 	while (!list_empty(&hbus->dr_list)) {
@@ -1637,7 +1685,6 @@ static void pci_devices_present_work(struct work_struct *work)
 	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
 	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
 
 
 	if (!dr) {
 	if (!dr) {
-		up(&hbus->enum_sem);
 		put_hvpcibus(hbus);
 		put_hvpcibus(hbus);
 		return;
 		return;
 	}
 	}
@@ -1724,7 +1771,6 @@ static void pci_devices_present_work(struct work_struct *work)
 		break;
 		break;
 	}
 	}
 
 
-	up(&hbus->enum_sem);
 	put_hvpcibus(hbus);
 	put_hvpcibus(hbus);
 	kfree(dr);
 	kfree(dr);
 }
 }
@@ -1743,6 +1789,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
 	struct hv_dr_state *dr;
 	struct hv_dr_state *dr;
 	struct hv_dr_work *dr_wrk;
 	struct hv_dr_work *dr_wrk;
 	unsigned long flags;
 	unsigned long flags;
+	bool pending_dr;
 
 
 	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
 	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
 	if (!dr_wrk)
 	if (!dr_wrk)
@@ -1766,11 +1813,21 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
 	}
 	}
 
 
 	spin_lock_irqsave(&hbus->device_list_lock, flags);
 	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	/*
+	 * If pending_dr is true, we have already queued a work,
+	 * which will see the new dr. Otherwise, we need to
+	 * queue a new work.
+	 */
+	pending_dr = !list_empty(&hbus->dr_list);
 	list_add_tail(&dr->list_entry, &hbus->dr_list);
 	list_add_tail(&dr->list_entry, &hbus->dr_list);
 	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
 	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
 
 
-	get_hvpcibus(hbus);
-	schedule_work(&dr_wrk->wrk);
+	if (pending_dr) {
+		kfree(dr_wrk);
+	} else {
+		get_hvpcibus(hbus);
+		queue_work(hbus->wq, &dr_wrk->wrk);
+	}
 }
 }
 
 
 /**
 /**
@@ -1796,10 +1853,7 @@ static void hv_eject_device_work(struct work_struct *work)
 
 
 	hpdev = container_of(work, struct hv_pci_dev, wrk);
 	hpdev = container_of(work, struct hv_pci_dev, wrk);
 
 
-	if (hpdev->state != hv_pcichild_ejecting) {
-		put_pcichild(hpdev, hv_pcidev_ref_pnp);
-		return;
-	}
+	WARN_ON(hpdev->state != hv_pcichild_ejecting);
 
 
 	/*
 	/*
 	 * Ejection can come before or after the PCI bus has been set up, so
 	 * Ejection can come before or after the PCI bus has been set up, so
@@ -1848,7 +1902,7 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
 	get_pcichild(hpdev, hv_pcidev_ref_pnp);
 	get_pcichild(hpdev, hv_pcidev_ref_pnp);
 	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
 	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
 	get_hvpcibus(hpdev->hbus);
 	get_hvpcibus(hpdev->hbus);
-	schedule_work(&hpdev->wrk);
+	queue_work(hpdev->hbus->wq, &hpdev->wrk);
 }
 }
 
 
 /**
 /**
@@ -2461,13 +2515,18 @@ static int hv_pci_probe(struct hv_device *hdev,
 	spin_lock_init(&hbus->config_lock);
 	spin_lock_init(&hbus->config_lock);
 	spin_lock_init(&hbus->device_list_lock);
 	spin_lock_init(&hbus->device_list_lock);
 	spin_lock_init(&hbus->retarget_msi_interrupt_lock);
 	spin_lock_init(&hbus->retarget_msi_interrupt_lock);
-	sema_init(&hbus->enum_sem, 1);
 	init_completion(&hbus->remove_event);
 	init_completion(&hbus->remove_event);
+	hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
+					   hbus->sysdata.domain);
+	if (!hbus->wq) {
+		ret = -ENOMEM;
+		goto free_bus;
+	}
 
 
 	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
 	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
 			 hv_pci_onchannelcallback, hbus);
 			 hv_pci_onchannelcallback, hbus);
 	if (ret)
 	if (ret)
-		goto free_bus;
+		goto destroy_wq;
 
 
 	hv_set_drvdata(hdev, hbus);
 	hv_set_drvdata(hdev, hbus);
 
 
@@ -2536,6 +2595,8 @@ free_config:
 	hv_free_config_window(hbus);
 	hv_free_config_window(hbus);
 close:
 close:
 	vmbus_close(hdev->channel);
 	vmbus_close(hdev->channel);
+destroy_wq:
+	destroy_workqueue(hbus->wq);
 free_bus:
 free_bus:
 	free_page((unsigned long)hbus);
 	free_page((unsigned long)hbus);
 	return ret;
 	return ret;
@@ -2615,6 +2676,7 @@ static int hv_pci_remove(struct hv_device *hdev)
 	irq_domain_free_fwnode(hbus->sysdata.fwnode);
 	irq_domain_free_fwnode(hbus->sysdata.fwnode);
 	put_hvpcibus(hbus);
 	put_hvpcibus(hbus);
 	wait_for_completion(&hbus->remove_event);
 	wait_for_completion(&hbus->remove_event);
+	destroy_workqueue(hbus->wq);
 	free_page((unsigned long)hbus);
 	free_page((unsigned long)hbus);
 	return 0;
 	return 0;
 }
 }