|
@@ -13,6 +13,7 @@
|
|
|
*/
|
|
|
|
|
|
#include <linux/aer.h>
|
|
|
+#include <linux/async.h>
|
|
|
#include <linux/blkdev.h>
|
|
|
#include <linux/blk-mq.h>
|
|
|
#include <linux/blk-mq-pci.h>
|
|
@@ -68,7 +69,6 @@ MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
|
|
|
struct nvme_dev;
|
|
|
struct nvme_queue;
|
|
|
|
|
|
-static void nvme_process_cq(struct nvme_queue *nvmeq);
|
|
|
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
|
|
|
|
|
|
/*
|
|
@@ -147,9 +147,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
|
|
|
struct nvme_queue {
|
|
|
struct device *q_dmadev;
|
|
|
struct nvme_dev *dev;
|
|
|
- spinlock_t q_lock;
|
|
|
+ spinlock_t sq_lock;
|
|
|
struct nvme_command *sq_cmds;
|
|
|
struct nvme_command __iomem *sq_cmds_io;
|
|
|
+ spinlock_t cq_lock ____cacheline_aligned_in_smp;
|
|
|
volatile struct nvme_completion *cqes;
|
|
|
struct blk_mq_tags **tags;
|
|
|
dma_addr_t sq_dma_addr;
|
|
@@ -161,7 +162,6 @@ struct nvme_queue {
|
|
|
u16 cq_head;
|
|
|
u16 qid;
|
|
|
u8 cq_phase;
|
|
|
- u8 cqe_seen;
|
|
|
u32 *dbbuf_sq_db;
|
|
|
u32 *dbbuf_cq_db;
|
|
|
u32 *dbbuf_sq_ei;
|
|
@@ -872,6 +872,13 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
struct nvme_command cmnd;
|
|
|
blk_status_t ret;
|
|
|
|
|
|
+ /*
|
|
|
+ * We should not need to do this, but we're still using this to
|
|
|
+ * ensure we can drain requests on a dying queue.
|
|
|
+ */
|
|
|
+ if (unlikely(nvmeq->cq_vector < 0))
|
|
|
+ return BLK_STS_IOERR;
|
|
|
+
|
|
|
ret = nvme_setup_cmd(ns, req, &cmnd);
|
|
|
if (ret)
|
|
|
return ret;
|
|
@@ -888,15 +895,9 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
|
|
|
blk_mq_start_request(req);
|
|
|
|
|
|
- spin_lock_irq(&nvmeq->q_lock);
|
|
|
- if (unlikely(nvmeq->cq_vector < 0)) {
|
|
|
- ret = BLK_STS_IOERR;
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
- goto out_cleanup_iod;
|
|
|
- }
|
|
|
+ spin_lock(&nvmeq->sq_lock);
|
|
|
__nvme_submit_cmd(nvmeq, &cmnd);
|
|
|
- nvme_process_cq(nvmeq);
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
+ spin_unlock(&nvmeq->sq_lock);
|
|
|
return BLK_STS_OK;
|
|
|
out_cleanup_iod:
|
|
|
nvme_free_iod(dev, req);
|
|
@@ -914,10 +915,10 @@ static void nvme_pci_complete_rq(struct request *req)
|
|
|
}
|
|
|
|
|
|
/* We read the CQE phase first to check if the rest of the entry is valid */
|
|
|
-static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
|
|
|
- u16 phase)
|
|
|
+static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
|
|
|
{
|
|
|
- return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
|
|
|
+ return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
|
|
|
+ nvmeq->cq_phase;
|
|
|
}
|
|
|
|
|
|
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
|
|
@@ -931,9 +932,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
|
|
|
- struct nvme_completion *cqe)
|
|
|
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
|
|
|
{
|
|
|
+ volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
|
|
|
struct request *req;
|
|
|
|
|
|
if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
|
|
@@ -956,83 +957,81 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- nvmeq->cqe_seen = 1;
|
|
|
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
|
|
|
nvme_end_request(req, cqe->status, cqe->result);
|
|
|
}
|
|
|
|
|
|
-static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
|
|
|
- struct nvme_completion *cqe)
|
|
|
+static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
|
|
|
{
|
|
|
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
|
|
|
- *cqe = nvmeq->cqes[nvmeq->cq_head];
|
|
|
+ while (start != end) {
|
|
|
+ nvme_handle_cqe(nvmeq, start);
|
|
|
+ if (++start == nvmeq->q_depth)
|
|
|
+ start = 0;
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
- if (++nvmeq->cq_head == nvmeq->q_depth) {
|
|
|
- nvmeq->cq_head = 0;
|
|
|
- nvmeq->cq_phase = !nvmeq->cq_phase;
|
|
|
- }
|
|
|
- return true;
|
|
|
+static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
|
|
|
+{
|
|
|
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
|
|
|
+ nvmeq->cq_head = 0;
|
|
|
+ nvmeq->cq_phase = !nvmeq->cq_phase;
|
|
|
}
|
|
|
- return false;
|
|
|
}
|
|
|
|
|
|
-static void nvme_process_cq(struct nvme_queue *nvmeq)
|
|
|
+static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
|
|
|
+ u16 *end, int tag)
|
|
|
{
|
|
|
- struct nvme_completion cqe;
|
|
|
- int consumed = 0;
|
|
|
+ bool found = false;
|
|
|
|
|
|
- while (nvme_read_cqe(nvmeq, &cqe)) {
|
|
|
- nvme_handle_cqe(nvmeq, &cqe);
|
|
|
- consumed++;
|
|
|
+ *start = nvmeq->cq_head;
|
|
|
+ while (!found && nvme_cqe_pending(nvmeq)) {
|
|
|
+ if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
|
|
|
+ found = true;
|
|
|
+ nvme_update_cq_head(nvmeq);
|
|
|
}
|
|
|
+ *end = nvmeq->cq_head;
|
|
|
|
|
|
- if (consumed)
|
|
|
+ if (*start != *end)
|
|
|
nvme_ring_cq_doorbell(nvmeq);
|
|
|
+ return found;
|
|
|
}
|
|
|
|
|
|
static irqreturn_t nvme_irq(int irq, void *data)
|
|
|
{
|
|
|
- irqreturn_t result;
|
|
|
struct nvme_queue *nvmeq = data;
|
|
|
- spin_lock(&nvmeq->q_lock);
|
|
|
- nvme_process_cq(nvmeq);
|
|
|
- result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
|
|
|
- nvmeq->cqe_seen = 0;
|
|
|
- spin_unlock(&nvmeq->q_lock);
|
|
|
- return result;
|
|
|
+ u16 start, end;
|
|
|
+
|
|
|
+ spin_lock(&nvmeq->cq_lock);
|
|
|
+ nvme_process_cq(nvmeq, &start, &end, -1);
|
|
|
+ spin_unlock(&nvmeq->cq_lock);
|
|
|
+
|
|
|
+ if (start == end)
|
|
|
+ return IRQ_NONE;
|
|
|
+ nvme_complete_cqes(nvmeq, start, end);
|
|
|
+ return IRQ_HANDLED;
|
|
|
}
|
|
|
|
|
|
static irqreturn_t nvme_irq_check(int irq, void *data)
|
|
|
{
|
|
|
struct nvme_queue *nvmeq = data;
|
|
|
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
|
|
|
+ if (nvme_cqe_pending(nvmeq))
|
|
|
return IRQ_WAKE_THREAD;
|
|
|
return IRQ_NONE;
|
|
|
}
|
|
|
|
|
|
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
|
|
|
{
|
|
|
- struct nvme_completion cqe;
|
|
|
- int found = 0, consumed = 0;
|
|
|
+ u16 start, end;
|
|
|
+ bool found;
|
|
|
|
|
|
- if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
|
|
|
+ if (!nvme_cqe_pending(nvmeq))
|
|
|
return 0;
|
|
|
|
|
|
- spin_lock_irq(&nvmeq->q_lock);
|
|
|
- while (nvme_read_cqe(nvmeq, &cqe)) {
|
|
|
- nvme_handle_cqe(nvmeq, &cqe);
|
|
|
- consumed++;
|
|
|
-
|
|
|
- if (tag == cqe.command_id) {
|
|
|
- found = 1;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if (consumed)
|
|
|
- nvme_ring_cq_doorbell(nvmeq);
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
+ spin_lock_irq(&nvmeq->cq_lock);
|
|
|
+ found = nvme_process_cq(nvmeq, &start, &end, tag);
|
|
|
+ spin_unlock_irq(&nvmeq->cq_lock);
|
|
|
|
|
|
+ nvme_complete_cqes(nvmeq, start, end);
|
|
|
return found;
|
|
|
}
|
|
|
|
|
@@ -1053,9 +1052,9 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
|
|
|
c.common.opcode = nvme_admin_async_event;
|
|
|
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
|
|
|
|
|
|
- spin_lock_irq(&nvmeq->q_lock);
|
|
|
+ spin_lock(&nvmeq->sq_lock);
|
|
|
__nvme_submit_cmd(nvmeq, &c);
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
+ spin_unlock(&nvmeq->sq_lock);
|
|
|
}
|
|
|
|
|
|
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
|
|
@@ -1312,15 +1311,21 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
|
|
|
{
|
|
|
int vector;
|
|
|
|
|
|
- spin_lock_irq(&nvmeq->q_lock);
|
|
|
+ spin_lock_irq(&nvmeq->cq_lock);
|
|
|
if (nvmeq->cq_vector == -1) {
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
+ spin_unlock_irq(&nvmeq->cq_lock);
|
|
|
return 1;
|
|
|
}
|
|
|
vector = nvmeq->cq_vector;
|
|
|
nvmeq->dev->online_queues--;
|
|
|
nvmeq->cq_vector = -1;
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
+ spin_unlock_irq(&nvmeq->cq_lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without
|
|
|
+ * having to grab the lock.
|
|
|
+ */
|
|
|
+ mb();
|
|
|
|
|
|
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
|
|
|
blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
|
|
@@ -1333,15 +1338,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
|
|
|
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
|
|
|
{
|
|
|
struct nvme_queue *nvmeq = &dev->queues[0];
|
|
|
+ u16 start, end;
|
|
|
|
|
|
if (shutdown)
|
|
|
nvme_shutdown_ctrl(&dev->ctrl);
|
|
|
else
|
|
|
nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
|
|
|
|
|
|
- spin_lock_irq(&nvmeq->q_lock);
|
|
|
- nvme_process_cq(nvmeq);
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
+ spin_lock_irq(&nvmeq->cq_lock);
|
|
|
+ nvme_process_cq(nvmeq, &start, &end, -1);
|
|
|
+ spin_unlock_irq(&nvmeq->cq_lock);
|
|
|
+
|
|
|
+ nvme_complete_cqes(nvmeq, start, end);
|
|
|
}
|
|
|
|
|
|
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
|
|
@@ -1399,7 +1407,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
|
|
|
|
|
|
nvmeq->q_dmadev = dev->dev;
|
|
|
nvmeq->dev = dev;
|
|
|
- spin_lock_init(&nvmeq->q_lock);
|
|
|
+ spin_lock_init(&nvmeq->sq_lock);
|
|
|
+ spin_lock_init(&nvmeq->cq_lock);
|
|
|
nvmeq->cq_head = 0;
|
|
|
nvmeq->cq_phase = 1;
|
|
|
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
|
|
@@ -1435,7 +1444,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
|
|
|
{
|
|
|
struct nvme_dev *dev = nvmeq->dev;
|
|
|
|
|
|
- spin_lock_irq(&nvmeq->q_lock);
|
|
|
+ spin_lock_irq(&nvmeq->cq_lock);
|
|
|
nvmeq->sq_tail = 0;
|
|
|
nvmeq->cq_head = 0;
|
|
|
nvmeq->cq_phase = 1;
|
|
@@ -1443,7 +1452,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
|
|
|
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
|
|
|
nvme_dbbuf_init(dev, nvmeq, qid);
|
|
|
dev->online_queues++;
|
|
|
- spin_unlock_irq(&nvmeq->q_lock);
|
|
|
+ spin_unlock_irq(&nvmeq->cq_lock);
|
|
|
}
|
|
|
|
|
|
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
|
|
@@ -1988,19 +1997,22 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error)
|
|
|
static void nvme_del_cq_end(struct request *req, blk_status_t error)
|
|
|
{
|
|
|
struct nvme_queue *nvmeq = req->end_io_data;
|
|
|
+ u16 start, end;
|
|
|
|
|
|
if (!error) {
|
|
|
unsigned long flags;
|
|
|
|
|
|
/*
|
|
|
- * We might be called with the AQ q_lock held
|
|
|
- * and the I/O queue q_lock should always
|
|
|
+ * We might be called with the AQ cq_lock held
|
|
|
+ * and the I/O queue cq_lock should always
|
|
|
* nest inside the AQ one.
|
|
|
*/
|
|
|
- spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
|
|
|
+ spin_lock_irqsave_nested(&nvmeq->cq_lock, flags,
|
|
|
SINGLE_DEPTH_NESTING);
|
|
|
- nvme_process_cq(nvmeq);
|
|
|
- spin_unlock_irqrestore(&nvmeq->q_lock, flags);
|
|
|
+ nvme_process_cq(nvmeq, &start, &end, -1);
|
|
|
+ spin_unlock_irqrestore(&nvmeq->cq_lock, flags);
|
|
|
+
|
|
|
+ nvme_complete_cqes(nvmeq, start, end);
|
|
|
}
|
|
|
|
|
|
nvme_del_queue_end(req, error);
|
|
@@ -2488,6 +2500,15 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static void nvme_async_probe(void *data, async_cookie_t cookie)
|
|
|
+{
|
|
|
+ struct nvme_dev *dev = data;
|
|
|
+
|
|
|
+ nvme_reset_ctrl_sync(&dev->ctrl);
|
|
|
+ flush_work(&dev->ctrl.scan_work);
|
|
|
+ nvme_put_ctrl(&dev->ctrl);
|
|
|
+}
|
|
|
+
|
|
|
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|
|
{
|
|
|
int node, result = -ENOMEM;
|
|
@@ -2532,7 +2553,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|
|
|
|
|
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
|
|
|
|
|
|
- nvme_reset_ctrl(&dev->ctrl);
|
|
|
+ nvme_get_ctrl(&dev->ctrl);
|
|
|
+ async_schedule(nvme_async_probe, dev);
|
|
|
|
|
|
return 0;
|
|
|
|
|
@@ -2670,8 +2692,15 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
|
|
|
|
|
|
dev_info(dev->ctrl.device, "restart after slot reset\n");
|
|
|
pci_restore_state(pdev);
|
|
|
- nvme_reset_ctrl(&dev->ctrl);
|
|
|
- return PCI_ERS_RESULT_RECOVERED;
|
|
|
+ nvme_reset_ctrl_sync(&dev->ctrl);
|
|
|
+
|
|
|
+ switch (dev->ctrl.state) {
|
|
|
+ case NVME_CTRL_LIVE:
|
|
|
+ case NVME_CTRL_ADMIN_ONLY:
|
|
|
+ return PCI_ERS_RESULT_RECOVERED;
|
|
|
+ default:
|
|
|
+ return PCI_ERS_RESULT_DISCONNECT;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static void nvme_error_resume(struct pci_dev *pdev)
|
|
@@ -2704,6 +2733,8 @@ static const struct pci_device_id nvme_id_table[] = {
|
|
|
.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
|
|
|
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
|
|
|
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
|
|
|
+ { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
|
|
|
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
|
|
|
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
|
|
|
{ PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
|
|
@@ -2718,6 +2749,8 @@ static const struct pci_device_id nvme_id_table[] = {
|
|
|
.driver_data = NVME_QUIRK_LIGHTNVM, },
|
|
|
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
|
|
|
.driver_data = NVME_QUIRK_LIGHTNVM, },
|
|
|
+ { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
|
|
|
+ .driver_data = NVME_QUIRK_LIGHTNVM, },
|
|
|
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
|