|
@@ -44,7 +44,7 @@
|
|
|
|
|
|
#define NVME_MINORS (1U << MINORBITS)
|
|
|
#define NVME_Q_DEPTH 1024
|
|
|
-#define NVME_AQ_DEPTH 64
|
|
|
+#define NVME_AQ_DEPTH 256
|
|
|
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
|
|
|
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
|
|
|
#define ADMIN_TIMEOUT (admin_timeout * HZ)
|
|
@@ -152,6 +152,7 @@ struct nvme_cmd_info {
|
|
|
*/
|
|
|
#define NVME_INT_PAGES 2
|
|
|
#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size)
|
|
|
+#define NVME_INT_MASK 0x01
|
|
|
|
|
|
/*
|
|
|
* Will slightly overestimate the number of pages needed. This is OK
|
|
@@ -257,7 +258,7 @@ static void *iod_get_private(struct nvme_iod *iod)
|
|
|
*/
|
|
|
static bool iod_should_kfree(struct nvme_iod *iod)
|
|
|
{
|
|
|
- return (iod->private & 0x01) == 0;
|
|
|
+ return (iod->private & NVME_INT_MASK) == 0;
|
|
|
}
|
|
|
|
|
|
/* Special values must be less than 0x1000 */
|
|
@@ -301,8 +302,6 @@ static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
|
|
|
static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
|
|
|
struct nvme_completion *cqe)
|
|
|
{
|
|
|
- struct request *req = ctx;
|
|
|
-
|
|
|
u32 result = le32_to_cpup(&cqe->result);
|
|
|
u16 status = le16_to_cpup(&cqe->status) >> 1;
|
|
|
|
|
@@ -311,8 +310,6 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
|
|
|
if (status == NVME_SC_SUCCESS)
|
|
|
dev_warn(nvmeq->q_dmadev,
|
|
|
"async event result %08x\n", result);
|
|
|
-
|
|
|
- blk_mq_free_hctx_request(nvmeq->hctx, req);
|
|
|
}
|
|
|
|
|
|
static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
|
|
@@ -432,7 +429,6 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
|
|
|
{
|
|
|
unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
|
|
|
sizeof(struct nvme_dsm_range);
|
|
|
- unsigned long mask = 0;
|
|
|
struct nvme_iod *iod;
|
|
|
|
|
|
if (rq->nr_phys_segments <= NVME_INT_PAGES &&
|
|
@@ -440,9 +436,8 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
|
|
|
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);
|
|
|
|
|
|
iod = cmd->iod;
|
|
|
- mask = 0x01;
|
|
|
iod_init(iod, size, rq->nr_phys_segments,
|
|
|
- (unsigned long) rq | 0x01);
|
|
|
+ (unsigned long) rq | NVME_INT_MASK);
|
|
|
return iod;
|
|
|
}
|
|
|
|
|
@@ -522,8 +517,6 @@ static void nvme_dif_remap(struct request *req,
|
|
|
return;
|
|
|
|
|
|
pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
|
|
|
- if (!pmap)
|
|
|
- return;
|
|
|
|
|
|
p = pmap;
|
|
|
virt = bip_get_seed(bip);
|
|
@@ -645,12 +638,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
|
|
|
struct scatterlist *sg = iod->sg;
|
|
|
int dma_len = sg_dma_len(sg);
|
|
|
u64 dma_addr = sg_dma_address(sg);
|
|
|
- int offset = offset_in_page(dma_addr);
|
|
|
+ u32 page_size = dev->page_size;
|
|
|
+ int offset = dma_addr & (page_size - 1);
|
|
|
__le64 *prp_list;
|
|
|
__le64 **list = iod_list(iod);
|
|
|
dma_addr_t prp_dma;
|
|
|
int nprps, i;
|
|
|
- u32 page_size = dev->page_size;
|
|
|
|
|
|
length -= (page_size - offset);
|
|
|
if (length <= 0)
|
|
@@ -1028,18 +1021,19 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
|
|
|
struct nvme_cmd_info *cmd_info;
|
|
|
struct request *req;
|
|
|
|
|
|
- req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false);
|
|
|
+ req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
|
|
|
if (IS_ERR(req))
|
|
|
return PTR_ERR(req);
|
|
|
|
|
|
req->cmd_flags |= REQ_NO_TIMEOUT;
|
|
|
cmd_info = blk_mq_rq_to_pdu(req);
|
|
|
- nvme_set_info(cmd_info, req, async_req_completion);
|
|
|
+ nvme_set_info(cmd_info, NULL, async_req_completion);
|
|
|
|
|
|
memset(&c, 0, sizeof(c));
|
|
|
c.common.opcode = nvme_admin_async_event;
|
|
|
c.common.command_id = req->tag;
|
|
|
|
|
|
+ blk_mq_free_hctx_request(nvmeq->hctx, req);
|
|
|
return __nvme_submit_cmd(nvmeq, &c);
|
|
|
}
|
|
|
|
|
@@ -1347,6 +1341,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
|
|
|
nvmeq->cq_vector = -1;
|
|
|
spin_unlock_irq(&nvmeq->q_lock);
|
|
|
|
|
|
+ if (!nvmeq->qid && nvmeq->dev->admin_q)
|
|
|
+ blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
|
|
|
+
|
|
|
irq_set_affinity_hint(vector, NULL);
|
|
|
free_irq(vector, nvmeq);
|
|
|
|
|
@@ -1378,8 +1375,6 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
|
|
|
adapter_delete_sq(dev, qid);
|
|
|
adapter_delete_cq(dev, qid);
|
|
|
}
|
|
|
- if (!qid && dev->admin_q)
|
|
|
- blk_mq_freeze_queue_start(dev->admin_q);
|
|
|
|
|
|
spin_lock_irq(&nvmeq->q_lock);
|
|
|
nvme_process_cq(nvmeq);
|
|
@@ -1583,6 +1578,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
|
|
dev->admin_tagset.ops = &nvme_mq_admin_ops;
|
|
|
dev->admin_tagset.nr_hw_queues = 1;
|
|
|
dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
|
|
|
+ dev->admin_tagset.reserved_tags = 1;
|
|
|
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
|
|
|
dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
|
|
|
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
|
|
@@ -1749,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
|
|
|
struct nvme_dev *dev = ns->dev;
|
|
|
struct nvme_user_io io;
|
|
|
struct nvme_command c;
|
|
|
- unsigned length, meta_len;
|
|
|
- int status, i;
|
|
|
- struct nvme_iod *iod, *meta_iod = NULL;
|
|
|
- dma_addr_t meta_dma_addr;
|
|
|
- void *meta, *uninitialized_var(meta_mem);
|
|
|
+ unsigned length, meta_len, prp_len;
|
|
|
+ int status, write;
|
|
|
+ struct nvme_iod *iod;
|
|
|
+ dma_addr_t meta_dma = 0;
|
|
|
+ void *meta = NULL;
|
|
|
|
|
|
if (copy_from_user(&io, uio, sizeof(io)))
|
|
|
return -EFAULT;
|
|
|
length = (io.nblocks + 1) << ns->lba_shift;
|
|
|
meta_len = (io.nblocks + 1) * ns->ms;
|
|
|
|
|
|
- if (meta_len && ((io.metadata & 3) || !io.metadata))
|
|
|
+ if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
|
|
|
return -EINVAL;
|
|
|
+ else if (meta_len && ns->ext) {
|
|
|
+ length += meta_len;
|
|
|
+ meta_len = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ write = io.opcode & 1;
|
|
|
|
|
|
switch (io.opcode) {
|
|
|
case nvme_cmd_write:
|
|
|
case nvme_cmd_read:
|
|
|
case nvme_cmd_compare:
|
|
|
- iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length);
|
|
|
+ iod = nvme_map_user_pages(dev, write, io.addr, length);
|
|
|
break;
|
|
|
default:
|
|
|
return -EINVAL;
|
|
@@ -1776,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
|
|
|
if (IS_ERR(iod))
|
|
|
return PTR_ERR(iod);
|
|
|
|
|
|
+ prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
|
|
|
+ if (length != prp_len) {
|
|
|
+ status = -ENOMEM;
|
|
|
+ goto unmap;
|
|
|
+ }
|
|
|
+ if (meta_len) {
|
|
|
+ meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
|
|
|
+ &meta_dma, GFP_KERNEL);
|
|
|
+ if (!meta) {
|
|
|
+ status = -ENOMEM;
|
|
|
+ goto unmap;
|
|
|
+ }
|
|
|
+ if (write) {
|
|
|
+ if (copy_from_user(meta, (void __user *)io.metadata,
|
|
|
+ meta_len)) {
|
|
|
+ status = -EFAULT;
|
|
|
+ goto unmap;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
memset(&c, 0, sizeof(c));
|
|
|
c.rw.opcode = io.opcode;
|
|
|
c.rw.flags = io.flags;
|
|
@@ -1787,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
|
|
|
c.rw.reftag = cpu_to_le32(io.reftag);
|
|
|
c.rw.apptag = cpu_to_le16(io.apptag);
|
|
|
c.rw.appmask = cpu_to_le16(io.appmask);
|
|
|
-
|
|
|
- if (meta_len) {
|
|
|
- meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
|
|
|
- meta_len);
|
|
|
- if (IS_ERR(meta_iod)) {
|
|
|
- status = PTR_ERR(meta_iod);
|
|
|
- meta_iod = NULL;
|
|
|
- goto unmap;
|
|
|
- }
|
|
|
-
|
|
|
- meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
|
|
|
- &meta_dma_addr, GFP_KERNEL);
|
|
|
- if (!meta_mem) {
|
|
|
- status = -ENOMEM;
|
|
|
- goto unmap;
|
|
|
- }
|
|
|
-
|
|
|
- if (io.opcode & 1) {
|
|
|
- int meta_offset = 0;
|
|
|
-
|
|
|
- for (i = 0; i < meta_iod->nents; i++) {
|
|
|
- meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
|
|
|
- meta_iod->sg[i].offset;
|
|
|
- memcpy(meta_mem + meta_offset, meta,
|
|
|
- meta_iod->sg[i].length);
|
|
|
- kunmap_atomic(meta);
|
|
|
- meta_offset += meta_iod->sg[i].length;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- c.rw.metadata = cpu_to_le64(meta_dma_addr);
|
|
|
- }
|
|
|
-
|
|
|
- length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
|
|
|
c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
|
|
|
c.rw.prp2 = cpu_to_le64(iod->first_dma);
|
|
|
-
|
|
|
- if (length != (io.nblocks + 1) << ns->lba_shift)
|
|
|
- status = -ENOMEM;
|
|
|
- else
|
|
|
- status = nvme_submit_io_cmd(dev, ns, &c, NULL);
|
|
|
-
|
|
|
- if (meta_len) {
|
|
|
- if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
|
|
|
- int meta_offset = 0;
|
|
|
-
|
|
|
- for (i = 0; i < meta_iod->nents; i++) {
|
|
|
- meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
|
|
|
- meta_iod->sg[i].offset;
|
|
|
- memcpy(meta, meta_mem + meta_offset,
|
|
|
- meta_iod->sg[i].length);
|
|
|
- kunmap_atomic(meta);
|
|
|
- meta_offset += meta_iod->sg[i].length;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem,
|
|
|
- meta_dma_addr);
|
|
|
- }
|
|
|
-
|
|
|
+ c.rw.metadata = cpu_to_le64(meta_dma);
|
|
|
+ status = nvme_submit_io_cmd(dev, ns, &c, NULL);
|
|
|
unmap:
|
|
|
- nvme_unmap_user_pages(dev, io.opcode & 1, iod);
|
|
|
+ nvme_unmap_user_pages(dev, write, iod);
|
|
|
nvme_free_iod(dev, iod);
|
|
|
-
|
|
|
- if (meta_iod) {
|
|
|
- nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod);
|
|
|
- nvme_free_iod(dev, meta_iod);
|
|
|
+ if (meta) {
|
|
|
+ if (status == NVME_SC_SUCCESS && !write) {
|
|
|
+ if (copy_to_user((void __user *)io.metadata, meta,
|
|
|
+ meta_len))
|
|
|
+ status = -EFAULT;
|
|
|
+ }
|
|
|
+ dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
|
|
|
}
|
|
|
-
|
|
|
return status;
|
|
|
}
|
|
|
|
|
@@ -2018,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
|
|
|
struct nvme_dev *dev = ns->dev;
|
|
|
struct nvme_id_ns *id;
|
|
|
dma_addr_t dma_addr;
|
|
|
- int lbaf, pi_type, old_ms;
|
|
|
+ u8 lbaf, pi_type;
|
|
|
+ u16 old_ms;
|
|
|
unsigned short bs;
|
|
|
|
|
|
id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
|
|
@@ -2039,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
|
|
|
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
|
|
|
ns->lba_shift = id->lbaf[lbaf].ds;
|
|
|
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
|
|
|
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
|
|
|
|
|
|
/*
|
|
|
* If identify namespace failed, use default 512 byte block size so
|
|
@@ -2055,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
|
|
|
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
|
|
|
ns->ms != old_ms ||
|
|
|
bs != queue_logical_block_size(disk->queue) ||
|
|
|
- (ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
|
|
|
+ (ns->ms && ns->ext)))
|
|
|
blk_integrity_unregister(disk);
|
|
|
|
|
|
ns->pi_type = pi_type;
|
|
|
blk_queue_logical_block_size(ns->queue, bs);
|
|
|
|
|
|
if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
|
|
|
- !(id->flbas & NVME_NS_FLBAS_META_EXT))
|
|
|
+ !ns->ext)
|
|
|
nvme_init_integrity(ns);
|
|
|
|
|
|
if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
|
|
@@ -2334,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
|
|
|
dev->oncs = le16_to_cpup(&ctrl->oncs);
|
|
|
dev->abort_limit = ctrl->acl + 1;
|
|
|
dev->vwc = ctrl->vwc;
|
|
|
- dev->event_limit = min(ctrl->aerl + 1, 8);
|
|
|
memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
|
|
|
memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
|
|
|
memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
|
|
@@ -2881,6 +2851,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
|
|
|
|
|
|
nvme_set_irq_hints(dev);
|
|
|
|
|
|
+ dev->event_limit = 1;
|
|
|
return result;
|
|
|
|
|
|
free_tags:
|
|
@@ -3166,8 +3137,10 @@ static int __init nvme_init(void)
|
|
|
nvme_char_major = result;
|
|
|
|
|
|
nvme_class = class_create(THIS_MODULE, "nvme");
|
|
|
- if (!nvme_class)
|
|
|
+ if (IS_ERR(nvme_class)) {
|
|
|
+ result = PTR_ERR(nvme_class);
|
|
|
goto unregister_chrdev;
|
|
|
+ }
|
|
|
|
|
|
result = pci_register_driver(&nvme_driver);
|
|
|
if (result)
|