Browse Source

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Final round of fixes for this merge window - some of this has come up
  after the initial pull request, and some of it was put in a post-merge
  branch before the merge window.

  This contains:

   - Fix for a bad check for an error on dma mapping in the mtip32xx
     driver, from Alexey Khoroshilov.

   - A set of fixes for lightnvm, from Javier, Matias, and Wenwei.

   - An NVMe completion record corruption fix from Marta, ensuring that
     we read things in the right order.

   - Two writeback fixes from Tejun, marked for stable@ as well.

   - A blk-mq sw queue iterator fix from Thomas, fixing an oops for
     sparse CPU maps.  They hit this in the hot plug/unplug rework"

* 'for-linus' of git://git.kernel.dk/linux-block:
  nvme: avoid cqe corruption when update at the same time as read
  writeback, cgroup: fix use of the wrong bdi_writeback which mismatches the inode
  writeback, cgroup: fix premature wb_put() in locked_inode_to_wb_and_lock_list()
  blk-mq: Use proper cpumask iterator
  mtip32xx: fix checks for dma mapping errors
  lightnvm: do not load L2P table if not supported
  lightnvm: do not reserve lun on l2p loading
  nvme: lightnvm: return ppa completion status
  lightnvm: add a bitmap of luns
  lightnvm: specify target's logical address area
  null_blk: add lightnvm null_blk device to the nullb_list
Linus Torvalds 9 years ago
parent
commit
1d02369dba

+ 4 - 2
block/blk-mq-sysfs.c

@@ -416,12 +416,14 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
 static void blk_mq_sysfs_init(struct request_queue *q)
 {
 	struct blk_mq_ctx *ctx;
-	int i;
+	int cpu;
 
 	kobject_init(&q->mq_kobj, &blk_mq_ktype);
 
-	queue_for_each_ctx(q, ctx, i)
+	for_each_possible_cpu(cpu) {
+		ctx = per_cpu_ptr(q->queue_ctx, cpu);
 		kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
+	}
 }
 
 int blk_mq_register_disk(struct gendisk *disk)

+ 2 - 1
block/blk-mq.c

@@ -1798,11 +1798,12 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	/*
 	 * Map software to hardware queues
 	 */
-	queue_for_each_ctx(q, ctx, i) {
+	for_each_possible_cpu(i) {
 		/* If the cpu isn't online, the cpu is mapped to first hctx */
 		if (!cpumask_test_cpu(i, online_mask))
 			continue;
 
+		ctx = per_cpu_ptr(q->queue_ctx, i);
 		hctx = q->mq_ops->map_queue(q, i);
 
 		cpumask_set_cpu(i, hctx->cpumask);

+ 2 - 2
drivers/block/mtip32xx/mtip32xx.c

@@ -2051,7 +2051,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
 					 outbuf,
 					 taskout,
 					 DMA_TO_DEVICE);
-		if (outbuf_dma == 0) {
+		if (pci_dma_mapping_error(dd->pdev, outbuf_dma)) {
 			err = -ENOMEM;
 			goto abort;
 		}
@@ -2068,7 +2068,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
 		inbuf_dma = pci_map_single(dd->pdev,
 					 inbuf,
 					 taskin, DMA_FROM_DEVICE);
-		if (inbuf_dma == 0) {
+		if (pci_dma_mapping_error(dd->pdev, inbuf_dma)) {
 			err = -ENOMEM;
 			goto abort;
 		}

+ 2 - 1
drivers/block/null_blk.c

@@ -742,10 +742,11 @@ static int null_add_dev(void)
 
 	add_disk(disk);
 
+done:
 	mutex_lock(&lock);
 	list_add_tail(&nullb->list, &nullb_list);
 	mutex_unlock(&lock);
-done:
+
 	return 0;
 
 out_cleanup_lightnvm:

+ 7 - 0
drivers/lightnvm/core.c

@@ -464,8 +464,13 @@ static int nvm_core_init(struct nvm_dev *dev)
 	dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
 
 	dev->total_secs = dev->nr_luns * dev->sec_per_lun;
+	dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
+					sizeof(unsigned long), GFP_KERNEL);
+	if (!dev->lun_map)
+		return -ENOMEM;
 	INIT_LIST_HEAD(&dev->online_targets);
 	mutex_init(&dev->mlock);
+	spin_lock_init(&dev->lock);
 
 	return 0;
 }
@@ -585,6 +590,7 @@ int nvm_register(struct request_queue *q, char *disk_name,
 
 	return 0;
 err_init:
+	kfree(dev->lun_map);
 	kfree(dev);
 	return ret;
 }
@@ -607,6 +613,7 @@ void nvm_unregister(char *disk_name)
 	up_write(&nvm_lock);
 
 	nvm_exit(dev);
+	kfree(dev->lun_map);
 	kfree(dev);
 }
 EXPORT_SYMBOL(nvm_unregister);

+ 83 - 1
drivers/lightnvm/gennvm.c

@@ -20,6 +20,68 @@
 
 #include "gennvm.h"
 
+static int gennvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area, *prev, *next;
+	sector_t begin = 0;
+	sector_t max_sectors = (dev->sec_size * dev->total_secs) >> 9;
+
+	if (len > max_sectors)
+		return -EINVAL;
+
+	area = kmalloc(sizeof(struct gennvm_area), GFP_KERNEL);
+	if (!area)
+		return -ENOMEM;
+
+	prev = NULL;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(next, &gn->area_list, list) {
+		if (begin + len > next->begin) {
+			begin = next->end;
+			prev = next;
+			continue;
+		}
+		break;
+	}
+
+	if ((begin + len) > max_sectors) {
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return -EINVAL;
+	}
+
+	area->begin = *lba = begin;
+	area->end = begin + len;
+
+	if (prev) /* insert into sorted order */
+		list_add(&area->list, &prev->list);
+	else
+		list_add(&area->list, &gn->area_list);
+	spin_unlock(&dev->lock);
+
+	return 0;
+}
+
+static void gennvm_put_area(struct nvm_dev *dev, sector_t begin)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(area, &gn->area_list, list) {
+		if (area->begin != begin)
+			continue;
+
+		list_del(&area->list);
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return;
+	}
+	spin_unlock(&dev->lock);
+}
+
 static void gennvm_blocks_free(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn = dev->mp;
@@ -195,7 +257,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 		}
 	}
 
-	if (dev->ops->get_l2p_tbl) {
+	if ((dev->identity.dom & NVM_RSP_L2P) && dev->ops->get_l2p_tbl) {
 		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
 							gennvm_block_map, dev);
 		if (ret) {
@@ -229,6 +291,7 @@ static int gennvm_register(struct nvm_dev *dev)
 
 	gn->dev = dev;
 	gn->nr_luns = dev->nr_luns;
+	INIT_LIST_HEAD(&gn->area_list);
 	dev->mp = gn;
 
 	ret = gennvm_luns_init(dev, gn);
@@ -419,10 +482,23 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
 	return nvm_erase_ppa(dev, &addr, 1);
 }
 
+static int gennvm_reserve_lun(struct nvm_dev *dev, int lunid)
+{
+	return test_and_set_bit(lunid, dev->lun_map);
+}
+
+static void gennvm_release_lun(struct nvm_dev *dev, int lunid)
+{
+	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+}
+
 static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
 {
 	struct gen_nvm *gn = dev->mp;
 
+	if (unlikely(lunid >= dev->nr_luns))
+		return NULL;
+
 	return &gn->luns[lunid].vlun;
 }
 
@@ -464,7 +540,13 @@ static struct nvmm_type gennvm = {
 	.erase_blk		= gennvm_erase_blk,
 
 	.get_lun		= gennvm_get_lun,
+	.reserve_lun		= gennvm_reserve_lun,
+	.release_lun		= gennvm_release_lun,
 	.lun_info_print		= gennvm_lun_info_print,
+
+	.get_area		= gennvm_get_area,
+	.put_area		= gennvm_put_area,
+
 };
 
 static int __init gennvm_module_init(void)

+ 6 - 0
drivers/lightnvm/gennvm.h

@@ -39,8 +39,14 @@ struct gen_nvm {
 
 	int nr_luns;
 	struct gen_lun *luns;
+	struct list_head area_list;
 };
 
+struct gennvm_area {
+	struct list_head list;
+	sector_t begin;
+	sector_t end;	/* end is excluded */
+};
 #define gennvm_for_each_lun(bm, lun, i) \
 		for ((i) = 0, lun = &(bm)->luns[0]; \
 			(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])

+ 78 - 31
drivers/lightnvm/rrpc.c

@@ -965,25 +965,11 @@ static void rrpc_requeue(struct work_struct *work)
 
 static void rrpc_gc_free(struct rrpc *rrpc)
 {
-	struct rrpc_lun *rlun;
-	int i;
-
 	if (rrpc->krqd_wq)
 		destroy_workqueue(rrpc->krqd_wq);
 
 	if (rrpc->kgc_wq)
 		destroy_workqueue(rrpc->kgc_wq);
-
-	if (!rrpc->luns)
-		return;
-
-	for (i = 0; i < rrpc->nr_luns; i++) {
-		rlun = &rrpc->luns[i];
-
-		if (!rlun->blocks)
-			break;
-		vfree(rlun->blocks);
-	}
 }
 
 static int rrpc_gc_init(struct rrpc *rrpc)
@@ -1053,8 +1039,11 @@ static int rrpc_map_init(struct rrpc *rrpc)
 {
 	struct nvm_dev *dev = rrpc->dev;
 	sector_t i;
+	u64 slba;
 	int ret;
 
+	slba = rrpc->soffset >> (ilog2(dev->sec_size) - 9);
+
 	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
 	if (!rrpc->trans_map)
 		return -ENOMEM;
@@ -1076,7 +1065,7 @@ static int rrpc_map_init(struct rrpc *rrpc)
 		return 0;
 
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update,
+	ret = dev->ops->get_l2p_tbl(dev, slba, rrpc->nr_sects, rrpc_l2p_update,
 									rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
@@ -1086,7 +1075,6 @@ static int rrpc_map_init(struct rrpc *rrpc)
 	return 0;
 }
 
-
 /* Minimum pages needed within a lun */
 #define PAGE_POOL_SIZE 16
 #define ADDR_POOL_SIZE 64
@@ -1141,6 +1129,23 @@ static void rrpc_core_free(struct rrpc *rrpc)
 
 static void rrpc_luns_free(struct rrpc *rrpc)
 {
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_lun *lun;
+	struct rrpc_lun *rlun;
+	int i;
+
+	if (!rrpc->luns)
+		return;
+
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		rlun = &rrpc->luns[i];
+		lun = rlun->parent;
+		if (!lun)
+			break;
+		dev->mt->release_lun(dev, lun->id);
+		vfree(rlun->blocks);
+	}
+
 	kfree(rrpc->luns);
 }
 
@@ -1148,7 +1153,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 {
 	struct nvm_dev *dev = rrpc->dev;
 	struct rrpc_lun *rlun;
-	int i, j;
+	int i, j, ret = -EINVAL;
 
 	if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
 		pr_err("rrpc: number of pages per block too high.");
@@ -1164,25 +1169,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 
 	/* 1:1 mapping */
 	for (i = 0; i < rrpc->nr_luns; i++) {
-		struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
-
-		rlun = &rrpc->luns[i];
-		rlun->rrpc = rrpc;
-		rlun->parent = lun;
-		INIT_LIST_HEAD(&rlun->prio_list);
-		INIT_LIST_HEAD(&rlun->open_list);
-		INIT_LIST_HEAD(&rlun->closed_list);
+		int lunid = lun_begin + i;
+		struct nvm_lun *lun;
 
-		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
-		spin_lock_init(&rlun->lock);
+		if (dev->mt->reserve_lun(dev, lunid)) {
+			pr_err("rrpc: lun %u is already allocated\n", lunid);
+			goto err;
+		}
 
-		rrpc->total_blocks += dev->blks_per_lun;
-		rrpc->nr_sects += dev->sec_per_lun;
+		lun = dev->mt->get_lun(dev, lunid);
+		if (!lun)
+			goto err;
 
+		rlun = &rrpc->luns[i];
+		rlun->parent = lun;
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
 						rrpc->dev->blks_per_lun);
-		if (!rlun->blocks)
+		if (!rlun->blocks) {
+			ret = -ENOMEM;
 			goto err;
+		}
 
 		for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
 			struct rrpc_block *rblk = &rlun->blocks[j];
@@ -1193,11 +1199,43 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 			INIT_LIST_HEAD(&rblk->prio);
 			spin_lock_init(&rblk->lock);
 		}
+
+		rlun->rrpc = rrpc;
+		INIT_LIST_HEAD(&rlun->prio_list);
+		INIT_LIST_HEAD(&rlun->open_list);
+		INIT_LIST_HEAD(&rlun->closed_list);
+
+		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
+		spin_lock_init(&rlun->lock);
+
+		rrpc->total_blocks += dev->blks_per_lun;
+		rrpc->nr_sects += dev->sec_per_lun;
+
 	}
 
 	return 0;
 err:
-	return -ENOMEM;
+	return ret;
+}
+
+/* returns 0 on success and stores the beginning address in *begin */
+static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+	sector_t size = rrpc->nr_sects * dev->sec_size;
+
+	size >>= 9;
+
+	return mt->get_area(dev, begin, size);
+}
+
+static void rrpc_area_free(struct rrpc *rrpc)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+
+	mt->put_area(dev, rrpc->soffset);
 }
 
 static void rrpc_free(struct rrpc *rrpc)
@@ -1206,6 +1244,7 @@ static void rrpc_free(struct rrpc *rrpc)
 	rrpc_map_free(rrpc);
 	rrpc_core_free(rrpc);
 	rrpc_luns_free(rrpc);
+	rrpc_area_free(rrpc);
 
 	kfree(rrpc);
 }
@@ -1327,6 +1366,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
 	struct rrpc *rrpc;
+	sector_t soffset;
 	int ret;
 
 	if (!(dev->identity.dom & NVM_RSP_L2P)) {
@@ -1352,6 +1392,13 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	/* simple round-robin strategy */
 	atomic_set(&rrpc->next_lun, -1);
 
+	ret = rrpc_area_init(rrpc, &soffset);
+	if (ret < 0) {
+		pr_err("nvm: rrpc: could not initialize area\n");
+		return ERR_PTR(ret);
+	}
+	rrpc->soffset = soffset;
+
 	ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize luns\n");

+ 1 - 0
drivers/lightnvm/rrpc.h

@@ -97,6 +97,7 @@ struct rrpc {
 	struct nvm_dev *dev;
 	struct gendisk *disk;
 
+	sector_t soffset; /* logical sector offset */
 	u64 poffset; /* physical page offset */
 	int lun_offset;
 

+ 15 - 2
drivers/nvme/host/lightnvm.c

@@ -146,6 +146,14 @@ struct nvme_nvm_command {
 	};
 };
 
+struct nvme_nvm_completion {
+	__le64	result;		/* Used by LightNVM to return ppa completions */
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
 #define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
 	__u16			num_pairs;
@@ -507,6 +515,10 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
 static void nvme_nvm_end_io(struct request *rq, int error)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
+	struct nvme_nvm_completion *cqe = rq->special;
+
+	if (cqe)
+		rqd->ppa_status = le64_to_cpu(cqe->result);
 
 	nvm_end_io(rqd, error);
 
@@ -526,7 +538,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
-	cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
+	cmd = kzalloc(sizeof(struct nvme_nvm_command) +
+				sizeof(struct nvme_nvm_completion), GFP_KERNEL);
 	if (!cmd) {
 		blk_mq_free_request(rq);
 		return -ENOMEM;
@@ -545,7 +558,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	rq->cmd = (unsigned char *)cmd;
 	rq->cmd_len = sizeof(struct nvme_nvm_command);
-	rq->special = (void *)0;
+	rq->special = cmd + 1;
 
 	rq->end_io_data = rqd;
 

+ 13 - 11
drivers/nvme/host/pci.c

@@ -723,6 +723,13 @@ static void nvme_complete_rq(struct request *req)
 	blk_mq_end_request(req, error);
 }
 
+/* We read the CQE phase first to check if the rest of the entry is valid */
+static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
+		u16 phase)
+{
+	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
+}
+
 static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 {
 	u16 head, phase;
@@ -730,13 +737,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 	head = nvmeq->cq_head;
 	phase = nvmeq->cq_phase;
 
-	for (;;) {
+	while (nvme_cqe_valid(nvmeq, head, phase)) {
 		struct nvme_completion cqe = nvmeq->cqes[head];
-		u16 status = le16_to_cpu(cqe.status);
 		struct request *req;
 
-		if ((status & 1) != phase)
-			break;
 		if (++head == nvmeq->q_depth) {
 			head = 0;
 			phase = !phase;
@@ -767,7 +771,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
 			memcpy(req->special, &cqe, sizeof(cqe));
-		blk_mq_complete_request(req, status >> 1);
+		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
 
 	}
 
@@ -808,18 +812,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
 static irqreturn_t nvme_irq_check(int irq, void *data)
 {
 	struct nvme_queue *nvmeq = data;
-	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
-	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
-		return IRQ_NONE;
-	return IRQ_WAKE_THREAD;
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
 }
 
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
 
-	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
-	    nvmeq->cq_phase) {
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
 		spin_lock_irq(&nvmeq->q_lock);
 		__nvme_process_cq(nvmeq, &tag);
 		spin_unlock_irq(&nvmeq->q_lock);

+ 24 - 13
fs/fs-writeback.c

@@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode)
 		wb_get(wb);
 		spin_unlock(&inode->i_lock);
 		spin_lock(&wb->list_lock);
-		wb_put(wb);		/* not gonna deref it anymore */
 
 		/* i_wb may have changed inbetween, can't use inode_to_wb() */
-		if (likely(wb == inode->i_wb))
-			return wb;	/* @inode already has ref */
+		if (likely(wb == inode->i_wb)) {
+			wb_put(wb);	/* @inode already has ref */
+			return wb;
+		}
 
 		spin_unlock(&wb->list_lock);
+		wb_put(wb);
 		cpu_relax();
 		spin_lock(&inode->i_lock);
 	}
@@ -1337,10 +1339,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
  * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
  * and does more profound writeback list handling in writeback_sb_inodes().
  */
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
-		       struct writeback_control *wbc)
+static int writeback_single_inode(struct inode *inode,
+				  struct writeback_control *wbc)
 {
+	struct bdi_writeback *wb;
 	int ret = 0;
 
 	spin_lock(&inode->i_lock);
@@ -1378,7 +1380,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	ret = __writeback_single_inode(inode, wbc);
 
 	wbc_detach_inode(wbc);
-	spin_lock(&wb->list_lock);
+
+	wb = inode_to_wb_and_lock_list(inode);
 	spin_lock(&inode->i_lock);
 	/*
 	 * If inode is clean, remove it from writeback lists. Otherwise don't
@@ -1453,6 +1456,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
+		struct bdi_writeback *tmp_wb;
 
 		if (inode->i_sb != sb) {
 			if (work->sb) {
@@ -1543,15 +1547,23 @@ static long writeback_sb_inodes(struct super_block *sb,
 			cond_resched();
 		}
 
-
-		spin_lock(&wb->list_lock);
+		/*
+		 * Requeue @inode if still dirty.  Be careful as @inode may
+		 * have been switched to another wb in the meantime.
+		 */
+		tmp_wb = inode_to_wb_and_lock_list(inode);
 		spin_lock(&inode->i_lock);
 		if (!(inode->i_state & I_DIRTY_ALL))
 			wrote++;
-		requeue_inode(inode, wb, &wbc);
+		requeue_inode(inode, tmp_wb, &wbc);
 		inode_sync_complete(inode);
 		spin_unlock(&inode->i_lock);
 
+		if (unlikely(tmp_wb != wb)) {
+			spin_unlock(&tmp_wb->list_lock);
+			spin_lock(&wb->list_lock);
+		}
+
 		/*
 		 * bail out to wb_writeback() often enough to check
 		 * background threshold and other termination conditions.
@@ -2338,7 +2350,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
  */
 int write_inode_now(struct inode *inode, int sync)
 {
-	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -2350,7 +2361,7 @@ int write_inode_now(struct inode *inode, int sync)
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	return writeback_single_inode(inode, wb, &wbc);
+	return writeback_single_inode(inode, &wbc);
 }
 EXPORT_SYMBOL(write_inode_now);
 
@@ -2367,7 +2378,7 @@ EXPORT_SYMBOL(write_inode_now);
  */
 int sync_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
+	return writeback_single_inode(inode, wbc);
 }
 EXPORT_SYMBOL(sync_inode);
 

+ 0 - 14
include/linux/blk-mq.h

@@ -263,22 +263,8 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
 	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
 	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
 
-#define queue_for_each_ctx(q, ctx, i)					\
-	for ((i) = 0; (i) < (q)->nr_queues &&				\
-	     ({ ctx = per_cpu_ptr((q)->queue_ctx, (i)); 1; }); (i)++)
-
 #define hctx_for_each_ctx(hctx, ctx, i)					\
 	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
 	     ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
 
-#define blk_ctx_sum(q, sum)						\
-({									\
-	struct blk_mq_ctx *__x;						\
-	unsigned int __ret = 0, __i;					\
-									\
-	queue_for_each_ctx((q), __x, __i)				\
-		__ret += sum;						\
-	__ret;								\
-})
-
 #endif

+ 14 - 0
include/linux/lightnvm.h

@@ -242,6 +242,7 @@ struct nvm_rq {
 	uint16_t nr_pages;
 	uint16_t flags;
 
+	u64 ppa_status; /* ppa media status */
 	int error;
 };
 
@@ -346,6 +347,7 @@ struct nvm_dev {
 	int nr_luns;
 	unsigned max_pages_per_blk;
 
+	unsigned long *lun_map;
 	void *ppalist_pool;
 
 	struct nvm_id identity;
@@ -355,6 +357,7 @@ struct nvm_dev {
 	char name[DISK_NAME_LEN];
 
 	struct mutex mlock;
+	spinlock_t lock;
 };
 
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
@@ -465,8 +468,13 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 								unsigned long);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
+typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
+typedef void (nvmm_release_lun)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
+typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
+typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+
 struct nvmm_type {
 	const char *name;
 	unsigned int version[3];
@@ -488,9 +496,15 @@ struct nvmm_type {
 
 	/* Configuration management */
 	nvmm_get_lun_fn *get_lun;
+	nvmm_reserve_lun *reserve_lun;
+	nvmm_release_lun *release_lun;
 
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
+
+	nvmm_get_area_fn *get_area;
+	nvmm_put_area_fn *put_area;
+
 	struct list_head list;
 };