Sfoglia il codice sorgente

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A collection of fixes from the past few weeks that should go into 4.5.
  This contains:

   - Overflow fix for sysfs discard show function from Alan.

   - A stacking limit init fix for max_dev_sectors, so we don't end up
     artificially capping some use cases.  From Keith.

   - Have blk-mq proper end unstarted requests on a dying queue, instead
     of pushing that to the driver.  From Keith.

   - NVMe:
        - Update to Kconfig description for NVME_SCSI, since it was
          vague and having it on is important for some SUSE distros.
          From Christoph.
        - Set of fixes from Keith, around surprise removal. Also kills
          the no-merge flag, so it supports merging.

   - Set of fixes for lightnvm from Matias, Javier, and Wenwei.

   - Fix null_blk oops when asked for lightnvm, but not available.  From
     Matias.

   - Copy-to-user EINTR fix from Hannes, fixing a case where SG_IO fails
     if interrupted by a signal.

   - Two floppy fixes from Jiri, fixing signal handling and blocking
     open.

   - A use-after-free fix for O_DIRECT, from Mike Krinkin.

   - A block module ref count fix from Roman Pen.

   - An fs IO wait accounting fix for O_DSYNC from Stephane Gasparini.

   - Smaller reallo fix for xen-blkfront from Bob Liu.

   - Removal of an unused struct member in the deadline IO scheduler,
     from Tahsin.

   - Also from Tahsin, properly initialize inode struct members
     associated with cgroup writeback, if enabled.

   - From Tejun, ensure that we keep the superblock pinned during cgroup
     writeback"

* 'for-linus' of git://git.kernel.dk/linux-block: (25 commits)
  blk: fix overflow in queue_discard_max_hw_show
  writeback: initialize inode members that track writeback history
  writeback: keep superblock pinned during cgroup writeback association switches
  bio: return EINTR if copying to user space got interrupted
  NVMe: Rate limit nvme IO warnings
  NVMe: Poll device while still active during remove
  NVMe: Requeue requests on suspended queues
  NVMe: Allow request merges
  NVMe: Fix io incapable return values
  blk-mq: End unstarted requests on dying queue
  block: Initialize max_dev_sectors to 0
  null_blk: oops when initializing without lightnvm
  block: fix module reference leak on put_disk() call for cgroups throttle
  nvme: fix Kconfig description for BLK_DEV_NVME_SCSI
  kernel/fs: fix I/O wait not accounted for RW O_DSYNC
  floppy: refactor open() flags handling
  lightnvm: allow to force mm initialization
  lightnvm: check overflow and correct mlc pairs
  lightnvm: fix request intersection locking in rrpc
  lightnvm: warn if irqs are disabled in lock laddr
  ...
Linus Torvalds 9 anni fa
parent
commit
2850713576

+ 6 - 3
block/bio.c

@@ -874,7 +874,7 @@ int submit_bio_wait(int rw, struct bio *bio)
 	bio->bi_private = &ret;
 	bio->bi_end_io = submit_bio_wait_endio;
 	submit_bio(rw, bio);
-	wait_for_completion(&ret.event);
+	wait_for_completion_io(&ret.event);
 
 	return ret.error;
 }
@@ -1090,9 +1090,12 @@ int bio_uncopy_user(struct bio *bio)
 	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
 		/*
 		 * if we're in a workqueue, the request is orphaned, so
-		 * don't copy into a random user address space, just free.
+		 * don't copy into a random user address space, just free
+		 * and return -EINTR so user space doesn't expect any data.
 		 */
-		if (current->mm && bio_data_dir(bio) == READ)
+		if (!current->mm)
+			ret = -EINTR;
+		else if (bio_data_dir(bio) == READ)
 			ret = bio_copy_to_iter(bio, bmd->iter);
 		if (bmd->is_our_pages)
 			bio_free_pages(bio);

+ 9 - 0
block/blk-cgroup.c

@@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 {
 	struct gendisk *disk;
 	struct blkcg_gq *blkg;
+	struct module *owner;
 	unsigned int major, minor;
 	int key_len, part, ret;
 	char *body;
@@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	if (!disk)
 		return -ENODEV;
 	if (part) {
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		return -ENODEV;
 	}
 
@@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		ret = PTR_ERR(blkg);
 		rcu_read_unlock();
 		spin_unlock_irq(disk->queue->queue_lock);
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		/*
 		 * If queue was bypassing, we should retry.  Do so after a
 		 * short msleep().  It isn't strictly necessary but queue
@@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
+	struct module *owner;
+
 	spin_unlock_irq(ctx->disk->queue->queue_lock);
 	rcu_read_unlock();
+	owner = ctx->disk->fops->owner;
 	put_disk(ctx->disk);
+	module_put(owner);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 

+ 4 - 2
block/blk-mq.c

@@ -599,8 +599,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 		 * If a request wasn't started before the queue was
 		 * marked dying, kill it here or it'll go unnoticed.
 		 */
-		if (unlikely(blk_queue_dying(rq->q)))
-			blk_mq_complete_request(rq, -EIO);
+		if (unlikely(blk_queue_dying(rq->q))) {
+			rq->errors = -EIO;
+			blk_mq_end_request(rq, rq->errors);
+		}
 		return;
 	}
 

+ 2 - 2
block/blk-settings.c

@@ -91,8 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->virt_boundary_mask = 0;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-	lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors =
-		BLK_SAFE_MAX_SECTORS;
+	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->max_dev_sectors = 0;
 	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;

+ 2 - 3
block/blk-sysfs.c

@@ -147,10 +147,9 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
 
 static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
 {
-	unsigned long long val;
 
-	val = q->limits.max_hw_discard_sectors << 9;
-	return sprintf(page, "%llu\n", val);
+	return sprintf(page, "%llu\n",
+		(unsigned long long)q->limits.max_hw_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)

+ 0 - 3
block/deadline-iosched.c

@@ -39,7 +39,6 @@ struct deadline_data {
 	 */
 	struct request *next_rq[2];
 	unsigned int batching;		/* number of sequential requests made */
-	sector_t last_sector;		/* head position */
 	unsigned int starved;		/* times reads have starved writes */
 
 	/*
@@ -210,8 +209,6 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
 	dd->next_rq[WRITE] = NULL;
 	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	dd->last_sector = rq_end_sector(rq);
-
 	/*
 	 * take it off the sort and fifo list, move
 	 * to dispatch queue

+ 37 - 30
drivers/block/floppy.c

@@ -866,7 +866,7 @@ static void set_fdc(int drive)
 }
 
 /* locks the driver */
-static int lock_fdc(int drive, bool interruptible)
+static int lock_fdc(int drive)
 {
 	if (WARN(atomic_read(&usage_count) == 0,
 		 "Trying to lock fdc while usage count=0\n"))
@@ -2173,7 +2173,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
 {
 	int ret;
 
-	if (lock_fdc(drive, true))
+	if (lock_fdc(drive))
 		return -EINTR;
 
 	set_floppy(drive);
@@ -2960,7 +2960,7 @@ static int user_reset_fdc(int drive, int arg, bool interruptible)
 {
 	int ret;
 
-	if (lock_fdc(drive, interruptible))
+	if (lock_fdc(drive))
 		return -EINTR;
 
 	if (arg == FD_RESET_ALWAYS)
@@ -3243,7 +3243,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		mutex_lock(&open_lock);
-		if (lock_fdc(drive, true)) {
+		if (lock_fdc(drive)) {
 			mutex_unlock(&open_lock);
 			return -EINTR;
 		}
@@ -3263,7 +3263,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
 	} else {
 		int oldStretch;
 
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (cmd != FDDEFPRM) {
 			/* notice a disk change immediately, else
@@ -3349,7 +3349,7 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
 	if (type)
 		*g = &floppy_type[type];
 	else {
-		if (lock_fdc(drive, false))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (poll_drive(false, 0) == -EINTR)
 			return -EINTR;
@@ -3433,7 +3433,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		if (UDRS->fd_ref != 1)
 			/* somebody else has this drive open */
 			return -EBUSY;
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 
 		/* do the actual eject. Fails on
@@ -3445,7 +3445,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		process_fd_request();
 		return ret;
 	case FDCLRPRM:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		current_type[drive] = NULL;
 		floppy_sizes[drive] = MAX_DISK_SIZE << 1;
@@ -3467,7 +3467,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		UDP->flags &= ~FTD_MSG;
 		return 0;
 	case FDFMTBEG:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
 			return -EINTR;
@@ -3484,7 +3484,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		return do_format(drive, &inparam.f);
 	case FDFMTEND:
 	case FDFLUSH:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		return invalidate_drive(bdev);
 	case FDSETEMSGTRESH:
@@ -3507,7 +3507,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		outparam = UDP;
 		break;
 	case FDPOLLDRVSTAT:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
 			return -EINTR;
@@ -3530,7 +3530,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 	case FDRAWCMD:
 		if (type)
 			return -EINVAL;
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		set_floppy(drive);
 		i = raw_cmd_ioctl(cmd, (void __user *)param);
@@ -3539,7 +3539,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		process_fd_request();
 		return i;
 	case FDTWADDLE:
-		if (lock_fdc(drive, true))
+		if (lock_fdc(drive))
 			return -EINTR;
 		twaddle();
 		process_fd_request();
@@ -3663,6 +3663,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 
 	opened_bdev[drive] = bdev;
 
+	if (!(mode & (FMODE_READ|FMODE_WRITE))) {
+		res = -EINVAL;
+		goto out;
+	}
+
 	res = -ENXIO;
 
 	if (!floppy_track_buffer) {
@@ -3706,21 +3711,20 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 	if (UFDCS->rawcmd == 1)
 		UFDCS->rawcmd = 2;
 
-	if (!(mode & FMODE_NDELAY)) {
-		if (mode & (FMODE_READ|FMODE_WRITE)) {
-			UDRS->last_checked = 0;
-			clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
-			check_disk_change(bdev);
-			if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
-				goto out;
-			if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
-				goto out;
-		}
-		res = -EROFS;
-		if ((mode & FMODE_WRITE) &&
-		    !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
-			goto out;
-	}
+	UDRS->last_checked = 0;
+	clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+	check_disk_change(bdev);
+	if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+		goto out;
+	if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+		goto out;
+
+	res = -EROFS;
+
+	if ((mode & FMODE_WRITE) &&
+			!test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
+		goto out;
+
 	mutex_unlock(&open_lock);
 	mutex_unlock(&floppy_mutex);
 	return 0;
@@ -3748,7 +3752,8 @@ static unsigned int floppy_check_events(struct gendisk *disk,
 		return DISK_EVENT_MEDIA_CHANGE;
 
 	if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
-		lock_fdc(drive, false);
+		if (lock_fdc(drive))
+			return -EINTR;
 		poll_drive(false, 0);
 		process_fd_request();
 	}
@@ -3847,7 +3852,9 @@ static int floppy_revalidate(struct gendisk *disk)
 			 "VFS: revalidate called on non-open device.\n"))
 			return -EFAULT;
 
-		lock_fdc(drive, false);
+		res = lock_fdc(drive);
+		if (res)
+			return res;
 		cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
 		      test_bit(FD_VERIFY_BIT, &UDRS->flags));
 		if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) {

+ 5 - 3
drivers/block/null_blk.c

@@ -478,7 +478,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
 	id->ver_id = 0x1;
 	id->vmnt = 0;
 	id->cgrps = 1;
-	id->cap = 0x3;
+	id->cap = 0x2;
 	id->dom = 0x1;
 
 	id->ppaf.blk_offset = 0;
@@ -707,9 +707,7 @@ static int null_add_dev(void)
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
 
-
 	mutex_lock(&lock);
-	list_add_tail(&nullb->list, &nullb_list);
 	nullb->index = nullb_indexes++;
 	mutex_unlock(&lock);
 
@@ -743,6 +741,10 @@ static int null_add_dev(void)
 	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
 
 	add_disk(disk);
+
+	mutex_lock(&lock);
+	list_add_tail(&nullb->list, &nullb_list);
+	mutex_unlock(&lock);
 done:
 	return 0;
 

+ 45 - 29
drivers/block/xen-blkfront.c

@@ -1873,6 +1873,43 @@ again:
 	return err;
 }
 
+static int negotiate_mq(struct blkfront_info *info)
+{
+	unsigned int backend_max_queues = 0;
+	int err;
+	unsigned int i;
+
+	BUG_ON(info->nr_rings);
+
+	/* Check if backend supports multiple queues. */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "multi-queue-max-queues", "%u", &backend_max_queues);
+	if (err < 0)
+		backend_max_queues = 1;
+
+	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+	/* We need at least one ring. */
+	if (!info->nr_rings)
+		info->nr_rings = 1;
+
+	info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
+	if (!info->rinfo) {
+		xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < info->nr_rings; i++) {
+		struct blkfront_ring_info *rinfo;
+
+		rinfo = &info->rinfo[i];
+		INIT_LIST_HEAD(&rinfo->indirect_pages);
+		INIT_LIST_HEAD(&rinfo->grants);
+		rinfo->dev_info = info;
+		INIT_WORK(&rinfo->work, blkif_restart_queue);
+		spin_lock_init(&rinfo->ring_lock);
+	}
+	return 0;
+}
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffer for communication with the backend, and
@@ -1883,9 +1920,7 @@ static int blkfront_probe(struct xenbus_device *dev,
 			  const struct xenbus_device_id *id)
 {
 	int err, vdevice;
-	unsigned int r_index;
 	struct blkfront_info *info;
-	unsigned int backend_max_queues = 0;
 
 	/* FIXME: Use dynamic device id if this is not set. */
 	err = xenbus_scanf(XBT_NIL, dev->nodename,
@@ -1936,33 +1971,10 @@ static int blkfront_probe(struct xenbus_device *dev,
 	}
 
 	info->xbdev = dev;
-	/* Check if backend supports multiple queues. */
-	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
-			   "multi-queue-max-queues", "%u", &backend_max_queues);
-	if (err < 0)
-		backend_max_queues = 1;
-
-	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
-	/* We need at least one ring. */
-	if (!info->nr_rings)
-		info->nr_rings = 1;
-
-	info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
-	if (!info->rinfo) {
-		xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure");
+	err = negotiate_mq(info);
+	if (err) {
 		kfree(info);
-		return -ENOMEM;
-	}
-
-	for (r_index = 0; r_index < info->nr_rings; r_index++) {
-		struct blkfront_ring_info *rinfo;
-
-		rinfo = &info->rinfo[r_index];
-		INIT_LIST_HEAD(&rinfo->indirect_pages);
-		INIT_LIST_HEAD(&rinfo->grants);
-		rinfo->dev_info = info;
-		INIT_WORK(&rinfo->work, blkif_restart_queue);
-		spin_lock_init(&rinfo->ring_lock);
+		return err;
 	}
 
 	mutex_init(&info->mutex);
@@ -2123,12 +2135,16 @@ static int blkif_recover(struct blkfront_info *info)
 static int blkfront_resume(struct xenbus_device *dev)
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-	int err;
+	int err = 0;
 
 	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
+	err = negotiate_mq(info);
+	if (err)
+		return err;
+
 	err = talk_to_blkback(dev, info);
 
 	/*

+ 16 - 9
drivers/lightnvm/core.c

@@ -572,11 +572,13 @@ int nvm_register(struct request_queue *q, char *disk_name,
 		}
 	}
 
-	ret = nvm_get_sysblock(dev, &dev->sb);
-	if (!ret)
-		pr_err("nvm: device not initialized.\n");
-	else if (ret < 0)
-		pr_err("nvm: err (%d) on device initialization\n", ret);
+	if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
+		ret = nvm_get_sysblock(dev, &dev->sb);
+		if (!ret)
+			pr_err("nvm: device not initialized.\n");
+		else if (ret < 0)
+			pr_err("nvm: err (%d) on device initialization\n", ret);
+	}
 
 	/* register device with a supported media manager */
 	down_write(&nvm_lock);
@@ -1055,9 +1057,11 @@ static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init)
 	strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
 	info.fs_ppa.ppa = -1;
 
-	ret = nvm_init_sysblock(dev, &info);
-	if (ret)
-		return ret;
+	if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
+		ret = nvm_init_sysblock(dev, &info);
+		if (ret)
+			return ret;
+	}
 
 	memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
 
@@ -1117,7 +1121,10 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
 		dev->mt = NULL;
 	}
 
-	return nvm_dev_factory(dev, fact.flags);
+	if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT)
+		return nvm_dev_factory(dev, fact.flags);
+
+	return 0;
 }
 
 static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)

+ 3 - 1
drivers/lightnvm/rrpc.c

@@ -300,8 +300,10 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 	}
 
 	page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
-	if (!page)
+	if (!page) {
+		bio_put(bio);
 		return -ENOMEM;
+	}
 
 	while ((slot = find_first_zero_bit(rblk->invalid_pages,
 					    nr_pgs_per_blk)) < nr_pgs_per_blk) {

+ 3 - 2
drivers/lightnvm/rrpc.h

@@ -174,8 +174,7 @@ static inline sector_t rrpc_get_sector(sector_t laddr)
 static inline int request_intersects(struct rrpc_inflight_rq *r,
 				sector_t laddr_start, sector_t laddr_end)
 {
-	return (laddr_end >= r->l_start && laddr_end <= r->l_end) &&
-		(laddr_start >= r->l_start && laddr_start <= r->l_end);
+	return (laddr_end >= r->l_start) && (laddr_start <= r->l_end);
 }
 
 static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
@@ -184,6 +183,8 @@ static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
 	sector_t laddr_end = laddr + pages - 1;
 	struct rrpc_inflight_rq *rtmp;
 
+	WARN_ON(irqs_disabled());
+
 	spin_lock_irq(&rrpc->inflights.lock);
 	list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) {
 		if (unlikely(request_intersects(rtmp, laddr, laddr_end))) {

+ 3 - 2
drivers/nvme/host/Kconfig

@@ -17,5 +17,6 @@ config BLK_DEV_NVME_SCSI
 	  and block devices nodes, as well a a translation for a small
 	  number of selected SCSI commands to NVMe commands to the NVMe
 	  driver.  If you don't know what this means you probably want
-	  to say N here, and if you know what it means you probably
-	  want to say N as well.
+	  to say N here, unless you run a distro that abuses the SCSI
+	  emulation to provide stable device names for mount by id, like
+	  some OpenSuSE and SLES versions.

+ 0 - 1
drivers/nvme/host/core.c

@@ -1121,7 +1121,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	ns->queue = blk_mq_init_queue(ctrl->tagset);
 	if (IS_ERR(ns->queue))
 		goto out_free_ns;
-	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
 	ns->queue->queuedata = ns;
 	ns->ctrl = ctrl;

+ 9 - 3
drivers/nvme/host/lightnvm.c

@@ -146,9 +146,10 @@ struct nvme_nvm_command {
 	};
 };
 
+#define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
 	__u16			num_pairs;
-	__u8			pairs[886];
+	__u8			pairs[NVME_NVM_LP_MLC_PAIRS];
 };
 
 struct nvme_nvm_lp_tbl {
@@ -282,9 +283,14 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
 			memcpy(dst->lptbl.id, src->lptbl.id, 8);
 			dst->lptbl.mlc.num_pairs =
 					le16_to_cpu(src->lptbl.mlc.num_pairs);
-			/* 4 bits per pair */
+
+			if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
+				pr_err("nvm: number of MLC pairs not supported\n");
+				return -EINVAL;
+			}
+
 			memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
-						dst->lptbl.mlc.num_pairs >> 1);
+						dst->lptbl.mlc.num_pairs);
 		}
 	}
 

+ 2 - 2
drivers/nvme/host/nvme.h

@@ -139,9 +139,9 @@ static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl)
 	u32 val = 0;
 
 	if (ctrl->ops->io_incapable(ctrl))
-		return false;
+		return true;
 	if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
-		return false;
+		return true;
 	return val & NVME_CSTS_CFS;
 }
 

+ 7 - 6
drivers/nvme/host/pci.c

@@ -678,6 +678,11 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&nvmeq->q_lock);
+	if (unlikely(nvmeq->cq_vector < 0)) {
+		ret = BLK_MQ_RQ_QUEUE_BUSY;
+		spin_unlock_irq(&nvmeq->q_lock);
+		goto out;
+	}
 	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
@@ -999,7 +1004,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved
 	if (!blk_mq_request_started(req))
 		return;
 
-	dev_warn(nvmeq->q_dmadev,
+	dev_dbg_ratelimited(nvmeq->q_dmadev,
 		 "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
 
 	status = NVME_SC_ABORT_REQ;
@@ -2111,16 +2116,12 @@ static void nvme_remove(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
 
-	spin_lock(&dev_list_lock);
-	list_del_init(&dev->node);
-	spin_unlock(&dev_list_lock);
-
 	pci_set_drvdata(pdev, NULL);
-	flush_work(&dev->reset_work);
 	flush_work(&dev->scan_work);
 	nvme_remove_namespaces(&dev->ctrl);
 	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_dev_disable(dev, true);
+	flush_work(&dev->reset_work);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);

+ 1 - 1
fs/direct-io.c

@@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 		dio->io_error = -EIO;
 
 	if (dio->is_async && dio->rw == READ && dio->should_dirty) {
-		bio_check_pages_dirty(bio);	/* transfers ownership */
 		err = bio->bi_error;
+		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
 		bio_for_each_segment_all(bvec, bio, i) {
 			struct page *page = bvec->bv_page;

+ 11 - 4
fs/fs-writeback.c

@@ -317,6 +317,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	struct inode_switch_wbs_context *isw =
 		container_of(work, struct inode_switch_wbs_context, work);
 	struct inode *inode = isw->inode;
+	struct super_block *sb = inode->i_sb;
 	struct address_space *mapping = inode->i_mapping;
 	struct bdi_writeback *old_wb = inode->i_wb;
 	struct bdi_writeback *new_wb = isw->new_wb;
@@ -423,6 +424,7 @@ skip_switch:
 	wb_put(new_wb);
 
 	iput(inode);
+	deactivate_super(sb);
 	kfree(isw);
 }
 
@@ -469,11 +471,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 
 	/* while holding I_WB_SWITCH, no one else can update the association */
 	spin_lock(&inode->i_lock);
+
 	if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
-	    inode_to_wb(inode) == isw->new_wb) {
-		spin_unlock(&inode->i_lock);
-		goto out_free;
-	}
+	    inode_to_wb(inode) == isw->new_wb)
+		goto out_unlock;
+
+	if (!atomic_inc_not_zero(&inode->i_sb->s_active))
+		goto out_unlock;
+
 	inode->i_state |= I_WB_SWITCH;
 	spin_unlock(&inode->i_lock);
 
@@ -489,6 +494,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
 	return;
 
+out_unlock:
+	spin_unlock(&inode->i_lock);
 out_free:
 	if (isw->new_wb)
 		wb_put(isw->new_wb);

+ 6 - 0
fs/inode.c

@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_rdev = 0;
 	inode->dirtied_when = 0;
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+	inode->i_wb_frn_winner = 0;
+	inode->i_wb_frn_avg_time = 0;
+	inode->i_wb_frn_history = 0;
+#endif
+
 	if (security_inode_alloc(inode))
 		goto out;
 	spin_lock_init(&inode->i_lock);

+ 4 - 0
include/linux/lightnvm.h

@@ -135,6 +135,10 @@ enum {
 	/* Memory types */
 	NVM_ID_FMTYPE_SLC	= 0,
 	NVM_ID_FMTYPE_MLC	= 1,
+
+	/* Device capabilities */
+	NVM_ID_DCAP_BBLKMGMT	= 0x1,
+	NVM_UD_DCAP_ECC		= 0x2,
 };
 
 struct nvm_id_lp_mlc {