8 years ago · f924ba70c1
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8604,10 +8604,10 @@ S:	Maintained
 
				 F:	drivers/net/ethernet/netronome/
			
 
				 
			
 
				 NETWORK BLOCK DEVICE (NBD)
			
 
				-M:	Markus Pargmann <mpa@pengutronix.de>
			
 
				+M:	Josef Bacik <jbacik@fb.com>
			
 
				 S:	Maintained
			
 
				+L:	linux-block@vger.kernel.org
			
 
				 L:	nbd-general@lists.sourceforge.net
			
 
				-T:	git git://git.pengutronix.de/git/mpa/linux-nbd.git
			
 
				 F:	Documentation/blockdev/nbd.txt
			
 
				 F:	drivers/block/nbd.c
			
 
				 F:	include/uapi/linux/nbd.h
			
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -147,6 +147,18 @@ config BLK_WBT_MQ
 
				 	Multiqueue currently doesn't have support for IO scheduling,
			
 
				 	enabling this option is recommended.
			
 
				 
			
 
				+config BLK_DEBUG_FS
			
 
				+	bool "Block layer debugging information in debugfs"
			
 
				+	default y
			
 
				+	depends on DEBUG_FS
			
 
				+	---help---
			
 
				+	Include block layer debugging information in debugfs. This information
			
 
				+	is mostly useful for kernel developers, but it doesn't incur any cost
			
 
				+	at runtime.
			
 
				+
			
 
				+	Unless you are building a kernel for a tiny system, you should
			
 
				+	say Y here.
			
 
				+
			
 
				 menu "Partition Types"
			
 
				 
			
 
				 source "block/partitions/Kconfig"
			
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -63,6 +63,56 @@ config DEFAULT_IOSCHED
 
				 	default "cfq" if DEFAULT_CFQ
			
 
				 	default "noop" if DEFAULT_NOOP
			
 
				 
			
 
				+config MQ_IOSCHED_DEADLINE
			
 
				+	tristate "MQ deadline I/O scheduler"
			
 
				+	default y
			
 
				+	---help---
			
 
				+	  MQ version of the deadline IO scheduler.
			
 
				+
			
 
				+config MQ_IOSCHED_NONE
			
 
				+	bool
			
 
				+	default y
			
 
				+
			
 
				+choice
			
 
				+	prompt "Default single-queue blk-mq I/O scheduler"
			
 
				+	default DEFAULT_SQ_NONE
			
 
				+	help
			
 
				+	  Select the I/O scheduler which will be used by default for blk-mq
			
 
				+	  managed block devices with a single queue.
			
 
				+
			
 
				+	config DEFAULT_SQ_DEADLINE
			
 
				+		bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
			
 
				+
			
 
				+	config DEFAULT_SQ_NONE
			
 
				+		bool "None"
			
 
				+
			
 
				+endchoice
			
 
				+
			
 
				+config DEFAULT_SQ_IOSCHED
			
 
				+	string
			
 
				+	default "mq-deadline" if DEFAULT_SQ_DEADLINE
			
 
				+	default "none" if DEFAULT_SQ_NONE
			
 
				+
			
 
				+choice
			
 
				+	prompt "Default multi-queue blk-mq I/O scheduler"
			
 
				+	default DEFAULT_MQ_NONE
			
 
				+	help
			
 
				+	  Select the I/O scheduler which will be used by default for blk-mq
			
 
				+	  managed block devices with multiple queues.
			
 
				+
			
 
				+	config DEFAULT_MQ_DEADLINE
			
 
				+		bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
			
 
				+
			
 
				+	config DEFAULT_MQ_NONE
			
 
				+		bool "None"
			
 
				+
			
 
				+endchoice
			
 
				+
			
 
				+config DEFAULT_MQ_IOSCHED
			
 
				+	string
			
 
				+	default "mq-deadline" if DEFAULT_MQ_DEADLINE
			
 
				+	default "none" if DEFAULT_MQ_NONE
			
 
				+
			
 
				 endmenu
			
 
				 
			
 
				 endif
			
--- a/block/Makefile
+++ b/block/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 
				 			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
			
 
				 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
			
 
				 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
			
 
				-			blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
			
 
				+			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
			
 
				 			genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
			
 
				 			badblocks.o partitions/
			
 
				 
			
@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
 
				 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
			
 
				 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
			
 
				 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
			
 
				+obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
			
 
				 
			
 
				 obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
			
 
				 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
			
@@ -25,3 +26,4 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
 
				 obj-$(CONFIG_BLK_MQ_PCI)	+= blk-mq-pci.o
			
 
				 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
			
 
				 obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
			
 
				+obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
			
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1223,7 +1223,10 @@ int blkcg_activate_policy(struct request_queue *q,
 
				 	if (blkcg_policy_enabled(q, pol))
			
 
				 		return 0;
			
 
				 
			
 
				-	blk_queue_bypass_start(q);
			
 
				+	if (q->mq_ops)
			
 
				+		blk_mq_freeze_queue(q);
			
 
				+	else
			
 
				+		blk_queue_bypass_start(q);
			
 
				 pd_prealloc:
			
 
				 	if (!pd_prealloc) {
			
 
				 		pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
			
@@ -1261,7 +1264,10 @@ pd_prealloc:
 
				 
			
 
				 	spin_unlock_irq(q->queue_lock);
			
 
				 out_bypass_end:
			
 
				-	blk_queue_bypass_end(q);
			
 
				+	if (q->mq_ops)
			
 
				+		blk_mq_unfreeze_queue(q);
			
 
				+	else
			
 
				+		blk_queue_bypass_end(q);
			
 
				 	if (pd_prealloc)
			
 
				 		pol->pd_free_fn(pd_prealloc);
			
 
				 	return ret;
			
@@ -1284,7 +1290,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
 
				 	if (!blkcg_policy_enabled(q, pol))
			
 
				 		return;
			
 
				 
			
 
				-	blk_queue_bypass_start(q);
			
 
				+	if (q->mq_ops)
			
 
				+		blk_mq_freeze_queue(q);
			
 
				+	else
			
 
				+		blk_queue_bypass_start(q);
			
 
				+
			
 
				 	spin_lock_irq(q->queue_lock);
			
 
				 
			
 
				 	__clear_bit(pol->plid, q->blkcg_pols);
			
@@ -1304,7 +1314,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
 
				 	}
			
 
				 
			
 
				 	spin_unlock_irq(q->queue_lock);
			
 
				-	blk_queue_bypass_end(q);
			
 
				+
			
 
				+	if (q->mq_ops)
			
 
				+		blk_mq_unfreeze_queue(q);
			
 
				+	else
			
 
				+		blk_queue_bypass_end(q);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
			
 
				 
			
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
 
				 
			
 
				 #include "blk.h"
			
 
				 #include "blk-mq.h"
			
 
				+#include "blk-mq-sched.h"
			
 
				 #include "blk-wbt.h"
			
 
				 
			
 
				 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
			
@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 
				 	rq->cmd = rq->__cmd;
			
 
				 	rq->cmd_len = BLK_MAX_CDB;
			
 
				 	rq->tag = -1;
			
 
				+	rq->internal_tag = -1;
			
 
				 	rq->start_time = jiffies;
			
 
				 	set_start_time_ns(rq);
			
 
				 	rq->part = NULL;
			
@@ -1033,28 +1035,12 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
 
				 	 * Flush requests do not use the elevator so skip initialization.
			
 
				 	 * This allows a request to share the flush and elevator data.
			
 
				 	 */
			
 
				-	if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA))
			
 
				+	if (op_is_flush(bio->bi_opf))
			
 
				 		return false;
			
 
				 
			
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * rq_ioc - determine io_context for request allocation
			
 
				- * @bio: request being allocated is for this bio (can be %NULL)
			
 
				- *
			
 
				- * Determine io_context to use for request allocation for @bio.  May return
			
 
				- * %NULL if %current->io_context doesn't exist.
			
 
				- */
			
 
				-static struct io_context *rq_ioc(struct bio *bio)
			
 
				-{
			
 
				-#ifdef CONFIG_BLK_CGROUP
			
 
				-	if (bio && bio->bi_ioc)
			
 
				-		return bio->bi_ioc;
			
 
				-#endif
			
 
				-	return current->io_context;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * __get_request - get a free request
			
 
				  * @rl: request list to allocate from
			
@@ -1655,7 +1641,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 
				 		return BLK_QC_T_NONE;
			
 
				 	}
			
 
				 
			
 
				-	if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) {
			
 
				+	if (op_is_flush(bio->bi_opf)) {
			
 
				 		spin_lock_irq(q->queue_lock);
			
 
				 		where = ELEVATOR_INSERT_FLUSH;
			
 
				 		goto get_rq;
			
@@ -1894,7 +1880,7 @@ generic_make_request_checks(struct bio *bio)
 
				 	 * drivers without flush support don't have to worry
			
 
				 	 * about them.
			
 
				 	 */
			
 
				-	if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) &&
			
 
				+	if (op_is_flush(bio->bi_opf) &&
			
 
				 	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
			
 
				 		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
			
 
				 		if (!nr_sectors) {
			
@@ -2143,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 
				 	if (q->mq_ops) {
			
 
				 		if (blk_queue_io_stat(q))
			
 
				 			blk_account_io_start(rq, true);
			
 
				-		blk_mq_insert_request(rq, false, true, false);
			
 
				+		blk_mq_sched_insert_request(rq, false, true, false, false);
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -2159,7 +2145,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 
				 	 */
			
 
				 	BUG_ON(blk_queued_rq(rq));
			
 
				 
			
 
				-	if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
			
 
				+	if (op_is_flush(rq->cmd_flags))
			
 
				 		where = ELEVATOR_INSERT_FLUSH;
			
 
				 
			
 
				 	add_acct_request(q, rq, where);
			
@@ -3270,7 +3256,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
				 		/*
			
 
				 		 * rq is already accounted, so use raw insert
			
 
				 		 */
			
 
				-		if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
			
 
				+		if (op_is_flush(rq->cmd_flags))
			
 
				 			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
			
 
				 		else
			
 
				 			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
			
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -9,6 +9,7 @@
 
				 #include <linux/sched/sysctl.h>
			
 
				 
			
 
				 #include "blk.h"
			
 
				+#include "blk-mq-sched.h"
			
 
				 
			
 
				 /*
			
 
				  * for max sense size
			
@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
				 	 * be reused after dying flag is set
			
 
				 	 */
			
 
				 	if (q->mq_ops) {
			
 
				-		blk_mq_insert_request(rq, at_head, true, false);
			
 
				+		blk_mq_sched_insert_request(rq, at_head, true, false, false);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -74,6 +74,7 @@
 
				 #include "blk.h"
			
 
				 #include "blk-mq.h"
			
 
				 #include "blk-mq-tag.h"
			
 
				+#include "blk-mq-sched.h"
			
 
				 
			
 
				 /* FLUSH/FUA sequences */
			
 
				 enum {
			
@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 
				 	 * the comment in flush_end_io().
			
 
				 	 */
			
 
				 	spin_lock_irqsave(&fq->mq_flush_lock, flags);
			
 
				-	if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
			
 
				-		blk_mq_run_hw_queue(hctx, true);
			
 
				+	blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
			
 
				 	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
			
 
				+
			
 
				+	blk_mq_run_hw_queue(hctx, true);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
 
				 	 */
			
 
				 	if ((policy & REQ_FSEQ_DATA) &&
			
 
				 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
			
 
				-		if (q->mq_ops) {
			
 
				-			blk_mq_insert_request(rq, false, true, false);
			
 
				-		} else
			
 
				+		if (q->mq_ops)
			
 
				+			blk_mq_sched_insert_request(rq, false, true, false, false);
			
 
				+		else
			
 
				 			list_add_tail(&rq->queuelist, &q->queue_head);
			
 
				 		return;
			
 
				 	}
			
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -43,8 +43,10 @@ static void ioc_exit_icq(struct io_cq *icq)
 
				 	if (icq->flags & ICQ_EXITED)
			
 
				 		return;
			
 
				 
			
 
				-	if (et->ops.elevator_exit_icq_fn)
			
 
				-		et->ops.elevator_exit_icq_fn(icq);
			
 
				+	if (et->uses_mq && et->ops.mq.exit_icq)
			
 
				+		et->ops.mq.exit_icq(icq);
			
 
				+	else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
			
 
				+		et->ops.sq.elevator_exit_icq_fn(icq);
			
 
				 
			
 
				 	icq->flags |= ICQ_EXITED;
			
 
				 }
			
@@ -383,8 +385,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
 
				 	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
			
 
				 		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
			
 
				 		list_add(&icq->q_node, &q->icq_list);
			
 
				-		if (et->ops.elevator_init_icq_fn)
			
 
				-			et->ops.elevator_init_icq_fn(icq);
			
 
				+		if (et->uses_mq && et->ops.mq.init_icq)
			
 
				+			et->ops.mq.init_icq(icq);
			
 
				+		else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
			
 
				+			et->ops.sq.elevator_init_icq_fn(icq);
			
 
				 	} else {
			
 
				 		kmem_cache_free(et->icq_cache, icq);
			
 
				 		icq = ioc_lookup_icq(ioc, q);
			
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_allow_rq_merge_fn)
			
 
				-		if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
			
 
				+	if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
			
 
				+		if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
			
 
				 			return 0;
			
 
				 
			
 
				 	return attempt_merge(q, rq, next);
			
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -0,0 +1,756 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Facebook
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License v2 as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/blkdev.h>
			
 
				+#include <linux/debugfs.h>
			
 
				+
			
 
				+#include <linux/blk-mq.h>
			
 
				+#include "blk-mq.h"
			
 
				+#include "blk-mq-tag.h"
			
 
				+
			
 
				+struct blk_mq_debugfs_attr {
			
 
				+	const char *name;
			
 
				+	umode_t mode;
			
 
				+	const struct file_operations *fops;
			
 
				+};
			
 
				+
			
 
				+static struct dentry *block_debugfs_root;
			
 
				+
			
 
				+static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file,
			
 
				+				   const struct seq_operations *ops)
			
 
				+{
			
 
				+	struct seq_file *m;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = seq_open(file, ops);
			
 
				+	if (!ret) {
			
 
				+		m = file->private_data;
			
 
				+		m->private = inode->i_private;
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int hctx_state_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "0x%lx\n", hctx->state);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_state_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_state_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_state_fops = {
			
 
				+	.open		= hctx_state_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_flags_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "0x%lx\n", hctx->flags);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_flags_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_flags_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_flags_fops = {
			
 
				+	.open		= hctx_flags_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct request *rq = list_entry_rq(v);
			
 
				+
			
 
				+	seq_printf(m, "%p {.cmd_type=%u, .cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n",
			
 
				+		   rq, rq->cmd_type, rq->cmd_flags, (unsigned int)rq->rq_flags,
			
 
				+		   rq->tag, rq->internal_tag);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	spin_lock(&hctx->lock);
			
 
				+	return seq_list_start(&hctx->dispatch, *pos);
			
 
				+}
			
 
				+
			
 
				+static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	return seq_list_next(v, &hctx->dispatch, pos);
			
 
				+}
			
 
				+
			
 
				+static void hctx_dispatch_stop(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	spin_unlock(&hctx->lock);
			
 
				+}
			
 
				+
			
 
				+static const struct seq_operations hctx_dispatch_seq_ops = {
			
 
				+	.start	= hctx_dispatch_start,
			
 
				+	.next	= hctx_dispatch_next,
			
 
				+	.stop	= hctx_dispatch_stop,
			
 
				+	.show	= blk_mq_debugfs_rq_show,
			
 
				+};
			
 
				+
			
 
				+static int hctx_dispatch_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_dispatch_fops = {
			
 
				+	.open		= hctx_dispatch_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= seq_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_ctx_map_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	sbitmap_bitmap_show(&hctx->ctx_map, m);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_ctx_map_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_ctx_map_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_ctx_map_fops = {
			
 
				+	.open		= hctx_ctx_map_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static void blk_mq_debugfs_tags_show(struct seq_file *m,
			
 
				+				     struct blk_mq_tags *tags)
			
 
				+{
			
 
				+	seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
			
 
				+	seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
			
 
				+	seq_printf(m, "active_queues=%d\n",
			
 
				+		   atomic_read(&tags->active_queues));
			
 
				+
			
 
				+	seq_puts(m, "\nbitmap_tags:\n");
			
 
				+	sbitmap_queue_show(&tags->bitmap_tags, m);
			
 
				+
			
 
				+	if (tags->nr_reserved_tags) {
			
 
				+		seq_puts(m, "\nbreserved_tags:\n");
			
 
				+		sbitmap_queue_show(&tags->breserved_tags, m);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int hctx_tags_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+
			
 
				+	mutex_lock(&q->sysfs_lock);
			
 
				+	if (hctx->tags)
			
 
				+		blk_mq_debugfs_tags_show(m, hctx->tags);
			
 
				+	mutex_unlock(&q->sysfs_lock);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_tags_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_tags_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_tags_fops = {
			
 
				+	.open		= hctx_tags_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_tags_bitmap_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+
			
 
				+	mutex_lock(&q->sysfs_lock);
			
 
				+	if (hctx->tags)
			
 
				+		sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m);
			
 
				+	mutex_unlock(&q->sysfs_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_tags_bitmap_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_tags_bitmap_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_tags_bitmap_fops = {
			
 
				+	.open		= hctx_tags_bitmap_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_sched_tags_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+
			
 
				+	mutex_lock(&q->sysfs_lock);
			
 
				+	if (hctx->sched_tags)
			
 
				+		blk_mq_debugfs_tags_show(m, hctx->sched_tags);
			
 
				+	mutex_unlock(&q->sysfs_lock);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_sched_tags_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_sched_tags_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_sched_tags_fops = {
			
 
				+	.open		= hctx_sched_tags_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+
			
 
				+	mutex_lock(&q->sysfs_lock);
			
 
				+	if (hctx->sched_tags)
			
 
				+		sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m);
			
 
				+	mutex_unlock(&q->sysfs_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_sched_tags_bitmap_fops = {
			
 
				+	.open		= hctx_sched_tags_bitmap_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_io_poll_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "considered=%lu\n", hctx->poll_considered);
			
 
				+	seq_printf(m, "invoked=%lu\n", hctx->poll_invoked);
			
 
				+	seq_printf(m, "success=%lu\n", hctx->poll_success);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_io_poll_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_io_poll_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf,
			
 
				+				  size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_io_poll_fops = {
			
 
				+	.open		= hctx_io_poll_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= hctx_io_poll_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
			
 
				+{
			
 
				+	seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
			
 
				+		   stat->nr_samples, stat->mean, stat->min, stat->max);
			
 
				+}
			
 
				+
			
 
				+static int hctx_stats_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	struct blk_rq_stat stat[2];
			
 
				+
			
 
				+	blk_stat_init(&stat[BLK_STAT_READ]);
			
 
				+	blk_stat_init(&stat[BLK_STAT_WRITE]);
			
 
				+
			
 
				+	blk_hctx_stat_get(hctx, stat);
			
 
				+
			
 
				+	seq_puts(m, "read: ");
			
 
				+	print_stat(m, &stat[BLK_STAT_READ]);
			
 
				+	seq_puts(m, "\n");
			
 
				+
			
 
				+	seq_puts(m, "write: ");
			
 
				+	print_stat(m, &stat[BLK_STAT_WRITE]);
			
 
				+	seq_puts(m, "\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_stats_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_stats_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t hctx_stats_write(struct file *file, const char __user *buf,
			
 
				+				size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	struct blk_mq_ctx *ctx;
			
 
				+	int i;
			
 
				+
			
 
				+	hctx_for_each_ctx(hctx, ctx, i) {
			
 
				+		blk_stat_init(&ctx->stat[BLK_STAT_READ]);
			
 
				+		blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
			
 
				+	}
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_stats_fops = {
			
 
				+	.open		= hctx_stats_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= hctx_stats_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_dispatched_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	int i;
			
 
				+
			
 
				+	seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
			
 
				+
			
 
				+	for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
			
 
				+		unsigned int d = 1U << (i - 1);
			
 
				+
			
 
				+		seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]);
			
 
				+	}
			
 
				+
			
 
				+	seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_dispatched_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_dispatched_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf,
			
 
				+				     size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++)
			
 
				+		hctx->dispatched[i] = 0;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_dispatched_fops = {
			
 
				+	.open		= hctx_dispatched_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= hctx_dispatched_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_queued_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "%lu\n", hctx->queued);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_queued_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_queued_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t hctx_queued_write(struct file *file, const char __user *buf,
			
 
				+				 size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	hctx->queued = 0;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_queued_fops = {
			
 
				+	.open		= hctx_queued_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= hctx_queued_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_run_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "%lu\n", hctx->run);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_run_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_run_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t hctx_run_write(struct file *file, const char __user *buf,
			
 
				+				 size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	hctx->run = 0;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_run_fops = {
			
 
				+	.open		= hctx_run_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= hctx_run_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int hctx_active_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "%d\n", atomic_read(&hctx->nr_active));
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int hctx_active_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, hctx_active_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations hctx_active_fops = {
			
 
				+	.open		= hctx_active_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
			
 
				+{
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	spin_lock(&ctx->lock);
			
 
				+	return seq_list_start(&ctx->rq_list, *pos);
			
 
				+}
			
 
				+
			
 
				+static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
			
 
				+{
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	return seq_list_next(v, &ctx->rq_list, pos);
			
 
				+}
			
 
				+
			
 
				+static void ctx_rq_list_stop(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	spin_unlock(&ctx->lock);
			
 
				+}
			
 
				+
			
 
				+static const struct seq_operations ctx_rq_list_seq_ops = {
			
 
				+	.start	= ctx_rq_list_start,
			
 
				+	.next	= ctx_rq_list_next,
			
 
				+	.stop	= ctx_rq_list_stop,
			
 
				+	.show	= blk_mq_debugfs_rq_show,
			
 
				+};
			
 
				+
			
 
				+static int ctx_rq_list_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops);
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations ctx_rq_list_fops = {
			
 
				+	.open		= ctx_rq_list_open,
			
 
				+	.read		= seq_read,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= seq_release,
			
 
				+};
			
 
				+
			
 
				+static int ctx_dispatched_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int ctx_dispatched_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, ctx_dispatched_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf,
			
 
				+				    size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations ctx_dispatched_fops = {
			
 
				+	.open		= ctx_dispatched_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= ctx_dispatched_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int ctx_merged_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "%lu\n", ctx->rq_merged);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int ctx_merged_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, ctx_merged_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t ctx_merged_write(struct file *file, const char __user *buf,
			
 
				+				    size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	ctx->rq_merged = 0;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations ctx_merged_fops = {
			
 
				+	.open		= ctx_merged_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= ctx_merged_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static int ctx_completed_show(struct seq_file *m, void *v)
			
 
				+{
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int ctx_completed_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	return single_open(file, ctx_completed_show, inode->i_private);
			
 
				+}
			
 
				+
			
 
				+static ssize_t ctx_completed_write(struct file *file, const char __user *buf,
			
 
				+				   size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	struct seq_file *m = file->private_data;
			
 
				+	struct blk_mq_ctx *ctx = m->private;
			
 
				+
			
 
				+	ctx->rq_completed[0] = ctx->rq_completed[1] = 0;
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations ctx_completed_fops = {
			
 
				+	.open		= ctx_completed_open,
			
 
				+	.read		= seq_read,
			
 
				+	.write		= ctx_completed_write,
			
 
				+	.llseek		= seq_lseek,
			
 
				+	.release	= single_release,
			
 
				+};
			
 
				+
			
 
				+static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
			
 
				+	{"state", 0400, &hctx_state_fops},
			
 
				+	{"flags", 0400, &hctx_flags_fops},
			
 
				+	{"dispatch", 0400, &hctx_dispatch_fops},
			
 
				+	{"ctx_map", 0400, &hctx_ctx_map_fops},
			
 
				+	{"tags", 0400, &hctx_tags_fops},
			
 
				+	{"tags_bitmap", 0400, &hctx_tags_bitmap_fops},
			
 
				+	{"sched_tags", 0400, &hctx_sched_tags_fops},
			
 
				+	{"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
			
 
				+	{"io_poll", 0600, &hctx_io_poll_fops},
			
 
				+	{"stats", 0600, &hctx_stats_fops},
			
 
				+	{"dispatched", 0600, &hctx_dispatched_fops},
			
 
				+	{"queued", 0600, &hctx_queued_fops},
			
 
				+	{"run", 0600, &hctx_run_fops},
			
 
				+	{"active", 0400, &hctx_active_fops},
			
 
				+};
			
 
				+
			
 
				+static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
			
 
				+	{"rq_list", 0400, &ctx_rq_list_fops},
			
 
				+	{"dispatched", 0600, &ctx_dispatched_fops},
			
 
				+	{"merged", 0600, &ctx_merged_fops},
			
 
				+	{"completed", 0600, &ctx_completed_fops},
			
 
				+};
			
 
				+
			
 
				+int blk_mq_debugfs_register(struct request_queue *q, const char *name)
			
 
				+{
			
 
				+	if (!block_debugfs_root)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	q->debugfs_dir = debugfs_create_dir(name, block_debugfs_root);
			
 
				+	if (!q->debugfs_dir)
			
 
				+		goto err;
			
 
				+
			
 
				+	if (blk_mq_debugfs_register_hctxs(q))
			
 
				+		goto err;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+err:
			
 
				+	blk_mq_debugfs_unregister(q);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+void blk_mq_debugfs_unregister(struct request_queue *q)
			
 
				+{
			
 
				+	debugfs_remove_recursive(q->debugfs_dir);
			
 
				+	q->mq_debugfs_dir = NULL;
			
 
				+	q->debugfs_dir = NULL;
			
 
				+}
			
 
				+
			
 
				+static int blk_mq_debugfs_register_ctx(struct request_queue *q,
			
 
				+				       struct blk_mq_ctx *ctx,
			
 
				+				       struct dentry *hctx_dir)
			
 
				+{
			
 
				+	struct dentry *ctx_dir;
			
 
				+	char name[20];
			
 
				+	int i;
			
 
				+
			
 
				+	snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
			
 
				+	ctx_dir = debugfs_create_dir(name, hctx_dir);
			
 
				+	if (!ctx_dir)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_ctx_attrs); i++) {
			
 
				+		const struct blk_mq_debugfs_attr *attr;
			
 
				+
			
 
				+		attr = &blk_mq_debugfs_ctx_attrs[i];
			
 
				+		if (!debugfs_create_file(attr->name, attr->mode, ctx_dir, ctx,
			
 
				+					 attr->fops))
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int blk_mq_debugfs_register_hctx(struct request_queue *q,
			
 
				+					struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	struct blk_mq_ctx *ctx;
			
 
				+	struct dentry *hctx_dir;
			
 
				+	char name[20];
			
 
				+	int i;
			
 
				+
			
 
				+	snprintf(name, sizeof(name), "%u", hctx->queue_num);
			
 
				+	hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir);
			
 
				+	if (!hctx_dir)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_hctx_attrs); i++) {
			
 
				+		const struct blk_mq_debugfs_attr *attr;
			
 
				+
			
 
				+		attr = &blk_mq_debugfs_hctx_attrs[i];
			
 
				+		if (!debugfs_create_file(attr->name, attr->mode, hctx_dir, hctx,
			
 
				+					 attr->fops))
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	hctx_for_each_ctx(hctx, ctx, i) {
			
 
				+		if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir))
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int blk_mq_debugfs_register_hctxs(struct request_queue *q)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!q->debugfs_dir)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir);
			
 
				+	if (!q->mq_debugfs_dir)
			
 
				+		goto err;
			
 
				+
			
 
				+	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				+		if (blk_mq_debugfs_register_hctx(q, hctx))
			
 
				+			goto err;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+err:
			
 
				+	blk_mq_debugfs_unregister_hctxs(q);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
			
 
				+{
			
 
				+	debugfs_remove_recursive(q->mq_debugfs_dir);
			
 
				+	q->mq_debugfs_dir = NULL;
			
 
				+}
			
 
				+
			
 
				+void blk_mq_debugfs_init(void)
			
 
				+{
			
 
				+	block_debugfs_root = debugfs_create_dir("block", NULL);
			
 
				+}
			
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -0,0 +1,481 @@
 
				+/*
			
 
				+ * blk-mq scheduling framework
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Jens Axboe
			
 
				+ */
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/blk-mq.h>
			
 
				+
			
 
				+#include <trace/events/block.h>
			
 
				+
			
 
				+#include "blk.h"
			
 
				+#include "blk-mq.h"
			
 
				+#include "blk-mq-sched.h"
			
 
				+#include "blk-mq-tag.h"
			
 
				+#include "blk-wbt.h"
			
 
				+
			
 
				+void blk_mq_sched_free_hctx_data(struct request_queue *q,
			
 
				+				 void (*exit)(struct blk_mq_hw_ctx *))
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	int i;
			
 
				+
			
 
				+	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				+		if (exit && hctx->sched_data)
			
 
				+			exit(hctx);
			
 
				+		kfree(hctx->sched_data);
			
 
				+		hctx->sched_data = NULL;
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
			
 
				+
			
 
				+int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
			
 
				+				int (*init)(struct blk_mq_hw_ctx *),
			
 
				+				void (*exit)(struct blk_mq_hw_ctx *))
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	int ret;
			
 
				+	int i;
			
 
				+
			
 
				+	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				+		hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
			
 
				+		if (!hctx->sched_data) {
			
 
				+			ret = -ENOMEM;
			
 
				+			goto error;
			
 
				+		}
			
 
				+
			
 
				+		if (init) {
			
 
				+			ret = init(hctx);
			
 
				+			if (ret) {
			
 
				+				/*
			
 
				+				 * We don't want to give exit() a partially
			
 
				+				 * initialized sched_data. init() must clean up
			
 
				+				 * if it fails.
			
 
				+				 */
			
 
				+				kfree(hctx->sched_data);
			
 
				+				hctx->sched_data = NULL;
			
 
				+				goto error;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+error:
			
 
				+	blk_mq_sched_free_hctx_data(q, exit);
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
			
 
				+
			
 
				+static void __blk_mq_sched_assign_ioc(struct request_queue *q,
			
 
				+				      struct request *rq, struct io_context *ioc)
			
 
				+{
			
 
				+	struct io_cq *icq;
			
 
				+
			
 
				+	spin_lock_irq(q->queue_lock);
			
 
				+	icq = ioc_lookup_icq(ioc, q);
			
 
				+	spin_unlock_irq(q->queue_lock);
			
 
				+
			
 
				+	if (!icq) {
			
 
				+		icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
			
 
				+		if (!icq)
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				+	rq->elv.icq = icq;
			
 
				+	if (!blk_mq_sched_get_rq_priv(q, rq)) {
			
 
				+		rq->rq_flags |= RQF_ELVPRIV;
			
 
				+		get_io_context(icq->ioc);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	rq->elv.icq = NULL;
			
 
				+}
			
 
				+
			
 
				+static void blk_mq_sched_assign_ioc(struct request_queue *q,
			
 
				+				    struct request *rq, struct bio *bio)
			
 
				+{
			
 
				+	struct io_context *ioc;
			
 
				+
			
 
				+	ioc = rq_ioc(bio);
			
 
				+	if (ioc)
			
 
				+		__blk_mq_sched_assign_ioc(q, rq, ioc);
			
 
				+}
			
 
				+
			
 
				+struct request *blk_mq_sched_get_request(struct request_queue *q,
			
 
				+					 struct bio *bio,
			
 
				+					 unsigned int op,
			
 
				+					 struct blk_mq_alloc_data *data)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	struct blk_mq_ctx *ctx;
			
 
				+	struct request *rq;
			
 
				+
			
 
				+	blk_queue_enter_live(q);
			
 
				+	ctx = blk_mq_get_ctx(q);
			
 
				+	hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				+
			
 
				+	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
			
 
				+
			
 
				+	if (e) {
			
 
				+		data->flags |= BLK_MQ_REQ_INTERNAL;
			
 
				+
			
 
				+		/*
			
 
				+		 * Flush requests are special and go directly to the
			
 
				+		 * dispatch list.
			
 
				+		 */
			
 
				+		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
			
 
				+			rq = e->type->ops.mq.get_request(q, op, data);
			
 
				+			if (rq)
			
 
				+				rq->rq_flags |= RQF_QUEUED;
			
 
				+		} else
			
 
				+			rq = __blk_mq_alloc_request(data, op);
			
 
				+	} else {
			
 
				+		rq = __blk_mq_alloc_request(data, op);
			
 
				+		if (rq)
			
 
				+			data->hctx->tags->rqs[rq->tag] = rq;
			
 
				+	}
			
 
				+
			
 
				+	if (rq) {
			
 
				+		if (!op_is_flush(op)) {
			
 
				+			rq->elv.icq = NULL;
			
 
				+			if (e && e->type->icq_cache)
			
 
				+				blk_mq_sched_assign_ioc(q, rq, bio);
			
 
				+		}
			
 
				+		data->hctx->queued++;
			
 
				+		return rq;
			
 
				+	}
			
 
				+
			
 
				+	blk_queue_exit(q);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_put_request(struct request *rq)
			
 
				+{
			
 
				+	struct request_queue *q = rq->q;
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (rq->rq_flags & RQF_ELVPRIV) {
			
 
				+		blk_mq_sched_put_rq_priv(rq->q, rq);
			
 
				+		if (rq->elv.icq) {
			
 
				+			put_io_context(rq->elv.icq->ioc);
			
 
				+			rq->elv.icq = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
			
 
				+		e->type->ops.mq.put_request(rq);
			
 
				+	else
			
 
				+		blk_mq_finish_request(rq);
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	struct elevator_queue *e = hctx->queue->elevator;
			
 
				+	LIST_HEAD(rq_list);
			
 
				+
			
 
				+	if (unlikely(blk_mq_hctx_stopped(hctx)))
			
 
				+		return;
			
 
				+
			
 
				+	hctx->run++;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we have previous entries on our dispatch list, grab them first for
			
 
				+	 * more fair dispatch.
			
 
				+	 */
			
 
				+	if (!list_empty_careful(&hctx->dispatch)) {
			
 
				+		spin_lock(&hctx->lock);
			
 
				+		if (!list_empty(&hctx->dispatch))
			
 
				+			list_splice_init(&hctx->dispatch, &rq_list);
			
 
				+		spin_unlock(&hctx->lock);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Only ask the scheduler for requests, if we didn't have residual
			
 
				+	 * requests from the dispatch list. This is to avoid the case where
			
 
				+	 * we only ever dispatch a fraction of the requests available because
			
 
				+	 * of low device queue depth. Once we pull requests out of the IO
			
 
				+	 * scheduler, we can no longer merge or sort them. So it's best to
			
 
				+	 * leave them there for as long as we can. Mark the hw queue as
			
 
				+	 * needing a restart in that case.
			
 
				+	 */
			
 
				+	if (!list_empty(&rq_list)) {
			
 
				+		blk_mq_sched_mark_restart(hctx);
			
 
				+		blk_mq_dispatch_rq_list(hctx, &rq_list);
			
 
				+	} else if (!e || !e->type->ops.mq.dispatch_request) {
			
 
				+		blk_mq_flush_busy_ctxs(hctx, &rq_list);
			
 
				+		blk_mq_dispatch_rq_list(hctx, &rq_list);
			
 
				+	} else {
			
 
				+		do {
			
 
				+			struct request *rq;
			
 
				+
			
 
				+			rq = e->type->ops.mq.dispatch_request(hctx);
			
 
				+			if (!rq)
			
 
				+				break;
			
 
				+			list_add(&rq->queuelist, &rq_list);
			
 
				+		} while (blk_mq_dispatch_rq_list(hctx, &rq_list));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
			
 
				+				   struct list_head *rq_list,
			
 
				+				   struct request *(*get_rq)(struct blk_mq_hw_ctx *))
			
 
				+{
			
 
				+	do {
			
 
				+		struct request *rq;
			
 
				+
			
 
				+		rq = get_rq(hctx);
			
 
				+		if (!rq)
			
 
				+			break;
			
 
				+
			
 
				+		list_add_tail(&rq->queuelist, rq_list);
			
 
				+	} while (1);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
			
 
				+
			
 
				+bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio)
			
 
				+{
			
 
				+	struct request *rq;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = elv_merge(q, &rq, bio);
			
 
				+	if (ret == ELEVATOR_BACK_MERGE) {
			
 
				+		if (!blk_mq_sched_allow_merge(q, rq, bio))
			
 
				+			return false;
			
 
				+		if (bio_attempt_back_merge(q, rq, bio)) {
			
 
				+			if (!attempt_back_merge(q, rq))
			
 
				+				elv_merged_request(q, rq, ret);
			
 
				+			return true;
			
 
				+		}
			
 
				+	} else if (ret == ELEVATOR_FRONT_MERGE) {
			
 
				+		if (!blk_mq_sched_allow_merge(q, rq, bio))
			
 
				+			return false;
			
 
				+		if (bio_attempt_front_merge(q, rq, bio)) {
			
 
				+			if (!attempt_front_merge(q, rq))
			
 
				+				elv_merged_request(q, rq, ret);
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
			
 
				+
			
 
				+bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (e->type->ops.mq.bio_merge) {
			
 
				+		struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
			
 
				+		struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				+
			
 
				+		blk_mq_put_ctx(ctx);
			
 
				+		return e->type->ops.mq.bio_merge(hctx, bio);
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
			
 
				+{
			
 
				+	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
			
 
				+
			
 
				+void blk_mq_sched_request_inserted(struct request *rq)
			
 
				+{
			
 
				+	trace_block_rq_insert(rq->q, rq);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
			
 
				+
			
 
				+bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq)
			
 
				+{
			
 
				+	if (rq->tag == -1) {
			
 
				+		rq->rq_flags |= RQF_SORTED;
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If we already have a real request tag, send directly to
			
 
				+	 * the dispatch list.
			
 
				+	 */
			
 
				+	spin_lock(&hctx->lock);
			
 
				+	list_add(&rq->queuelist, &hctx->dispatch);
			
 
				+	spin_unlock(&hctx->lock);
			
 
				+	return true;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
			
 
				+
			
 
				+static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
			
 
				+		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
			
 
				+		if (blk_mq_hctx_has_pending(hctx))
			
 
				+			blk_mq_run_hw_queue(hctx, true);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	unsigned int i;
			
 
				+
			
 
				+	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
			
 
				+		blk_mq_sched_restart_hctx(hctx);
			
 
				+	else {
			
 
				+		struct request_queue *q = hctx->queue;
			
 
				+
			
 
				+		if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
			
 
				+			return;
			
 
				+
			
 
				+		clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
			
 
				+
			
 
				+		queue_for_each_hw_ctx(q, hctx, i)
			
 
				+			blk_mq_sched_restart_hctx(hctx);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Add flush/fua to the queue. If we fail getting a driver tag, then
			
 
				+ * punt to the requeue list. Requeue will re-invoke us from a context
			
 
				+ * that's safe to block from.
			
 
				+ */
			
 
				+static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
			
 
				+				      struct request *rq, bool can_block)
			
 
				+{
			
 
				+	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
			
 
				+		blk_insert_flush(rq);
			
 
				+		blk_mq_run_hw_queue(hctx, true);
			
 
				+	} else
			
 
				+		blk_mq_add_to_requeue_list(rq, true, true);
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
			
 
				+				 bool run_queue, bool async, bool can_block)
			
 
				+{
			
 
				+	struct request_queue *q = rq->q;
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+	struct blk_mq_ctx *ctx = rq->mq_ctx;
			
 
				+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				+
			
 
				+	if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
			
 
				+		blk_mq_sched_insert_flush(hctx, rq, can_block);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (e && e->type->ops.mq.insert_requests) {
			
 
				+		LIST_HEAD(list);
			
 
				+
			
 
				+		list_add(&rq->queuelist, &list);
			
 
				+		e->type->ops.mq.insert_requests(hctx, &list, at_head);
			
 
				+	} else {
			
 
				+		spin_lock(&ctx->lock);
			
 
				+		__blk_mq_insert_request(hctx, rq, at_head);
			
 
				+		spin_unlock(&ctx->lock);
			
 
				+	}
			
 
				+
			
 
				+	if (run_queue)
			
 
				+		blk_mq_run_hw_queue(hctx, async);
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_insert_requests(struct request_queue *q,
			
 
				+				  struct blk_mq_ctx *ctx,
			
 
				+				  struct list_head *list, bool run_queue_async)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				+	struct elevator_queue *e = hctx->queue->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.insert_requests)
			
 
				+		e->type->ops.mq.insert_requests(hctx, list, false);
			
 
				+	else
			
 
				+		blk_mq_insert_requests(hctx, ctx, list);
			
 
				+
			
 
				+	blk_mq_run_hw_queue(hctx, run_queue_async);
			
 
				+}
			
 
				+
			
 
				+static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
			
 
				+				   struct blk_mq_hw_ctx *hctx,
			
 
				+				   unsigned int hctx_idx)
			
 
				+{
			
 
				+	if (hctx->sched_tags) {
			
 
				+		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
			
 
				+		blk_mq_free_rq_map(hctx->sched_tags);
			
 
				+		hctx->sched_tags = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int blk_mq_sched_setup(struct request_queue *q)
			
 
				+{
			
 
				+	struct blk_mq_tag_set *set = q->tag_set;
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	int ret, i;
			
 
				+
			
 
				+	/*
			
 
				+	 * Default to 256, since we don't split into sync/async like the
			
 
				+	 * old code did. Additionally, this is a per-hw queue depth.
			
 
				+	 */
			
 
				+	q->nr_requests = 2 * BLKDEV_MAX_RQ;
			
 
				+
			
 
				+	/*
			
 
				+	 * We're switching to using an IO scheduler, so setup the hctx
			
 
				+	 * scheduler tags and switch the request map from the regular
			
 
				+	 * tags to scheduler tags. First allocate what we need, so we
			
 
				+	 * can safely fail and fallback, if needed.
			
 
				+	 */
			
 
				+	ret = 0;
			
 
				+	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				+		hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
			
 
				+		if (!hctx->sched_tags) {
			
 
				+			ret = -ENOMEM;
			
 
				+			break;
			
 
				+		}
			
 
				+		ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
			
 
				+		if (ret)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If we failed, free what we did allocate
			
 
				+	 */
			
 
				+	if (ret) {
			
 
				+		queue_for_each_hw_ctx(q, hctx, i) {
			
 
				+			if (!hctx->sched_tags)
			
 
				+				continue;
			
 
				+			blk_mq_sched_free_tags(set, hctx, i);
			
 
				+		}
			
 
				+
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_teardown(struct request_queue *q)
			
 
				+{
			
 
				+	struct blk_mq_tag_set *set = q->tag_set;
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	int i;
			
 
				+
			
 
				+	queue_for_each_hw_ctx(q, hctx, i)
			
 
				+		blk_mq_sched_free_tags(set, hctx, i);
			
 
				+}
			
 
				+
			
 
				+int blk_mq_sched_init(struct request_queue *q)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+#if defined(CONFIG_DEFAULT_SQ_NONE)
			
 
				+	if (q->nr_hw_queues == 1)
			
 
				+		return 0;
			
 
				+#endif
			
 
				+#if defined(CONFIG_DEFAULT_MQ_NONE)
			
 
				+	if (q->nr_hw_queues > 1)
			
 
				+		return 0;
			
 
				+#endif
			
 
				+
			
 
				+	mutex_lock(&q->sysfs_lock);
			
 
				+	ret = elevator_init(q, NULL);
			
 
				+	mutex_unlock(&q->sysfs_lock);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -0,0 +1,142 @@
 
				+#ifndef BLK_MQ_SCHED_H
			
 
				+#define BLK_MQ_SCHED_H
			
 
				+
			
 
				+#include "blk-mq.h"
			
 
				+#include "blk-mq-tag.h"
			
 
				+
			
 
				+int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
			
 
				+				int (*init)(struct blk_mq_hw_ctx *),
			
 
				+				void (*exit)(struct blk_mq_hw_ctx *));
			
 
				+
			
 
				+void blk_mq_sched_free_hctx_data(struct request_queue *q,
			
 
				+				 void (*exit)(struct blk_mq_hw_ctx *));
			
 
				+
			
 
				+struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
			
 
				+void blk_mq_sched_put_request(struct request *rq);
			
 
				+
			
 
				+void blk_mq_sched_request_inserted(struct request *rq);
			
 
				+bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq);
			
 
				+bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio);
			
 
				+bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
			
 
				+bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
			
 
				+void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
			
 
				+
			
 
				+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
			
 
				+				 bool run_queue, bool async, bool can_block);
			
 
				+void blk_mq_sched_insert_requests(struct request_queue *q,
			
 
				+				  struct blk_mq_ctx *ctx,
			
 
				+				  struct list_head *list, bool run_queue_async);
			
 
				+
			
 
				+void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
			
 
				+void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
			
 
				+			struct list_head *rq_list,
			
 
				+			struct request *(*get_rq)(struct blk_mq_hw_ctx *));
			
 
				+
			
 
				+int blk_mq_sched_setup(struct request_queue *q);
			
 
				+void blk_mq_sched_teardown(struct request_queue *q);
			
 
				+
			
 
				+int blk_mq_sched_init(struct request_queue *q);
			
 
				+
			
 
				+static inline bool
			
 
				+blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
			
 
				+		return false;
			
 
				+
			
 
				+	return __blk_mq_sched_bio_merge(q, bio);
			
 
				+}
			
 
				+
			
 
				+static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
			
 
				+					   struct request *rq)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.get_rq_priv)
			
 
				+		return e->type->ops.mq.get_rq_priv(q, rq);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
			
 
				+					    struct request *rq)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.put_rq_priv)
			
 
				+		e->type->ops.mq.put_rq_priv(q, rq);
			
 
				+}
			
 
				+
			
 
				+static inline bool
			
 
				+blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
			
 
				+			 struct bio *bio)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.allow_merge)
			
 
				+		return e->type->ops.mq.allow_merge(q, rq, bio);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
			
 
				+{
			
 
				+	struct elevator_queue *e = hctx->queue->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.completed_request)
			
 
				+		e->type->ops.mq.completed_request(hctx, rq);
			
 
				+
			
 
				+	BUG_ON(rq->internal_tag == -1);
			
 
				+
			
 
				+	blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
			
 
				+}
			
 
				+
			
 
				+static inline void blk_mq_sched_started_request(struct request *rq)
			
 
				+{
			
 
				+	struct request_queue *q = rq->q;
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.started_request)
			
 
				+		e->type->ops.mq.started_request(rq);
			
 
				+}
			
 
				+
			
 
				+static inline void blk_mq_sched_requeue_request(struct request *rq)
			
 
				+{
			
 
				+	struct request_queue *q = rq->q;
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.requeue_request)
			
 
				+		e->type->ops.mq.requeue_request(rq);
			
 
				+}
			
 
				+
			
 
				+static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	struct elevator_queue *e = hctx->queue->elevator;
			
 
				+
			
 
				+	if (e && e->type->ops.mq.has_work)
			
 
				+		return e->type->ops.mq.has_work(hctx);
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
			
 
				+		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
			
 
				+		if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
			
 
				+			struct request_queue *q = hctx->queue;
			
 
				+
			
 
				+			if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
			
 
				+				set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
 
				 	return res;
			
 
				 }
			
 
				 
			
 
				-static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page)
			
 
				-{
			
 
				-	return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
			
 
				-				ctx->rq_dispatched[0]);
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
			
 
				-{
			
 
				-	return sprintf(page, "%lu\n", ctx->rq_merged);
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
			
 
				-{
			
 
				-	return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
			
 
				-				ctx->rq_completed[0]);
			
 
				-}
			
 
				-
			
 
				-static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
			
 
				-{
			
 
				-	struct request *rq;
			
 
				-	int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg);
			
 
				-
			
 
				-	list_for_each_entry(rq, list, queuelist) {
			
 
				-		const int rq_len = 2 * sizeof(rq) + 2;
			
 
				-
			
 
				-		/* if the output will be truncated */
			
 
				-		if (PAGE_SIZE - 1 < len + rq_len) {
			
 
				-			/* backspacing if it can't hold '\t...\n' */
			
 
				-			if (PAGE_SIZE - 1 < len + 5)
			
 
				-				len -= rq_len;
			
 
				-			len += snprintf(page + len, PAGE_SIZE - 1 - len,
			
 
				-					"\t...\n");
			
 
				-			break;
			
 
				-		}
			
 
				-		len += snprintf(page + len, PAGE_SIZE - 1 - len,
			
 
				-				"\t%p\n", rq);
			
 
				-	}
			
 
				-
			
 
				-	return len;
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
			
 
				-{
			
 
				-	ssize_t ret;
			
 
				-
			
 
				-	spin_lock(&ctx->lock);
			
 
				-	ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
			
 
				-	spin_unlock(&ctx->lock);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
			
 
				-{
			
 
				-	return sprintf(page, "considered=%lu, invoked=%lu, success=%lu\n",
			
 
				-		       hctx->poll_considered, hctx->poll_invoked,
			
 
				-		       hctx->poll_success);
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_poll_store(struct blk_mq_hw_ctx *hctx,
			
 
				-					  const char *page, size_t size)
			
 
				-{
			
 
				-	hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
			
 
				-
			
 
				-	return size;
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
			
 
				-					   char *page)
			
 
				-{
			
 
				-	return sprintf(page, "%lu\n", hctx->queued);
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
			
 
				-{
			
 
				-	return sprintf(page, "%lu\n", hctx->run);
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
			
 
				-					       char *page)
			
 
				-{
			
 
				-	char *start_page = page;
			
 
				-	int i;
			
 
				-
			
 
				-	page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
			
 
				-
			
 
				-	for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
			
 
				-		unsigned int d = 1U << (i - 1);
			
 
				-
			
 
				-		page += sprintf(page, "%8u\t%lu\n", d, hctx->dispatched[i]);
			
 
				-	}
			
 
				-
			
 
				-	page += sprintf(page, "%8u+\t%lu\n", 1U << (i - 1),
			
 
				-						hctx->dispatched[i]);
			
 
				-	return page - start_page;
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
			
 
				+static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
			
 
				 					    char *page)
			
 
				 {
			
 
				-	ssize_t ret;
			
 
				-
			
 
				-	spin_lock(&hctx->lock);
			
 
				-	ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
			
 
				-	spin_unlock(&hctx->lock);
			
 
				-
			
 
				-	return ret;
			
 
				+	return sprintf(page, "%u\n", hctx->tags->nr_tags);
			
 
				 }
			
 
				 
			
 
				-static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
			
 
				+static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
			
 
				+						     char *page)
			
 
				 {
			
 
				-	return blk_mq_tag_sysfs_show(hctx->tags, page);
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
			
 
				-{
			
 
				-	return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
			
 
				+	return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags);
			
 
				 }
			
 
				 
			
 
				 static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
			
@@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
			
 
				-{
			
 
				-	struct blk_mq_ctx *ctx;
			
 
				-	unsigned int i;
			
 
				-
			
 
				-	hctx_for_each_ctx(hctx, ctx, i) {
			
 
				-		blk_stat_init(&ctx->stat[BLK_STAT_READ]);
			
 
				-		blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
			
 
				-					  const char *page, size_t count)
			
 
				-{
			
 
				-	blk_mq_stat_clear(hctx);
			
 
				-	return count;
			
 
				-}
			
 
				-
			
 
				-static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
			
 
				-{
			
 
				-	return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
			
 
				-			pre, (long long) stat->nr_samples,
			
 
				-			(long long) stat->mean, (long long) stat->min,
			
 
				-			(long long) stat->max);
			
 
				-}
			
 
				-
			
 
				-static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
			
 
				-{
			
 
				-	struct blk_rq_stat stat[2];
			
 
				-	ssize_t ret;
			
 
				-
			
 
				-	blk_stat_init(&stat[BLK_STAT_READ]);
			
 
				-	blk_stat_init(&stat[BLK_STAT_WRITE]);
			
 
				-
			
 
				-	blk_hctx_stat_get(hctx, stat);
			
 
				-
			
 
				-	ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
			
 
				-	ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
			
 
				-	.attr = {.name = "dispatched", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_sysfs_dispatched_show,
			
 
				-};
			
 
				-static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
			
 
				-	.attr = {.name = "merged", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_sysfs_merged_show,
			
 
				-};
			
 
				-static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
			
 
				-	.attr = {.name = "completed", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_sysfs_completed_show,
			
 
				-};
			
 
				-static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
			
 
				-	.attr = {.name = "rq_list", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_sysfs_rq_list_show,
			
 
				-};
			
 
				-
			
 
				 static struct attribute *default_ctx_attrs[] = {
			
 
				-	&blk_mq_sysfs_dispatched.attr,
			
 
				-	&blk_mq_sysfs_merged.attr,
			
 
				-	&blk_mq_sysfs_completed.attr,
			
 
				-	&blk_mq_sysfs_rq_list.attr,
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = {
			
 
				-	.attr = {.name = "queued", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_hw_sysfs_queued_show,
			
 
				+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
			
 
				+	.attr = {.name = "nr_tags", .mode = S_IRUGO },
			
 
				+	.show = blk_mq_hw_sysfs_nr_tags_show,
			
 
				 };
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = {
			
 
				-	.attr = {.name = "run", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_hw_sysfs_run_show,
			
 
				-};
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
			
 
				-	.attr = {.name = "dispatched", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_hw_sysfs_dispatched_show,
			
 
				-};
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
			
 
				-	.attr = {.name = "active", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_hw_sysfs_active_show,
			
 
				-};
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
			
 
				-	.attr = {.name = "pending", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_hw_sysfs_rq_list_show,
			
 
				-};
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
			
 
				-	.attr = {.name = "tags", .mode = S_IRUGO },
			
 
				-	.show = blk_mq_hw_sysfs_tags_show,
			
 
				+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
			
 
				+	.attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
			
 
				+	.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
			
 
				 };
			
 
				 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
			
 
				 	.attr = {.name = "cpu_list", .mode = S_IRUGO },
			
 
				 	.show = blk_mq_hw_sysfs_cpus_show,
			
 
				 };
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
			
 
				-	.attr = {.name = "io_poll", .mode = S_IWUSR | S_IRUGO },
			
 
				-	.show = blk_mq_hw_sysfs_poll_show,
			
 
				-	.store = blk_mq_hw_sysfs_poll_store,
			
 
				-};
			
 
				-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
			
 
				-	.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
			
 
				-	.show = blk_mq_hw_sysfs_stat_show,
			
 
				-	.store = blk_mq_hw_sysfs_stat_store,
			
 
				-};
			
 
				 
			
 
				 static struct attribute *default_hw_ctx_attrs[] = {
			
 
				-	&blk_mq_hw_sysfs_queued.attr,
			
 
				-	&blk_mq_hw_sysfs_run.attr,
			
 
				-	&blk_mq_hw_sysfs_dispatched.attr,
			
 
				-	&blk_mq_hw_sysfs_pending.attr,
			
 
				-	&blk_mq_hw_sysfs_tags.attr,
			
 
				+	&blk_mq_hw_sysfs_nr_tags.attr,
			
 
				+	&blk_mq_hw_sysfs_nr_reserved_tags.attr,
			
 
				 	&blk_mq_hw_sysfs_cpus.attr,
			
 
				-	&blk_mq_hw_sysfs_active.attr,
			
 
				-	&blk_mq_hw_sysfs_poll.attr,
			
 
				-	&blk_mq_hw_sysfs_stat.attr,
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
@@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 
				 		kobject_put(&hctx->kobj);
			
 
				 	}
			
 
				 
			
 
				+	blk_mq_debugfs_unregister(q);
			
 
				+
			
 
				 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
			
 
				 	kobject_del(&q->mq_kobj);
			
 
				 	kobject_put(&q->mq_kobj);
			
@@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
 
				 
			
 
				 	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
			
 
				 
			
 
				+	blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
			
 
				+
			
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				 		ret = blk_mq_register_hctx(hctx);
			
 
				 		if (ret)
			
@@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
 
				 	if (!q->mq_sysfs_init_done)
			
 
				 		return;
			
 
				 
			
 
				+	blk_mq_debugfs_unregister_hctxs(q);
			
 
				+
			
 
				 	queue_for_each_hw_ctx(q, hctx, i)
			
 
				 		blk_mq_unregister_hctx(hctx);
			
 
				 }
			
@@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
 
				 	if (!q->mq_sysfs_init_done)
			
 
				 		return ret;
			
 
				 
			
 
				+	blk_mq_debugfs_register_hctxs(q);
			
 
				+
			
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				 		ret = blk_mq_register_hctx(hctx);
			
 
				 		if (ret)
			
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 
				 	return atomic_read(&hctx->nr_active) < depth;
			
 
				 }
			
 
				 
			
 
				-static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt)
			
 
				+static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
			
 
				+			    struct sbitmap_queue *bt)
			
 
				 {
			
 
				-	if (!hctx_may_queue(hctx, bt))
			
 
				+	if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
			
 
				+	    !hctx_may_queue(data->hctx, bt))
			
 
				 		return -1;
			
 
				 	return __sbitmap_queue_get(bt);
			
 
				 }
			
 
				 
			
 
				-static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt,
			
 
				-		  struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
			
 
				+unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
			
 
				 {
			
 
				+	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
			
 
				+	struct sbitmap_queue *bt;
			
 
				 	struct sbq_wait_state *ws;
			
 
				 	DEFINE_WAIT(wait);
			
 
				+	unsigned int tag_offset;
			
 
				+	bool drop_ctx;
			
 
				 	int tag;
			
 
				 
			
 
				-	tag = __bt_get(hctx, bt);
			
 
				+	if (data->flags & BLK_MQ_REQ_RESERVED) {
			
 
				+		if (unlikely(!tags->nr_reserved_tags)) {
			
 
				+			WARN_ON_ONCE(1);
			
 
				+			return BLK_MQ_TAG_FAIL;
			
 
				+		}
			
 
				+		bt = &tags->breserved_tags;
			
 
				+		tag_offset = 0;
			
 
				+	} else {
			
 
				+		bt = &tags->bitmap_tags;
			
 
				+		tag_offset = tags->nr_reserved_tags;
			
 
				+	}
			
 
				+
			
 
				+	tag = __blk_mq_get_tag(data, bt);
			
 
				 	if (tag != -1)
			
 
				-		return tag;
			
 
				+		goto found_tag;
			
 
				 
			
 
				 	if (data->flags & BLK_MQ_REQ_NOWAIT)
			
 
				-		return -1;
			
 
				+		return BLK_MQ_TAG_FAIL;
			
 
				 
			
 
				-	ws = bt_wait_ptr(bt, hctx);
			
 
				+	ws = bt_wait_ptr(bt, data->hctx);
			
 
				+	drop_ctx = data->ctx == NULL;
			
 
				 	do {
			
 
				 		prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
			
 
				 
			
 
				-		tag = __bt_get(hctx, bt);
			
 
				+		tag = __blk_mq_get_tag(data, bt);
			
 
				 		if (tag != -1)
			
 
				 			break;
			
 
				 
			
 
				 		/*
			
 
				 		 * We're out of tags on this hardware queue, kick any
			
 
				 		 * pending IO submits before going to sleep waiting for
			
 
				-		 * some to complete. Note that hctx can be NULL here for
			
 
				-		 * reserved tag allocation.
			
 
				+		 * some to complete.
			
 
				 		 */
			
 
				-		if (hctx)
			
 
				-			blk_mq_run_hw_queue(hctx, false);
			
 
				+		blk_mq_run_hw_queue(data->hctx, false);
			
 
				 
			
 
				 		/*
			
 
				 		 * Retry tag allocation after running the hardware queue,
			
 
				 		 * as running the queue may also have found completions.
			
 
				 		 */
			
 
				-		tag = __bt_get(hctx, bt);
			
 
				+		tag = __blk_mq_get_tag(data, bt);
			
 
				 		if (tag != -1)
			
 
				 			break;
			
 
				 
			
 
				-		blk_mq_put_ctx(data->ctx);
			
 
				+		if (data->ctx)
			
 
				+			blk_mq_put_ctx(data->ctx);
			
 
				 
			
 
				 		io_schedule();
			
 
				 
			
 
				 		data->ctx = blk_mq_get_ctx(data->q);
			
 
				 		data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
			
 
				-		if (data->flags & BLK_MQ_REQ_RESERVED) {
			
 
				-			bt = &data->hctx->tags->breserved_tags;
			
 
				-		} else {
			
 
				-			hctx = data->hctx;
			
 
				-			bt = &hctx->tags->bitmap_tags;
			
 
				-		}
			
 
				+		tags = blk_mq_tags_from_data(data);
			
 
				+		if (data->flags & BLK_MQ_REQ_RESERVED)
			
 
				+			bt = &tags->breserved_tags;
			
 
				+		else
			
 
				+			bt = &tags->bitmap_tags;
			
 
				+
			
 
				 		finish_wait(&ws->wait, &wait);
			
 
				-		ws = bt_wait_ptr(bt, hctx);
			
 
				+		ws = bt_wait_ptr(bt, data->hctx);
			
 
				 	} while (1);
			
 
				 
			
 
				-	finish_wait(&ws->wait, &wait);
			
 
				-	return tag;
			
 
				-}
			
 
				-
			
 
				-static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
			
 
				-{
			
 
				-	int tag;
			
 
				-
			
 
				-	tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
			
 
				-		     data->hctx->tags);
			
 
				-	if (tag >= 0)
			
 
				-		return tag + data->hctx->tags->nr_reserved_tags;
			
 
				-
			
 
				-	return BLK_MQ_TAG_FAIL;
			
 
				-}
			
 
				-
			
 
				-static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
			
 
				-{
			
 
				-	int tag;
			
 
				-
			
 
				-	if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
			
 
				-		WARN_ON_ONCE(1);
			
 
				-		return BLK_MQ_TAG_FAIL;
			
 
				-	}
			
 
				-
			
 
				-	tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
			
 
				-		     data->hctx->tags);
			
 
				-	if (tag < 0)
			
 
				-		return BLK_MQ_TAG_FAIL;
			
 
				+	if (drop_ctx && data->ctx)
			
 
				+		blk_mq_put_ctx(data->ctx);
			
 
				 
			
 
				-	return tag;
			
 
				-}
			
 
				+	finish_wait(&ws->wait, &wait);
			
 
				 
			
 
				-unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
			
 
				-{
			
 
				-	if (data->flags & BLK_MQ_REQ_RESERVED)
			
 
				-		return __blk_mq_get_reserved_tag(data);
			
 
				-	return __blk_mq_get_tag(data);
			
 
				+found_tag:
			
 
				+	return tag + tag_offset;
			
 
				 }
			
 
				 
			
 
				-void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
			
 
				-		    unsigned int tag)
			
 
				+void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
			
 
				+		    struct blk_mq_ctx *ctx, unsigned int tag)
			
 
				 {
			
 
				-	struct blk_mq_tags *tags = hctx->tags;
			
 
				-
			
 
				 	if (tag >= tags->nr_reserved_tags) {
			
 
				 		const int real_tag = tag - tags->nr_reserved_tags;
			
 
				 
			
@@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
 
				 		struct blk_mq_tags *tags = set->tags[i];
			
 
				 
			
 
				 		for (j = 0; j < tags->nr_tags; j++) {
			
 
				-			if (!tags->rqs[j])
			
 
				+			if (!tags->static_rqs[j])
			
 
				 				continue;
			
 
				 
			
 
				 			ret = set->ops->reinit_request(set->driver_data,
			
 
				-						tags->rqs[j]);
			
 
				+						tags->static_rqs[j]);
			
 
				 			if (ret)
			
 
				 				goto out;
			
 
				 		}
			
@@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 
				 
			
 
				 }
			
 
				 
			
 
				-static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
			
 
				-{
			
 
				-	return bt->sb.depth - sbitmap_weight(&bt->sb);
			
 
				-}
			
 
				-
			
 
				 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
			
 
				 		    bool round_robin, int node)
			
 
				 {
			
@@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
 
				 	kfree(tags);
			
 
				 }
			
 
				 
			
 
				-int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
			
 
				+int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
			
 
				+			    struct blk_mq_tags **tagsptr, unsigned int tdepth,
			
 
				+			    bool can_grow)
			
 
				 {
			
 
				-	tdepth -= tags->nr_reserved_tags;
			
 
				-	if (tdepth > tags->nr_tags)
			
 
				+	struct blk_mq_tags *tags = *tagsptr;
			
 
				+
			
 
				+	if (tdepth <= tags->nr_reserved_tags)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	tdepth -= tags->nr_reserved_tags;
			
 
				+
			
 
				 	/*
			
 
				-	 * Don't need (or can't) update reserved tags here, they remain
			
 
				-	 * static and should never need resizing.
			
 
				+	 * If we are allowed to grow beyond the original size, allocate
			
 
				+	 * a new set of tags before freeing the old one.
			
 
				 	 */
			
 
				-	sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
			
 
				+	if (tdepth > tags->nr_tags) {
			
 
				+		struct blk_mq_tag_set *set = hctx->queue->tag_set;
			
 
				+		struct blk_mq_tags *new;
			
 
				+		bool ret;
			
 
				+
			
 
				+		if (!can_grow)
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		/*
			
 
				+		 * We need some sort of upper limit, set it high enough that
			
 
				+		 * no valid use cases should require more.
			
 
				+		 */
			
 
				+		if (tdepth > 16 * BLKDEV_MAX_RQ)
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0);
			
 
				+		if (!new)
			
 
				+			return -ENOMEM;
			
 
				+		ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
			
 
				+		if (ret) {
			
 
				+			blk_mq_free_rq_map(new);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+
			
 
				+		blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
			
 
				+		blk_mq_free_rq_map(*tagsptr);
			
 
				+		*tagsptr = new;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Don't need (or can't) update reserved tags here, they
			
 
				+		 * remain static and should never need resizing.
			
 
				+		 */
			
 
				+		sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
			
 
				+	}
			
 
				 
			
 
				-	blk_mq_tag_wakeup_all(tags, false);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq)
 
				 		(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
			
 
				 }
			
 
				 EXPORT_SYMBOL(blk_mq_unique_tag);
			
 
				-
			
 
				-ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
			
 
				-{
			
 
				-	char *orig_page = page;
			
 
				-	unsigned int free, res;
			
 
				-
			
 
				-	if (!tags)
			
 
				-		return 0;
			
 
				-
			
 
				-	page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
			
 
				-			"bits_per_word=%u\n",
			
 
				-			tags->nr_tags, tags->nr_reserved_tags,
			
 
				-			1U << tags->bitmap_tags.sb.shift);
			
 
				-
			
 
				-	free = bt_unused_tags(&tags->bitmap_tags);
			
 
				-	res = bt_unused_tags(&tags->breserved_tags);
			
 
				-
			
 
				-	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
			
 
				-	page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
			
 
				-
			
 
				-	return page - orig_page;
			
 
				-}
			
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -16,6 +16,7 @@ struct blk_mq_tags {
 
				 	struct sbitmap_queue breserved_tags;
			
 
				 
			
 
				 	struct request **rqs;
			
 
				+	struct request **static_rqs;
			
 
				 	struct list_head page_list;
			
 
				 };
			
 
				 
			
@@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r
 
				 extern void blk_mq_free_tags(struct blk_mq_tags *tags);
			
 
				 
			
 
				 extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
			
 
				-extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
			
 
				-			   unsigned int tag);
			
 
				+extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
			
 
				+			   struct blk_mq_ctx *ctx, unsigned int tag);
			
 
				 extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
			
 
				-extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
			
 
				-extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
			
 
				+extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
			
 
				+					struct blk_mq_tags **tags,
			
 
				+					unsigned int depth, bool can_grow);
			
 
				 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
			
 
				 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
			
 
				 		void *priv);
			
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -32,6 +32,7 @@
 
				 #include "blk-mq-tag.h"
			
 
				 #include "blk-stat.h"
			
 
				 #include "blk-wbt.h"
			
 
				+#include "blk-mq-sched.h"
			
 
				 
			
 
				 static DEFINE_MUTEX(all_q_mutex);
			
 
				 static LIST_HEAD(all_q_list);
			
@@ -39,9 +40,11 @@ static LIST_HEAD(all_q_list);
 
				 /*
			
 
				  * Check if any of the ctx's have pending work in this hardware queue
			
 
				  */
			
 
				-static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
			
 
				+bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
			
 
				 {
			
 
				-	return sbitmap_any_bit_set(&hctx->ctx_map);
			
 
				+	return sbitmap_any_bit_set(&hctx->ctx_map) ||
			
 
				+			!list_empty_careful(&hctx->dispatch) ||
			
 
				+			blk_mq_sched_has_work(hctx);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -167,8 +170,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 
				 }
			
 
				 EXPORT_SYMBOL(blk_mq_can_queue);
			
 
				 
			
 
				-static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
			
 
				-			       struct request *rq, unsigned int op)
			
 
				+void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
			
 
				+			struct request *rq, unsigned int op)
			
 
				 {
			
 
				 	INIT_LIST_HEAD(&rq->queuelist);
			
 
				 	/* csd/requeue_work/fifo_time is initialized before use */
			
@@ -213,53 +216,58 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 
				 
			
 
				 	ctx->rq_dispatched[op_is_sync(op)]++;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
			
 
				 
			
 
				-static struct request *
			
 
				-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op)
			
 
				+struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
			
 
				+				       unsigned int op)
			
 
				 {
			
 
				 	struct request *rq;
			
 
				 	unsigned int tag;
			
 
				 
			
 
				 	tag = blk_mq_get_tag(data);
			
 
				 	if (tag != BLK_MQ_TAG_FAIL) {
			
 
				-		rq = data->hctx->tags->rqs[tag];
			
 
				+		struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
			
 
				 
			
 
				-		if (blk_mq_tag_busy(data->hctx)) {
			
 
				-			rq->rq_flags = RQF_MQ_INFLIGHT;
			
 
				-			atomic_inc(&data->hctx->nr_active);
			
 
				+		rq = tags->static_rqs[tag];
			
 
				+
			
 
				+		if (data->flags & BLK_MQ_REQ_INTERNAL) {
			
 
				+			rq->tag = -1;
			
 
				+			rq->internal_tag = tag;
			
 
				+		} else {
			
 
				+			if (blk_mq_tag_busy(data->hctx)) {
			
 
				+				rq->rq_flags = RQF_MQ_INFLIGHT;
			
 
				+				atomic_inc(&data->hctx->nr_active);
			
 
				+			}
			
 
				+			rq->tag = tag;
			
 
				+			rq->internal_tag = -1;
			
 
				 		}
			
 
				 
			
 
				-		rq->tag = tag;
			
 
				 		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
			
 
				 		return rq;
			
 
				 	}
			
 
				 
			
 
				 	return NULL;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
			
 
				 
			
 
				 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
			
 
				 		unsigned int flags)
			
 
				 {
			
 
				-	struct blk_mq_ctx *ctx;
			
 
				-	struct blk_mq_hw_ctx *hctx;
			
 
				+	struct blk_mq_alloc_data alloc_data = { .flags = flags };
			
 
				 	struct request *rq;
			
 
				-	struct blk_mq_alloc_data alloc_data;
			
 
				 	int ret;
			
 
				 
			
 
				 	ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
			
 
				 	if (ret)
			
 
				 		return ERR_PTR(ret);
			
 
				 
			
 
				-	ctx = blk_mq_get_ctx(q);
			
 
				-	hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				-	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
			
 
				-	rq = __blk_mq_alloc_request(&alloc_data, rw);
			
 
				-	blk_mq_put_ctx(ctx);
			
 
				+	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
			
 
				 
			
 
				-	if (!rq) {
			
 
				-		blk_queue_exit(q);
			
 
				+	blk_mq_put_ctx(alloc_data.ctx);
			
 
				+	blk_queue_exit(q);
			
 
				+
			
 
				+	if (!rq)
			
 
				 		return ERR_PTR(-EWOULDBLOCK);
			
 
				-	}
			
 
				 
			
 
				 	rq->__data_len = 0;
			
 
				 	rq->__sector = (sector_t) -1;
			
@@ -319,10 +327,10 @@ out_queue_exit:
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
			
 
				 
			
 
				-static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
			
 
				-				  struct blk_mq_ctx *ctx, struct request *rq)
			
 
				+void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
			
 
				+			     struct request *rq)
			
 
				 {
			
 
				-	const int tag = rq->tag;
			
 
				+	const int sched_tag = rq->internal_tag;
			
 
				 	struct request_queue *q = rq->q;
			
 
				 
			
 
				 	if (rq->rq_flags & RQF_MQ_INFLIGHT)
			
@@ -333,23 +341,31 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 
				 
			
 
				 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
			
 
				 	clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
			
 
				-	blk_mq_put_tag(hctx, ctx, tag);
			
 
				+	if (rq->tag != -1)
			
 
				+		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
			
 
				+	if (sched_tag != -1)
			
 
				+		blk_mq_sched_completed_request(hctx, rq);
			
 
				+	blk_mq_sched_restart_queues(hctx);
			
 
				 	blk_queue_exit(q);
			
 
				 }
			
 
				 
			
 
				-void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
			
 
				+static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
			
 
				+				     struct request *rq)
			
 
				 {
			
 
				 	struct blk_mq_ctx *ctx = rq->mq_ctx;
			
 
				 
			
 
				 	ctx->rq_completed[rq_is_sync(rq)]++;
			
 
				-	__blk_mq_free_request(hctx, ctx, rq);
			
 
				+	__blk_mq_finish_request(hctx, ctx, rq);
			
 
				+}
			
 
				 
			
 
				+void blk_mq_finish_request(struct request *rq)
			
 
				+{
			
 
				+	blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
			
 
				 }
			
 
				-EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
			
 
				 
			
 
				 void blk_mq_free_request(struct request *rq)
			
 
				 {
			
 
				-	blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
			
 
				+	blk_mq_sched_put_request(rq);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blk_mq_free_request);
			
 
				 
			
@@ -467,6 +483,8 @@ void blk_mq_start_request(struct request *rq)
 
				 {
			
 
				 	struct request_queue *q = rq->q;
			
 
				 
			
 
				+	blk_mq_sched_started_request(rq);
			
 
				+
			
 
				 	trace_block_rq_issue(q, rq);
			
 
				 
			
 
				 	rq->resid_len = blk_rq_bytes(rq);
			
@@ -515,6 +533,7 @@ static void __blk_mq_requeue_request(struct request *rq)
 
				 
			
 
				 	trace_block_rq_requeue(q, rq);
			
 
				 	wbt_requeue(q->rq_wb, &rq->issue_stat);
			
 
				+	blk_mq_sched_requeue_request(rq);
			
 
				 
			
 
				 	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
			
 
				 		if (q->dma_drain_size && blk_rq_bytes(rq))
			
@@ -549,13 +568,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
 
				 
			
 
				 		rq->rq_flags &= ~RQF_SOFTBARRIER;
			
 
				 		list_del_init(&rq->queuelist);
			
 
				-		blk_mq_insert_request(rq, true, false, false);
			
 
				+		blk_mq_sched_insert_request(rq, true, false, false, true);
			
 
				 	}
			
 
				 
			
 
				 	while (!list_empty(&rq_list)) {
			
 
				 		rq = list_entry(rq_list.next, struct request, queuelist);
			
 
				 		list_del_init(&rq->queuelist);
			
 
				-		blk_mq_insert_request(rq, false, false, false);
			
 
				+		blk_mq_sched_insert_request(rq, false, false, false, true);
			
 
				 	}
			
 
				 
			
 
				 	blk_mq_run_hw_queues(q, false);
			
@@ -639,7 +658,7 @@ struct blk_mq_timeout_data {
 
				 
			
 
				 void blk_mq_rq_timed_out(struct request *req, bool reserved)
			
 
				 {
			
 
				-	struct blk_mq_ops *ops = req->q->mq_ops;
			
 
				+	const struct blk_mq_ops *ops = req->q->mq_ops;
			
 
				 	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
			
 
				 
			
 
				 	/*
			
@@ -763,6 +782,12 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
 
				 			continue;
			
 
				 
			
 
				 		el_ret = blk_try_merge(rq, bio);
			
 
				+		if (el_ret == ELEVATOR_NO_MERGE)
			
 
				+			continue;
			
 
				+
			
 
				+		if (!blk_mq_sched_allow_merge(q, rq, bio))
			
 
				+			break;
			
 
				+
			
 
				 		if (el_ret == ELEVATOR_BACK_MERGE) {
			
 
				 			if (bio_attempt_back_merge(q, rq, bio)) {
			
 
				 				ctx->rq_merged++;
			
@@ -803,7 +828,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
 
				  * Process software queues that have been marked busy, splicing them
			
 
				  * to the for-dispatch
			
 
				  */
			
 
				-static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
			
 
				+void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
			
 
				 {
			
 
				 	struct flush_busy_ctx_data data = {
			
 
				 		.hctx = hctx,
			
@@ -812,6 +837,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
				 
			
 
				 	sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
			
 
				 
			
 
				 static inline unsigned int queued_to_index(unsigned int queued)
			
 
				 {
			
@@ -821,6 +847,77 @@ static inline unsigned int queued_to_index(unsigned int queued)
 
				 	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
			
 
				 }
			
 
				 
			
 
				+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
			
 
				+			   bool wait)
			
 
				+{
			
 
				+	struct blk_mq_alloc_data data = {
			
 
				+		.q = rq->q,
			
 
				+		.hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
			
 
				+		.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
			
 
				+	};
			
 
				+
			
 
				+	if (blk_mq_hctx_stopped(data.hctx))
			
 
				+		return false;
			
 
				+
			
 
				+	if (rq->tag != -1) {
			
 
				+done:
			
 
				+		if (hctx)
			
 
				+			*hctx = data.hctx;
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	rq->tag = blk_mq_get_tag(&data);
			
 
				+	if (rq->tag >= 0) {
			
 
				+		if (blk_mq_tag_busy(data.hctx)) {
			
 
				+			rq->rq_flags |= RQF_MQ_INFLIGHT;
			
 
				+			atomic_inc(&data.hctx->nr_active);
			
 
				+		}
			
 
				+		data.hctx->tags->rqs[rq->tag] = rq;
			
 
				+		goto done;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
			
 
				+				  struct request *rq)
			
 
				+{
			
 
				+	if (rq->tag == -1 || rq->internal_tag == -1)
			
 
				+		return;
			
 
				+
			
 
				+	blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
			
 
				+	rq->tag = -1;
			
 
				+
			
 
				+	if (rq->rq_flags & RQF_MQ_INFLIGHT) {
			
 
				+		rq->rq_flags &= ~RQF_MQ_INFLIGHT;
			
 
				+		atomic_dec(&hctx->nr_active);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If we fail getting a driver tag because all the driver tags are already
			
 
				+ * assigned and on the dispatch list, BUT the first entry does not have a
			
 
				+ * tag, then we could deadlock. For that case, move entries with assigned
			
 
				+ * driver tags to the front, leaving the set of tagged requests in the
			
 
				+ * same order, and the untagged set in the same order.
			
 
				+ */
			
 
				+static bool reorder_tags_to_front(struct list_head *list)
			
 
				+{
			
 
				+	struct request *rq, *tmp, *first = NULL;
			
 
				+
			
 
				+	list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) {
			
 
				+		if (rq == first)
			
 
				+			break;
			
 
				+		if (rq->tag != -1) {
			
 
				+			list_move(&rq->queuelist, list);
			
 
				+			if (!first)
			
 
				+				first = rq;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return first != NULL;
			
 
				+}
			
 
				+
			
 
				 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
			
 
				 {
			
 
				 	struct request_queue *q = hctx->queue;
			
@@ -843,6 +940,20 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
				 		struct blk_mq_queue_data bd;
			
 
				 
			
 
				 		rq = list_first_entry(list, struct request, queuelist);
			
 
				+		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
			
 
				+			if (!queued && reorder_tags_to_front(list))
			
 
				+				continue;
			
 
				+
			
 
				+			/*
			
 
				+			 * We failed getting a driver tag. Mark the queue(s)
			
 
				+			 * as needing a restart. Retry getting a tag again,
			
 
				+			 * in case the needed IO completed right before we
			
 
				+			 * marked the queue as needing a restart.
			
 
				+			 */
			
 
				+			blk_mq_sched_mark_restart(hctx);
			
 
				+			if (!blk_mq_get_driver_tag(rq, &hctx, false))
			
 
				+				break;
			
 
				+		}
			
 
				 		list_del_init(&rq->queuelist);
			
 
				 
			
 
				 		bd.rq = rq;
			
@@ -855,6 +966,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
				 			queued++;
			
 
				 			break;
			
 
				 		case BLK_MQ_RQ_QUEUE_BUSY:
			
 
				+			blk_mq_put_driver_tag(hctx, rq);
			
 
				 			list_add(&rq->queuelist, list);
			
 
				 			__blk_mq_requeue_request(rq);
			
 
				 			break;
			
@@ -885,7 +997,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
				 	 */
			
 
				 	if (!list_empty(list)) {
			
 
				 		spin_lock(&hctx->lock);
			
 
				-		list_splice(list, &hctx->dispatch);
			
 
				+		list_splice_init(list, &hctx->dispatch);
			
 
				 		spin_unlock(&hctx->lock);
			
 
				 
			
 
				 		/*
			
@@ -896,47 +1008,17 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
				 		 * the requests in rq_list might get lost.
			
 
				 		 *
			
 
				 		 * blk_mq_run_hw_queue() already checks the STOPPED bit
			
 
				-		 **/
			
 
				-		blk_mq_run_hw_queue(hctx, true);
			
 
				+		 *
			
 
				+		 * If RESTART is set, then let completion restart the queue
			
 
				+		 * instead of potentially looping here.
			
 
				+		 */
			
 
				+		if (!blk_mq_sched_needs_restart(hctx))
			
 
				+			blk_mq_run_hw_queue(hctx, true);
			
 
				 	}
			
 
				 
			
 
				 	return ret != BLK_MQ_RQ_QUEUE_BUSY;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Run this hardware queue, pulling any software queues mapped to it in.
			
 
				- * Note that this function currently has various problems around ordering
			
 
				- * of IO. In particular, we'd like FIFO behaviour on handling existing
			
 
				- * items on the hctx->dispatch list. Ignore that for now.
			
 
				- */
			
 
				-static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
			
 
				-{
			
 
				-	LIST_HEAD(rq_list);
			
 
				-
			
 
				-	if (unlikely(blk_mq_hctx_stopped(hctx)))
			
 
				-		return;
			
 
				-
			
 
				-	hctx->run++;
			
 
				-
			
 
				-	/*
			
 
				-	 * Touch any software queue that has pending entries.
			
 
				-	 */
			
 
				-	flush_busy_ctxs(hctx, &rq_list);
			
 
				-
			
 
				-	/*
			
 
				-	 * If we have previous entries on our dispatch list, grab them
			
 
				-	 * and stuff them at the front for more fair dispatch.
			
 
				-	 */
			
 
				-	if (!list_empty_careful(&hctx->dispatch)) {
			
 
				-		spin_lock(&hctx->lock);
			
 
				-		if (!list_empty(&hctx->dispatch))
			
 
				-			list_splice_init(&hctx->dispatch, &rq_list);
			
 
				-		spin_unlock(&hctx->lock);
			
 
				-	}
			
 
				-
			
 
				-	blk_mq_dispatch_rq_list(hctx, &rq_list);
			
 
				-}
			
 
				-
			
 
				 static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
			
 
				 {
			
 
				 	int srcu_idx;
			
@@ -946,11 +1028,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 
				 
			
 
				 	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
			
 
				 		rcu_read_lock();
			
 
				-		blk_mq_process_rq_list(hctx);
			
 
				+		blk_mq_sched_dispatch_requests(hctx);
			
 
				 		rcu_read_unlock();
			
 
				 	} else {
			
 
				 		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
			
 
				-		blk_mq_process_rq_list(hctx);
			
 
				+		blk_mq_sched_dispatch_requests(hctx);
			
 
				 		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
			
 
				 	}
			
 
				 }
			
@@ -1006,8 +1088,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
 
				 	int i;
			
 
				 
			
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				-		if ((!blk_mq_hctx_has_pending(hctx) &&
			
 
				-		    list_empty_careful(&hctx->dispatch)) ||
			
 
				+		if (!blk_mq_hctx_has_pending(hctx) ||
			
 
				 		    blk_mq_hctx_stopped(hctx))
			
 
				 			continue;
			
 
				 
			
@@ -1116,6 +1197,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 
				 	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
			
 
				 		return;
			
 
				 
			
 
				+	blk_mq_stop_hw_queue(hctx);
			
 
				 	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
			
 
				 			&hctx->delay_work, msecs_to_jiffies(msecs));
			
 
				 }
			
@@ -1135,8 +1217,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
 
				 		list_add_tail(&rq->queuelist, &ctx->rq_list);
			
 
				 }
			
 
				 
			
 
				-static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
			
 
				-				    struct request *rq, bool at_head)
			
 
				+void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
			
 
				+			     bool at_head)
			
 
				 {
			
 
				 	struct blk_mq_ctx *ctx = rq->mq_ctx;
			
 
				 
			
@@ -1144,32 +1226,10 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
 
				 	blk_mq_hctx_mark_pending(hctx, ctx);
			
 
				 }
			
 
				 
			
 
				-void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
			
 
				-			   bool async)
			
 
				-{
			
 
				-	struct blk_mq_ctx *ctx = rq->mq_ctx;
			
 
				-	struct request_queue *q = rq->q;
			
 
				-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				-
			
 
				-	spin_lock(&ctx->lock);
			
 
				-	__blk_mq_insert_request(hctx, rq, at_head);
			
 
				-	spin_unlock(&ctx->lock);
			
 
				-
			
 
				-	if (run_queue)
			
 
				-		blk_mq_run_hw_queue(hctx, async);
			
 
				-}
			
 
				-
			
 
				-static void blk_mq_insert_requests(struct request_queue *q,
			
 
				-				     struct blk_mq_ctx *ctx,
			
 
				-				     struct list_head *list,
			
 
				-				     int depth,
			
 
				-				     bool from_schedule)
			
 
				+void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
			
 
				+			    struct list_head *list)
			
 
				 
			
 
				 {
			
 
				-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				-
			
 
				-	trace_block_unplug(q, depth, !from_schedule);
			
 
				-
			
 
				 	/*
			
 
				 	 * preemption doesn't flush plug list, so it's possible ctx->cpu is
			
 
				 	 * offline now
			
@@ -1185,8 +1245,6 @@ static void blk_mq_insert_requests(struct request_queue *q,
 
				 	}
			
 
				 	blk_mq_hctx_mark_pending(hctx, ctx);
			
 
				 	spin_unlock(&ctx->lock);
			
 
				-
			
 
				-	blk_mq_run_hw_queue(hctx, from_schedule);
			
 
				 }
			
 
				 
			
 
				 static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
			
@@ -1222,9 +1280,10 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
				 		BUG_ON(!rq->q);
			
 
				 		if (rq->mq_ctx != this_ctx) {
			
 
				 			if (this_ctx) {
			
 
				-				blk_mq_insert_requests(this_q, this_ctx,
			
 
				-							&ctx_list, depth,
			
 
				-							from_schedule);
			
 
				+				trace_block_unplug(this_q, depth, from_schedule);
			
 
				+				blk_mq_sched_insert_requests(this_q, this_ctx,
			
 
				+								&ctx_list,
			
 
				+								from_schedule);
			
 
				 			}
			
 
				 
			
 
				 			this_ctx = rq->mq_ctx;
			
@@ -1241,8 +1300,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
				 	 * on 'ctx_list'. Do those.
			
 
				 	 */
			
 
				 	if (this_ctx) {
			
 
				-		blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth,
			
 
				-				       from_schedule);
			
 
				+		trace_block_unplug(this_q, depth, from_schedule);
			
 
				+		blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
			
 
				+						from_schedule);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1280,46 +1340,39 @@ insert_rq:
 
				 		}
			
 
				 
			
 
				 		spin_unlock(&ctx->lock);
			
 
				-		__blk_mq_free_request(hctx, ctx, rq);
			
 
				+		__blk_mq_finish_request(hctx, ctx, rq);
			
 
				 		return true;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static struct request *blk_mq_map_request(struct request_queue *q,
			
 
				-					  struct bio *bio,
			
 
				-					  struct blk_mq_alloc_data *data)
			
 
				+static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
			
 
				 {
			
 
				-	struct blk_mq_hw_ctx *hctx;
			
 
				-	struct blk_mq_ctx *ctx;
			
 
				-	struct request *rq;
			
 
				-
			
 
				-	blk_queue_enter_live(q);
			
 
				-	ctx = blk_mq_get_ctx(q);
			
 
				-	hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				-
			
 
				-	trace_block_getrq(q, bio, bio->bi_opf);
			
 
				-	blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
			
 
				-	rq = __blk_mq_alloc_request(data, bio->bi_opf);
			
 
				+	if (rq->tag != -1)
			
 
				+		return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false);
			
 
				 
			
 
				-	data->hctx->queued++;
			
 
				-	return rq;
			
 
				+	return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
			
 
				 }
			
 
				 
			
 
				 static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
			
 
				 {
			
 
				-	int ret;
			
 
				 	struct request_queue *q = rq->q;
			
 
				-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
			
 
				 	struct blk_mq_queue_data bd = {
			
 
				 		.rq = rq,
			
 
				 		.list = NULL,
			
 
				 		.last = 1
			
 
				 	};
			
 
				-	blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num);
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	blk_qc_t new_cookie;
			
 
				+	int ret;
			
 
				 
			
 
				-	if (blk_mq_hctx_stopped(hctx))
			
 
				+	if (q->elevator)
			
 
				 		goto insert;
			
 
				 
			
 
				+	if (!blk_mq_get_driver_tag(rq, &hctx, false))
			
 
				+		goto insert;
			
 
				+
			
 
				+	new_cookie = request_to_qc_t(hctx, rq);
			
 
				+
			
 
				 	/*
			
 
				 	 * For OK queue, we are done. For error, kill it. Any other
			
 
				 	 * error (busy), just add it to our list as we previously
			
@@ -1341,7 +1394,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
 
				 	}
			
 
				 
			
 
				 insert:
			
 
				-	blk_mq_insert_request(rq, false, true, true);
			
 
				+	blk_mq_sched_insert_request(rq, false, true, true, false);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1352,8 +1405,8 @@ insert:
 
				 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
			
 
				 {
			
 
				 	const int is_sync = op_is_sync(bio->bi_opf);
			
 
				-	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
			
 
				-	struct blk_mq_alloc_data data;
			
 
				+	const int is_flush_fua = op_is_flush(bio->bi_opf);
			
 
				+	struct blk_mq_alloc_data data = { .flags = 0 };
			
 
				 	struct request *rq;
			
 
				 	unsigned int request_count = 0, srcu_idx;
			
 
				 	struct blk_plug *plug;
			
@@ -1374,9 +1427,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
				 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
			
 
				 		return BLK_QC_T_NONE;
			
 
				 
			
 
				+	if (blk_mq_sched_bio_merge(q, bio))
			
 
				+		return BLK_QC_T_NONE;
			
 
				+
			
 
				 	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
			
 
				 
			
 
				-	rq = blk_mq_map_request(q, bio, &data);
			
 
				+	trace_block_getrq(q, bio, bio->bi_opf);
			
 
				+
			
 
				+	rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
			
 
				 	if (unlikely(!rq)) {
			
 
				 		__wbt_done(q->rq_wb, wb_acct);
			
 
				 		return BLK_QC_T_NONE;
			
@@ -1384,12 +1442,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
				 
			
 
				 	wbt_track(&rq->issue_stat, wb_acct);
			
 
				 
			
 
				-	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
			
 
				+	cookie = request_to_qc_t(data.hctx, rq);
			
 
				 
			
 
				 	if (unlikely(is_flush_fua)) {
			
 
				+		blk_mq_put_ctx(data.ctx);
			
 
				 		blk_mq_bio_to_request(rq, bio);
			
 
				+		blk_mq_get_driver_tag(rq, NULL, true);
			
 
				 		blk_insert_flush(rq);
			
 
				-		goto run_queue;
			
 
				+		blk_mq_run_hw_queue(data.hctx, true);
			
 
				+		goto done;
			
 
				 	}
			
 
				 
			
 
				 	plug = current->plug;
			
@@ -1438,6 +1499,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
				 		goto done;
			
 
				 	}
			
 
				 
			
 
				+	if (q->elevator) {
			
 
				+		blk_mq_put_ctx(data.ctx);
			
 
				+		blk_mq_bio_to_request(rq, bio);
			
 
				+		blk_mq_sched_insert_request(rq, false, true,
			
 
				+						!is_sync || is_flush_fua, true);
			
 
				+		goto done;
			
 
				+	}
			
 
				 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
			
 
				 		/*
			
 
				 		 * For a SYNC request, send it to the hardware immediately. For
			
@@ -1445,7 +1513,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
				 		 * latter allows for merging opportunities and more efficient
			
 
				 		 * dispatching.
			
 
				 		 */
			
 
				-run_queue:
			
 
				 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
			
 
				 	}
			
 
				 	blk_mq_put_ctx(data.ctx);
			
@@ -1460,10 +1527,10 @@ done:
 
				 static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
			
 
				 {
			
 
				 	const int is_sync = op_is_sync(bio->bi_opf);
			
 
				-	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
			
 
				+	const int is_flush_fua = op_is_flush(bio->bi_opf);
			
 
				 	struct blk_plug *plug;
			
 
				 	unsigned int request_count = 0;
			
 
				-	struct blk_mq_alloc_data data;
			
 
				+	struct blk_mq_alloc_data data = { .flags = 0 };
			
 
				 	struct request *rq;
			
 
				 	blk_qc_t cookie;
			
 
				 	unsigned int wb_acct;
			
@@ -1483,9 +1550,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
				 	} else
			
 
				 		request_count = blk_plug_queued_count(q);
			
 
				 
			
 
				+	if (blk_mq_sched_bio_merge(q, bio))
			
 
				+		return BLK_QC_T_NONE;
			
 
				+
			
 
				 	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
			
 
				 
			
 
				-	rq = blk_mq_map_request(q, bio, &data);
			
 
				+	trace_block_getrq(q, bio, bio->bi_opf);
			
 
				+
			
 
				+	rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
			
 
				 	if (unlikely(!rq)) {
			
 
				 		__wbt_done(q->rq_wb, wb_acct);
			
 
				 		return BLK_QC_T_NONE;
			
@@ -1493,12 +1565,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
				 
			
 
				 	wbt_track(&rq->issue_stat, wb_acct);
			
 
				 
			
 
				-	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
			
 
				+	cookie = request_to_qc_t(data.hctx, rq);
			
 
				 
			
 
				 	if (unlikely(is_flush_fua)) {
			
 
				+		blk_mq_put_ctx(data.ctx);
			
 
				 		blk_mq_bio_to_request(rq, bio);
			
 
				+		blk_mq_get_driver_tag(rq, NULL, true);
			
 
				 		blk_insert_flush(rq);
			
 
				-		goto run_queue;
			
 
				+		blk_mq_run_hw_queue(data.hctx, true);
			
 
				+		goto done;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1535,6 +1610,13 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
				 		return cookie;
			
 
				 	}
			
 
				 
			
 
				+	if (q->elevator) {
			
 
				+		blk_mq_put_ctx(data.ctx);
			
 
				+		blk_mq_bio_to_request(rq, bio);
			
 
				+		blk_mq_sched_insert_request(rq, false, true,
			
 
				+						!is_sync || is_flush_fua, true);
			
 
				+		goto done;
			
 
				+	}
			
 
				 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
			
 
				 		/*
			
 
				 		 * For a SYNC request, send it to the hardware immediately. For
			
@@ -1542,16 +1624,16 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
				 		 * latter allows for merging opportunities and more efficient
			
 
				 		 * dispatching.
			
 
				 		 */
			
 
				-run_queue:
			
 
				 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
			
 
				 	}
			
 
				 
			
 
				 	blk_mq_put_ctx(data.ctx);
			
 
				+done:
			
 
				 	return cookie;
			
 
				 }
			
 
				 
			
 
				-static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
			
 
				-		struct blk_mq_tags *tags, unsigned int hctx_idx)
			
 
				+void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
			
 
				+		     unsigned int hctx_idx)
			
 
				 {
			
 
				 	struct page *page;
			
 
				 
			
@@ -1559,11 +1641,13 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
 
				 		int i;
			
 
				 
			
 
				 		for (i = 0; i < tags->nr_tags; i++) {
			
 
				-			if (!tags->rqs[i])
			
 
				+			struct request *rq = tags->static_rqs[i];
			
 
				+
			
 
				+			if (!rq)
			
 
				 				continue;
			
 
				-			set->ops->exit_request(set->driver_data, tags->rqs[i],
			
 
				+			set->ops->exit_request(set->driver_data, rq,
			
 
				 						hctx_idx, i);
			
 
				-			tags->rqs[i] = NULL;
			
 
				+			tags->static_rqs[i] = NULL;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1577,33 +1661,32 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
 
				 		kmemleak_free(page_address(page));
			
 
				 		__free_pages(page, page->private);
			
 
				 	}
			
 
				+}
			
 
				 
			
 
				+void blk_mq_free_rq_map(struct blk_mq_tags *tags)
			
 
				+{
			
 
				 	kfree(tags->rqs);
			
 
				+	tags->rqs = NULL;
			
 
				+	kfree(tags->static_rqs);
			
 
				+	tags->static_rqs = NULL;
			
 
				 
			
 
				 	blk_mq_free_tags(tags);
			
 
				 }
			
 
				 
			
 
				-static size_t order_to_size(unsigned int order)
			
 
				-{
			
 
				-	return (size_t)PAGE_SIZE << order;
			
 
				-}
			
 
				-
			
 
				-static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
			
 
				-		unsigned int hctx_idx)
			
 
				+struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
			
 
				+					unsigned int hctx_idx,
			
 
				+					unsigned int nr_tags,
			
 
				+					unsigned int reserved_tags)
			
 
				 {
			
 
				 	struct blk_mq_tags *tags;
			
 
				-	unsigned int i, j, entries_per_page, max_order = 4;
			
 
				-	size_t rq_size, left;
			
 
				 
			
 
				-	tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
			
 
				+	tags = blk_mq_init_tags(nr_tags, reserved_tags,
			
 
				 				set->numa_node,
			
 
				 				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
			
 
				 	if (!tags)
			
 
				 		return NULL;
			
 
				 
			
 
				-	INIT_LIST_HEAD(&tags->page_list);
			
 
				-
			
 
				-	tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
			
 
				+	tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
			
 
				 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
			
 
				 				 set->numa_node);
			
 
				 	if (!tags->rqs) {
			
@@ -1611,15 +1694,40 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 
				 		return NULL;
			
 
				 	}
			
 
				 
			
 
				+	tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
			
 
				+				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
			
 
				+				 set->numa_node);
			
 
				+	if (!tags->static_rqs) {
			
 
				+		kfree(tags->rqs);
			
 
				+		blk_mq_free_tags(tags);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	return tags;
			
 
				+}
			
 
				+
			
 
				+static size_t order_to_size(unsigned int order)
			
 
				+{
			
 
				+	return (size_t)PAGE_SIZE << order;
			
 
				+}
			
 
				+
			
 
				+int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
			
 
				+		     unsigned int hctx_idx, unsigned int depth)
			
 
				+{
			
 
				+	unsigned int i, j, entries_per_page, max_order = 4;
			
 
				+	size_t rq_size, left;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&tags->page_list);
			
 
				+
			
 
				 	/*
			
 
				 	 * rq_size is the size of the request plus driver payload, rounded
			
 
				 	 * to the cacheline size
			
 
				 	 */
			
 
				 	rq_size = round_up(sizeof(struct request) + set->cmd_size,
			
 
				 				cache_line_size());
			
 
				-	left = rq_size * set->queue_depth;
			
 
				+	left = rq_size * depth;
			
 
				 
			
 
				-	for (i = 0; i < set->queue_depth; ) {
			
 
				+	for (i = 0; i < depth; ) {
			
 
				 		int this_order = max_order;
			
 
				 		struct page *page;
			
 
				 		int to_do;
			
@@ -1653,15 +1761,17 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 
				 		 */
			
 
				 		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
			
 
				 		entries_per_page = order_to_size(this_order) / rq_size;
			
 
				-		to_do = min(entries_per_page, set->queue_depth - i);
			
 
				+		to_do = min(entries_per_page, depth - i);
			
 
				 		left -= to_do * rq_size;
			
 
				 		for (j = 0; j < to_do; j++) {
			
 
				-			tags->rqs[i] = p;
			
 
				+			struct request *rq = p;
			
 
				+
			
 
				+			tags->static_rqs[i] = rq;
			
 
				 			if (set->ops->init_request) {
			
 
				 				if (set->ops->init_request(set->driver_data,
			
 
				-						tags->rqs[i], hctx_idx, i,
			
 
				+						rq, hctx_idx, i,
			
 
				 						set->numa_node)) {
			
 
				-					tags->rqs[i] = NULL;
			
 
				+					tags->static_rqs[i] = NULL;
			
 
				 					goto fail;
			
 
				 				}
			
 
				 			}
			
@@ -1670,11 +1780,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 
				 			i++;
			
 
				 		}
			
 
				 	}
			
 
				-	return tags;
			
 
				+	return 0;
			
 
				 
			
 
				 fail:
			
 
				-	blk_mq_free_rq_map(set, tags, hctx_idx);
			
 
				-	return NULL;
			
 
				+	blk_mq_free_rqs(set, tags, hctx_idx);
			
 
				+	return -ENOMEM;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1866,6 +1976,35 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
			
 
				+					set->queue_depth, set->reserved_tags);
			
 
				+	if (!set->tags[hctx_idx])
			
 
				+		return false;
			
 
				+
			
 
				+	ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
			
 
				+				set->queue_depth);
			
 
				+	if (!ret)
			
 
				+		return true;
			
 
				+
			
 
				+	blk_mq_free_rq_map(set->tags[hctx_idx]);
			
 
				+	set->tags[hctx_idx] = NULL;
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
			
 
				+					 unsigned int hctx_idx)
			
 
				+{
			
 
				+	if (set->tags[hctx_idx]) {
			
 
				+		blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
			
 
				+		blk_mq_free_rq_map(set->tags[hctx_idx]);
			
 
				+		set->tags[hctx_idx] = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void blk_mq_map_swqueue(struct request_queue *q,
			
 
				 			       const struct cpumask *online_mask)
			
 
				 {
			
@@ -1894,17 +2033,15 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 
				 
			
 
				 		hctx_idx = q->mq_map[i];
			
 
				 		/* unmapped hw queue can be remapped after CPU topo changed */
			
 
				-		if (!set->tags[hctx_idx]) {
			
 
				-			set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx);
			
 
				-
			
 
				+		if (!set->tags[hctx_idx] &&
			
 
				+		    !__blk_mq_alloc_rq_map(set, hctx_idx)) {
			
 
				 			/*
			
 
				 			 * If tags initialization fail for some hctx,
			
 
				 			 * that hctx won't be brought online.  In this
			
 
				 			 * case, remap the current ctx to hctx[0] which
			
 
				 			 * is guaranteed to always have tags allocated
			
 
				 			 */
			
 
				-			if (!set->tags[hctx_idx])
			
 
				-				q->mq_map[i] = 0;
			
 
				+			q->mq_map[i] = 0;
			
 
				 		}
			
 
				 
			
 
				 		ctx = per_cpu_ptr(q->queue_ctx, i);
			
@@ -1927,10 +2064,9 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 
				 			 * fallback in case of a new remap fails
			
 
				 			 * allocation
			
 
				 			 */
			
 
				-			if (i && set->tags[i]) {
			
 
				-				blk_mq_free_rq_map(set, set->tags[i], i);
			
 
				-				set->tags[i] = NULL;
			
 
				-			}
			
 
				+			if (i && set->tags[i])
			
 
				+				blk_mq_free_map_and_requests(set, i);
			
 
				+
			
 
				 			hctx->tags = NULL;
			
 
				 			continue;
			
 
				 		}
			
@@ -2023,6 +2159,8 @@ void blk_mq_release(struct request_queue *q)
 
				 	struct blk_mq_hw_ctx *hctx;
			
 
				 	unsigned int i;
			
 
				 
			
 
				+	blk_mq_sched_teardown(q);
			
 
				+
			
 
				 	/* hctx kobj stays in hctx */
			
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				 		if (!hctx)
			
@@ -2097,10 +2235,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 
				 		struct blk_mq_hw_ctx *hctx = hctxs[j];
			
 
				 
			
 
				 		if (hctx) {
			
 
				-			if (hctx->tags) {
			
 
				-				blk_mq_free_rq_map(set, hctx->tags, j);
			
 
				-				set->tags[j] = NULL;
			
 
				-			}
			
 
				+			if (hctx->tags)
			
 
				+				blk_mq_free_map_and_requests(set, j);
			
 
				 			blk_mq_exit_hctx(q, set, hctx, j);
			
 
				 			free_cpumask_var(hctx->cpumask);
			
 
				 			kobject_put(&hctx->kobj);
			
@@ -2181,6 +2317,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
				 	mutex_unlock(&all_q_mutex);
			
 
				 	put_online_cpus();
			
 
				 
			
 
				+	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
			
 
				+		int ret;
			
 
				+
			
 
				+		ret = blk_mq_sched_init(q);
			
 
				+		if (ret)
			
 
				+			return ERR_PTR(ret);
			
 
				+	}
			
 
				+
			
 
				 	return q;
			
 
				 
			
 
				 err_hctxs:
			
@@ -2279,10 +2423,10 @@ static int blk_mq_queue_reinit_dead(unsigned int cpu)
 
				  * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
			
 
				  * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
			
 
				  *
			
 
				- * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in
			
 
				- * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
			
 
				- * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list
			
 
				- * is ignored.
			
 
				+ * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set
			
 
				+ * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
			
 
				+ * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is
			
 
				+ * ignored.
			
 
				  */
			
 
				 static int blk_mq_queue_reinit_prepare(unsigned int cpu)
			
 
				 {
			
@@ -2296,17 +2440,15 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < set->nr_hw_queues; i++) {
			
 
				-		set->tags[i] = blk_mq_init_rq_map(set, i);
			
 
				-		if (!set->tags[i])
			
 
				+	for (i = 0; i < set->nr_hw_queues; i++)
			
 
				+		if (!__blk_mq_alloc_rq_map(set, i))
			
 
				 			goto out_unwind;
			
 
				-	}
			
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				 out_unwind:
			
 
				 	while (--i >= 0)
			
 
				-		blk_mq_free_rq_map(set, set->tags[i], i);
			
 
				+		blk_mq_free_rq_map(set->tags[i]);
			
 
				 
			
 
				 	return -ENOMEM;
			
 
				 }
			
@@ -2430,10 +2572,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < nr_cpu_ids; i++) {
			
 
				-		if (set->tags[i])
			
 
				-			blk_mq_free_rq_map(set, set->tags[i], i);
			
 
				-	}
			
 
				+	for (i = 0; i < nr_cpu_ids; i++)
			
 
				+		blk_mq_free_map_and_requests(set, i);
			
 
				 
			
 
				 	kfree(set->mq_map);
			
 
				 	set->mq_map = NULL;
			
@@ -2449,14 +2589,28 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 
				 	struct blk_mq_hw_ctx *hctx;
			
 
				 	int i, ret;
			
 
				 
			
 
				-	if (!set || nr > set->queue_depth)
			
 
				+	if (!set)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	blk_mq_freeze_queue(q);
			
 
				+	blk_mq_quiesce_queue(q);
			
 
				+
			
 
				 	ret = 0;
			
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				 		if (!hctx->tags)
			
 
				 			continue;
			
 
				-		ret = blk_mq_tag_update_depth(hctx->tags, nr);
			
 
				+		/*
			
 
				+		 * If we're using an MQ scheduler, just update the scheduler
			
 
				+		 * queue depth. This is similar to what the old code would do.
			
 
				+		 */
			
 
				+		if (!hctx->sched_tags) {
			
 
				+			ret = blk_mq_tag_update_depth(hctx, &hctx->tags,
			
 
				+							min(nr, set->queue_depth),
			
 
				+							false);
			
 
				+		} else {
			
 
				+			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
			
 
				+							nr, true);
			
 
				+		}
			
 
				 		if (ret)
			
 
				 			break;
			
 
				 	}
			
@@ -2464,6 +2618,9 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 
				 	if (!ret)
			
 
				 		q->nr_requests = nr;
			
 
				 
			
 
				+	blk_mq_unfreeze_queue(q);
			
 
				+	blk_mq_start_stopped_hw_queues(q, true);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -2649,7 +2806,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
 
				 		blk_flush_plug_list(plug, false);
			
 
				 
			
 
				 	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
			
 
				-	rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
			
 
				+	if (!blk_qc_t_is_internal(cookie))
			
 
				+		rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
			
 
				+	else
			
 
				+		rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
			
 
				 
			
 
				 	return __blk_mq_poll(hctx, rq);
			
 
				 }
			
@@ -2667,6 +2827,8 @@ void blk_mq_enable_hotplug(void)
 
				 
			
 
				 static int __init blk_mq_init(void)
			
 
				 {
			
 
				+	blk_mq_debugfs_init();
			
 
				+
			
 
				 	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
			
 
				 				blk_mq_hctx_notify_dead);
			
 
				 
			
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,7 +32,31 @@ void blk_mq_free_queue(struct request_queue *q);
 
				 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
			
 
				 void blk_mq_wake_waiters(struct request_queue *q);
			
 
				 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
			
 
				+void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
			
 
				+bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
			
 
				+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
			
 
				+				bool wait);
			
 
				 
			
 
				+/*
			
 
				+ * Internal helpers for allocating/freeing the request map
			
 
				+ */
			
 
				+void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
			
 
				+		     unsigned int hctx_idx);
			
 
				+void blk_mq_free_rq_map(struct blk_mq_tags *tags);
			
 
				+struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
			
 
				+					unsigned int hctx_idx,
			
 
				+					unsigned int nr_tags,
			
 
				+					unsigned int reserved_tags);
			
 
				+int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
			
 
				+		     unsigned int hctx_idx, unsigned int depth);
			
 
				+
			
 
				+/*
			
 
				+ * Internal helpers for request insertion into sw queues
			
 
				+ */
			
 
				+void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
			
 
				+				bool at_head);
			
 
				+void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
			
 
				+				struct list_head *list);
			
 
				 /*
			
 
				  * CPU hotplug helpers
			
 
				  */
			
@@ -57,6 +81,40 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
 
				 extern void blk_mq_sysfs_unregister(struct request_queue *q);
			
 
				 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
			
 
				 
			
 
				+/*
			
 
				+ * debugfs helpers
			
 
				+ */
			
 
				+#ifdef CONFIG_BLK_DEBUG_FS
			
 
				+void blk_mq_debugfs_init(void);
			
 
				+int blk_mq_debugfs_register(struct request_queue *q, const char *name);
			
 
				+void blk_mq_debugfs_unregister(struct request_queue *q);
			
 
				+int blk_mq_debugfs_register_hctxs(struct request_queue *q);
			
 
				+void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
			
 
				+#else
			
 
				+static inline void blk_mq_debugfs_init(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline int blk_mq_debugfs_register(struct request_queue *q,
			
 
				+					  const char *name)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void blk_mq_debugfs_unregister(struct request_queue *q)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
			
 
				+{
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
			
 
				 
			
 
				 void blk_mq_release(struct request_queue *q);
			
@@ -103,6 +161,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
 
				 	data->hctx = hctx;
			
 
				 }
			
 
				 
			
 
				+static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
			
 
				+{
			
 
				+	if (data->flags & BLK_MQ_REQ_INTERNAL)
			
 
				+		return data->hctx->sched_tags;
			
 
				+
			
 
				+	return data->hctx->tags;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Internal helpers for request allocation/init/free
			
 
				+ */
			
 
				+void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
			
 
				+			struct request *rq, unsigned int op);
			
 
				+void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
			
 
				+				struct request *rq);
			
 
				+void blk_mq_finish_request(struct request *rq);
			
 
				+struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
			
 
				+					unsigned int op);
			
 
				+
			
 
				 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
			
 
				 {
			
 
				 	return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
			
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 
				 	list_del_init(&rq->queuelist);
			
 
				 	rq->rq_flags &= ~RQF_QUEUED;
			
 
				 	rq->tag = -1;
			
 
				+	rq->internal_tag = -1;
			
 
				 
			
 
				 	if (unlikely(bqt->tag_index[tag] == NULL))
			
 
				 		printk(KERN_ERR "%s: tag %d is missing\n",
			
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg)
 
				 	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
			
 
				 	struct bio *bio;
			
 
				 
			
 
				-	if ((bio = throtl_peek_queued(&sq->queued[READ])))
			
 
				+	bio = throtl_peek_queued(&sq->queued[READ]);
			
 
				+	if (bio)
			
 
				 		tg_may_dispatch(tg, bio, &read_wait);
			
 
				 
			
 
				-	if ((bio = throtl_peek_queued(&sq->queued[WRITE])))
			
 
				+	bio = throtl_peek_queued(&sq->queued[WRITE]);
			
 
				+	if (bio)
			
 
				 		tg_may_dispatch(tg, bio, &write_wait);
			
 
				 
			
 
				 	min_wait = min(read_wait, write_wait);
			
--- a/block/blk.h
+++ b/block/blk.h
@@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 
				 			return NULL;
			
 
				 		}
			
 
				 		if (unlikely(blk_queue_bypass(q)) ||
			
 
				-		    !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
			
 
				+		    !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
			
 
				 			return NULL;
			
 
				 	}
			
 
				 }
			
@@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_activate_req_fn)
			
 
				-		e->type->ops.elevator_activate_req_fn(q, rq);
			
 
				+	if (e->type->ops.sq.elevator_activate_req_fn)
			
 
				+		e->type->ops.sq.elevator_activate_req_fn(q, rq);
			
 
				 }
			
 
				 
			
 
				 static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
			
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_deactivate_req_fn)
			
 
				-		e->type->ops.elevator_deactivate_req_fn(q, rq);
			
 
				+	if (e->type->ops.sq.elevator_deactivate_req_fn)
			
 
				+		e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_FAIL_IO_TIMEOUT
			
@@ -263,6 +263,22 @@ void ioc_clear_queue(struct request_queue *q);
 
				 
			
 
				 int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
			
 
				 
			
 
				+/**
			
 
				+ * rq_ioc - determine io_context for request allocation
			
 
				+ * @bio: request being allocated is for this bio (can be %NULL)
			
 
				+ *
			
 
				+ * Determine io_context to use for request allocation for @bio.  May return
			
 
				+ * %NULL if %current->io_context doesn't exist.
			
 
				+ */
			
 
				+static inline struct io_context *rq_ioc(struct bio *bio)
			
 
				+{
			
 
				+#ifdef CONFIG_BLK_CGROUP
			
 
				+	if (bio && bio->bi_ioc)
			
 
				+		return bio->bi_ioc;
			
 
				+#endif
			
 
				+	return current->io_context;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * create_io_context - try to create task->io_context
			
 
				  * @gfp_mask: allocation mask
			
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2749,9 +2749,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
 
				 	if (!cfqg)
			
 
				 		return NULL;
			
 
				 
			
 
				-	for_each_cfqg_st(cfqg, i, j, st)
			
 
				-		if ((cfqq = cfq_rb_first(st)) != NULL)
			
 
				+	for_each_cfqg_st(cfqg, i, j, st) {
			
 
				+		cfqq = cfq_rb_first(st);
			
 
				+		if (cfqq)
			
 
				 			return cfqq;
			
 
				+	}
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -3864,6 +3866,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				+	/* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
			
 
				+	cfqq->ioprio_class = IOPRIO_CLASS_NONE;
			
 
				 	cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
			
 
				 	cfq_init_prio_data(cfqq, cic);
			
 
				 	cfq_link_cfqq_cfqg(cfqq, cfqg);
			
@@ -4837,7 +4841,7 @@ static struct elv_fs_entry cfq_attrs[] = {
 
				 };
			
 
				 
			
 
				 static struct elevator_type iosched_cfq = {
			
 
				-	.ops = {
			
 
				+	.ops.sq = {
			
 
				 		.elevator_merge_fn = 		cfq_merge,
			
 
				 		.elevator_merged_fn =		cfq_merged_request,
			
 
				 		.elevator_merge_req_fn =	cfq_merged_requests,
			
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = {
 
				 };
			
 
				 
			
 
				 static struct elevator_type iosched_deadline = {
			
 
				-	.ops = {
			
 
				+	.ops.sq = {
			
 
				 		.elevator_merge_fn = 		deadline_merge,
			
 
				 		.elevator_merged_fn =		deadline_merged_request,
			
 
				 		.elevator_merge_req_fn =	deadline_merged_requests,
			
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -40,6 +40,7 @@
 
				 #include <trace/events/block.h>
			
 
				 
			
 
				 #include "blk.h"
			
 
				+#include "blk-mq-sched.h"
			
 
				 
			
 
				 static DEFINE_SPINLOCK(elv_list_lock);
			
 
				 static LIST_HEAD(elv_list);
			
@@ -58,8 +59,10 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
 
				 	struct request_queue *q = rq->q;
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_allow_bio_merge_fn)
			
 
				-		return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio);
			
 
				+	if (e->uses_mq && e->type->ops.mq.allow_merge)
			
 
				+		return e->type->ops.mq.allow_merge(q, rq, bio);
			
 
				+	else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
			
 
				+		return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
			
 
				 
			
 
				 	return 1;
			
 
				 }
			
@@ -163,6 +166,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
 
				 	kobject_init(&eq->kobj, &elv_ktype);
			
 
				 	mutex_init(&eq->sysfs_lock);
			
 
				 	hash_init(eq->hash);
			
 
				+	eq->uses_mq = e->uses_mq;
			
 
				 
			
 
				 	return eq;
			
 
				 }
			
@@ -215,18 +219,32 @@ int elevator_init(struct request_queue *q, char *name)
 
				 	}
			
 
				 
			
 
				 	if (!e) {
			
 
				-		e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
			
 
				+		if (q->mq_ops && q->nr_hw_queues == 1)
			
 
				+			e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
			
 
				+		else if (q->mq_ops)
			
 
				+			e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
			
 
				+		else
			
 
				+			e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
			
 
				+
			
 
				 		if (!e) {
			
 
				 			printk(KERN_ERR
			
 
				 				"Default I/O scheduler not found. " \
			
 
				-				"Using noop.\n");
			
 
				+				"Using noop/none.\n");
			
 
				 			e = elevator_get("noop", false);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	err = e->ops.elevator_init_fn(q, e);
			
 
				-	if (err)
			
 
				+	if (e->uses_mq) {
			
 
				+		err = blk_mq_sched_setup(q);
			
 
				+		if (!err)
			
 
				+			err = e->ops.mq.init_sched(q, e);
			
 
				+	} else
			
 
				+		err = e->ops.sq.elevator_init_fn(q, e);
			
 
				+	if (err) {
			
 
				+		if (e->uses_mq)
			
 
				+			blk_mq_sched_teardown(q);
			
 
				 		elevator_put(e);
			
 
				+	}
			
 
				 	return err;
			
 
				 }
			
 
				 EXPORT_SYMBOL(elevator_init);
			
@@ -234,8 +252,10 @@ EXPORT_SYMBOL(elevator_init);
 
				 void elevator_exit(struct elevator_queue *e)
			
 
				 {
			
 
				 	mutex_lock(&e->sysfs_lock);
			
 
				-	if (e->type->ops.elevator_exit_fn)
			
 
				-		e->type->ops.elevator_exit_fn(e);
			
 
				+	if (e->uses_mq && e->type->ops.mq.exit_sched)
			
 
				+		e->type->ops.mq.exit_sched(e);
			
 
				+	else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
			
 
				+		e->type->ops.sq.elevator_exit_fn(e);
			
 
				 	mutex_unlock(&e->sysfs_lock);
			
 
				 
			
 
				 	kobject_put(&e->kobj);
			
@@ -253,6 +273,7 @@ void elv_rqhash_del(struct request_queue *q, struct request *rq)
 
				 	if (ELV_ON_HASH(rq))
			
 
				 		__elv_rqhash_del(rq);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(elv_rqhash_del);
			
 
				 
			
 
				 void elv_rqhash_add(struct request_queue *q, struct request *rq)
			
 
				 {
			
@@ -262,6 +283,7 @@ void elv_rqhash_add(struct request_queue *q, struct request *rq)
 
				 	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
			
 
				 	rq->rq_flags |= RQF_HASHED;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(elv_rqhash_add);
			
 
				 
			
 
				 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
			
 
				 {
			
@@ -443,8 +465,10 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 
				 		return ELEVATOR_BACK_MERGE;
			
 
				 	}
			
 
				 
			
 
				-	if (e->type->ops.elevator_merge_fn)
			
 
				-		return e->type->ops.elevator_merge_fn(q, req, bio);
			
 
				+	if (e->uses_mq && e->type->ops.mq.request_merge)
			
 
				+		return e->type->ops.mq.request_merge(q, req, bio);
			
 
				+	else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
			
 
				+		return e->type->ops.sq.elevator_merge_fn(q, req, bio);
			
 
				 
			
 
				 	return ELEVATOR_NO_MERGE;
			
 
				 }
			
@@ -456,8 +480,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 
				  *
			
 
				  * Returns true if we merged, false otherwise
			
 
				  */
			
 
				-static bool elv_attempt_insert_merge(struct request_queue *q,
			
 
				-				     struct request *rq)
			
 
				+bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
			
 
				 {
			
 
				 	struct request *__rq;
			
 
				 	bool ret;
			
@@ -495,8 +518,10 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_merged_fn)
			
 
				-		e->type->ops.elevator_merged_fn(q, rq, type);
			
 
				+	if (e->uses_mq && e->type->ops.mq.request_merged)
			
 
				+		e->type->ops.mq.request_merged(q, rq, type);
			
 
				+	else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
			
 
				+		e->type->ops.sq.elevator_merged_fn(q, rq, type);
			
 
				 
			
 
				 	if (type == ELEVATOR_BACK_MERGE)
			
 
				 		elv_rqhash_reposition(q, rq);
			
@@ -508,10 +533,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
 
				 			     struct request *next)
			
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				-	const int next_sorted = next->rq_flags & RQF_SORTED;
			
 
				-
			
 
				-	if (next_sorted && e->type->ops.elevator_merge_req_fn)
			
 
				-		e->type->ops.elevator_merge_req_fn(q, rq, next);
			
 
				+	bool next_sorted = false;
			
 
				+
			
 
				+	if (e->uses_mq && e->type->ops.mq.requests_merged)
			
 
				+		e->type->ops.mq.requests_merged(q, rq, next);
			
 
				+	else if (e->type->ops.sq.elevator_merge_req_fn) {
			
 
				+		next_sorted = next->rq_flags & RQF_SORTED;
			
 
				+		if (next_sorted)
			
 
				+			e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
			
 
				+	}
			
 
				 
			
 
				 	elv_rqhash_reposition(q, rq);
			
 
				 
			
@@ -528,8 +558,11 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_bio_merged_fn)
			
 
				-		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
			
 
				+	if (WARN_ON_ONCE(e->uses_mq))
			
 
				+		return;
			
 
				+
			
 
				+	if (e->type->ops.sq.elevator_bio_merged_fn)
			
 
				+		e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_PM
			
@@ -574,11 +607,15 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
 
				 
			
 
				 void elv_drain_elevator(struct request_queue *q)
			
 
				 {
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				 	static int printed;
			
 
				 
			
 
				+	if (WARN_ON_ONCE(e->uses_mq))
			
 
				+		return;
			
 
				+
			
 
				 	lockdep_assert_held(q->queue_lock);
			
 
				 
			
 
				-	while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
			
 
				+	while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
			
 
				 		;
			
 
				 	if (q->nr_sorted && printed++ < 10) {
			
 
				 		printk(KERN_ERR "%s: forced dispatching is broken "
			
@@ -653,7 +690,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 
				 		 * rq cannot be accessed after calling
			
 
				 		 * elevator_add_req_fn.
			
 
				 		 */
			
 
				-		q->elevator->type->ops.elevator_add_req_fn(q, rq);
			
 
				+		q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
			
 
				 		break;
			
 
				 
			
 
				 	case ELEVATOR_INSERT_FLUSH:
			
@@ -682,8 +719,11 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_latter_req_fn)
			
 
				-		return e->type->ops.elevator_latter_req_fn(q, rq);
			
 
				+	if (e->uses_mq && e->type->ops.mq.next_request)
			
 
				+		return e->type->ops.mq.next_request(q, rq);
			
 
				+	else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
			
 
				+		return e->type->ops.sq.elevator_latter_req_fn(q, rq);
			
 
				+
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -691,8 +731,10 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_former_req_fn)
			
 
				-		return e->type->ops.elevator_former_req_fn(q, rq);
			
 
				+	if (e->uses_mq && e->type->ops.mq.former_request)
			
 
				+		return e->type->ops.mq.former_request(q, rq);
			
 
				+	if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
			
 
				+		return e->type->ops.sq.elevator_former_req_fn(q, rq);
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -701,8 +743,11 @@ int elv_set_request(struct request_queue *q, struct request *rq,
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_set_req_fn)
			
 
				-		return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
			
 
				+	if (WARN_ON_ONCE(e->uses_mq))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (e->type->ops.sq.elevator_set_req_fn)
			
 
				+		return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -710,16 +755,22 @@ void elv_put_request(struct request_queue *q, struct request *rq)
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_put_req_fn)
			
 
				-		e->type->ops.elevator_put_req_fn(rq);
			
 
				+	if (WARN_ON_ONCE(e->uses_mq))
			
 
				+		return;
			
 
				+
			
 
				+	if (e->type->ops.sq.elevator_put_req_fn)
			
 
				+		e->type->ops.sq.elevator_put_req_fn(rq);
			
 
				 }
			
 
				 
			
 
				 int elv_may_queue(struct request_queue *q, unsigned int op)
			
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				-	if (e->type->ops.elevator_may_queue_fn)
			
 
				-		return e->type->ops.elevator_may_queue_fn(q, op);
			
 
				+	if (WARN_ON_ONCE(e->uses_mq))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (e->type->ops.sq.elevator_may_queue_fn)
			
 
				+		return e->type->ops.sq.elevator_may_queue_fn(q, op);
			
 
				 
			
 
				 	return ELV_MQUEUE_MAY;
			
 
				 }
			
@@ -728,14 +779,17 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 
			
 
				+	if (WARN_ON_ONCE(e->uses_mq))
			
 
				+		return;
			
 
				+
			
 
				 	/*
			
 
				 	 * request is released from the driver, io must be done
			
 
				 	 */
			
 
				 	if (blk_account_rq(rq)) {
			
 
				 		q->in_flight[rq_is_sync(rq)]--;
			
 
				 		if ((rq->rq_flags & RQF_SORTED) &&
			
 
				-		    e->type->ops.elevator_completed_req_fn)
			
 
				-			e->type->ops.elevator_completed_req_fn(q, rq);
			
 
				+		    e->type->ops.sq.elevator_completed_req_fn)
			
 
				+			e->type->ops.sq.elevator_completed_req_fn(q, rq);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -803,8 +857,8 @@ int elv_register_queue(struct request_queue *q)
 
				 		}
			
 
				 		kobject_uevent(&e->kobj, KOBJ_ADD);
			
 
				 		e->registered = 1;
			
 
				-		if (e->type->ops.elevator_registered_fn)
			
 
				-			e->type->ops.elevator_registered_fn(q);
			
 
				+		if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
			
 
				+			e->type->ops.sq.elevator_registered_fn(q);
			
 
				 	}
			
 
				 	return error;
			
 
				 }
			
@@ -891,9 +945,14 @@ EXPORT_SYMBOL_GPL(elv_unregister);
 
				 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
			
 
				 {
			
 
				 	struct elevator_queue *old = q->elevator;
			
 
				-	bool registered = old->registered;
			
 
				+	bool old_registered = false;
			
 
				 	int err;
			
 
				 
			
 
				+	if (q->mq_ops) {
			
 
				+		blk_mq_freeze_queue(q);
			
 
				+		blk_mq_quiesce_queue(q);
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Turn on BYPASS and drain all requests w/ elevator private data.
			
 
				 	 * Block layer doesn't call into a quiesced elevator - all requests
			
@@ -901,42 +960,76 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
				 	 * using INSERT_BACK.  All requests have SOFTBARRIER set and no
			
 
				 	 * merge happens either.
			
 
				 	 */
			
 
				-	blk_queue_bypass_start(q);
			
 
				+	if (old) {
			
 
				+		old_registered = old->registered;
			
 
				 
			
 
				-	/* unregister and clear all auxiliary data of the old elevator */
			
 
				-	if (registered)
			
 
				-		elv_unregister_queue(q);
			
 
				+		if (old->uses_mq)
			
 
				+			blk_mq_sched_teardown(q);
			
 
				 
			
 
				-	spin_lock_irq(q->queue_lock);
			
 
				-	ioc_clear_queue(q);
			
 
				-	spin_unlock_irq(q->queue_lock);
			
 
				+		if (!q->mq_ops)
			
 
				+			blk_queue_bypass_start(q);
			
 
				+
			
 
				+		/* unregister and clear all auxiliary data of the old elevator */
			
 
				+		if (old_registered)
			
 
				+			elv_unregister_queue(q);
			
 
				+
			
 
				+		spin_lock_irq(q->queue_lock);
			
 
				+		ioc_clear_queue(q);
			
 
				+		spin_unlock_irq(q->queue_lock);
			
 
				+	}
			
 
				 
			
 
				 	/* allocate, init and register new elevator */
			
 
				-	err = new_e->ops.elevator_init_fn(q, new_e);
			
 
				-	if (err)
			
 
				-		goto fail_init;
			
 
				+	if (new_e) {
			
 
				+		if (new_e->uses_mq) {
			
 
				+			err = blk_mq_sched_setup(q);
			
 
				+			if (!err)
			
 
				+				err = new_e->ops.mq.init_sched(q, new_e);
			
 
				+		} else
			
 
				+			err = new_e->ops.sq.elevator_init_fn(q, new_e);
			
 
				+		if (err)
			
 
				+			goto fail_init;
			
 
				 
			
 
				-	if (registered) {
			
 
				 		err = elv_register_queue(q);
			
 
				 		if (err)
			
 
				 			goto fail_register;
			
 
				-	}
			
 
				+	} else
			
 
				+		q->elevator = NULL;
			
 
				 
			
 
				 	/* done, kill the old one and finish */
			
 
				-	elevator_exit(old);
			
 
				-	blk_queue_bypass_end(q);
			
 
				+	if (old) {
			
 
				+		elevator_exit(old);
			
 
				+		if (!q->mq_ops)
			
 
				+			blk_queue_bypass_end(q);
			
 
				+	}
			
 
				 
			
 
				-	blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
			
 
				+	if (q->mq_ops) {
			
 
				+		blk_mq_unfreeze_queue(q);
			
 
				+		blk_mq_start_stopped_hw_queues(q, true);
			
 
				+	}
			
 
				+
			
 
				+	if (new_e)
			
 
				+		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
			
 
				+	else
			
 
				+		blk_add_trace_msg(q, "elv switch: none");
			
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				 fail_register:
			
 
				+	if (q->mq_ops)
			
 
				+		blk_mq_sched_teardown(q);
			
 
				 	elevator_exit(q->elevator);
			
 
				 fail_init:
			
 
				 	/* switch failed, restore and re-register old elevator */
			
 
				-	q->elevator = old;
			
 
				-	elv_register_queue(q);
			
 
				-	blk_queue_bypass_end(q);
			
 
				+	if (old) {
			
 
				+		q->elevator = old;
			
 
				+		elv_register_queue(q);
			
 
				+		if (!q->mq_ops)
			
 
				+			blk_queue_bypass_end(q);
			
 
				+	}
			
 
				+	if (q->mq_ops) {
			
 
				+		blk_mq_unfreeze_queue(q);
			
 
				+		blk_mq_start_stopped_hw_queues(q, true);
			
 
				+	}
			
 
				 
			
 
				 	return err;
			
 
				 }
			
@@ -949,8 +1042,11 @@ static int __elevator_change(struct request_queue *q, const char *name)
 
				 	char elevator_name[ELV_NAME_MAX];
			
 
				 	struct elevator_type *e;
			
 
				 
			
 
				-	if (!q->elevator)
			
 
				-		return -ENXIO;
			
 
				+	/*
			
 
				+	 * Special case for mq, turn off scheduling
			
 
				+	 */
			
 
				+	if (q->mq_ops && !strncmp(name, "none", 4))
			
 
				+		return elevator_switch(q, NULL);
			
 
				 
			
 
				 	strlcpy(elevator_name, name, sizeof(elevator_name));
			
 
				 	e = elevator_get(strstrip(elevator_name), true);
			
@@ -959,11 +1055,21 @@ static int __elevator_change(struct request_queue *q, const char *name)
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
			
 
				+	if (q->elevator &&
			
 
				+	    !strcmp(elevator_name, q->elevator->type->elevator_name)) {
			
 
				 		elevator_put(e);
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				+	if (!e->uses_mq && q->mq_ops) {
			
 
				+		elevator_put(e);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	if (e->uses_mq && !q->mq_ops) {
			
 
				+		elevator_put(e);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				 	return elevator_switch(q, e);
			
 
				 }
			
 
				 
			
@@ -985,7 +1091,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	if (!q->elevator)
			
 
				+	if (!(q->mq_ops || q->request_fn))
			
 
				 		return count;
			
 
				 
			
 
				 	ret = __elevator_change(q, name);
			
@@ -999,24 +1105,34 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 
				 ssize_t elv_iosched_show(struct request_queue *q, char *name)
			
 
				 {
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				-	struct elevator_type *elv;
			
 
				+	struct elevator_type *elv = NULL;
			
 
				 	struct elevator_type *__e;
			
 
				 	int len = 0;
			
 
				 
			
 
				-	if (!q->elevator || !blk_queue_stackable(q))
			
 
				+	if (!blk_queue_stackable(q))
			
 
				 		return sprintf(name, "none\n");
			
 
				 
			
 
				-	elv = e->type;
			
 
				+	if (!q->elevator)
			
 
				+		len += sprintf(name+len, "[none] ");
			
 
				+	else
			
 
				+		elv = e->type;
			
 
				 
			
 
				 	spin_lock(&elv_list_lock);
			
 
				 	list_for_each_entry(__e, &elv_list, list) {
			
 
				-		if (!strcmp(elv->elevator_name, __e->elevator_name))
			
 
				+		if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
			
 
				 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
			
 
				-		else
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (__e->uses_mq && q->mq_ops)
			
 
				+			len += sprintf(name+len, "%s ", __e->elevator_name);
			
 
				+		else if (!__e->uses_mq && !q->mq_ops)
			
 
				 			len += sprintf(name+len, "%s ", __e->elevator_name);
			
 
				 	}
			
 
				 	spin_unlock(&elv_list_lock);
			
 
				 
			
 
				+	if (q->mq_ops && q->elevator)
			
 
				+		len += sprintf(name+len, "none");
			
 
				+
			
 
				 	len += sprintf(len+name, "\n");
			
 
				 	return len;
			
 
				 }
			
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -0,0 +1,555 @@
 
				+/*
			
 
				+ *  MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
			
 
				+ *  for the blk-mq scheduling framework
			
 
				+ *
			
 
				+ *  Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
			
 
				+ */
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/blkdev.h>
			
 
				+#include <linux/blk-mq.h>
			
 
				+#include <linux/elevator.h>
			
 
				+#include <linux/bio.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/compiler.h>
			
 
				+#include <linux/rbtree.h>
			
 
				+#include <linux/sbitmap.h>
			
 
				+
			
 
				+#include "blk.h"
			
 
				+#include "blk-mq.h"
			
 
				+#include "blk-mq-tag.h"
			
 
				+#include "blk-mq-sched.h"
			
 
				+
			
 
				+/*
			
 
				+ * See Documentation/block/deadline-iosched.txt
			
 
				+ */
			
 
				+static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
			
 
				+static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
			
 
				+static const int writes_starved = 2;    /* max times reads can starve a write */
			
 
				+static const int fifo_batch = 16;       /* # of sequential requests treated as one
			
 
				+				     by the above parameters. For throughput. */
			
 
				+
			
 
				+struct deadline_data {
			
 
				+	/*
			
 
				+	 * run time data
			
 
				+	 */
			
 
				+
			
 
				+	/*
			
 
				+	 * requests (deadline_rq s) are present on both sort_list and fifo_list
			
 
				+	 */
			
 
				+	struct rb_root sort_list[2];
			
 
				+	struct list_head fifo_list[2];
			
 
				+
			
 
				+	/*
			
 
				+	 * next in sort order. read, write or both are NULL
			
 
				+	 */
			
 
				+	struct request *next_rq[2];
			
 
				+	unsigned int batching;		/* number of sequential requests made */
			
 
				+	unsigned int starved;		/* times reads have starved writes */
			
 
				+
			
 
				+	/*
			
 
				+	 * settings that change how the i/o scheduler behaves
			
 
				+	 */
			
 
				+	int fifo_expire[2];
			
 
				+	int fifo_batch;
			
 
				+	int writes_starved;
			
 
				+	int front_merges;
			
 
				+
			
 
				+	spinlock_t lock;
			
 
				+	struct list_head dispatch;
			
 
				+};
			
 
				+
			
 
				+static inline struct rb_root *
			
 
				+deadline_rb_root(struct deadline_data *dd, struct request *rq)
			
 
				+{
			
 
				+	return &dd->sort_list[rq_data_dir(rq)];
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * get the request after `rq' in sector-sorted order
			
 
				+ */
			
 
				+static inline struct request *
			
 
				+deadline_latter_request(struct request *rq)
			
 
				+{
			
 
				+	struct rb_node *node = rb_next(&rq->rb_node);
			
 
				+
			
 
				+	if (node)
			
 
				+		return rb_entry_rq(node);
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
			
 
				+{
			
 
				+	struct rb_root *root = deadline_rb_root(dd, rq);
			
 
				+
			
 
				+	elv_rb_add(root, rq);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
			
 
				+{
			
 
				+	const int data_dir = rq_data_dir(rq);
			
 
				+
			
 
				+	if (dd->next_rq[data_dir] == rq)
			
 
				+		dd->next_rq[data_dir] = deadline_latter_request(rq);
			
 
				+
			
 
				+	elv_rb_del(deadline_rb_root(dd, rq), rq);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * remove rq from rbtree and fifo.
			
 
				+ */
			
 
				+static void deadline_remove_request(struct request_queue *q, struct request *rq)
			
 
				+{
			
 
				+	struct deadline_data *dd = q->elevator->elevator_data;
			
 
				+
			
 
				+	list_del_init(&rq->queuelist);
			
 
				+
			
 
				+	/*
			
 
				+	 * We might not be on the rbtree, if we are doing an insert merge
			
 
				+	 */
			
 
				+	if (!RB_EMPTY_NODE(&rq->rb_node))
			
 
				+		deadline_del_rq_rb(dd, rq);
			
 
				+
			
 
				+	elv_rqhash_del(q, rq);
			
 
				+	if (q->last_merge == rq)
			
 
				+		q->last_merge = NULL;
			
 
				+}
			
 
				+
			
 
				+static void dd_request_merged(struct request_queue *q, struct request *req,
			
 
				+			      int type)
			
 
				+{
			
 
				+	struct deadline_data *dd = q->elevator->elevator_data;
			
 
				+
			
 
				+	/*
			
 
				+	 * if the merge was a front merge, we need to reposition request
			
 
				+	 */
			
 
				+	if (type == ELEVATOR_FRONT_MERGE) {
			
 
				+		elv_rb_del(deadline_rb_root(dd, req), req);
			
 
				+		deadline_add_rq_rb(dd, req);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void dd_merged_requests(struct request_queue *q, struct request *req,
			
 
				+			       struct request *next)
			
 
				+{
			
 
				+	/*
			
 
				+	 * if next expires before rq, assign its expire time to rq
			
 
				+	 * and move into next position (next will be deleted) in fifo
			
 
				+	 */
			
 
				+	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
			
 
				+		if (time_before((unsigned long)next->fifo_time,
			
 
				+				(unsigned long)req->fifo_time)) {
			
 
				+			list_move(&req->queuelist, &next->queuelist);
			
 
				+			req->fifo_time = next->fifo_time;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * kill knowledge of next, this one is a goner
			
 
				+	 */
			
 
				+	deadline_remove_request(q, next);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * move an entry to dispatch queue
			
 
				+ */
			
 
				+static void
			
 
				+deadline_move_request(struct deadline_data *dd, struct request *rq)
			
 
				+{
			
 
				+	const int data_dir = rq_data_dir(rq);
			
 
				+
			
 
				+	dd->next_rq[READ] = NULL;
			
 
				+	dd->next_rq[WRITE] = NULL;
			
 
				+	dd->next_rq[data_dir] = deadline_latter_request(rq);
			
 
				+
			
 
				+	/*
			
 
				+	 * take it off the sort and fifo list
			
 
				+	 */
			
 
				+	deadline_remove_request(rq->q, rq);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
			
 
				+ * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
			
 
				+ */
			
 
				+static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
			
 
				+{
			
 
				+	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
			
 
				+
			
 
				+	/*
			
 
				+	 * rq is expired!
			
 
				+	 */
			
 
				+	if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
			
 
				+		return 1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * deadline_dispatch_requests selects the best request according to
			
 
				+ * read/write expire, fifo_batch, etc
			
 
				+ */
			
 
				+static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
			
 
				+	struct request *rq;
			
 
				+	bool reads, writes;
			
 
				+	int data_dir;
			
 
				+
			
 
				+	if (!list_empty(&dd->dispatch)) {
			
 
				+		rq = list_first_entry(&dd->dispatch, struct request, queuelist);
			
 
				+		list_del_init(&rq->queuelist);
			
 
				+		goto done;
			
 
				+	}
			
 
				+
			
 
				+	reads = !list_empty(&dd->fifo_list[READ]);
			
 
				+	writes = !list_empty(&dd->fifo_list[WRITE]);
			
 
				+
			
 
				+	/*
			
 
				+	 * batches are currently reads XOR writes
			
 
				+	 */
			
 
				+	if (dd->next_rq[WRITE])
			
 
				+		rq = dd->next_rq[WRITE];
			
 
				+	else
			
 
				+		rq = dd->next_rq[READ];
			
 
				+
			
 
				+	if (rq && dd->batching < dd->fifo_batch)
			
 
				+		/* we have a next request are still entitled to batch */
			
 
				+		goto dispatch_request;
			
 
				+
			
 
				+	/*
			
 
				+	 * at this point we are not running a batch. select the appropriate
			
 
				+	 * data direction (read / write)
			
 
				+	 */
			
 
				+
			
 
				+	if (reads) {
			
 
				+		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
			
 
				+
			
 
				+		if (writes && (dd->starved++ >= dd->writes_starved))
			
 
				+			goto dispatch_writes;
			
 
				+
			
 
				+		data_dir = READ;
			
 
				+
			
 
				+		goto dispatch_find_request;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * there are either no reads or writes have been starved
			
 
				+	 */
			
 
				+
			
 
				+	if (writes) {
			
 
				+dispatch_writes:
			
 
				+		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
			
 
				+
			
 
				+		dd->starved = 0;
			
 
				+
			
 
				+		data_dir = WRITE;
			
 
				+
			
 
				+		goto dispatch_find_request;
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+
			
 
				+dispatch_find_request:
			
 
				+	/*
			
 
				+	 * we are not running a batch, find best request for selected data_dir
			
 
				+	 */
			
 
				+	if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) {
			
 
				+		/*
			
 
				+		 * A deadline has expired, the last request was in the other
			
 
				+		 * direction, or we have run out of higher-sectored requests.
			
 
				+		 * Start again from the request with the earliest expiry time.
			
 
				+		 */
			
 
				+		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * The last req was the same dir and we have a next request in
			
 
				+		 * sort order. No expired requests so continue on from here.
			
 
				+		 */
			
 
				+		rq = dd->next_rq[data_dir];
			
 
				+	}
			
 
				+
			
 
				+	dd->batching = 0;
			
 
				+
			
 
				+dispatch_request:
			
 
				+	/*
			
 
				+	 * rq is the selected appropriate request.
			
 
				+	 */
			
 
				+	dd->batching++;
			
 
				+	deadline_move_request(dd, rq);
			
 
				+done:
			
 
				+	rq->rq_flags |= RQF_STARTED;
			
 
				+	return rq;
			
 
				+}
			
 
				+
			
 
				+static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
			
 
				+	struct request *rq;
			
 
				+
			
 
				+	spin_lock(&dd->lock);
			
 
				+	rq = __dd_dispatch_request(hctx);
			
 
				+	spin_unlock(&dd->lock);
			
 
				+
			
 
				+	return rq;
			
 
				+}
			
 
				+
			
 
				+static void dd_exit_queue(struct elevator_queue *e)
			
 
				+{
			
 
				+	struct deadline_data *dd = e->elevator_data;
			
 
				+
			
 
				+	BUG_ON(!list_empty(&dd->fifo_list[READ]));
			
 
				+	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
			
 
				+
			
 
				+	kfree(dd);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * initialize elevator private data (deadline_data).
			
 
				+ */
			
 
				+static int dd_init_queue(struct request_queue *q, struct elevator_type *e)
			
 
				+{
			
 
				+	struct deadline_data *dd;
			
 
				+	struct elevator_queue *eq;
			
 
				+
			
 
				+	eq = elevator_alloc(q, e);
			
 
				+	if (!eq)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
			
 
				+	if (!dd) {
			
 
				+		kobject_put(&eq->kobj);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+	eq->elevator_data = dd;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&dd->fifo_list[READ]);
			
 
				+	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
			
 
				+	dd->sort_list[READ] = RB_ROOT;
			
 
				+	dd->sort_list[WRITE] = RB_ROOT;
			
 
				+	dd->fifo_expire[READ] = read_expire;
			
 
				+	dd->fifo_expire[WRITE] = write_expire;
			
 
				+	dd->writes_starved = writes_starved;
			
 
				+	dd->front_merges = 1;
			
 
				+	dd->fifo_batch = fifo_batch;
			
 
				+	spin_lock_init(&dd->lock);
			
 
				+	INIT_LIST_HEAD(&dd->dispatch);
			
 
				+
			
 
				+	q->elevator = eq;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int dd_request_merge(struct request_queue *q, struct request **rq,
			
 
				+			    struct bio *bio)
			
 
				+{
			
 
				+	struct deadline_data *dd = q->elevator->elevator_data;
			
 
				+	sector_t sector = bio_end_sector(bio);
			
 
				+	struct request *__rq;
			
 
				+
			
 
				+	if (!dd->front_merges)
			
 
				+		return ELEVATOR_NO_MERGE;
			
 
				+
			
 
				+	__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
			
 
				+	if (__rq) {
			
 
				+		BUG_ON(sector != blk_rq_pos(__rq));
			
 
				+
			
 
				+		if (elv_bio_merge_ok(__rq, bio)) {
			
 
				+			*rq = __rq;
			
 
				+			return ELEVATOR_FRONT_MERGE;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return ELEVATOR_NO_MERGE;
			
 
				+}
			
 
				+
			
 
				+static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
			
 
				+{
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+	struct deadline_data *dd = q->elevator->elevator_data;
			
 
				+	int ret;
			
 
				+
			
 
				+	spin_lock(&dd->lock);
			
 
				+	ret = blk_mq_sched_try_merge(q, bio);
			
 
				+	spin_unlock(&dd->lock);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * add rq to rbtree and fifo
			
 
				+ */
			
 
				+static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
			
 
				+			      bool at_head)
			
 
				+{
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+	struct deadline_data *dd = q->elevator->elevator_data;
			
 
				+	const int data_dir = rq_data_dir(rq);
			
 
				+
			
 
				+	if (blk_mq_sched_try_insert_merge(q, rq))
			
 
				+		return;
			
 
				+
			
 
				+	blk_mq_sched_request_inserted(rq);
			
 
				+
			
 
				+	if (blk_mq_sched_bypass_insert(hctx, rq))
			
 
				+		return;
			
 
				+
			
 
				+	if (at_head || rq->cmd_type != REQ_TYPE_FS) {
			
 
				+		if (at_head)
			
 
				+			list_add(&rq->queuelist, &dd->dispatch);
			
 
				+		else
			
 
				+			list_add_tail(&rq->queuelist, &dd->dispatch);
			
 
				+	} else {
			
 
				+		deadline_add_rq_rb(dd, rq);
			
 
				+
			
 
				+		if (rq_mergeable(rq)) {
			
 
				+			elv_rqhash_add(q, rq);
			
 
				+			if (!q->last_merge)
			
 
				+				q->last_merge = rq;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * set expire time and add to fifo list
			
 
				+		 */
			
 
				+		rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
			
 
				+		list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
			
 
				+			       struct list_head *list, bool at_head)
			
 
				+{
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+	struct deadline_data *dd = q->elevator->elevator_data;
			
 
				+
			
 
				+	spin_lock(&dd->lock);
			
 
				+	while (!list_empty(list)) {
			
 
				+		struct request *rq;
			
 
				+
			
 
				+		rq = list_first_entry(list, struct request, queuelist);
			
 
				+		list_del_init(&rq->queuelist);
			
 
				+		dd_insert_request(hctx, rq, at_head);
			
 
				+	}
			
 
				+	spin_unlock(&dd->lock);
			
 
				+}
			
 
				+
			
 
				+static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
			
 
				+{
			
 
				+	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
			
 
				+
			
 
				+	return !list_empty_careful(&dd->dispatch) ||
			
 
				+		!list_empty_careful(&dd->fifo_list[0]) ||
			
 
				+		!list_empty_careful(&dd->fifo_list[1]);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * sysfs parts below
			
 
				+ */
			
 
				+static ssize_t
			
 
				+deadline_var_show(int var, char *page)
			
 
				+{
			
 
				+	return sprintf(page, "%d\n", var);
			
 
				+}
			
 
				+
			
 
				+static ssize_t
			
 
				+deadline_var_store(int *var, const char *page, size_t count)
			
 
				+{
			
 
				+	char *p = (char *) page;
			
 
				+
			
 
				+	*var = simple_strtol(p, &p, 10);
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
			
 
				+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
			
 
				+{									\
			
 
				+	struct deadline_data *dd = e->elevator_data;			\
			
 
				+	int __data = __VAR;						\
			
 
				+	if (__CONV)							\
			
 
				+		__data = jiffies_to_msecs(__data);			\
			
 
				+	return deadline_var_show(__data, (page));			\
			
 
				+}
			
 
				+SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
			
 
				+SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
			
 
				+SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
			
 
				+SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
			
 
				+SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
			
 
				+#undef SHOW_FUNCTION
			
 
				+
			
 
				+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
			
 
				+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
			
 
				+{									\
			
 
				+	struct deadline_data *dd = e->elevator_data;			\
			
 
				+	int __data;							\
			
 
				+	int ret = deadline_var_store(&__data, (page), count);		\
			
 
				+	if (__data < (MIN))						\
			
 
				+		__data = (MIN);						\
			
 
				+	else if (__data > (MAX))					\
			
 
				+		__data = (MAX);						\
			
 
				+	if (__CONV)							\
			
 
				+		*(__PTR) = msecs_to_jiffies(__data);			\
			
 
				+	else								\
			
 
				+		*(__PTR) = __data;					\
			
 
				+	return ret;							\
			
 
				+}
			
 
				+STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
			
 
				+STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
			
 
				+STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
			
 
				+STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
			
 
				+STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
			
 
				+#undef STORE_FUNCTION
			
 
				+
			
 
				+#define DD_ATTR(name) \
			
 
				+	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
			
 
				+				      deadline_##name##_store)
			
 
				+
			
 
				+static struct elv_fs_entry deadline_attrs[] = {
			
 
				+	DD_ATTR(read_expire),
			
 
				+	DD_ATTR(write_expire),
			
 
				+	DD_ATTR(writes_starved),
			
 
				+	DD_ATTR(front_merges),
			
 
				+	DD_ATTR(fifo_batch),
			
 
				+	__ATTR_NULL
			
 
				+};
			
 
				+
			
 
				+static struct elevator_type mq_deadline = {
			
 
				+	.ops.mq = {
			
 
				+		.insert_requests	= dd_insert_requests,
			
 
				+		.dispatch_request	= dd_dispatch_request,
			
 
				+		.next_request		= elv_rb_latter_request,
			
 
				+		.former_request		= elv_rb_former_request,
			
 
				+		.bio_merge		= dd_bio_merge,
			
 
				+		.request_merge		= dd_request_merge,
			
 
				+		.requests_merged	= dd_merged_requests,
			
 
				+		.request_merged		= dd_request_merged,
			
 
				+		.has_work		= dd_has_work,
			
 
				+		.init_sched		= dd_init_queue,
			
 
				+		.exit_sched		= dd_exit_queue,
			
 
				+	},
			
 
				+
			
 
				+	.uses_mq	= true,
			
 
				+	.elevator_attrs = deadline_attrs,
			
 
				+	.elevator_name = "mq-deadline",
			
 
				+	.elevator_owner = THIS_MODULE,
			
 
				+};
			
 
				+
			
 
				+static int __init deadline_init(void)
			
 
				+{
			
 
				+	return elv_register(&mq_deadline);
			
 
				+}
			
 
				+
			
 
				+static void __exit deadline_exit(void)
			
 
				+{
			
 
				+	elv_unregister(&mq_deadline);
			
 
				+}
			
 
				+
			
 
				+module_init(deadline_init);
			
 
				+module_exit(deadline_exit);
			
 
				+
			
 
				+MODULE_AUTHOR("Jens Axboe");
			
 
				+MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("MQ deadline IO scheduler");
			
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e)
 
				 }
			
 
				 
			
 
				 static struct elevator_type elevator_noop = {
			
 
				-	.ops = {
			
 
				+	.ops.sq = {
			
 
				 		.elevator_merge_req_fn		= noop_merged_requests,
			
 
				 		.elevator_dispatch_fn		= noop_dispatch,
			
 
				 		.elevator_add_req_fn		= noop_add_request,
			
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
 
				 	if (!gpt)
			
 
				 		return NULL;
			
 
				 
			
 
				-	count = le32_to_cpu(gpt->num_partition_entries) *
			
 
				+	count = (size_t)le32_to_cpu(gpt->num_partition_entries) *
			
 
				                 le32_to_cpu(gpt->sizeof_partition_entry);
			
 
				 	if (!count)
			
 
				 		return NULL;
			
@@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
 
				 			gpt_header **gpt, gpt_entry **ptes)
			
 
				 {
			
 
				 	u32 crc, origcrc;
			
 
				-	u64 lastlba;
			
 
				+	u64 lastlba, pt_size;
			
 
				 
			
 
				 	if (!ptes)
			
 
				 		return 0;
			
@@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
 
				 		goto fail;
			
 
				 	}
			
 
				 
			
 
				+	/* Sanity check partition table size */
			
 
				+	pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) *
			
 
				+		le32_to_cpu((*gpt)->sizeof_partition_entry);
			
 
				+	if (pt_size > KMALLOC_MAX_SIZE) {
			
 
				+		pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n",
			
 
				+			 (unsigned long long)pt_size, KMALLOC_MAX_SIZE);
			
 
				+		goto fail;
			
 
				+	}
			
 
				+
			
 
				 	if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
			
 
				 		goto fail;
			
 
				 
			
 
				 	/* Check the GUID Partition Entry Array CRC */
			
 
				-	crc = efi_crc32((const unsigned char *) (*ptes),
			
 
				-			le32_to_cpu((*gpt)->num_partition_entries) *
			
 
				-			le32_to_cpu((*gpt)->sizeof_partition_entry));
			
 
				+	crc = efi_crc32((const unsigned char *) (*ptes), pt_size);
			
 
				 
			
 
				 	if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
			
 
				 		pr_debug("GUID Partition Entry Array CRC check failed.\n");
			
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param,
 
				 	*rcmd = NULL;
			
 
				 
			
 
				 loop:
			
 
				-	ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER);
			
 
				+	ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL);
			
 
				 	if (!ptr)
			
 
				 		return -ENOMEM;
			
 
				 	*rcmd = ptr;
			
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio,
 
				 	s->iop.write_prio	= 0;
			
 
				 	s->iop.error		= 0;
			
 
				 	s->iop.flags		= 0;
			
 
				-	s->iop.flush_journal	= (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0;
			
 
				+	s->iop.flush_journal	= op_is_flush(bio->bi_opf);
			
 
				 	s->iop.wq		= bcache_wq;
			
 
				 
			
 
				 	return s;
			
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 
				 	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
			
 
				 
			
 
				 	spin_lock_irqsave(&cache->lock, flags);
			
 
				-	if (cache->need_tick_bio &&
			
 
				-	    !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) &&
			
 
				+	if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
			
 
				 	    bio_op(bio) != REQ_OP_DISCARD) {
			
 
				 		pb->tick = true;
			
 
				 		cache->need_tick_bio = false;
			
@@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 
				 	return to_oblock(block_nr);
			
 
				 }
			
 
				 
			
 
				-static int bio_triggers_commit(struct cache *cache, struct bio *bio)
			
 
				-{
			
 
				-	return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * You must increment the deferred set whilst the prison cell is held.  To
			
 
				  * encourage this, we ask for 'cell' to be passed in.
			
@@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio)
 
				 {
			
 
				 	unsigned long flags;
			
 
				 
			
 
				-	if (!bio_triggers_commit(cache, bio)) {
			
 
				+	if (!op_is_flush(bio->bi_opf)) {
			
 
				 		accounted_request(cache, bio);
			
 
				 		return;
			
 
				 	}
			
@@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache)
 
				 
			
 
				 static bool discard_or_flush(struct bio *bio)
			
 
				 {
			
 
				-	return bio_op(bio) == REQ_OP_DISCARD ||
			
 
				-	       bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
			
 
				+	return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
			
 
				 }
			
 
				 
			
 
				 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
			
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
 
				 
			
 
				 static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
			
 
				 {
			
 
				-	return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) &&
			
 
				+	return op_is_flush(bio->bi_opf) &&
			
 
				 		dm_thin_changed_this_transaction(tc->td);
			
 
				 }
			
 
				 
			
@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context,
 
				 	struct bio *bio;
			
 
				 
			
 
				 	while ((bio = bio_list_pop(&cell->bios))) {
			
 
				-		if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
			
 
				-		    bio_op(bio) == REQ_OP_DISCARD)
			
 
				+		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
			
 
				 			bio_list_add(&info->defer_bios, bio);
			
 
				 		else {
			
 
				 			inc_all_io_entry(info->tc->pool, bio);
			
@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context,
 
				 	struct bio *bio;
			
 
				 
			
 
				 	while ((bio = bio_list_pop(&cell->bios))) {
			
 
				-		if ((bio_data_dir(bio) == WRITE) ||
			
 
				-		    (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
			
 
				-		     bio_op(bio) == REQ_OP_DISCARD))
			
 
				+		if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
			
 
				+		    bio_op(bio) == REQ_OP_DISCARD)
			
 
				 			bio_list_add(&info->defer_bios, bio);
			
 
				 		else {
			
 
				 			struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
			
@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 
				 		return DM_MAPIO_SUBMITTED;
			
 
				 	}
			
 
				 
			
 
				-	if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
			
 
				-	    bio_op(bio) == REQ_OP_DISCARD) {
			
 
				+	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
			
 
				 		thin_defer_bio_with_throttle(tc, bio);
			
 
				 		return DM_MAPIO_SUBMITTED;
			
 
				 	}
			
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1178,6 +1178,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 
				 		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
			
 
				 		dev->admin_tagset.numa_node = dev_to_node(dev->dev);
			
 
				 		dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
			
 
				+		dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
			
 
				 		dev->admin_tagset.driver_data = dev;
			
 
				 
			
 
				 		if (blk_mq_alloc_tag_set(&dev->admin_tagset))
			
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx {
 
				 
			
 
				 	unsigned long		flags;		/* BLK_MQ_F_* flags */
			
 
				 
			
 
				+	void			*sched_data;
			
 
				 	struct request_queue	*queue;
			
 
				 	struct blk_flush_queue	*fq;
			
 
				 
			
@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx {
 
				 	atomic_t		wait_index;
			
 
				 
			
 
				 	struct blk_mq_tags	*tags;
			
 
				+	struct blk_mq_tags	*sched_tags;
			
 
				 
			
 
				 	struct srcu_struct	queue_rq_srcu;
			
 
				 
			
@@ -60,7 +62,7 @@ struct blk_mq_hw_ctx {
 
				 
			
 
				 struct blk_mq_tag_set {
			
 
				 	unsigned int		*mq_map;
			
 
				-	struct blk_mq_ops	*ops;
			
 
				+	const struct blk_mq_ops	*ops;
			
 
				 	unsigned int		nr_hw_queues;
			
 
				 	unsigned int		queue_depth;	/* max hw supported */
			
 
				 	unsigned int		reserved_tags;
			
@@ -151,11 +153,13 @@ enum {
 
				 	BLK_MQ_F_SG_MERGE	= 1 << 2,
			
 
				 	BLK_MQ_F_DEFER_ISSUE	= 1 << 4,
			
 
				 	BLK_MQ_F_BLOCKING	= 1 << 5,
			
 
				+	BLK_MQ_F_NO_SCHED	= 1 << 6,
			
 
				 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
			
 
				 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
			
 
				 
			
 
				 	BLK_MQ_S_STOPPED	= 0,
			
 
				 	BLK_MQ_S_TAG_ACTIVE	= 1,
			
 
				+	BLK_MQ_S_SCHED_RESTART	= 2,
			
 
				 
			
 
				 	BLK_MQ_MAX_DEPTH	= 10240,
			
 
				 
			
@@ -179,14 +183,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
 
				 
			
 
				 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
			
 
				 
			
 
				-void blk_mq_insert_request(struct request *, bool, bool, bool);
			
 
				 void blk_mq_free_request(struct request *rq);
			
 
				-void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
			
 
				 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
			
 
				 
			
 
				 enum {
			
 
				 	BLK_MQ_REQ_NOWAIT	= (1 << 0), /* return when out of requests */
			
 
				 	BLK_MQ_REQ_RESERVED	= (1 << 1), /* allocate from reserved pool */
			
 
				+	BLK_MQ_REQ_INTERNAL	= (1 << 2), /* allocate internal/sched tag */
			
 
				 };
			
 
				 
			
 
				 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
			
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -220,6 +220,15 @@ static inline bool op_is_write(unsigned int op)
 
				 	return (op & 1);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Check if the bio or request is one that needs special treatment in the
			
 
				+ * flush state machine.
			
 
				+ */
			
 
				+static inline bool op_is_flush(unsigned int op)
			
 
				+{
			
 
				+	return op & (REQ_FUA | REQ_PREFLUSH);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Reads are always treated as synchronous, as are requests with the FUA or
			
 
				  * PREFLUSH flag.  Other operations may be marked as synchronous using the
			
@@ -232,22 +241,29 @@ static inline bool op_is_sync(unsigned int op)
 
				 }
			
 
				 
			
 
				 typedef unsigned int blk_qc_t;
			
 
				-#define BLK_QC_T_NONE	-1U
			
 
				-#define BLK_QC_T_SHIFT	16
			
 
				+#define BLK_QC_T_NONE		-1U
			
 
				+#define BLK_QC_T_SHIFT		16
			
 
				+#define BLK_QC_T_INTERNAL	(1U << 31)
			
 
				 
			
 
				 static inline bool blk_qc_t_valid(blk_qc_t cookie)
			
 
				 {
			
 
				 	return cookie != BLK_QC_T_NONE;
			
 
				 }
			
 
				 
			
 
				-static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num)
			
 
				+static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num,
			
 
				+				       bool internal)
			
 
				 {
			
 
				-	return tag | (queue_num << BLK_QC_T_SHIFT);
			
 
				+	blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT);
			
 
				+
			
 
				+	if (internal)
			
 
				+		ret |= BLK_QC_T_INTERNAL;
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
			
 
				 {
			
 
				-	return cookie >> BLK_QC_T_SHIFT;
			
 
				+	return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
			
 
				 }
			
 
				 
			
 
				 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
			
@@ -255,6 +271,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 
				 	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
			
 
				 }
			
 
				 
			
 
				+static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
			
 
				+{
			
 
				+	return (cookie & BLK_QC_T_INTERNAL) != 0;
			
 
				+}
			
 
				+
			
 
				 struct blk_issue_stat {
			
 
				 	u64 time;
			
 
				 };
			
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -154,6 +154,7 @@ struct request {
 
				 
			
 
				 	/* the following two fields are internal, NEVER access directly */
			
 
				 	unsigned int __data_len;	/* total data len */
			
 
				+	int tag;
			
 
				 	sector_t __sector;		/* sector cursor */
			
 
				 
			
 
				 	struct bio *bio;
			
@@ -220,9 +221,10 @@ struct request {
 
				 
			
 
				 	unsigned short ioprio;
			
 
				 
			
 
				+	int internal_tag;
			
 
				+
			
 
				 	void *special;		/* opaque pointer available for LLD use */
			
 
				 
			
 
				-	int tag;
			
 
				 	int errors;
			
 
				 
			
 
				 	/*
			
@@ -407,7 +409,7 @@ struct request_queue {
 
				 	dma_drain_needed_fn	*dma_drain_needed;
			
 
				 	lld_busy_fn		*lld_busy_fn;
			
 
				 
			
 
				-	struct blk_mq_ops	*mq_ops;
			
 
				+	const struct blk_mq_ops	*mq_ops;
			
 
				 
			
 
				 	unsigned int		*mq_map;
			
 
				 
			
@@ -569,6 +571,11 @@ struct request_queue {
 
				 	struct list_head	tag_set_list;
			
 
				 	struct bio_set		*bio_split;
			
 
				 
			
 
				+#ifdef CONFIG_DEBUG_FS
			
 
				+	struct dentry		*debugfs_dir;
			
 
				+	struct dentry		*mq_debugfs_dir;
			
 
				+#endif
			
 
				+
			
 
				 	bool			mq_sysfs_init_done;
			
 
				 };
			
 
				 
			
@@ -600,6 +607,7 @@ struct request_queue {
 
				 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
			
 
				 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
			
 
				 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
			
 
				+#define QUEUE_FLAG_RESTART     28	/* queue needs restart at completion */
			
 
				 
			
 
				 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
			
 
				 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
			
@@ -1620,6 +1628,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 
				 	return __bvec_gap_to_prev(q, bprv, offset);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Check if the two bvecs from two bios can be merged to one segment.
			
 
				+ * If yes, no need to check gap between the two bios since the 1st bio
			
 
				+ * and the 1st bvec in the 2nd bio can be handled in one segment.
			
 
				+ */
			
 
				+static inline bool bios_segs_mergeable(struct request_queue *q,
			
 
				+		struct bio *prev, struct bio_vec *prev_last_bv,
			
 
				+		struct bio_vec *next_first_bv)
			
 
				+{
			
 
				+	if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
			
 
				+		return false;
			
 
				+	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
			
 
				+		return false;
			
 
				+	if (prev->bi_seg_back_size + next_first_bv->bv_len >
			
 
				+			queue_max_segment_size(q))
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
			
 
				 			 struct bio *next)
			
 
				 {
			
@@ -1629,7 +1656,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 
				 		bio_get_last_bvec(prev, &pb);
			
 
				 		bio_get_first_bvec(next, &nb);
			
 
				 
			
 
				-		return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
			
 
				+		if (!bios_segs_mergeable(q, prev, &pb, &nb))
			
 
				+			return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
			
 
				 	}
			
 
				 
			
 
				 	return false;
			
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -77,6 +77,34 @@ struct elevator_ops
 
				 	elevator_registered_fn *elevator_registered_fn;
			
 
				 };
			
 
				 
			
 
				+struct blk_mq_alloc_data;
			
 
				+struct blk_mq_hw_ctx;
			
 
				+
			
 
				+struct elevator_mq_ops {
			
 
				+	int (*init_sched)(struct request_queue *, struct elevator_type *);
			
 
				+	void (*exit_sched)(struct elevator_queue *);
			
 
				+
			
 
				+	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
			
 
				+	bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
			
 
				+	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
			
 
				+	void (*request_merged)(struct request_queue *, struct request *, int);
			
 
				+	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
			
 
				+	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
			
 
				+	void (*put_request)(struct request *);
			
 
				+	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
			
 
				+	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
			
 
				+	bool (*has_work)(struct blk_mq_hw_ctx *);
			
 
				+	void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
			
 
				+	void (*started_request)(struct request *);
			
 
				+	void (*requeue_request)(struct request *);
			
 
				+	struct request *(*former_request)(struct request_queue *, struct request *);
			
 
				+	struct request *(*next_request)(struct request_queue *, struct request *);
			
 
				+	int (*get_rq_priv)(struct request_queue *, struct request *);
			
 
				+	void (*put_rq_priv)(struct request_queue *, struct request *);
			
 
				+	void (*init_icq)(struct io_cq *);
			
 
				+	void (*exit_icq)(struct io_cq *);
			
 
				+};
			
 
				+
			
 
				 #define ELV_NAME_MAX	(16)
			
 
				 
			
 
				 struct elv_fs_entry {
			
@@ -94,12 +122,16 @@ struct elevator_type
 
				 	struct kmem_cache *icq_cache;
			
 
				 
			
 
				 	/* fields provided by elevator implementation */
			
 
				-	struct elevator_ops ops;
			
 
				+	union {
			
 
				+		struct elevator_ops sq;
			
 
				+		struct elevator_mq_ops mq;
			
 
				+	} ops;
			
 
				 	size_t icq_size;	/* see iocontext.h */
			
 
				 	size_t icq_align;	/* ditto */
			
 
				 	struct elv_fs_entry *elevator_attrs;
			
 
				 	char elevator_name[ELV_NAME_MAX];
			
 
				 	struct module *elevator_owner;
			
 
				+	bool uses_mq;
			
 
				 
			
 
				 	/* managed by elevator core */
			
 
				 	char icq_cache_name[ELV_NAME_MAX + 5];	/* elvname + "_io_cq" */
			
@@ -123,6 +155,7 @@ struct elevator_queue
 
				 	struct kobject kobj;
			
 
				 	struct mutex sysfs_lock;
			
 
				 	unsigned int registered:1;
			
 
				+	unsigned int uses_mq:1;
			
 
				 	DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
			
 
				 };
			
 
				 
			
@@ -139,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *,
 
				 extern void elv_merged_request(struct request_queue *, struct request *, int);
			
 
				 extern void elv_bio_merged(struct request_queue *q, struct request *,
			
 
				 				struct bio *);
			
 
				+extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
			
 
				 extern void elv_requeue_request(struct request_queue *, struct request *);
			
 
				 extern struct request *elv_former_request(struct request_queue *, struct request *);
			
 
				 extern struct request *elv_latter_request(struct request_queue *, struct request *);
			
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -258,6 +258,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
 
				 
			
 
				 unsigned int sbitmap_weight(const struct sbitmap *sb);
			
 
				 
			
 
				+/**
			
 
				+ * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
			
 
				+ * @sb: Bitmap to show.
			
 
				+ * @m: struct seq_file to write to.
			
 
				+ *
			
 
				+ * This is intended for debugging. The format may change at any time.
			
 
				+ */
			
 
				+void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
			
 
				+
			
 
				+/**
			
 
				+ * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
			
 
				+ * seq_file.
			
 
				+ * @sb: Bitmap to show.
			
 
				+ * @m: struct seq_file to write to.
			
 
				+ *
			
 
				+ * This is intended for debugging. The output isn't guaranteed to be internally
			
 
				+ * consistent.
			
 
				+ */
			
 
				+void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);
			
 
				+
			
 
				 /**
			
 
				  * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
			
 
				  * memory node.
			
@@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
 
				  */
			
 
				 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
			
 
				 
			
 
				+/**
			
 
				+ * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
			
 
				+ * seq_file.
			
 
				+ * @sbq: Bitmap queue to show.
			
 
				+ * @m: struct seq_file to write to.
			
 
				+ *
			
 
				+ * This is intended for debugging. The format may change at any time.
			
 
				+ */
			
 
				+void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
			
 
				+
			
 
				 #endif /* __LINUX_SCALE_BITMAP_H */
			
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -17,6 +17,7 @@
 
				 
			
 
				 #include <linux/random.h>
			
 
				 #include <linux/sbitmap.h>
			
 
				+#include <linux/seq_file.h>
			
 
				 
			
 
				 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
			
 
				 		      gfp_t flags, int node)
			
@@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(sbitmap_weight);
			
 
				 
			
 
				+void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
			
 
				+{
			
 
				+	seq_printf(m, "depth=%u\n", sb->depth);
			
 
				+	seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
			
 
				+	seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
			
 
				+	seq_printf(m, "map_nr=%u\n", sb->map_nr);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(sbitmap_show);
			
 
				+
			
 
				+static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
			
 
				+{
			
 
				+	if ((offset & 0xf) == 0) {
			
 
				+		if (offset != 0)
			
 
				+			seq_putc(m, '\n');
			
 
				+		seq_printf(m, "%08x:", offset);
			
 
				+	}
			
 
				+	if ((offset & 0x1) == 0)
			
 
				+		seq_putc(m, ' ');
			
 
				+	seq_printf(m, "%02x", byte);
			
 
				+}
			
 
				+
			
 
				+void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
			
 
				+{
			
 
				+	u8 byte = 0;
			
 
				+	unsigned int byte_bits = 0;
			
 
				+	unsigned int offset = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < sb->map_nr; i++) {
			
 
				+		unsigned long word = READ_ONCE(sb->map[i].word);
			
 
				+		unsigned int word_bits = READ_ONCE(sb->map[i].depth);
			
 
				+
			
 
				+		while (word_bits > 0) {
			
 
				+			unsigned int bits = min(8 - byte_bits, word_bits);
			
 
				+
			
 
				+			byte |= (word & (BIT(bits) - 1)) << byte_bits;
			
 
				+			byte_bits += bits;
			
 
				+			if (byte_bits == 8) {
			
 
				+				emit_byte(m, offset, byte);
			
 
				+				byte = 0;
			
 
				+				byte_bits = 0;
			
 
				+				offset++;
			
 
				+			}
			
 
				+			word >>= bits;
			
 
				+			word_bits -= bits;
			
 
				+		}
			
 
				+	}
			
 
				+	if (byte_bits) {
			
 
				+		emit_byte(m, offset, byte);
			
 
				+		offset++;
			
 
				+	}
			
 
				+	if (offset)
			
 
				+		seq_putc(m, '\n');
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
			
 
				+
			
 
				 static unsigned int sbq_calc_wake_batch(unsigned int depth)
			
 
				 {
			
 
				 	unsigned int wake_batch;
			
@@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
 
				 
			
 
				 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
			
 
				 {
			
 
				-	sbq->wake_batch = sbq_calc_wake_batch(depth);
			
 
				+	unsigned int wake_batch = sbq_calc_wake_batch(depth);
			
 
				+	int i;
			
 
				+
			
 
				+	if (sbq->wake_batch != wake_batch) {
			
 
				+		WRITE_ONCE(sbq->wake_batch, wake_batch);
			
 
				+		/*
			
 
				+		 * Pairs with the memory barrier in sbq_wake_up() to ensure that
			
 
				+		 * the batch size is updated before the wait counts.
			
 
				+		 */
			
 
				+		smp_mb__before_atomic();
			
 
				+		for (i = 0; i < SBQ_WAIT_QUEUES; i++)
			
 
				+			atomic_set(&sbq->ws[i].wait_cnt, 1);
			
 
				+	}
			
 
				 	sbitmap_resize(&sbq->sb, depth);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
			
@@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 
				 static void sbq_wake_up(struct sbitmap_queue *sbq)
			
 
				 {
			
 
				 	struct sbq_wait_state *ws;
			
 
				+	unsigned int wake_batch;
			
 
				 	int wait_cnt;
			
 
				 
			
 
				-	/* Ensure that the wait list checks occur after clear_bit(). */
			
 
				-	smp_mb();
			
 
				+	/*
			
 
				+	 * Pairs with the memory barrier in set_current_state() to ensure the
			
 
				+	 * proper ordering of clear_bit()/waitqueue_active() in the waker and
			
 
				+	 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
			
 
				+	 * the comment on waitqueue_active(). This is __after_atomic because we
			
 
				+	 * just did clear_bit() in the caller.
			
 
				+	 */
			
 
				+	smp_mb__after_atomic();
			
 
				 
			
 
				 	ws = sbq_wake_ptr(sbq);
			
 
				 	if (!ws)
			
 
				 		return;
			
 
				 
			
 
				 	wait_cnt = atomic_dec_return(&ws->wait_cnt);
			
 
				-	if (unlikely(wait_cnt < 0))
			
 
				-		wait_cnt = atomic_inc_return(&ws->wait_cnt);
			
 
				-	if (wait_cnt == 0) {
			
 
				-		atomic_add(sbq->wake_batch, &ws->wait_cnt);
			
 
				+	if (wait_cnt <= 0) {
			
 
				+		wake_batch = READ_ONCE(sbq->wake_batch);
			
 
				+		/*
			
 
				+		 * Pairs with the memory barrier in sbitmap_queue_resize() to
			
 
				+		 * ensure that we see the batch size update before the wait
			
 
				+		 * count is reset.
			
 
				+		 */
			
 
				+		smp_mb__before_atomic();
			
 
				+		/*
			
 
				+		 * If there are concurrent callers to sbq_wake_up(), the last
			
 
				+		 * one to decrement the wait count below zero will bump it back
			
 
				+		 * up. If there is a concurrent resize, the count reset will
			
 
				+		 * either cause the cmpxchg to fail or overwrite after the
			
 
				+		 * cmpxchg.
			
 
				+		 */
			
 
				+		atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
			
 
				 		sbq_index_atomic_inc(&sbq->wake_index);
			
 
				 		wake_up(&ws->wait);
			
 
				 	}
			
@@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
 
				 	int i, wake_index;
			
 
				 
			
 
				 	/*
			
 
				-	 * Make sure all changes prior to this are visible from other CPUs.
			
 
				+	 * Pairs with the memory barrier in set_current_state() like in
			
 
				+	 * sbq_wake_up().
			
 
				 	 */
			
 
				 	smp_mb();
			
 
				 	wake_index = atomic_read(&sbq->wake_index);
			
@@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
 
				 	}
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
			
 
				+
			
 
				+void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
			
 
				+{
			
 
				+	bool first;
			
 
				+	int i;
			
 
				+
			
 
				+	sbitmap_show(&sbq->sb, m);
			
 
				+
			
 
				+	seq_puts(m, "alloc_hint={");
			
 
				+	first = true;
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		if (!first)
			
 
				+			seq_puts(m, ", ");
			
 
				+		first = false;
			
 
				+		seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
			
 
				+	}
			
 
				+	seq_puts(m, "}\n");
			
 
				+
			
 
				+	seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
			
 
				+	seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
			
 
				+
			
 
				+	seq_puts(m, "ws={\n");
			
 
				+	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
			
 
				+		struct sbq_wait_state *ws = &sbq->ws[i];
			
 
				+
			
 
				+		seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
			
 
				+			   atomic_read(&ws->wait_cnt),
			
 
				+			   waitqueue_active(&ws->wait) ? "active" : "inactive");
			
 
				+	}
			
 
				+	seq_puts(m, "}\n");
			
 
				+
			
 
				+	seq_printf(m, "round_robin=%d\n", sbq->round_robin);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(sbitmap_queue_show);