8 жил өмнө · 78d91a75b4
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -171,7 +171,8 @@ void blk_mq_sched_put_request(struct request *rq)
 
				 
			
 
				 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
			
 
				 {
			
 
				-	struct elevator_queue *e = hctx->queue->elevator;
			
 
				+	struct request_queue *q = hctx->queue;
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				 	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
			
 
				 	bool did_work = false;
			
 
				 	LIST_HEAD(rq_list);
			
@@ -203,10 +204,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 
				 	 */
			
 
				 	if (!list_empty(&rq_list)) {
			
 
				 		blk_mq_sched_mark_restart_hctx(hctx);
			
 
				-		did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
			
 
				+		did_work = blk_mq_dispatch_rq_list(q, &rq_list);
			
 
				 	} else if (!has_sched_dispatch) {
			
 
				 		blk_mq_flush_busy_ctxs(hctx, &rq_list);
			
 
				-		blk_mq_dispatch_rq_list(hctx, &rq_list);
			
 
				+		blk_mq_dispatch_rq_list(q, &rq_list);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -222,7 +223,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 
				 			if (!rq)
			
 
				 				break;
			
 
				 			list_add(&rq->queuelist, &rq_list);
			
 
				-		} while (blk_mq_dispatch_rq_list(hctx, &rq_list));
			
 
				+		} while (blk_mq_dispatch_rq_list(q, &rq_list));
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -317,25 +318,68 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
			
 
				+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
			
 
				 {
			
 
				 	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
			
 
				 		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
			
 
				-		if (blk_mq_hctx_has_pending(hctx))
			
 
				+		if (blk_mq_hctx_has_pending(hctx)) {
			
 
				 			blk_mq_run_hw_queue(hctx, true);
			
 
				+			return true;
			
 
				+		}
			
 
				 	}
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				-void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
			
 
				-{
			
 
				-	struct request_queue *q = hctx->queue;
			
 
				-	unsigned int i;
			
 
				+/**
			
 
				+ * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
			
 
				+ * @pos:    loop cursor.
			
 
				+ * @skip:   the list element that will not be examined. Iteration starts at
			
 
				+ *          @skip->next.
			
 
				+ * @head:   head of the list to examine. This list must have at least one
			
 
				+ *          element, namely @skip.
			
 
				+ * @member: name of the list_head structure within typeof(*pos).
			
 
				+ */
			
 
				+#define list_for_each_entry_rcu_rr(pos, skip, head, member)		\
			
 
				+	for ((pos) = (skip);						\
			
 
				+	     (pos = (pos)->member.next != (head) ? list_entry_rcu(	\
			
 
				+			(pos)->member.next, typeof(*pos), member) :	\
			
 
				+	      list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
			
 
				+	     (pos) != (skip); )
			
 
				 
			
 
				-	if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
			
 
				-		if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
			
 
				-			queue_for_each_hw_ctx(q, hctx, i)
			
 
				-				blk_mq_sched_restart_hctx(hctx);
			
 
				+/*
			
 
				+ * Called after a driver tag has been freed to check whether a hctx needs to
			
 
				+ * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
			
 
				+ * queues in a round-robin fashion if the tag set of @hctx is shared with other
			
 
				+ * hardware queues.
			
 
				+ */
			
 
				+void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
			
 
				+{
			
 
				+	struct blk_mq_tags *const tags = hctx->tags;
			
 
				+	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
			
 
				+	struct request_queue *const queue = hctx->queue, *q;
			
 
				+	struct blk_mq_hw_ctx *hctx2;
			
 
				+	unsigned int i, j;
			
 
				+
			
 
				+	if (set->flags & BLK_MQ_F_TAG_SHARED) {
			
 
				+		rcu_read_lock();
			
 
				+		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
			
 
				+					   tag_set_list) {
			
 
				+			queue_for_each_hw_ctx(q, hctx2, i)
			
 
				+				if (hctx2->tags == tags &&
			
 
				+				    blk_mq_sched_restart_hctx(hctx2))
			
 
				+					goto done;
			
 
				+		}
			
 
				+		j = hctx->queue_num + 1;
			
 
				+		for (i = 0; i < queue->nr_hw_queues; i++, j++) {
			
 
				+			if (j == queue->nr_hw_queues)
			
 
				+				j = 0;
			
 
				+			hctx2 = queue->queue_hw_ctx[j];
			
 
				+			if (hctx2->tags == tags &&
			
 
				+			    blk_mq_sched_restart_hctx(hctx2))
			
 
				+				break;
			
 
				 		}
			
 
				+done:
			
 
				+		rcu_read_unlock();
			
 
				 	} else {
			
 
				 		blk_mq_sched_restart_hctx(hctx);
			
 
				 	}
			
@@ -431,11 +475,67 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-int blk_mq_sched_setup(struct request_queue *q)
			
 
				+static int blk_mq_sched_alloc_tags(struct request_queue *q,
			
 
				+				   struct blk_mq_hw_ctx *hctx,
			
 
				+				   unsigned int hctx_idx)
			
 
				+{
			
 
				+	struct blk_mq_tag_set *set = q->tag_set;
			
 
				+	int ret;
			
 
				+
			
 
				+	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
			
 
				+					       set->reserved_tags);
			
 
				+	if (!hctx->sched_tags)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
			
 
				+	if (ret)
			
 
				+		blk_mq_sched_free_tags(set, hctx, hctx_idx);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void blk_mq_sched_tags_teardown(struct request_queue *q)
			
 
				 {
			
 
				 	struct blk_mq_tag_set *set = q->tag_set;
			
 
				 	struct blk_mq_hw_ctx *hctx;
			
 
				-	int ret, i;
			
 
				+	int i;
			
 
				+
			
 
				+	queue_for_each_hw_ctx(q, hctx, i)
			
 
				+		blk_mq_sched_free_tags(set, hctx, i);
			
 
				+}
			
 
				+
			
 
				+int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			
 
				+			   unsigned int hctx_idx)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (!e)
			
 
				+		return 0;
			
 
				+
			
 
				+	return blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
			
 
				+}
			
 
				+
			
 
				+void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			
 
				+			    unsigned int hctx_idx)
			
 
				+{
			
 
				+	struct elevator_queue *e = q->elevator;
			
 
				+
			
 
				+	if (!e)
			
 
				+		return;
			
 
				+
			
 
				+	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
			
 
				+}
			
 
				+
			
 
				+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+	unsigned int i;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!e) {
			
 
				+		q->elevator = NULL;
			
 
				+		return 0;
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Default to 256, since we don't split into sync/async like the
			
@@ -443,49 +543,30 @@ int blk_mq_sched_setup(struct request_queue *q)
 
				 	 */
			
 
				 	q->nr_requests = 2 * BLKDEV_MAX_RQ;
			
 
				 
			
 
				-	/*
			
 
				-	 * We're switching to using an IO scheduler, so setup the hctx
			
 
				-	 * scheduler tags and switch the request map from the regular
			
 
				-	 * tags to scheduler tags. First allocate what we need, so we
			
 
				-	 * can safely fail and fallback, if needed.
			
 
				-	 */
			
 
				-	ret = 0;
			
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				-		hctx->sched_tags = blk_mq_alloc_rq_map(set, i,
			
 
				-				q->nr_requests, set->reserved_tags);
			
 
				-		if (!hctx->sched_tags) {
			
 
				-			ret = -ENOMEM;
			
 
				-			break;
			
 
				-		}
			
 
				-		ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
			
 
				+		ret = blk_mq_sched_alloc_tags(q, hctx, i);
			
 
				 		if (ret)
			
 
				-			break;
			
 
				+			goto err;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * If we failed, free what we did allocate
			
 
				-	 */
			
 
				-	if (ret) {
			
 
				-		queue_for_each_hw_ctx(q, hctx, i) {
			
 
				-			if (!hctx->sched_tags)
			
 
				-				continue;
			
 
				-			blk_mq_sched_free_tags(set, hctx, i);
			
 
				-		}
			
 
				-
			
 
				-		return ret;
			
 
				-	}
			
 
				+	ret = e->ops.mq.init_sched(q, e);
			
 
				+	if (ret)
			
 
				+		goto err;
			
 
				 
			
 
				 	return 0;
			
 
				+
			
 
				+err:
			
 
				+	blk_mq_sched_tags_teardown(q);
			
 
				+	q->elevator = NULL;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				-void blk_mq_sched_teardown(struct request_queue *q)
			
 
				+void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
			
 
				 {
			
 
				-	struct blk_mq_tag_set *set = q->tag_set;
			
 
				-	struct blk_mq_hw_ctx *hctx;
			
 
				-	int i;
			
 
				-
			
 
				-	queue_for_each_hw_ctx(q, hctx, i)
			
 
				-		blk_mq_sched_free_tags(set, hctx, i);
			
 
				+	if (e->type->ops.mq.exit_sched)
			
 
				+		e->type->ops.mq.exit_sched(e);
			
 
				+	blk_mq_sched_tags_teardown(q);
			
 
				+	q->elevator = NULL;
			
 
				 }
			
 
				 
			
 
				 int blk_mq_sched_init(struct request_queue *q)
			
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -19,7 +19,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 
				 				struct request **merged_request);
			
 
				 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
			
 
				 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
			
 
				-void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
			
 
				+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
			
 
				 
			
 
				 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
			
 
				 				 bool run_queue, bool async, bool can_block);
			
@@ -32,8 +32,13 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
 
				 			struct list_head *rq_list,
			
 
				 			struct request *(*get_rq)(struct blk_mq_hw_ctx *));
			
 
				 
			
 
				-int blk_mq_sched_setup(struct request_queue *q);
			
 
				-void blk_mq_sched_teardown(struct request_queue *q);
			
 
				+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
			
 
				+void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
			
 
				+
			
 
				+int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			
 
				+			   unsigned int hctx_idx);
			
 
				+void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			
 
				+			    unsigned int hctx_idx);
			
 
				 
			
 
				 int blk_mq_sched_init(struct request_queue *q);
			
 
				 
			
@@ -131,20 +136,6 @@ static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 
				 		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Mark a hardware queue and the request queue it belongs to as needing a
			
 
				- * restart.
			
 
				- */
			
 
				-static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx)
			
 
				-{
			
 
				-	struct request_queue *q = hctx->queue;
			
 
				-
			
 
				-	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
			
 
				-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
			
 
				-	if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
			
 
				-		set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
			
 
				-}
			
 
				-
			
 
				 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
			
 
				 {
			
 
				 	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
			
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -321,7 +321,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 
				 
			
 
				 	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
			
 
				 
			
 
				-	blk_mq_put_ctx(alloc_data.ctx);
			
 
				 	blk_queue_exit(q);
			
 
				 
			
 
				 	if (!rq)
			
@@ -349,7 +348,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 
				 		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
			
 
				 	if (sched_tag != -1)
			
 
				 		blk_mq_sched_completed_request(hctx, rq);
			
 
				-	blk_mq_sched_restart_queues(hctx);
			
 
				+	blk_mq_sched_restart(hctx);
			
 
				 	blk_queue_exit(q);
			
 
				 }
			
 
				 
			
@@ -846,12 +845,8 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 
				 		.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
			
 
				 	};
			
 
				 
			
 
				-	if (rq->tag != -1) {
			
 
				-done:
			
 
				-		if (hctx)
			
 
				-			*hctx = data.hctx;
			
 
				-		return true;
			
 
				-	}
			
 
				+	if (rq->tag != -1)
			
 
				+		goto done;
			
 
				 
			
 
				 	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
			
 
				 		data.flags |= BLK_MQ_REQ_RESERVED;
			
@@ -863,10 +858,12 @@ done:
 
				 			atomic_inc(&data.hctx->nr_active);
			
 
				 		}
			
 
				 		data.hctx->tags->rqs[rq->tag] = rq;
			
 
				-		goto done;
			
 
				 	}
			
 
				 
			
 
				-	return false;
			
 
				+done:
			
 
				+	if (hctx)
			
 
				+		*hctx = data.hctx;
			
 
				+	return rq->tag != -1;
			
 
				 }
			
 
				 
			
 
				 static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
			
@@ -963,14 +960,17 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
			
 
				+bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
			
 
				 {
			
 
				-	struct request_queue *q = hctx->queue;
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				 	struct request *rq;
			
 
				 	LIST_HEAD(driver_list);
			
 
				 	struct list_head *dptr;
			
 
				 	int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
			
 
				 
			
 
				+	if (list_empty(list))
			
 
				+		return false;
			
 
				+
			
 
				 	/*
			
 
				 	 * Start off with dptr being NULL, so we start the first request
			
 
				 	 * immediately, even if we have more pending.
			
@@ -981,7 +981,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
				 	 * Now process all the entries, sending them to the driver.
			
 
				 	 */
			
 
				 	errors = queued = 0;
			
 
				-	while (!list_empty(list)) {
			
 
				+	do {
			
 
				 		struct blk_mq_queue_data bd;
			
 
				 
			
 
				 		rq = list_first_entry(list, struct request, queuelist);
			
@@ -1052,7 +1052,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
				 		 */
			
 
				 		if (!dptr && list->next != list->prev)
			
 
				 			dptr = &driver_list;
			
 
				-	}
			
 
				+	} while (!list_empty(list));
			
 
				 
			
 
				 	hctx->dispatched[queued_to_index(queued)]++;
			
 
				 
			
@@ -1135,7 +1135,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 
				 	return hctx->next_cpu;
			
 
				 }
			
 
				 
			
 
				-void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
			
 
				+static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
			
 
				+					unsigned long msecs)
			
 
				 {
			
 
				 	if (unlikely(blk_mq_hctx_stopped(hctx) ||
			
 
				 		     !blk_mq_hw_queue_mapped(hctx)))
			
@@ -1152,7 +1153,24 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 
				 		put_cpu();
			
 
				 	}
			
 
				 
			
 
				-	kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work);
			
 
				+	if (msecs == 0)
			
 
				+		kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx),
			
 
				+					 &hctx->run_work);
			
 
				+	else
			
 
				+		kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
			
 
				+						 &hctx->delayed_run_work,
			
 
				+						 msecs_to_jiffies(msecs));
			
 
				+}
			
 
				+
			
 
				+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
			
 
				+{
			
 
				+	__blk_mq_delay_run_hw_queue(hctx, true, msecs);
			
 
				+}
			
 
				+EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
			
 
				+
			
 
				+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
			
 
				+{
			
 
				+	__blk_mq_delay_run_hw_queue(hctx, async, 0);
			
 
				 }
			
 
				 
			
 
				 void blk_mq_run_hw_queues(struct request_queue *q, bool async)
			
@@ -1255,6 +1273,15 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 
				 	__blk_mq_run_hw_queue(hctx);
			
 
				 }
			
 
				 
			
 
				+static void blk_mq_delayed_run_work_fn(struct work_struct *work)
			
 
				+{
			
 
				+	struct blk_mq_hw_ctx *hctx;
			
 
				+
			
 
				+	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work);
			
 
				+
			
 
				+	__blk_mq_run_hw_queue(hctx);
			
 
				+}
			
 
				+
			
 
				 static void blk_mq_delay_work_fn(struct work_struct *work)
			
 
				 {
			
 
				 	struct blk_mq_hw_ctx *hctx;
			
@@ -1924,6 +1951,8 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 
				 				       hctx->fq->flush_rq, hctx_idx,
			
 
				 				       flush_start_tag + hctx_idx);
			
 
				 
			
 
				+	blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
			
 
				+
			
 
				 	if (set->ops->exit_hctx)
			
 
				 		set->ops->exit_hctx(hctx, hctx_idx);
			
 
				 
			
@@ -1960,6 +1989,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 
				 		node = hctx->numa_node = set->numa_node;
			
 
				 
			
 
				 	INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
			
 
				+	INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn);
			
 
				 	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
			
 
				 	spin_lock_init(&hctx->lock);
			
 
				 	INIT_LIST_HEAD(&hctx->dispatch);
			
@@ -1990,9 +2020,12 @@ static int blk_mq_init_hctx(struct request_queue *q,
 
				 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
			
 
				 		goto free_bitmap;
			
 
				 
			
 
				+	if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
			
 
				+		goto exit_hctx;
			
 
				+
			
 
				 	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
			
 
				 	if (!hctx->fq)
			
 
				-		goto exit_hctx;
			
 
				+		goto sched_exit_hctx;
			
 
				 
			
 
				 	if (set->ops->init_request &&
			
 
				 	    set->ops->init_request(set->driver_data,
			
@@ -2007,6 +2040,8 @@ static int blk_mq_init_hctx(struct request_queue *q,
 
				 
			
 
				  free_fq:
			
 
				 	kfree(hctx->fq);
			
 
				+ sched_exit_hctx:
			
 
				+	blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
			
 
				  exit_hctx:
			
 
				 	if (set->ops->exit_hctx)
			
 
				 		set->ops->exit_hctx(hctx, hctx_idx);
			
@@ -2233,8 +2268,6 @@ void blk_mq_release(struct request_queue *q)
 
				 	struct blk_mq_hw_ctx *hctx;
			
 
				 	unsigned int i;
			
 
				 
			
 
				-	blk_mq_sched_teardown(q);
			
 
				-
			
 
				 	/* hctx kobj stays in hctx */
			
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				 		if (!hctx)
			
@@ -2565,6 +2598,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
			
 
				+{
			
 
				+	if (set->ops->map_queues)
			
 
				+		return set->ops->map_queues(set);
			
 
				+	else
			
 
				+		return blk_mq_map_queues(set);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Alloc a tag set to be associated with one or more request queues.
			
 
				  * May fail with EINVAL for various error conditions. May adjust the
			
@@ -2619,10 +2660,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 
				 	if (!set->mq_map)
			
 
				 		goto out_free_tags;
			
 
				 
			
 
				-	if (set->ops->map_queues)
			
 
				-		ret = set->ops->map_queues(set);
			
 
				-	else
			
 
				-		ret = blk_mq_map_queues(set);
			
 
				+	ret = blk_mq_update_queue_map(set);
			
 
				 	if (ret)
			
 
				 		goto out_free_mq_map;
			
 
				 
			
@@ -2714,6 +2752,7 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 
				 		blk_mq_freeze_queue(q);
			
 
				 
			
 
				 	set->nr_hw_queues = nr_hw_queues;
			
 
				+	blk_mq_update_queue_map(set);
			
 
				 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
			
 
				 		blk_mq_realloc_hw_ctxs(set, q);
			
 
				 
			
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -31,7 +31,7 @@ void blk_mq_freeze_queue(struct request_queue *q);
 
				 void blk_mq_free_queue(struct request_queue *q);
			
 
				 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
			
 
				 void blk_mq_wake_waiters(struct request_queue *q);
			
 
				-bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
			
 
				+bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *);
			
 
				 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
			
 
				 bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
			
 
				 bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
			
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -816,7 +816,7 @@ static void blk_release_queue(struct kobject *kobj)
 
				 
			
 
				 	if (q->elevator) {
			
 
				 		ioc_clear_queue(q);
			
 
				-		elevator_exit(q->elevator);
			
 
				+		elevator_exit(q, q->elevator);
			
 
				 	}
			
 
				 
			
 
				 	blk_exit_rl(&q->root_rl);
			
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -242,26 +242,21 @@ int elevator_init(struct request_queue *q, char *name)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (e->uses_mq) {
			
 
				-		err = blk_mq_sched_setup(q);
			
 
				-		if (!err)
			
 
				-			err = e->ops.mq.init_sched(q, e);
			
 
				-	} else
			
 
				+	if (e->uses_mq)
			
 
				+		err = blk_mq_init_sched(q, e);
			
 
				+	else
			
 
				 		err = e->ops.sq.elevator_init_fn(q, e);
			
 
				-	if (err) {
			
 
				-		if (e->uses_mq)
			
 
				-			blk_mq_sched_teardown(q);
			
 
				+	if (err)
			
 
				 		elevator_put(e);
			
 
				-	}
			
 
				 	return err;
			
 
				 }
			
 
				 EXPORT_SYMBOL(elevator_init);
			
 
				 
			
 
				-void elevator_exit(struct elevator_queue *e)
			
 
				+void elevator_exit(struct request_queue *q, struct elevator_queue *e)
			
 
				 {
			
 
				 	mutex_lock(&e->sysfs_lock);
			
 
				 	if (e->uses_mq && e->type->ops.mq.exit_sched)
			
 
				-		e->type->ops.mq.exit_sched(e);
			
 
				+		blk_mq_exit_sched(q, e);
			
 
				 	else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
			
 
				 		e->type->ops.sq.elevator_exit_fn(e);
			
 
				 	mutex_unlock(&e->sysfs_lock);
			
@@ -946,6 +941,45 @@ void elv_unregister(struct elevator_type *e)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(elv_unregister);
			
 
				 
			
 
				+static int elevator_switch_mq(struct request_queue *q,
			
 
				+			      struct elevator_type *new_e)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	blk_mq_freeze_queue(q);
			
 
				+	blk_mq_quiesce_queue(q);
			
 
				+
			
 
				+	if (q->elevator) {
			
 
				+		if (q->elevator->registered)
			
 
				+			elv_unregister_queue(q);
			
 
				+		ioc_clear_queue(q);
			
 
				+		elevator_exit(q, q->elevator);
			
 
				+	}
			
 
				+
			
 
				+	ret = blk_mq_init_sched(q, new_e);
			
 
				+	if (ret)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (new_e) {
			
 
				+		ret = elv_register_queue(q);
			
 
				+		if (ret) {
			
 
				+			elevator_exit(q, q->elevator);
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (new_e)
			
 
				+		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
			
 
				+	else
			
 
				+		blk_add_trace_msg(q, "elv switch: none");
			
 
				+
			
 
				+out:
			
 
				+	blk_mq_unfreeze_queue(q);
			
 
				+	blk_mq_start_stopped_hw_queues(q, true);
			
 
				+	return ret;
			
 
				+
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * switch to new_e io scheduler. be careful not to introduce deadlocks -
			
 
				  * we don't free the old io scheduler, before we have allocated what we
			
@@ -958,10 +992,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
				 	bool old_registered = false;
			
 
				 	int err;
			
 
				 
			
 
				-	if (q->mq_ops) {
			
 
				-		blk_mq_freeze_queue(q);
			
 
				-		blk_mq_quiesce_queue(q);
			
 
				-	}
			
 
				+	if (q->mq_ops)
			
 
				+		return elevator_switch_mq(q, new_e);
			
 
				 
			
 
				 	/*
			
 
				 	 * Turn on BYPASS and drain all requests w/ elevator private data.
			
@@ -973,11 +1005,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
				 	if (old) {
			
 
				 		old_registered = old->registered;
			
 
				 
			
 
				-		if (old->uses_mq)
			
 
				-			blk_mq_sched_teardown(q);
			
 
				-
			
 
				-		if (!q->mq_ops)
			
 
				-			blk_queue_bypass_start(q);
			
 
				+		blk_queue_bypass_start(q);
			
 
				 
			
 
				 		/* unregister and clear all auxiliary data of the old elevator */
			
 
				 		if (old_registered)
			
@@ -987,56 +1015,32 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
				 	}
			
 
				 
			
 
				 	/* allocate, init and register new elevator */
			
 
				-	if (new_e) {
			
 
				-		if (new_e->uses_mq) {
			
 
				-			err = blk_mq_sched_setup(q);
			
 
				-			if (!err)
			
 
				-				err = new_e->ops.mq.init_sched(q, new_e);
			
 
				-		} else
			
 
				-			err = new_e->ops.sq.elevator_init_fn(q, new_e);
			
 
				-		if (err)
			
 
				-			goto fail_init;
			
 
				+	err = new_e->ops.sq.elevator_init_fn(q, new_e);
			
 
				+	if (err)
			
 
				+		goto fail_init;
			
 
				 
			
 
				-		err = elv_register_queue(q);
			
 
				-		if (err)
			
 
				-			goto fail_register;
			
 
				-	} else
			
 
				-		q->elevator = NULL;
			
 
				+	err = elv_register_queue(q);
			
 
				+	if (err)
			
 
				+		goto fail_register;
			
 
				 
			
 
				 	/* done, kill the old one and finish */
			
 
				 	if (old) {
			
 
				-		elevator_exit(old);
			
 
				-		if (!q->mq_ops)
			
 
				-			blk_queue_bypass_end(q);
			
 
				+		elevator_exit(q, old);
			
 
				+		blk_queue_bypass_end(q);
			
 
				 	}
			
 
				 
			
 
				-	if (q->mq_ops) {
			
 
				-		blk_mq_unfreeze_queue(q);
			
 
				-		blk_mq_start_stopped_hw_queues(q, true);
			
 
				-	}
			
 
				-
			
 
				-	if (new_e)
			
 
				-		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
			
 
				-	else
			
 
				-		blk_add_trace_msg(q, "elv switch: none");
			
 
				+	blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
			
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				 fail_register:
			
 
				-	if (q->mq_ops)
			
 
				-		blk_mq_sched_teardown(q);
			
 
				-	elevator_exit(q->elevator);
			
 
				+	elevator_exit(q, q->elevator);
			
 
				 fail_init:
			
 
				 	/* switch failed, restore and re-register old elevator */
			
 
				 	if (old) {
			
 
				 		q->elevator = old;
			
 
				 		elv_register_queue(q);
			
 
				-		if (!q->mq_ops)
			
 
				-			blk_queue_bypass_end(q);
			
 
				-	}
			
 
				-	if (q->mq_ops) {
			
 
				-		blk_mq_unfreeze_queue(q);
			
 
				-		blk_mq_start_stopped_hw_queues(q, true);
			
 
				+		blk_queue_bypass_end(q);
			
 
				 	}
			
 
				 
			
 
				 	return err;
			
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -755,6 +755,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 
				 		/* Undo dm_start_request() before requeuing */
			
 
				 		rq_end_stats(md, rq);
			
 
				 		rq_completed(md, rq_data_dir(rq), false);
			
 
				+		blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
			
 
				 		return BLK_MQ_RQ_QUEUE_BUSY;
			
 
				 	}
			
 
				 
			
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -270,7 +270,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
				 	memset(cmnd, 0, sizeof(*cmnd));
			
 
				 	cmnd->dsm.opcode = nvme_cmd_dsm;
			
 
				 	cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
			
 
				-	cmnd->dsm.nr = segments - 1;
			
 
				+	cmnd->dsm.nr = cpu_to_le32(segments - 1);
			
 
				 	cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
			
 
				 
			
 
				 	req->special_vec.bv_page = virt_to_page(range);
			
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req,
 
				 	u16 status;
			
 
				 
			
 
				 	WARN_ON(req == NULL || slog == NULL);
			
 
				-	if (req->cmd->get_log_page.nsid == 0xFFFFFFFF)
			
 
				+	if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF))
			
 
				 		status = nvmet_get_smart_log_all(req, slog);
			
 
				 	else
			
 
				 		status = nvmet_get_smart_log_nsid(req, slog);
			
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -180,7 +180,7 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
 
				 
			
 
				 	sector = le64_to_cpu(write_zeroes->slba) <<
			
 
				 		(req->ns->blksize_shift - 9);
			
 
				-	nr_sector = (((sector_t)le32_to_cpu(write_zeroes->length)) <<
			
 
				+	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length)) <<
			
 
				 		(req->ns->blksize_shift - 9)) + 1;
			
 
				 
			
 
				 	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
			
@@ -230,7 +230,7 @@ int nvmet_parse_io_cmd(struct nvmet_req *req)
 
				 		return 0;
			
 
				 	case nvme_cmd_dsm:
			
 
				 		req->execute = nvmet_execute_dsm;
			
 
				-		req->data_len = le32_to_cpu(cmd->dsm.nr + 1) *
			
 
				+		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
			
 
				 			sizeof(struct nvme_dsm_range);
			
 
				 		return 0;
			
 
				 	case nvme_cmd_write_zeroes:
			
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -496,7 +496,7 @@ static void scsi_run_queue(struct request_queue *q)
 
				 		scsi_starved_list_run(sdev->host);
			
 
				 
			
 
				 	if (q->mq_ops)
			
 
				-		blk_mq_start_stopped_hw_queues(q, false);
			
 
				+		blk_mq_run_hw_queues(q, false);
			
 
				 	else
			
 
				 		blk_run_queue(q);
			
 
				 }
			
@@ -667,7 +667,7 @@ static bool scsi_end_request(struct request *req, int error,
 
				 		    !list_empty(&sdev->host->starved_list))
			
 
				 			kblockd_schedule_work(&sdev->requeue_work);
			
 
				 		else
			
 
				-			blk_mq_start_stopped_hw_queues(q, true);
			
 
				+			blk_mq_run_hw_queues(q, true);
			
 
				 	} else {
			
 
				 		unsigned long flags;
			
 
				 
			
@@ -1974,7 +1974,7 @@ out:
 
				 	case BLK_MQ_RQ_QUEUE_BUSY:
			
 
				 		if (atomic_read(&sdev->device_busy) == 0 &&
			
 
				 		    !scsi_device_blocked(sdev))
			
 
				-			blk_mq_delay_queue(hctx, SCSI_QUEUE_DELAY);
			
 
				+			blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
			
 
				 		break;
			
 
				 	case BLK_MQ_RQ_QUEUE_ERROR:
			
 
				 		/*
			
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -51,6 +51,7 @@ struct blk_mq_hw_ctx {
 
				 
			
 
				 	atomic_t		nr_active;
			
 
				 
			
 
				+	struct delayed_work	delayed_run_work;
			
 
				 	struct delayed_work	delay_work;
			
 
				 
			
 
				 	struct hlist_node	cpuhp_dead;
			
@@ -238,6 +239,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 
				 void blk_mq_start_hw_queues(struct request_queue *q);
			
 
				 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
			
 
				 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
			
 
				+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
			
 
				 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
			
 
				 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
			
 
				 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
			
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -610,7 +610,6 @@ struct request_queue {
 
				 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
			
 
				 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
			
 
				 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
			
 
				-#define QUEUE_FLAG_RESTART     28	/* queue needs restart at completion */
			
 
				 
			
 
				 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
			
 
				 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
			
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -211,7 +211,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *);
 
				 extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
			
 
				 
			
 
				 extern int elevator_init(struct request_queue *, char *);
			
 
				-extern void elevator_exit(struct elevator_queue *);
			
 
				+extern void elevator_exit(struct request_queue *, struct elevator_queue *);
			
 
				 extern int elevator_change(struct request_queue *, const char *);
			
 
				 extern bool elv_bio_merge_ok(struct request *, struct bio *);
			
 
				 extern struct elevator_queue *elevator_alloc(struct request_queue *,
			
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -64,26 +64,26 @@ enum {
 
				  * RDMA_QPTYPE field
			
 
				  */
			
 
				 enum {
			
 
				-	NVMF_RDMA_QPTYPE_CONNECTED	= 0, /* Reliable Connected */
			
 
				-	NVMF_RDMA_QPTYPE_DATAGRAM	= 1, /* Reliable Datagram */
			
 
				+	NVMF_RDMA_QPTYPE_CONNECTED	= 1, /* Reliable Connected */
			
 
				+	NVMF_RDMA_QPTYPE_DATAGRAM	= 2, /* Reliable Datagram */
			
 
				 };
			
 
				 
			
 
				 /* RDMA QP Service Type codes for Discovery Log Page entry TSAS
			
 
				  * RDMA_QPTYPE field
			
 
				  */
			
 
				 enum {
			
 
				-	NVMF_RDMA_PRTYPE_NOT_SPECIFIED	= 0, /* No Provider Specified */
			
 
				-	NVMF_RDMA_PRTYPE_IB		= 1, /* InfiniBand */
			
 
				-	NVMF_RDMA_PRTYPE_ROCE		= 2, /* InfiniBand RoCE */
			
 
				-	NVMF_RDMA_PRTYPE_ROCEV2		= 3, /* InfiniBand RoCEV2 */
			
 
				-	NVMF_RDMA_PRTYPE_IWARP		= 4, /* IWARP */
			
 
				+	NVMF_RDMA_PRTYPE_NOT_SPECIFIED	= 1, /* No Provider Specified */
			
 
				+	NVMF_RDMA_PRTYPE_IB		= 2, /* InfiniBand */
			
 
				+	NVMF_RDMA_PRTYPE_ROCE		= 3, /* InfiniBand RoCE */
			
 
				+	NVMF_RDMA_PRTYPE_ROCEV2		= 4, /* InfiniBand RoCEV2 */
			
 
				+	NVMF_RDMA_PRTYPE_IWARP		= 5, /* IWARP */
			
 
				 };
			
 
				 
			
 
				 /* RDMA Connection Management Service Type codes for Discovery Log Page
			
 
				  * entry TSAS RDMA_CMS field
			
 
				  */
			
 
				 enum {
			
 
				-	NVMF_RDMA_CMS_RDMA_CM	= 0, /* Sockets based enpoint addressing */
			
 
				+	NVMF_RDMA_CMS_RDMA_CM	= 1, /* Sockets based endpoint addressing */
			
 
				 };
			
 
				 
			
 
				 #define NVMF_AQ_DEPTH		32