7 years ago · f459c34538
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.txt
@@ -71,13 +71,16 @@ use_per_node_hctx=[0/1]: Default: 0
 
				   1: The multi-queue block layer is instantiated with a hardware dispatch
			
 
				      queue for each CPU node in the system.
			
 
				 
			
 
				-use_lightnvm=[0/1]: Default: 0
			
 
				-  Register device with LightNVM. Requires blk-mq and CONFIG_NVM to be enabled.
			
 
				-
			
 
				 no_sched=[0/1]: Default: 0
			
 
				   0: nullb* use default blk-mq io scheduler.
			
 
				   1: nullb* doesn't use io scheduler.
			
 
				 
			
 
				+blocking=[0/1]: Default: 0
			
 
				+  0: Register as a non-blocking blk-mq driver device.
			
 
				+  1: Register as a blocking blk-mq driver device, null_blk will set
			
 
				+     the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
			
 
				+     needs to block in its ->queue_rq() function.
			
 
				+
			
 
				 shared_tags=[0/1]: Default: 0
			
 
				   0: Tag set is not shared.
			
 
				   1: Tag set shared between devices for blk-mq. Only makes sense with
			
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -82,24 +82,13 @@ function
 
				  1. invokes optional hostt->eh_timed_out() callback.  Return value can
			
 
				     be one of
			
 
				 
			
 
				-    - BLK_EH_HANDLED
			
 
				-	This indicates that eh_timed_out() dealt with the timeout.
			
 
				-	The command is passed back to the block layer and completed
			
 
				-	via __blk_complete_requests().
			
 
				-
			
 
				-	*NOTE* After returning BLK_EH_HANDLED the SCSI layer is
			
 
				-	assumed to be finished with the command, and no other
			
 
				-	functions from the SCSI layer will be called. So this
			
 
				-	should typically only be returned if the eh_timed_out()
			
 
				-	handler raced with normal completion.
			
 
				-
			
 
				     - BLK_EH_RESET_TIMER
			
 
				 	This indicates that more time is required to finish the
			
 
				 	command.  Timer is restarted.  This action is counted as a
			
 
				 	retry and only allowed scmd->allowed + 1(!) times.  Once the
			
 
				-	limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
			
 
				+	limit is reached, action for BLK_EH_DONE is taken instead.
			
 
				 
			
 
				-    - BLK_EH_NOT_HANDLED
			
 
				+    - BLK_EH_DONE
			
 
				         eh_timed_out() callback did not handle the command.
			
 
				 	Step #2 is taken.
			
 
				 
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9700,7 +9700,7 @@ S:	Maintained
 
				 F:	drivers/net/ethernet/netronome/
			
 
				 
			
 
				 NETWORK BLOCK DEVICE (NBD)
			
 
				-M:	Josef Bacik <jbacik@fb.com>
			
 
				+M:	Josef Bacik <josef@toxicpanda.com>
			
 
				 S:	Maintained
			
 
				 L:	linux-block@vger.kernel.org
			
 
				 L:	nbd@other.debian.org
			
--- a/arch/sparc/include/uapi/asm/jsflash.h
+++ b/arch/sparc/include/uapi/asm/jsflash.h
@@ -1,40 +0,0 @@
 
				-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
			
 
				-/*
			
 
				- * jsflash.h: OS Flash SIMM support for JavaStations.
			
 
				- *
			
 
				- * Copyright (C) 1999  Pete Zaitcev
			
 
				- */
			
 
				-
			
 
				-#ifndef _SPARC_JSFLASH_H
			
 
				-#define _SPARC_JSFLASH_H
			
 
				-
			
 
				-#ifndef _SPARC_TYPES_H
			
 
				-#include <linux/types.h>
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				- * Semantics of the offset is a full address.
			
 
				- * Hardcode it or get it from probe ioctl.
			
 
				- *
			
 
				- * We use full bus address, so that we would be
			
 
				- * automatically compatible with possible future systems.
			
 
				- */
			
 
				-
			
 
				-#define JSFLASH_IDENT   (('F'<<8)|54)
			
 
				-struct jsflash_ident_arg {
			
 
				-	__u64 off;                /* 0x20000000 is included */
			
 
				-	__u32 size;
			
 
				-	char name[32];		/* With trailing zero */
			
 
				-};
			
 
				-
			
 
				-#define JSFLASH_ERASE   (('F'<<8)|55)
			
 
				-/* Put 0 as argument, may be flags or sector number... */
			
 
				-
			
 
				-#define JSFLASH_PROGRAM (('F'<<8)|56)
			
 
				-struct jsflash_program_arg {
			
 
				-	__u64 data;		/* char* for sparc and sparc64 */
			
 
				-	__u64 off;
			
 
				-	__u32 size;
			
 
				-};
			
 
				-
			
 
				-#endif /* _SPARC_JSFLASH_H */
			
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -55,13 +55,13 @@ BFQG_FLAG_FNS(empty)
 
				 /* This should be called with the scheduler lock held. */
			
 
				 static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
			
 
				 {
			
 
				-	unsigned long long now;
			
 
				+	u64 now;
			
 
				 
			
 
				 	if (!bfqg_stats_waiting(stats))
			
 
				 		return;
			
 
				 
			
 
				-	now = sched_clock();
			
 
				-	if (time_after64(now, stats->start_group_wait_time))
			
 
				+	now = ktime_get_ns();
			
 
				+	if (now > stats->start_group_wait_time)
			
 
				 		blkg_stat_add(&stats->group_wait_time,
			
 
				 			      now - stats->start_group_wait_time);
			
 
				 	bfqg_stats_clear_waiting(stats);
			
@@ -77,20 +77,20 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
 
				 		return;
			
 
				 	if (bfqg == curr_bfqg)
			
 
				 		return;
			
 
				-	stats->start_group_wait_time = sched_clock();
			
 
				+	stats->start_group_wait_time = ktime_get_ns();
			
 
				 	bfqg_stats_mark_waiting(stats);
			
 
				 }
			
 
				 
			
 
				 /* This should be called with the scheduler lock held. */
			
 
				 static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
			
 
				 {
			
 
				-	unsigned long long now;
			
 
				+	u64 now;
			
 
				 
			
 
				 	if (!bfqg_stats_empty(stats))
			
 
				 		return;
			
 
				 
			
 
				-	now = sched_clock();
			
 
				-	if (time_after64(now, stats->start_empty_time))
			
 
				+	now = ktime_get_ns();
			
 
				+	if (now > stats->start_empty_time)
			
 
				 		blkg_stat_add(&stats->empty_time,
			
 
				 			      now - stats->start_empty_time);
			
 
				 	bfqg_stats_clear_empty(stats);
			
@@ -116,7 +116,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
 
				 	if (bfqg_stats_empty(stats))
			
 
				 		return;
			
 
				 
			
 
				-	stats->start_empty_time = sched_clock();
			
 
				+	stats->start_empty_time = ktime_get_ns();
			
 
				 	bfqg_stats_mark_empty(stats);
			
 
				 }
			
 
				 
			
@@ -125,9 +125,9 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
 
				 	struct bfqg_stats *stats = &bfqg->stats;
			
 
				 
			
 
				 	if (bfqg_stats_idling(stats)) {
			
 
				-		unsigned long long now = sched_clock();
			
 
				+		u64 now = ktime_get_ns();
			
 
				 
			
 
				-		if (time_after64(now, stats->start_idle_time))
			
 
				+		if (now > stats->start_idle_time)
			
 
				 			blkg_stat_add(&stats->idle_time,
			
 
				 				      now - stats->start_idle_time);
			
 
				 		bfqg_stats_clear_idling(stats);
			
@@ -138,7 +138,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
 
				 {
			
 
				 	struct bfqg_stats *stats = &bfqg->stats;
			
 
				 
			
 
				-	stats->start_idle_time = sched_clock();
			
 
				+	stats->start_idle_time = ktime_get_ns();
			
 
				 	bfqg_stats_mark_idling(stats);
			
 
				 }
			
 
				 
			
@@ -171,18 +171,18 @@ void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
 
				 	blkg_rwstat_add(&bfqg->stats.merged, op, 1);
			
 
				 }
			
 
				 
			
 
				-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
			
 
				-				  uint64_t io_start_time, unsigned int op)
			
 
				+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
			
 
				+				  u64 io_start_time_ns, unsigned int op)
			
 
				 {
			
 
				 	struct bfqg_stats *stats = &bfqg->stats;
			
 
				-	unsigned long long now = sched_clock();
			
 
				+	u64 now = ktime_get_ns();
			
 
				 
			
 
				-	if (time_after64(now, io_start_time))
			
 
				+	if (now > io_start_time_ns)
			
 
				 		blkg_rwstat_add(&stats->service_time, op,
			
 
				-				now - io_start_time);
			
 
				-	if (time_after64(io_start_time, start_time))
			
 
				+				now - io_start_time_ns);
			
 
				+	if (io_start_time_ns > start_time_ns)
			
 
				 		blkg_rwstat_add(&stats->wait_time, op,
			
 
				-				io_start_time - start_time);
			
 
				+				io_start_time_ns - start_time_ns);
			
 
				 }
			
 
				 
			
 
				 #else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
			
@@ -191,8 +191,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
 
				 			      unsigned int op) { }
			
 
				 void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
			
 
				 void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
			
 
				-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
			
 
				-				  uint64_t io_start_time, unsigned int op) { }
			
 
				+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
			
 
				+				  u64 io_start_time_ns, unsigned int op) { }
			
 
				 void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
			
 
				 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
			
 
				 void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
			
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -49,9 +49,39 @@
 
				  *
			
 
				  * In particular, to provide these low-latency guarantees, BFQ
			
 
				  * explicitly privileges the I/O of two classes of time-sensitive
			
 
				- * applications: interactive and soft real-time. This feature enables
			
 
				- * BFQ to provide applications in these classes with a very low
			
 
				- * latency. Finally, BFQ also features additional heuristics for
			
 
				+ * applications: interactive and soft real-time. In more detail, BFQ
			
 
				+ * behaves this way if the low_latency parameter is set (default
			
 
				+ * configuration). This feature enables BFQ to provide applications in
			
 
				+ * these classes with a very low latency.
			
 
				+ *
			
 
				+ * To implement this feature, BFQ constantly tries to detect whether
			
 
				+ * the I/O requests in a bfq_queue come from an interactive or a soft
			
 
				+ * real-time application. For brevity, in these cases, the queue is
			
 
				+ * said to be interactive or soft real-time. In both cases, BFQ
			
 
				+ * privileges the service of the queue, over that of non-interactive
			
 
				+ * and non-soft-real-time queues. This privileging is performed,
			
 
				+ * mainly, by raising the weight of the queue. So, for brevity, we
			
 
				+ * call just weight-raising periods the time periods during which a
			
 
				+ * queue is privileged, because deemed interactive or soft real-time.
			
 
				+ *
			
 
				+ * The detection of soft real-time queues/applications is described in
			
 
				+ * detail in the comments on the function
			
 
				+ * bfq_bfqq_softrt_next_start. On the other hand, the detection of an
			
 
				+ * interactive queue works as follows: a queue is deemed interactive
			
 
				+ * if it is constantly non empty only for a limited time interval,
			
 
				+ * after which it does become empty. The queue may be deemed
			
 
				+ * interactive again (for a limited time), if it restarts being
			
 
				+ * constantly non empty, provided that this happens only after the
			
 
				+ * queue has remained empty for a given minimum idle time.
			
 
				+ *
			
 
				+ * By default, BFQ computes automatically the above maximum time
			
 
				+ * interval, i.e., the time interval after which a constantly
			
 
				+ * non-empty queue stops being deemed interactive. Since a queue is
			
 
				+ * weight-raised while it is deemed interactive, this maximum time
			
 
				+ * interval happens to coincide with the (maximum) duration of the
			
 
				+ * weight-raising for interactive queues.
			
 
				+ *
			
 
				+ * Finally, BFQ also features additional heuristics for
			
 
				  * preserving both a low latency and a high throughput on NCQ-capable,
			
 
				  * rotational or flash-based devices, and to get the job done quickly
			
 
				  * for applications consisting in many I/O-bound processes.
			
@@ -61,14 +91,14 @@
 
				  * all low-latency heuristics for that device, by setting low_latency
			
 
				  * to 0.
			
 
				  *
			
 
				- * BFQ is described in [1], where also a reference to the initial, more
			
 
				- * theoretical paper on BFQ can be found. The interested reader can find
			
 
				- * in the latter paper full details on the main algorithm, as well as
			
 
				- * formulas of the guarantees and formal proofs of all the properties.
			
 
				- * With respect to the version of BFQ presented in these papers, this
			
 
				- * implementation adds a few more heuristics, such as the one that
			
 
				- * guarantees a low latency to soft real-time applications, and a
			
 
				- * hierarchical extension based on H-WF2Q+.
			
 
				+ * BFQ is described in [1], where also a reference to the initial,
			
 
				+ * more theoretical paper on BFQ can be found. The interested reader
			
 
				+ * can find in the latter paper full details on the main algorithm, as
			
 
				+ * well as formulas of the guarantees and formal proofs of all the
			
 
				+ * properties.  With respect to the version of BFQ presented in these
			
 
				+ * papers, this implementation adds a few more heuristics, such as the
			
 
				+ * ones that guarantee a low latency to interactive and soft real-time
			
 
				+ * applications, and a hierarchical extension based on H-WF2Q+.
			
 
				  *
			
 
				  * B-WF2Q+ is based on WF2Q+, which is described in [2], together with
			
 
				  * H-WF2Q+, while the augmented tree used here to implement B-WF2Q+
			
@@ -218,56 +248,46 @@ static struct kmem_cache *bfq_pool;
 
				 #define BFQ_RATE_SHIFT		16
			
 
				 
			
 
				 /*
			
 
				- * By default, BFQ computes the duration of the weight raising for
			
 
				- * interactive applications automatically, using the following formula:
			
 
				- * duration = (R / r) * T, where r is the peak rate of the device, and
			
 
				- * R and T are two reference parameters.
			
 
				- * In particular, R is the peak rate of the reference device (see
			
 
				- * below), and T is a reference time: given the systems that are
			
 
				- * likely to be installed on the reference device according to its
			
 
				- * speed class, T is about the maximum time needed, under BFQ and
			
 
				- * while reading two files in parallel, to load typical large
			
 
				- * applications on these systems (see the comments on
			
 
				- * max_service_from_wr below, for more details on how T is obtained).
			
 
				- * In practice, the slower/faster the device at hand is, the more/less
			
 
				- * it takes to load applications with respect to the reference device.
			
 
				- * Accordingly, the longer/shorter BFQ grants weight raising to
			
 
				- * interactive applications.
			
 
				- *
			
 
				- * BFQ uses four different reference pairs (R, T), depending on:
			
 
				- * . whether the device is rotational or non-rotational;
			
 
				- * . whether the device is slow, such as old or portable HDDs, as well as
			
 
				- *   SD cards, or fast, such as newer HDDs and SSDs.
			
 
				+ * When configured for computing the duration of the weight-raising
			
 
				+ * for interactive queues automatically (see the comments at the
			
 
				+ * beginning of this file), BFQ does it using the following formula:
			
 
				+ * duration = (ref_rate / r) * ref_wr_duration,
			
 
				+ * where r is the peak rate of the device, and ref_rate and
			
 
				+ * ref_wr_duration are two reference parameters.  In particular,
			
 
				+ * ref_rate is the peak rate of the reference storage device (see
			
 
				+ * below), and ref_wr_duration is about the maximum time needed, with
			
 
				+ * BFQ and while reading two files in parallel, to load typical large
			
 
				+ * applications on the reference device (see the comments on
			
 
				+ * max_service_from_wr below, for more details on how ref_wr_duration
			
 
				+ * is obtained).  In practice, the slower/faster the device at hand
			
 
				+ * is, the more/less it takes to load applications with respect to the
			
 
				+ * reference device.  Accordingly, the longer/shorter BFQ grants
			
 
				+ * weight raising to interactive applications.
			
 
				  *
			
 
				- * The device's speed class is dynamically (re)detected in
			
 
				- * bfq_update_peak_rate() every time the estimated peak rate is updated.
			
 
				+ * BFQ uses two different reference pairs (ref_rate, ref_wr_duration),
			
 
				+ * depending on whether the device is rotational or non-rotational.
			
 
				  *
			
 
				- * In the following definitions, R_slow[0]/R_fast[0] and
			
 
				- * T_slow[0]/T_fast[0] are the reference values for a slow/fast
			
 
				- * rotational device, whereas R_slow[1]/R_fast[1] and
			
 
				- * T_slow[1]/T_fast[1] are the reference values for a slow/fast
			
 
				- * non-rotational device. Finally, device_speed_thresh are the
			
 
				- * thresholds used to switch between speed classes. The reference
			
 
				- * rates are not the actual peak rates of the devices used as a
			
 
				- * reference, but slightly lower values. The reason for using these
			
 
				- * slightly lower values is that the peak-rate estimator tends to
			
 
				- * yield slightly lower values than the actual peak rate (it can yield
			
 
				- * the actual peak rate only if there is only one process doing I/O,
			
 
				- * and the process does sequential I/O).
			
 
				+ * In the following definitions, ref_rate[0] and ref_wr_duration[0]
			
 
				+ * are the reference values for a rotational device, whereas
			
 
				+ * ref_rate[1] and ref_wr_duration[1] are the reference values for a
			
 
				+ * non-rotational device. The reference rates are not the actual peak
			
 
				+ * rates of the devices used as a reference, but slightly lower
			
 
				+ * values. The reason for using slightly lower values is that the
			
 
				+ * peak-rate estimator tends to yield slightly lower values than the
			
 
				+ * actual peak rate (it can yield the actual peak rate only if there
			
 
				+ * is only one process doing I/O, and the process does sequential
			
 
				+ * I/O).
			
 
				  *
			
 
				- * Both the reference peak rates and the thresholds are measured in
			
 
				- * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
			
 
				+ * The reference peak rates are measured in sectors/usec, left-shifted
			
 
				+ * by BFQ_RATE_SHIFT.
			
 
				  */
			
 
				-static int R_slow[2] = {1000, 10700};
			
 
				-static int R_fast[2] = {14000, 33000};
			
 
				+static int ref_rate[2] = {14000, 33000};
			
 
				 /*
			
 
				- * To improve readability, a conversion function is used to initialize the
			
 
				- * following arrays, which entails that they can be initialized only in a
			
 
				- * function.
			
 
				+ * To improve readability, a conversion function is used to initialize
			
 
				+ * the following array, which entails that the array can be
			
 
				+ * initialized only in a function.
			
 
				  */
			
 
				-static int T_slow[2];
			
 
				-static int T_fast[2];
			
 
				-static int device_speed_thresh[2];
			
 
				+static int ref_wr_duration[2];
			
 
				 
			
 
				 /*
			
 
				  * BFQ uses the above-detailed, time-based weight-raising mechanism to
			
@@ -486,46 +506,6 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * See the comments on bfq_limit_depth for the purpose of
			
 
				- * the depths set in the function.
			
 
				- */
			
 
				-static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
			
 
				-{
			
 
				-	bfqd->sb_shift = bt->sb.shift;
			
 
				-
			
 
				-	/*
			
 
				-	 * In-word depths if no bfq_queue is being weight-raised:
			
 
				-	 * leaving 25% of tags only for sync reads.
			
 
				-	 *
			
 
				-	 * In next formulas, right-shift the value
			
 
				-	 * (1U<<bfqd->sb_shift), instead of computing directly
			
 
				-	 * (1U<<(bfqd->sb_shift - something)), to be robust against
			
 
				-	 * any possible value of bfqd->sb_shift, without having to
			
 
				-	 * limit 'something'.
			
 
				-	 */
			
 
				-	/* no more than 50% of tags for async I/O */
			
 
				-	bfqd->word_depths[0][0] = max((1U<<bfqd->sb_shift)>>1, 1U);
			
 
				-	/*
			
 
				-	 * no more than 75% of tags for sync writes (25% extra tags
			
 
				-	 * w.r.t. async I/O, to prevent async I/O from starving sync
			
 
				-	 * writes)
			
 
				-	 */
			
 
				-	bfqd->word_depths[0][1] = max(((1U<<bfqd->sb_shift) * 3)>>2, 1U);
			
 
				-
			
 
				-	/*
			
 
				-	 * In-word depths in case some bfq_queue is being weight-
			
 
				-	 * raised: leaving ~63% of tags for sync reads. This is the
			
 
				-	 * highest percentage for which, in our tests, application
			
 
				-	 * start-up times didn't suffer from any regression due to tag
			
 
				-	 * shortage.
			
 
				-	 */
			
 
				-	/* no more than ~18% of tags for async I/O */
			
 
				-	bfqd->word_depths[1][0] = max(((1U<<bfqd->sb_shift) * 3)>>4, 1U);
			
 
				-	/* no more than ~37% of tags for sync writes (~20% extra tags) */
			
 
				-	bfqd->word_depths[1][1] = max(((1U<<bfqd->sb_shift) * 6)>>4, 1U);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Async I/O can easily starve sync I/O (both sync reads and sync
			
 
				  * writes), by consuming all tags. Similarly, storms of sync writes,
			
@@ -535,25 +515,11 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
 
				  */
			
 
				 static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
			
 
				 {
			
 
				-	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
			
 
				 	struct bfq_data *bfqd = data->q->elevator->elevator_data;
			
 
				-	struct sbitmap_queue *bt;
			
 
				 
			
 
				 	if (op_is_sync(op) && !op_is_write(op))
			
 
				 		return;
			
 
				 
			
 
				-	if (data->flags & BLK_MQ_REQ_RESERVED) {
			
 
				-		if (unlikely(!tags->nr_reserved_tags)) {
			
 
				-			WARN_ON_ONCE(1);
			
 
				-			return;
			
 
				-		}
			
 
				-		bt = &tags->breserved_tags;
			
 
				-	} else
			
 
				-		bt = &tags->bitmap_tags;
			
 
				-
			
 
				-	if (unlikely(bfqd->sb_shift != bt->sb.shift))
			
 
				-		bfq_update_depths(bfqd, bt);
			
 
				-
			
 
				 	data->shallow_depth =
			
 
				 		bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
			
 
				 
			
@@ -906,26 +872,30 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
 
				 	if (bfqd->bfq_wr_max_time > 0)
			
 
				 		return bfqd->bfq_wr_max_time;
			
 
				 
			
 
				-	dur = bfqd->RT_prod;
			
 
				+	dur = bfqd->rate_dur_prod;
			
 
				 	do_div(dur, bfqd->peak_rate);
			
 
				 
			
 
				 	/*
			
 
				-	 * Limit duration between 3 and 13 seconds. Tests show that
			
 
				-	 * higher values than 13 seconds often yield the opposite of
			
 
				-	 * the desired result, i.e., worsen responsiveness by letting
			
 
				-	 * non-interactive and non-soft-real-time applications
			
 
				-	 * preserve weight raising for a too long time interval.
			
 
				+	 * Limit duration between 3 and 25 seconds. The upper limit
			
 
				+	 * has been conservatively set after the following worst case:
			
 
				+	 * on a QEMU/KVM virtual machine
			
 
				+	 * - running in a slow PC
			
 
				+	 * - with a virtual disk stacked on a slow low-end 5400rpm HDD
			
 
				+	 * - serving a heavy I/O workload, such as the sequential reading
			
 
				+	 *   of several files
			
 
				+	 * mplayer took 23 seconds to start, if constantly weight-raised.
			
 
				+	 *
			
 
				+	 * As for higher values than that accomodating the above bad
			
 
				+	 * scenario, tests show that higher values would often yield
			
 
				+	 * the opposite of the desired result, i.e., would worsen
			
 
				+	 * responsiveness by allowing non-interactive applications to
			
 
				+	 * preserve weight raising for too long.
			
 
				 	 *
			
 
				 	 * On the other end, lower values than 3 seconds make it
			
 
				 	 * difficult for most interactive tasks to complete their jobs
			
 
				 	 * before weight-raising finishes.
			
 
				 	 */
			
 
				-	if (dur > msecs_to_jiffies(13000))
			
 
				-		dur = msecs_to_jiffies(13000);
			
 
				-	else if (dur < msecs_to_jiffies(3000))
			
 
				-		dur = msecs_to_jiffies(3000);
			
 
				-
			
 
				-	return dur;
			
 
				+	return clamp_val(dur, msecs_to_jiffies(3000), msecs_to_jiffies(25000));
			
 
				 }
			
 
				 
			
 
				 /* switch back from soft real-time to interactive weight raising */
			
@@ -1392,15 +1362,6 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
 
				 	return wr_or_deserves_wr;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Return the farthest future time instant according to jiffies
			
 
				- * macros.
			
 
				- */
			
 
				-static unsigned long bfq_greatest_from_now(void)
			
 
				-{
			
 
				-	return jiffies + MAX_JIFFY_OFFSET;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Return the farthest past time instant according to jiffies
			
 
				  * macros.
			
@@ -1545,7 +1506,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
 
				 	in_burst = bfq_bfqq_in_large_burst(bfqq);
			
 
				 	soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
			
 
				 		!in_burst &&
			
 
				-		time_is_before_jiffies(bfqq->soft_rt_next_start);
			
 
				+		time_is_before_jiffies(bfqq->soft_rt_next_start) &&
			
 
				+		bfqq->dispatched == 0;
			
 
				 	*interactive = !in_burst && idle_for_long_time;
			
 
				 	wr_or_deserves_wr = bfqd->low_latency &&
			
 
				 		(bfqq->wr_coeff > 1 ||
			
@@ -1858,6 +1820,8 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
 
				 	return ELEVATOR_NO_MERGE;
			
 
				 }
			
 
				 
			
 
				+static struct bfq_queue *bfq_init_rq(struct request *rq);
			
 
				+
			
 
				 static void bfq_request_merged(struct request_queue *q, struct request *req,
			
 
				 			       enum elv_merge type)
			
 
				 {
			
@@ -1866,7 +1830,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
 
				 	    blk_rq_pos(req) <
			
 
				 	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
			
 
				 				    struct request, rb_node))) {
			
 
				-		struct bfq_queue *bfqq = RQ_BFQQ(req);
			
 
				+		struct bfq_queue *bfqq = bfq_init_rq(req);
			
 
				 		struct bfq_data *bfqd = bfqq->bfqd;
			
 
				 		struct request *prev, *next_rq;
			
 
				 
			
@@ -1891,14 +1855,25 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This function is called to notify the scheduler that the requests
			
 
				+ * rq and 'next' have been merged, with 'next' going away.  BFQ
			
 
				+ * exploits this hook to address the following issue: if 'next' has a
			
 
				+ * fifo_time lower that rq, then the fifo_time of rq must be set to
			
 
				+ * the value of 'next', to not forget the greater age of 'next'.
			
 
				+ *
			
 
				+ * NOTE: in this function we assume that rq is in a bfq_queue, basing
			
 
				+ * on that rq is picked from the hash table q->elevator->hash, which,
			
 
				+ * in its turn, is filled only with I/O requests present in
			
 
				+ * bfq_queues, while BFQ is in use for the request queue q. In fact,
			
 
				+ * the function that fills this hash table (elv_rqhash_add) is called
			
 
				+ * only by bfq_insert_request.
			
 
				+ */
			
 
				 static void bfq_requests_merged(struct request_queue *q, struct request *rq,
			
 
				 				struct request *next)
			
 
				 {
			
 
				-	struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
			
 
				-
			
 
				-	if (!RB_EMPTY_NODE(&rq->rb_node))
			
 
				-		goto end;
			
 
				-	spin_lock_irq(&bfqq->bfqd->lock);
			
 
				+	struct bfq_queue *bfqq = bfq_init_rq(rq),
			
 
				+		*next_bfqq = bfq_init_rq(next);
			
 
				 
			
 
				 	/*
			
 
				 	 * If next and rq belong to the same bfq_queue and next is older
			
@@ -1920,11 +1895,6 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
 
				 	if (bfqq->next_rq == next)
			
 
				 		bfqq->next_rq = rq;
			
 
				 
			
 
				-	bfq_remove_request(q, next);
			
 
				-	bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
			
 
				-
			
 
				-	spin_unlock_irq(&bfqq->bfqd->lock);
			
 
				-end:
			
 
				 	bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
			
 
				 }
			
 
				 
			
@@ -2506,37 +2476,15 @@ static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
 
				 /*
			
 
				  * Update parameters related to throughput and responsiveness, as a
			
 
				  * function of the estimated peak rate. See comments on
			
 
				- * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
			
 
				+ * bfq_calc_max_budget(), and on the ref_wr_duration array.
			
 
				  */
			
 
				 static void update_thr_responsiveness_params(struct bfq_data *bfqd)
			
 
				 {
			
 
				-	int dev_type = blk_queue_nonrot(bfqd->queue);
			
 
				-
			
 
				-	if (bfqd->bfq_user_max_budget == 0)
			
 
				+	if (bfqd->bfq_user_max_budget == 0) {
			
 
				 		bfqd->bfq_max_budget =
			
 
				 			bfq_calc_max_budget(bfqd);
			
 
				-
			
 
				-	if (bfqd->device_speed == BFQ_BFQD_FAST &&
			
 
				-	    bfqd->peak_rate < device_speed_thresh[dev_type]) {
			
 
				-		bfqd->device_speed = BFQ_BFQD_SLOW;
			
 
				-		bfqd->RT_prod = R_slow[dev_type] *
			
 
				-			T_slow[dev_type];
			
 
				-	} else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
			
 
				-		   bfqd->peak_rate > device_speed_thresh[dev_type]) {
			
 
				-		bfqd->device_speed = BFQ_BFQD_FAST;
			
 
				-		bfqd->RT_prod = R_fast[dev_type] *
			
 
				-			T_fast[dev_type];
			
 
				+		bfq_log(bfqd, "new max_budget = %d", bfqd->bfq_max_budget);
			
 
				 	}
			
 
				-
			
 
				-	bfq_log(bfqd,
			
 
				-"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
			
 
				-		dev_type == 0 ? "ROT" : "NONROT",
			
 
				-		bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
			
 
				-		bfqd->device_speed == BFQ_BFQD_FAST ?
			
 
				-		(USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
			
 
				-		(USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
			
 
				-		(USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
			
 
				-		BFQ_RATE_SHIFT);
			
 
				 }
			
 
				 
			
 
				 static void bfq_reset_rate_computation(struct bfq_data *bfqd,
			
@@ -3265,23 +3213,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
 
				 			bfqq->soft_rt_next_start =
			
 
				 				bfq_bfqq_softrt_next_start(bfqd, bfqq);
			
 
				 		else {
			
 
				-			/*
			
 
				-			 * The application is still waiting for the
			
 
				-			 * completion of one or more requests:
			
 
				-			 * prevent it from possibly being incorrectly
			
 
				-			 * deemed as soft real-time by setting its
			
 
				-			 * soft_rt_next_start to infinity. In fact,
			
 
				-			 * without this assignment, the application
			
 
				-			 * would be incorrectly deemed as soft
			
 
				-			 * real-time if:
			
 
				-			 * 1) it issued a new request before the
			
 
				-			 *    completion of all its in-flight
			
 
				-			 *    requests, and
			
 
				-			 * 2) at that time, its soft_rt_next_start
			
 
				-			 *    happened to be in the past.
			
 
				-			 */
			
 
				-			bfqq->soft_rt_next_start =
			
 
				-				bfq_greatest_from_now();
			
 
				 			/*
			
 
				 			 * Schedule an update of soft_rt_next_start to when
			
 
				 			 * the task may be discovered to be isochronous.
			
@@ -4540,14 +4471,12 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
 
				 					   unsigned int cmd_flags) {}
			
 
				 #endif
			
 
				 
			
 
				-static void bfq_prepare_request(struct request *rq, struct bio *bio);
			
 
				-
			
 
				 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
			
 
				 			       bool at_head)
			
 
				 {
			
 
				 	struct request_queue *q = hctx->queue;
			
 
				 	struct bfq_data *bfqd = q->elevator->elevator_data;
			
 
				-	struct bfq_queue *bfqq = RQ_BFQQ(rq);
			
 
				+	struct bfq_queue *bfqq;
			
 
				 	bool idle_timer_disabled = false;
			
 
				 	unsigned int cmd_flags;
			
 
				 
			
@@ -4562,24 +4491,13 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
				 	blk_mq_sched_request_inserted(rq);
			
 
				 
			
 
				 	spin_lock_irq(&bfqd->lock);
			
 
				+	bfqq = bfq_init_rq(rq);
			
 
				 	if (at_head || blk_rq_is_passthrough(rq)) {
			
 
				 		if (at_head)
			
 
				 			list_add(&rq->queuelist, &bfqd->dispatch);
			
 
				 		else
			
 
				 			list_add_tail(&rq->queuelist, &bfqd->dispatch);
			
 
				-	} else {
			
 
				-		if (WARN_ON_ONCE(!bfqq)) {
			
 
				-			/*
			
 
				-			 * This should never happen. Most likely rq is
			
 
				-			 * a requeued regular request, being
			
 
				-			 * re-inserted without being first
			
 
				-			 * re-prepared. Do a prepare, to avoid
			
 
				-			 * failure.
			
 
				-			 */
			
 
				-			bfq_prepare_request(rq, rq->bio);
			
 
				-			bfqq = RQ_BFQQ(rq);
			
 
				-		}
			
 
				-
			
 
				+	} else { /* bfqq is assumed to be non null here */
			
 
				 		idle_timer_disabled = __bfq_insert_request(bfqd, rq);
			
 
				 		/*
			
 
				 		 * Update bfqq, because, if a queue merge has occurred
			
@@ -4778,8 +4696,8 @@ static void bfq_finish_requeue_request(struct request *rq)
 
				 
			
 
				 	if (rq->rq_flags & RQF_STARTED)
			
 
				 		bfqg_stats_update_completion(bfqq_group(bfqq),
			
 
				-					     rq_start_time_ns(rq),
			
 
				-					     rq_io_start_time_ns(rq),
			
 
				+					     rq->start_time_ns,
			
 
				+					     rq->io_start_time_ns,
			
 
				 					     rq->cmd_flags);
			
 
				 
			
 
				 	if (likely(rq->rq_flags & RQF_STARTED)) {
			
@@ -4922,11 +4840,48 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Allocate bfq data structures associated with this request.
			
 
				+ * Only reset private fields. The actual request preparation will be
			
 
				+ * performed by bfq_init_rq, when rq is either inserted or merged. See
			
 
				+ * comments on bfq_init_rq for the reason behind this delayed
			
 
				+ * preparation.
			
 
				  */
			
 
				 static void bfq_prepare_request(struct request *rq, struct bio *bio)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Regardless of whether we have an icq attached, we have to
			
 
				+	 * clear the scheduler pointers, as they might point to
			
 
				+	 * previously allocated bic/bfqq structs.
			
 
				+	 */
			
 
				+	rq->elv.priv[0] = rq->elv.priv[1] = NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If needed, init rq, allocate bfq data structures associated with
			
 
				+ * rq, and increment reference counters in the destination bfq_queue
			
 
				+ * for rq. Return the destination bfq_queue for rq, or NULL is rq is
			
 
				+ * not associated with any bfq_queue.
			
 
				+ *
			
 
				+ * This function is invoked by the functions that perform rq insertion
			
 
				+ * or merging. One may have expected the above preparation operations
			
 
				+ * to be performed in bfq_prepare_request, and not delayed to when rq
			
 
				+ * is inserted or merged. The rationale behind this delayed
			
 
				+ * preparation is that, after the prepare_request hook is invoked for
			
 
				+ * rq, rq may still be transformed into a request with no icq, i.e., a
			
 
				+ * request not associated with any queue. No bfq hook is invoked to
			
 
				+ * signal this tranformation. As a consequence, should these
			
 
				+ * preparation operations be performed when the prepare_request hook
			
 
				+ * is invoked, and should rq be transformed one moment later, bfq
			
 
				+ * would end up in an inconsistent state, because it would have
			
 
				+ * incremented some queue counters for an rq destined to
			
 
				+ * transformation, without any chance to correctly lower these
			
 
				+ * counters back. In contrast, no transformation can still happen for
			
 
				+ * rq after rq has been inserted or merged. So, it is safe to execute
			
 
				+ * these preparation operations when rq is finally inserted or merged.
			
 
				+ */
			
 
				+static struct bfq_queue *bfq_init_rq(struct request *rq)
			
 
				 {
			
 
				 	struct request_queue *q = rq->q;
			
 
				+	struct bio *bio = rq->bio;
			
 
				 	struct bfq_data *bfqd = q->elevator->elevator_data;
			
 
				 	struct bfq_io_cq *bic;
			
 
				 	const int is_sync = rq_is_sync(rq);
			
@@ -4934,20 +4889,21 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
 
				 	bool new_queue = false;
			
 
				 	bool bfqq_already_existing = false, split = false;
			
 
				 
			
 
				+	if (unlikely(!rq->elv.icq))
			
 
				+		return NULL;
			
 
				+
			
 
				 	/*
			
 
				-	 * Even if we don't have an icq attached, we should still clear
			
 
				-	 * the scheduler pointers, as they might point to previously
			
 
				-	 * allocated bic/bfqq structs.
			
 
				+	 * Assuming that elv.priv[1] is set only if everything is set
			
 
				+	 * for this rq. This holds true, because this function is
			
 
				+	 * invoked only for insertion or merging, and, after such
			
 
				+	 * events, a request cannot be manipulated any longer before
			
 
				+	 * being removed from bfq.
			
 
				 	 */
			
 
				-	if (!rq->elv.icq) {
			
 
				-		rq->elv.priv[0] = rq->elv.priv[1] = NULL;
			
 
				-		return;
			
 
				-	}
			
 
				+	if (rq->elv.priv[1])
			
 
				+		return rq->elv.priv[1];
			
 
				 
			
 
				 	bic = icq_to_bic(rq->elv.icq);
			
 
				 
			
 
				-	spin_lock_irq(&bfqd->lock);
			
 
				-
			
 
				 	bfq_check_ioprio_change(bic, bio);
			
 
				 
			
 
				 	bfq_bic_update_cgroup(bic, bio);
			
@@ -5006,7 +4962,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
 
				 	if (unlikely(bfq_bfqq_just_created(bfqq)))
			
 
				 		bfq_handle_burst(bfqd, bfqq);
			
 
				 
			
 
				-	spin_unlock_irq(&bfqd->lock);
			
 
				+	return bfqq;
			
 
				 }
			
 
				 
			
 
				 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
			
@@ -5105,6 +5061,64 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
 
				 	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * See the comments on bfq_limit_depth for the purpose of
			
 
				+ * the depths set in the function. Return minimum shallow depth we'll use.
			
 
				+ */
			
 
				+static unsigned int bfq_update_depths(struct bfq_data *bfqd,
			
 
				+				      struct sbitmap_queue *bt)
			
 
				+{
			
 
				+	unsigned int i, j, min_shallow = UINT_MAX;
			
 
				+
			
 
				+	/*
			
 
				+	 * In-word depths if no bfq_queue is being weight-raised:
			
 
				+	 * leaving 25% of tags only for sync reads.
			
 
				+	 *
			
 
				+	 * In next formulas, right-shift the value
			
 
				+	 * (1U<<bt->sb.shift), instead of computing directly
			
 
				+	 * (1U<<(bt->sb.shift - something)), to be robust against
			
 
				+	 * any possible value of bt->sb.shift, without having to
			
 
				+	 * limit 'something'.
			
 
				+	 */
			
 
				+	/* no more than 50% of tags for async I/O */
			
 
				+	bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U);
			
 
				+	/*
			
 
				+	 * no more than 75% of tags for sync writes (25% extra tags
			
 
				+	 * w.r.t. async I/O, to prevent async I/O from starving sync
			
 
				+	 * writes)
			
 
				+	 */
			
 
				+	bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U);
			
 
				+
			
 
				+	/*
			
 
				+	 * In-word depths in case some bfq_queue is being weight-
			
 
				+	 * raised: leaving ~63% of tags for sync reads. This is the
			
 
				+	 * highest percentage for which, in our tests, application
			
 
				+	 * start-up times didn't suffer from any regression due to tag
			
 
				+	 * shortage.
			
 
				+	 */
			
 
				+	/* no more than ~18% of tags for async I/O */
			
 
				+	bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U);
			
 
				+	/* no more than ~37% of tags for sync writes (~20% extra tags) */
			
 
				+	bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U);
			
 
				+
			
 
				+	for (i = 0; i < 2; i++)
			
 
				+		for (j = 0; j < 2; j++)
			
 
				+			min_shallow = min(min_shallow, bfqd->word_depths[i][j]);
			
 
				+
			
 
				+	return min_shallow;
			
 
				+}
			
 
				+
			
 
				+static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
			
 
				+{
			
 
				+	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
			
 
				+	struct blk_mq_tags *tags = hctx->sched_tags;
			
 
				+	unsigned int min_shallow;
			
 
				+
			
 
				+	min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
			
 
				+	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void bfq_exit_queue(struct elevator_queue *e)
			
 
				 {
			
 
				 	struct bfq_data *bfqd = e->elevator_data;
			
@@ -5242,14 +5256,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
 
				 	bfqd->wr_busy_queues = 0;
			
 
				 
			
 
				 	/*
			
 
				-	 * Begin by assuming, optimistically, that the device is a
			
 
				-	 * high-speed one, and that its peak rate is equal to 2/3 of
			
 
				-	 * the highest reference rate.
			
 
				+	 * Begin by assuming, optimistically, that the device peak
			
 
				+	 * rate is equal to 2/3 of the highest reference rate.
			
 
				 	 */
			
 
				-	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
			
 
				-			T_fast[blk_queue_nonrot(bfqd->queue)];
			
 
				-	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
			
 
				-	bfqd->device_speed = BFQ_BFQD_FAST;
			
 
				+	bfqd->rate_dur_prod = ref_rate[blk_queue_nonrot(bfqd->queue)] *
			
 
				+		ref_wr_duration[blk_queue_nonrot(bfqd->queue)];
			
 
				+	bfqd->peak_rate = ref_rate[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
			
 
				 
			
 
				 	spin_lock_init(&bfqd->lock);
			
 
				 
			
@@ -5526,6 +5538,7 @@ static struct elevator_type iosched_bfq_mq = {
 
				 		.requests_merged	= bfq_requests_merged,
			
 
				 		.request_merged		= bfq_request_merged,
			
 
				 		.has_work		= bfq_has_work,
			
 
				+		.init_hctx		= bfq_init_hctx,
			
 
				 		.init_sched		= bfq_init_queue,
			
 
				 		.exit_sched		= bfq_exit_queue,
			
 
				 	},
			
@@ -5556,8 +5569,8 @@ static int __init bfq_init(void)
 
				 	/*
			
 
				 	 * Times to load large popular applications for the typical
			
 
				 	 * systems installed on the reference devices (see the
			
 
				-	 * comments before the definitions of the next two
			
 
				-	 * arrays). Actually, we use slightly slower values, as the
			
 
				+	 * comments before the definition of the next
			
 
				+	 * array). Actually, we use slightly lower values, as the
			
 
				 	 * estimated peak rate tends to be smaller than the actual
			
 
				 	 * peak rate.  The reason for this last fact is that estimates
			
 
				 	 * are computed over much shorter time intervals than the long
			
@@ -5566,25 +5579,8 @@ static int __init bfq_init(void)
 
				 	 * scheduler cannot rely on a peak-rate-evaluation workload to
			
 
				 	 * be run for a long time.
			
 
				 	 */
			
 
				-	T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
			
 
				-	T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
			
 
				-	T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
			
 
				-	T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
			
 
				-
			
 
				-	/*
			
 
				-	 * Thresholds that determine the switch between speed classes
			
 
				-	 * (see the comments before the definition of the array
			
 
				-	 * device_speed_thresh). These thresholds are biased towards
			
 
				-	 * transitions to the fast class. This is safer than the
			
 
				-	 * opposite bias. In fact, a wrong transition to the slow
			
 
				-	 * class results in short weight-raising periods, because the
			
 
				-	 * speed of the device then tends to be higher that the
			
 
				-	 * reference peak rate. On the opposite end, a wrong
			
 
				-	 * transition to the fast class tends to increase
			
 
				-	 * weight-raising periods, because of the opposite reason.
			
 
				-	 */
			
 
				-	device_speed_thresh[0] = (4 * R_slow[0]) / 3;
			
 
				-	device_speed_thresh[1] = (4 * R_slow[1]) / 3;
			
 
				+	ref_wr_duration[0] = msecs_to_jiffies(7000); /* actually 8 sec */
			
 
				+	ref_wr_duration[1] = msecs_to_jiffies(2500); /* actually 3 sec */
			
 
				 
			
 
				 	ret = elv_register(&iosched_bfq_mq);
			
 
				 	if (ret)
			
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -399,11 +399,6 @@ struct bfq_io_cq {
 
				 	struct bfq_ttime saved_ttime;
			
 
				 };
			
 
				 
			
 
				-enum bfq_device_speed {
			
 
				-	BFQ_BFQD_FAST,
			
 
				-	BFQ_BFQD_SLOW,
			
 
				-};
			
 
				-
			
 
				 /**
			
 
				  * struct bfq_data - per-device data structure.
			
 
				  *
			
@@ -611,12 +606,11 @@ struct bfq_data {
 
				 	/* Max service-rate for a soft real-time queue, in sectors/sec */
			
 
				 	unsigned int bfq_wr_max_softrt_rate;
			
 
				 	/*
			
 
				-	 * Cached value of the product R*T, used for computing the
			
 
				-	 * maximum duration of weight raising automatically.
			
 
				+	 * Cached value of the product ref_rate*ref_wr_duration, used
			
 
				+	 * for computing the maximum duration of weight raising
			
 
				+	 * automatically.
			
 
				 	 */
			
 
				-	u64 RT_prod;
			
 
				-	/* device-speed class for the low-latency heuristic */
			
 
				-	enum bfq_device_speed device_speed;
			
 
				+	u64 rate_dur_prod;
			
 
				 
			
 
				 	/* fallback dummy bfqq for extreme OOM conditions */
			
 
				 	struct bfq_queue oom_bfqq;
			
@@ -635,12 +629,6 @@ struct bfq_data {
 
				 	/* bfqq associated with the task issuing current bio for merging */
			
 
				 	struct bfq_queue *bio_bfqq;
			
 
				 
			
 
				-	/*
			
 
				-	 * Cached sbitmap shift, used to compute depth limits in
			
 
				-	 * bfq_update_depths.
			
 
				-	 */
			
 
				-	unsigned int sb_shift;
			
 
				-
			
 
				 	/*
			
 
				 	 * Depth limits used in bfq_limit_depth (see comments on the
			
 
				 	 * function)
			
@@ -732,9 +720,9 @@ struct bfqg_stats {
 
				 	/* total time with empty current active q with other requests queued */
			
 
				 	struct blkg_stat		empty_time;
			
 
				 	/* fields after this shouldn't be cleared on stat reset */
			
 
				-	uint64_t			start_group_wait_time;
			
 
				-	uint64_t			start_idle_time;
			
 
				-	uint64_t			start_empty_time;
			
 
				+	u64				start_group_wait_time;
			
 
				+	u64				start_idle_time;
			
 
				+	u64				start_empty_time;
			
 
				 	uint16_t			flags;
			
 
				 #endif	/* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
			
 
				 };
			
@@ -856,8 +844,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
 
				 			      unsigned int op);
			
 
				 void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
			
 
				 void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
			
 
				-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
			
 
				-				  uint64_t io_start_time, unsigned int op);
			
 
				+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
			
 
				+				  u64 io_start_time_ns, unsigned int op);
			
 
				 void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
			
 
				 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
			
 
				 void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
			
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -56,12 +56,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 
				 	struct bio_set *bs = bio->bi_pool;
			
 
				 	unsigned inline_vecs;
			
 
				 
			
 
				-	if (!bs || !bs->bio_integrity_pool) {
			
 
				+	if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
			
 
				 		bip = kmalloc(sizeof(struct bio_integrity_payload) +
			
 
				 			      sizeof(struct bio_vec) * nr_vecs, gfp_mask);
			
 
				 		inline_vecs = nr_vecs;
			
 
				 	} else {
			
 
				-		bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
			
 
				+		bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
			
 
				 		inline_vecs = BIP_INLINE_VECS;
			
 
				 	}
			
 
				 
			
@@ -74,7 +74,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 
				 		unsigned long idx = 0;
			
 
				 
			
 
				 		bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
			
 
				-					  bs->bvec_integrity_pool);
			
 
				+					  &bs->bvec_integrity_pool);
			
 
				 		if (!bip->bip_vec)
			
 
				 			goto err;
			
 
				 		bip->bip_max_vcnt = bvec_nr_vecs(idx);
			
@@ -90,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 
				 
			
 
				 	return bip;
			
 
				 err:
			
 
				-	mempool_free(bip, bs->bio_integrity_pool);
			
 
				+	mempool_free(bip, &bs->bio_integrity_pool);
			
 
				 	return ERR_PTR(-ENOMEM);
			
 
				 }
			
 
				 EXPORT_SYMBOL(bio_integrity_alloc);
			
@@ -111,10 +111,10 @@ static void bio_integrity_free(struct bio *bio)
 
				 		kfree(page_address(bip->bip_vec->bv_page) +
			
 
				 		      bip->bip_vec->bv_offset);
			
 
				 
			
 
				-	if (bs && bs->bio_integrity_pool) {
			
 
				-		bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
			
 
				+	if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
			
 
				+		bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
			
 
				 
			
 
				-		mempool_free(bip, bs->bio_integrity_pool);
			
 
				+		mempool_free(bip, &bs->bio_integrity_pool);
			
 
				 	} else {
			
 
				 		kfree(bip);
			
 
				 	}
			
@@ -465,16 +465,15 @@ EXPORT_SYMBOL(bio_integrity_clone);
 
				 
			
 
				 int bioset_integrity_create(struct bio_set *bs, int pool_size)
			
 
				 {
			
 
				-	if (bs->bio_integrity_pool)
			
 
				+	if (mempool_initialized(&bs->bio_integrity_pool))
			
 
				 		return 0;
			
 
				 
			
 
				-	bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
			
 
				-	if (!bs->bio_integrity_pool)
			
 
				+	if (mempool_init_slab_pool(&bs->bio_integrity_pool,
			
 
				+				   pool_size, bip_slab))
			
 
				 		return -1;
			
 
				 
			
 
				-	bs->bvec_integrity_pool = biovec_create_pool(pool_size);
			
 
				-	if (!bs->bvec_integrity_pool) {
			
 
				-		mempool_destroy(bs->bio_integrity_pool);
			
 
				+	if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
			
 
				+		mempool_exit(&bs->bio_integrity_pool);
			
 
				 		return -1;
			
 
				 	}
			
 
				 
			
@@ -484,8 +483,8 @@ EXPORT_SYMBOL(bioset_integrity_create);
 
				 
			
 
				 void bioset_integrity_free(struct bio_set *bs)
			
 
				 {
			
 
				-	mempool_destroy(bs->bio_integrity_pool);
			
 
				-	mempool_destroy(bs->bvec_integrity_pool);
			
 
				+	mempool_exit(&bs->bio_integrity_pool);
			
 
				+	mempool_exit(&bs->bvec_integrity_pool);
			
 
				 }
			
 
				 EXPORT_SYMBOL(bioset_integrity_free);
			
 
				 
			
--- a/block/bio.c
+++ b/block/bio.c
@@ -53,7 +53,7 @@ static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
 
				  * fs_bio_set is the bio_set containing bio and iovec memory pools used by
			
 
				  * IO code that does not need private memory pools.
			
 
				  */
			
 
				-struct bio_set *fs_bio_set;
			
 
				+struct bio_set fs_bio_set;
			
 
				 EXPORT_SYMBOL(fs_bio_set);
			
 
				 
			
 
				 /*
			
@@ -254,7 +254,7 @@ static void bio_free(struct bio *bio)
 
				 	bio_uninit(bio);
			
 
				 
			
 
				 	if (bs) {
			
 
				-		bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
			
 
				+		bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
			
 
				 
			
 
				 		/*
			
 
				 		 * If we have front padding, adjust the bio pointer before freeing
			
@@ -262,7 +262,7 @@ static void bio_free(struct bio *bio)
 
				 		p = bio;
			
 
				 		p -= bs->front_pad;
			
 
				 
			
 
				-		mempool_free(p, bs->bio_pool);
			
 
				+		mempool_free(p, &bs->bio_pool);
			
 
				 	} else {
			
 
				 		/* Bio was allocated by bio_kmalloc() */
			
 
				 		kfree(bio);
			
@@ -454,7 +454,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
				 		inline_vecs = nr_iovecs;
			
 
				 	} else {
			
 
				 		/* should not use nobvec bioset for nr_iovecs > 0 */
			
 
				-		if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
			
 
				+		if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
			
 
				+				 nr_iovecs > 0))
			
 
				 			return NULL;
			
 
				 		/*
			
 
				 		 * generic_make_request() converts recursion to iteration; this
			
@@ -483,11 +484,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
				 		    bs->rescue_workqueue)
			
 
				 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
			
 
				 
			
 
				-		p = mempool_alloc(bs->bio_pool, gfp_mask);
			
 
				+		p = mempool_alloc(&bs->bio_pool, gfp_mask);
			
 
				 		if (!p && gfp_mask != saved_gfp) {
			
 
				 			punt_bios_to_rescuer(bs);
			
 
				 			gfp_mask = saved_gfp;
			
 
				-			p = mempool_alloc(bs->bio_pool, gfp_mask);
			
 
				+			p = mempool_alloc(&bs->bio_pool, gfp_mask);
			
 
				 		}
			
 
				 
			
 
				 		front_pad = bs->front_pad;
			
@@ -503,11 +504,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
				 	if (nr_iovecs > inline_vecs) {
			
 
				 		unsigned long idx = 0;
			
 
				 
			
 
				-		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
			
 
				+		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
			
 
				 		if (!bvl && gfp_mask != saved_gfp) {
			
 
				 			punt_bios_to_rescuer(bs);
			
 
				 			gfp_mask = saved_gfp;
			
 
				-			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
			
 
				+			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
			
 
				 		}
			
 
				 
			
 
				 		if (unlikely(!bvl))
			
@@ -524,25 +525,25 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
				 	return bio;
			
 
				 
			
 
				 err_free:
			
 
				-	mempool_free(p, bs->bio_pool);
			
 
				+	mempool_free(p, &bs->bio_pool);
			
 
				 	return NULL;
			
 
				 }
			
 
				 EXPORT_SYMBOL(bio_alloc_bioset);
			
 
				 
			
 
				-void zero_fill_bio(struct bio *bio)
			
 
				+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				 	struct bio_vec bv;
			
 
				 	struct bvec_iter iter;
			
 
				 
			
 
				-	bio_for_each_segment(bv, bio, iter) {
			
 
				+	__bio_for_each_segment(bv, bio, iter, start) {
			
 
				 		char *data = bvec_kmap_irq(&bv, &flags);
			
 
				 		memset(data, 0, bv.bv_len);
			
 
				 		flush_dcache_page(bv.bv_page);
			
 
				 		bvec_kunmap_irq(data, &flags);
			
 
				 	}
			
 
				 }
			
 
				-EXPORT_SYMBOL(zero_fill_bio);
			
 
				+EXPORT_SYMBOL(zero_fill_bio_iter);
			
 
				 
			
 
				 /**
			
 
				  * bio_put - release a reference to a bio
			
@@ -970,27 +971,68 @@ void bio_advance(struct bio *bio, unsigned bytes)
 
				 }
			
 
				 EXPORT_SYMBOL(bio_advance);
			
 
				 
			
 
				+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
			
 
				+			struct bio *src, struct bvec_iter *src_iter)
			
 
				+{
			
 
				+	struct bio_vec src_bv, dst_bv;
			
 
				+	void *src_p, *dst_p;
			
 
				+	unsigned bytes;
			
 
				+
			
 
				+	while (src_iter->bi_size && dst_iter->bi_size) {
			
 
				+		src_bv = bio_iter_iovec(src, *src_iter);
			
 
				+		dst_bv = bio_iter_iovec(dst, *dst_iter);
			
 
				+
			
 
				+		bytes = min(src_bv.bv_len, dst_bv.bv_len);
			
 
				+
			
 
				+		src_p = kmap_atomic(src_bv.bv_page);
			
 
				+		dst_p = kmap_atomic(dst_bv.bv_page);
			
 
				+
			
 
				+		memcpy(dst_p + dst_bv.bv_offset,
			
 
				+		       src_p + src_bv.bv_offset,
			
 
				+		       bytes);
			
 
				+
			
 
				+		kunmap_atomic(dst_p);
			
 
				+		kunmap_atomic(src_p);
			
 
				+
			
 
				+		flush_dcache_page(dst_bv.bv_page);
			
 
				+
			
 
				+		bio_advance_iter(src, src_iter, bytes);
			
 
				+		bio_advance_iter(dst, dst_iter, bytes);
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(bio_copy_data_iter);
			
 
				+
			
 
				 /**
			
 
				- * bio_copy_data - copy contents of data buffers from one chain of bios to
			
 
				- * another
			
 
				- * @src: source bio list
			
 
				- * @dst: destination bio list
			
 
				- *
			
 
				- * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
			
 
				- * @src and @dst as linked lists of bios.
			
 
				+ * bio_copy_data - copy contents of data buffers from one bio to another
			
 
				+ * @src: source bio
			
 
				+ * @dst: destination bio
			
 
				  *
			
 
				  * Stops when it reaches the end of either @src or @dst - that is, copies
			
 
				  * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
			
 
				  */
			
 
				 void bio_copy_data(struct bio *dst, struct bio *src)
			
 
				 {
			
 
				-	struct bvec_iter src_iter, dst_iter;
			
 
				-	struct bio_vec src_bv, dst_bv;
			
 
				-	void *src_p, *dst_p;
			
 
				-	unsigned bytes;
			
 
				+	struct bvec_iter src_iter = src->bi_iter;
			
 
				+	struct bvec_iter dst_iter = dst->bi_iter;
			
 
				 
			
 
				-	src_iter = src->bi_iter;
			
 
				-	dst_iter = dst->bi_iter;
			
 
				+	bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
			
 
				+}
			
 
				+EXPORT_SYMBOL(bio_copy_data);
			
 
				+
			
 
				+/**
			
 
				+ * bio_list_copy_data - copy contents of data buffers from one chain of bios to
			
 
				+ * another
			
 
				+ * @src: source bio list
			
 
				+ * @dst: destination bio list
			
 
				+ *
			
 
				+ * Stops when it reaches the end of either the @src list or @dst list - that is,
			
 
				+ * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
			
 
				+ * bios).
			
 
				+ */
			
 
				+void bio_list_copy_data(struct bio *dst, struct bio *src)
			
 
				+{
			
 
				+	struct bvec_iter src_iter = src->bi_iter;
			
 
				+	struct bvec_iter dst_iter = dst->bi_iter;
			
 
				 
			
 
				 	while (1) {
			
 
				 		if (!src_iter.bi_size) {
			
@@ -1009,26 +1051,10 @@ void bio_copy_data(struct bio *dst, struct bio *src)
 
				 			dst_iter = dst->bi_iter;
			
 
				 		}
			
 
				 
			
 
				-		src_bv = bio_iter_iovec(src, src_iter);
			
 
				-		dst_bv = bio_iter_iovec(dst, dst_iter);
			
 
				-
			
 
				-		bytes = min(src_bv.bv_len, dst_bv.bv_len);
			
 
				-
			
 
				-		src_p = kmap_atomic(src_bv.bv_page);
			
 
				-		dst_p = kmap_atomic(dst_bv.bv_page);
			
 
				-
			
 
				-		memcpy(dst_p + dst_bv.bv_offset,
			
 
				-		       src_p + src_bv.bv_offset,
			
 
				-		       bytes);
			
 
				-
			
 
				-		kunmap_atomic(dst_p);
			
 
				-		kunmap_atomic(src_p);
			
 
				-
			
 
				-		bio_advance_iter(src, &src_iter, bytes);
			
 
				-		bio_advance_iter(dst, &dst_iter, bytes);
			
 
				+		bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
			
 
				 	}
			
 
				 }
			
 
				-EXPORT_SYMBOL(bio_copy_data);
			
 
				+EXPORT_SYMBOL(bio_list_copy_data);
			
 
				 
			
 
				 struct bio_map_data {
			
 
				 	int is_our_pages;
			
@@ -1584,6 +1610,7 @@ void bio_set_pages_dirty(struct bio *bio)
 
				 			set_page_dirty_lock(page);
			
 
				 	}
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
			
 
				 
			
 
				 static void bio_release_pages(struct bio *bio)
			
 
				 {
			
@@ -1667,6 +1694,7 @@ void bio_check_pages_dirty(struct bio *bio)
 
				 		bio_put(bio);
			
 
				 	}
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
			
 
				 
			
 
				 void generic_start_io_acct(struct request_queue *q, int rw,
			
 
				 			   unsigned long sectors, struct hd_struct *part)
			
@@ -1749,6 +1777,9 @@ again:
 
				 	if (!bio_integrity_endio(bio))
			
 
				 		return;
			
 
				 
			
 
				+	if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
			
 
				+		bio->bi_next = NULL;
			
 
				+
			
 
				 	/*
			
 
				 	 * Need to have a real endio function for chained bios, otherwise
			
 
				 	 * various corner cases will break (like stacking block devices that
			
@@ -1848,30 +1879,38 @@ EXPORT_SYMBOL_GPL(bio_trim);
 
				  * create memory pools for biovec's in a bio_set.
			
 
				  * use the global biovec slabs created for general use.
			
 
				  */
			
 
				-mempool_t *biovec_create_pool(int pool_entries)
			
 
				+int biovec_init_pool(mempool_t *pool, int pool_entries)
			
 
				 {
			
 
				 	struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
			
 
				 
			
 
				-	return mempool_create_slab_pool(pool_entries, bp->slab);
			
 
				+	return mempool_init_slab_pool(pool, pool_entries, bp->slab);
			
 
				 }
			
 
				 
			
 
				-void bioset_free(struct bio_set *bs)
			
 
				+/*
			
 
				+ * bioset_exit - exit a bioset initialized with bioset_init()
			
 
				+ *
			
 
				+ * May be called on a zeroed but uninitialized bioset (i.e. allocated with
			
 
				+ * kzalloc()).
			
 
				+ */
			
 
				+void bioset_exit(struct bio_set *bs)
			
 
				 {
			
 
				 	if (bs->rescue_workqueue)
			
 
				 		destroy_workqueue(bs->rescue_workqueue);
			
 
				+	bs->rescue_workqueue = NULL;
			
 
				 
			
 
				-	mempool_destroy(bs->bio_pool);
			
 
				-	mempool_destroy(bs->bvec_pool);
			
 
				+	mempool_exit(&bs->bio_pool);
			
 
				+	mempool_exit(&bs->bvec_pool);
			
 
				 
			
 
				 	bioset_integrity_free(bs);
			
 
				-	bio_put_slab(bs);
			
 
				-
			
 
				-	kfree(bs);
			
 
				+	if (bs->bio_slab)
			
 
				+		bio_put_slab(bs);
			
 
				+	bs->bio_slab = NULL;
			
 
				 }
			
 
				-EXPORT_SYMBOL(bioset_free);
			
 
				+EXPORT_SYMBOL(bioset_exit);
			
 
				 
			
 
				 /**
			
 
				- * bioset_create  - Create a bio_set
			
 
				+ * bioset_init - Initialize a bio_set
			
 
				+ * @bs:		pool to initialize
			
 
				  * @pool_size:	Number of bio and bio_vecs to cache in the mempool
			
 
				  * @front_pad:	Number of bytes to allocate in front of the returned bio
			
 
				  * @flags:	Flags to modify behavior, currently %BIOSET_NEED_BVECS
			
@@ -1890,16 +1929,12 @@ EXPORT_SYMBOL(bioset_free);
 
				  *    dispatch queued requests when the mempool runs out of space.
			
 
				  *
			
 
				  */
			
 
				-struct bio_set *bioset_create(unsigned int pool_size,
			
 
				-			      unsigned int front_pad,
			
 
				-			      int flags)
			
 
				+int bioset_init(struct bio_set *bs,
			
 
				+		unsigned int pool_size,
			
 
				+		unsigned int front_pad,
			
 
				+		int flags)
			
 
				 {
			
 
				 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
			
 
				-	struct bio_set *bs;
			
 
				-
			
 
				-	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
			
 
				-	if (!bs)
			
 
				-		return NULL;
			
 
				 
			
 
				 	bs->front_pad = front_pad;
			
 
				 
			
@@ -1908,34 +1943,29 @@ struct bio_set *bioset_create(unsigned int pool_size,
 
				 	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
			
 
				 
			
 
				 	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
			
 
				-	if (!bs->bio_slab) {
			
 
				-		kfree(bs);
			
 
				-		return NULL;
			
 
				-	}
			
 
				+	if (!bs->bio_slab)
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				-	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
			
 
				-	if (!bs->bio_pool)
			
 
				+	if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
			
 
				 		goto bad;
			
 
				 
			
 
				-	if (flags & BIOSET_NEED_BVECS) {
			
 
				-		bs->bvec_pool = biovec_create_pool(pool_size);
			
 
				-		if (!bs->bvec_pool)
			
 
				-			goto bad;
			
 
				-	}
			
 
				+	if ((flags & BIOSET_NEED_BVECS) &&
			
 
				+	    biovec_init_pool(&bs->bvec_pool, pool_size))
			
 
				+		goto bad;
			
 
				 
			
 
				 	if (!(flags & BIOSET_NEED_RESCUER))
			
 
				-		return bs;
			
 
				+		return 0;
			
 
				 
			
 
				 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
			
 
				 	if (!bs->rescue_workqueue)
			
 
				 		goto bad;
			
 
				 
			
 
				-	return bs;
			
 
				+	return 0;
			
 
				 bad:
			
 
				-	bioset_free(bs);
			
 
				-	return NULL;
			
 
				+	bioset_exit(bs);
			
 
				+	return -ENOMEM;
			
 
				 }
			
 
				-EXPORT_SYMBOL(bioset_create);
			
 
				+EXPORT_SYMBOL(bioset_init);
			
 
				 
			
 
				 #ifdef CONFIG_BLK_CGROUP
			
 
				 
			
@@ -2020,11 +2050,10 @@ static int __init init_bio(void)
 
				 	bio_integrity_init();
			
 
				 	biovec_init_slabs();
			
 
				 
			
 
				-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
			
 
				-	if (!fs_bio_set)
			
 
				+	if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
			
 
				 		panic("bio: can't allocate bios\n");
			
 
				 
			
 
				-	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
			
 
				+	if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
			
 
				 		panic("bio: can't create integrity pool\n");
			
 
				 
			
 
				 	return 0;
			
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -196,15 +196,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 
				 	RB_CLEAR_NODE(&rq->rb_node);
			
 
				 	rq->tag = -1;
			
 
				 	rq->internal_tag = -1;
			
 
				-	rq->start_time = jiffies;
			
 
				-	set_start_time_ns(rq);
			
 
				+	rq->start_time_ns = ktime_get_ns();
			
 
				 	rq->part = NULL;
			
 
				-	seqcount_init(&rq->gstate_seq);
			
 
				-	u64_stats_init(&rq->aborted_gstate_sync);
			
 
				-	/*
			
 
				-	 * See comment of blk_mq_init_request
			
 
				-	 */
			
 
				-	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
			
 
				 }
			
 
				 EXPORT_SYMBOL(blk_rq_init);
			
 
				 
			
@@ -280,6 +273,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 
				 	bio_advance(bio, nbytes);
			
 
				 
			
 
				 	/* don't actually finish bio if it's part of flush sequence */
			
 
				+	/*
			
 
				+	 * XXX this code looks suspicious - it's not consistent with advancing
			
 
				+	 * req->bio in caller
			
 
				+	 */
			
 
				 	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
			
 
				 		bio_endio(bio);
			
 
				 }
			
@@ -360,7 +357,6 @@ EXPORT_SYMBOL(blk_start_queue_async);
 
				 void blk_start_queue(struct request_queue *q)
			
 
				 {
			
 
				 	lockdep_assert_held(q->queue_lock);
			
 
				-	WARN_ON(!in_interrupt() && !irqs_disabled());
			
 
				 	WARN_ON_ONCE(q->mq_ops);
			
 
				 
			
 
				 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
			
@@ -996,18 +992,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 
				 					   spinlock_t *lock)
			
 
				 {
			
 
				 	struct request_queue *q;
			
 
				+	int ret;
			
 
				 
			
 
				 	q = kmem_cache_alloc_node(blk_requestq_cachep,
			
 
				 				gfp_mask | __GFP_ZERO, node_id);
			
 
				 	if (!q)
			
 
				 		return NULL;
			
 
				 
			
 
				+	INIT_LIST_HEAD(&q->queue_head);
			
 
				+	q->last_merge = NULL;
			
 
				+	q->end_sector = 0;
			
 
				+	q->boundary_rq = NULL;
			
 
				+
			
 
				 	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
			
 
				 	if (q->id < 0)
			
 
				 		goto fail_q;
			
 
				 
			
 
				-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
			
 
				-	if (!q->bio_split)
			
 
				+	ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
			
 
				+	if (ret)
			
 
				 		goto fail_id;
			
 
				 
			
 
				 	q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
			
@@ -1079,7 +1081,7 @@ fail_bdi:
 
				 fail_stats:
			
 
				 	bdi_put(q->backing_dev_info);
			
 
				 fail_split:
			
 
				-	bioset_free(q->bio_split);
			
 
				+	bioset_exit(&q->bio_split);
			
 
				 fail_id:
			
 
				 	ida_simple_remove(&blk_queue_ida, q->id);
			
 
				 fail_q:
			
@@ -1173,16 +1175,8 @@ int blk_init_allocated_queue(struct request_queue *q)
 
				 
			
 
				 	q->sg_reserved_size = INT_MAX;
			
 
				 
			
 
				-	/* Protect q->elevator from elevator_change */
			
 
				-	mutex_lock(&q->sysfs_lock);
			
 
				-
			
 
				-	/* init elevator */
			
 
				-	if (elevator_init(q, NULL)) {
			
 
				-		mutex_unlock(&q->sysfs_lock);
			
 
				+	if (elevator_init(q))
			
 
				 		goto out_exit_flush_rq;
			
 
				-	}
			
 
				-
			
 
				-	mutex_unlock(&q->sysfs_lock);
			
 
				 	return 0;
			
 
				 
			
 
				 out_exit_flush_rq:
			
@@ -1334,6 +1328,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
 
				  * @op: operation and flags
			
 
				  * @bio: bio to allocate request for (can be %NULL)
			
 
				  * @flags: BLQ_MQ_REQ_* flags
			
 
				+ * @gfp_mask: allocator flags
			
 
				  *
			
 
				  * Get a free request from @q.  This function may fail under memory
			
 
				  * pressure or if @q is dead.
			
@@ -1343,7 +1338,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
 
				  * Returns request pointer on success, with @q->queue_lock *not held*.
			
 
				  */
			
 
				 static struct request *__get_request(struct request_list *rl, unsigned int op,
			
 
				-				     struct bio *bio, blk_mq_req_flags_t flags)
			
 
				+		struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask)
			
 
				 {
			
 
				 	struct request_queue *q = rl->q;
			
 
				 	struct request *rq;
			
@@ -1352,8 +1347,6 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
 
				 	struct io_cq *icq = NULL;
			
 
				 	const bool is_sync = op_is_sync(op);
			
 
				 	int may_queue;
			
 
				-	gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
			
 
				-			 __GFP_DIRECT_RECLAIM;
			
 
				 	req_flags_t rq_flags = RQF_ALLOCED;
			
 
				 
			
 
				 	lockdep_assert_held(q->queue_lock);
			
@@ -1517,8 +1510,9 @@ rq_starved:
 
				  * @op: operation and flags
			
 
				  * @bio: bio to allocate request for (can be %NULL)
			
 
				  * @flags: BLK_MQ_REQ_* flags.
			
 
				+ * @gfp: allocator flags
			
 
				  *
			
 
				- * Get a free request from @q.  If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
			
 
				+ * Get a free request from @q.  If %BLK_MQ_REQ_NOWAIT is set in @flags,
			
 
				  * this function keeps retrying under memory pressure and fails iff @q is dead.
			
 
				  *
			
 
				  * Must be called with @q->queue_lock held and,
			
@@ -1526,7 +1520,7 @@ rq_starved:
 
				  * Returns request pointer on success, with @q->queue_lock *not held*.
			
 
				  */
			
 
				 static struct request *get_request(struct request_queue *q, unsigned int op,
			
 
				-				   struct bio *bio, blk_mq_req_flags_t flags)
			
 
				+		struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp)
			
 
				 {
			
 
				 	const bool is_sync = op_is_sync(op);
			
 
				 	DEFINE_WAIT(wait);
			
@@ -1538,7 +1532,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
 
				 
			
 
				 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
			
 
				 retry:
			
 
				-	rq = __get_request(rl, op, bio, flags);
			
 
				+	rq = __get_request(rl, op, bio, flags, gfp);
			
 
				 	if (!IS_ERR(rq))
			
 
				 		return rq;
			
 
				 
			
@@ -1579,8 +1573,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
 
				 				unsigned int op, blk_mq_req_flags_t flags)
			
 
				 {
			
 
				 	struct request *rq;
			
 
				-	gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
			
 
				-			 __GFP_DIRECT_RECLAIM;
			
 
				+	gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	WARN_ON_ONCE(q->mq_ops);
			
@@ -1592,7 +1585,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
 
				 	if (ret)
			
 
				 		return ERR_PTR(ret);
			
 
				 	spin_lock_irq(q->queue_lock);
			
 
				-	rq = get_request(q, op, NULL, flags);
			
 
				+	rq = get_request(q, op, NULL, flags, gfp_mask);
			
 
				 	if (IS_ERR(rq)) {
			
 
				 		spin_unlock_irq(q->queue_lock);
			
 
				 		blk_queue_exit(q);
			
@@ -1607,13 +1600,13 @@ static struct request *blk_old_get_request(struct request_queue *q,
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * blk_get_request_flags - allocate a request
			
 
				+ * blk_get_request - allocate a request
			
 
				  * @q: request queue to allocate a request for
			
 
				  * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
			
 
				  * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
			
 
				  */
			
 
				-struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
			
 
				-				      blk_mq_req_flags_t flags)
			
 
				+struct request *blk_get_request(struct request_queue *q, unsigned int op,
			
 
				+				blk_mq_req_flags_t flags)
			
 
				 {
			
 
				 	struct request *req;
			
 
				 
			
@@ -1632,14 +1625,6 @@ struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
 
				 
			
 
				 	return req;
			
 
				 }
			
 
				-EXPORT_SYMBOL(blk_get_request_flags);
			
 
				-
			
 
				-struct request *blk_get_request(struct request_queue *q, unsigned int op,
			
 
				-				gfp_t gfp_mask)
			
 
				-{
			
 
				-	return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
			
 
				-				     0 : BLK_MQ_REQ_NOWAIT);
			
 
				-}
			
 
				 EXPORT_SYMBOL(blk_get_request);
			
 
				 
			
 
				 /**
			
@@ -1660,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 
				 	blk_delete_timer(rq);
			
 
				 	blk_clear_rq_complete(rq);
			
 
				 	trace_block_rq_requeue(q, rq);
			
 
				-	wbt_requeue(q->rq_wb, &rq->issue_stat);
			
 
				+	wbt_requeue(q->rq_wb, rq);
			
 
				 
			
 
				 	if (rq->rq_flags & RQF_QUEUED)
			
 
				 		blk_queue_end_tag(q, rq);
			
@@ -1767,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 
				 	/* this is a bio leak */
			
 
				 	WARN_ON(req->bio != NULL);
			
 
				 
			
 
				-	wbt_done(q->rq_wb, &req->issue_stat);
			
 
				+	wbt_done(q->rq_wb, req);
			
 
				 
			
 
				 	/*
			
 
				 	 * Request may not have originated from ll_rw_blk. if not,
			
@@ -2066,7 +2051,7 @@ get_rq:
 
				 	 * Returns with the queue unlocked.
			
 
				 	 */
			
 
				 	blk_queue_enter_live(q);
			
 
				-	req = get_request(q, bio->bi_opf, bio, 0);
			
 
				+	req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
			
 
				 	if (IS_ERR(req)) {
			
 
				 		blk_queue_exit(q);
			
 
				 		__wbt_done(q->rq_wb, wb_acct);
			
@@ -2078,7 +2063,7 @@ get_rq:
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	wbt_track(&req->issue_stat, wb_acct);
			
 
				+	wbt_track(req, wb_acct);
			
 
				 
			
 
				 	/*
			
 
				 	 * After dropping the lock and possibly sleeping here, our request
			
@@ -2392,7 +2377,9 @@ blk_qc_t generic_make_request(struct bio *bio)
 
				 
			
 
				 	if (bio->bi_opf & REQ_NOWAIT)
			
 
				 		flags = BLK_MQ_REQ_NOWAIT;
			
 
				-	if (blk_queue_enter(q, flags) < 0) {
			
 
				+	if (bio_flagged(bio, BIO_QUEUE_ENTERED))
			
 
				+		blk_queue_enter_live(q);
			
 
				+	else if (blk_queue_enter(q, flags) < 0) {
			
 
				 		if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
			
 
				 			bio_wouldblock_error(bio);
			
 
				 		else
			
@@ -2727,7 +2714,7 @@ void blk_account_io_completion(struct request *req, unsigned int bytes)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void blk_account_io_done(struct request *req)
			
 
				+void blk_account_io_done(struct request *req, u64 now)
			
 
				 {
			
 
				 	/*
			
 
				 	 * Account IO completion.  flush_rq isn't accounted as a
			
@@ -2735,11 +2722,12 @@ void blk_account_io_done(struct request *req)
 
				 	 * containing request is enough.
			
 
				 	 */
			
 
				 	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
			
 
				-		unsigned long duration = jiffies - req->start_time;
			
 
				+		unsigned long duration;
			
 
				 		const int rw = rq_data_dir(req);
			
 
				 		struct hd_struct *part;
			
 
				 		int cpu;
			
 
				 
			
 
				+		duration = nsecs_to_jiffies(now - req->start_time_ns);
			
 
				 		cpu = part_stat_lock();
			
 
				 		part = req->part;
			
 
				 
			
@@ -2970,10 +2958,8 @@ static void blk_dequeue_request(struct request *rq)
 
				 	 * and to it is freed is accounted as io that is in progress at
			
 
				 	 * the driver side.
			
 
				 	 */
			
 
				-	if (blk_account_rq(rq)) {
			
 
				+	if (blk_account_rq(rq))
			
 
				 		q->in_flight[rq_is_sync(rq)]++;
			
 
				-		set_io_start_time_ns(rq);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -2992,9 +2978,12 @@ void blk_start_request(struct request *req)
 
				 	blk_dequeue_request(req);
			
 
				 
			
 
				 	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
			
 
				-		blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
			
 
				+		req->io_start_time_ns = ktime_get_ns();
			
 
				+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
			
 
				+		req->throtl_size = blk_rq_sectors(req);
			
 
				+#endif
			
 
				 		req->rq_flags |= RQF_STATS;
			
 
				-		wbt_issue(req->q->rq_wb, &req->issue_stat);
			
 
				+		wbt_issue(req->q->rq_wb, req);
			
 
				 	}
			
 
				 
			
 
				 	BUG_ON(blk_rq_is_complete(req));
			
@@ -3092,8 +3081,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
 
				 		struct bio *bio = req->bio;
			
 
				 		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
			
 
				 
			
 
				-		if (bio_bytes == bio->bi_iter.bi_size)
			
 
				+		if (bio_bytes == bio->bi_iter.bi_size) {
			
 
				 			req->bio = bio->bi_next;
			
 
				+			bio->bi_next = NULL;
			
 
				+		}
			
 
				 
			
 
				 		/* Completion has already been traced */
			
 
				 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
			
@@ -3190,12 +3181,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
 
				 void blk_finish_request(struct request *req, blk_status_t error)
			
 
				 {
			
 
				 	struct request_queue *q = req->q;
			
 
				+	u64 now = ktime_get_ns();
			
 
				 
			
 
				 	lockdep_assert_held(req->q->queue_lock);
			
 
				 	WARN_ON_ONCE(q->mq_ops);
			
 
				 
			
 
				 	if (req->rq_flags & RQF_STATS)
			
 
				-		blk_stat_add(req);
			
 
				+		blk_stat_add(req, now);
			
 
				 
			
 
				 	if (req->rq_flags & RQF_QUEUED)
			
 
				 		blk_queue_end_tag(q, req);
			
@@ -3210,10 +3202,10 @@ void blk_finish_request(struct request *req, blk_status_t error)
 
				 	if (req->rq_flags & RQF_DONTPREP)
			
 
				 		blk_unprep_request(req);
			
 
				 
			
 
				-	blk_account_io_done(req);
			
 
				+	blk_account_io_done(req, now);
			
 
				 
			
 
				 	if (req->end_io) {
			
 
				-		wbt_done(req->q->rq_wb, &req->issue_stat);
			
 
				+		wbt_done(req->q->rq_wb, req);
			
 
				 		req->end_io(req, error);
			
 
				 	} else {
			
 
				 		if (blk_bidi_rq(req))
			
@@ -3519,7 +3511,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 
				 	struct bio *bio, *bio_src;
			
 
				 
			
 
				 	if (!bs)
			
 
				-		bs = fs_bio_set;
			
 
				+		bs = &fs_bio_set;
			
 
				 
			
 
				 	__rq_for_each_bio(bio_src, rq_src) {
			
 
				 		bio = bio_clone_fast(bio_src, gfp_mask, bs);
			
@@ -3630,7 +3622,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 
				 		blk_run_queue_async(q);
			
 
				 	else
			
 
				 		__blk_run_queue(q);
			
 
				-	spin_unlock(q->queue_lock);
			
 
				+	spin_unlock_irq(q->queue_lock);
			
 
				 }
			
 
				 
			
 
				 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
			
@@ -3678,7 +3670,6 @@ EXPORT_SYMBOL(blk_check_plugged);
 
				 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
			
 
				 {
			
 
				 	struct request_queue *q;
			
 
				-	unsigned long flags;
			
 
				 	struct request *rq;
			
 
				 	LIST_HEAD(list);
			
 
				 	unsigned int depth;
			
@@ -3698,11 +3689,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
				 	q = NULL;
			
 
				 	depth = 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * Save and disable interrupts here, to avoid doing it for every
			
 
				-	 * queue lock we have to take.
			
 
				-	 */
			
 
				-	local_irq_save(flags);
			
 
				 	while (!list_empty(&list)) {
			
 
				 		rq = list_entry_rq(list.next);
			
 
				 		list_del_init(&rq->queuelist);
			
@@ -3715,7 +3701,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
				 				queue_unplugged(q, depth, from_schedule);
			
 
				 			q = rq->q;
			
 
				 			depth = 0;
			
 
				-			spin_lock(q->queue_lock);
			
 
				+			spin_lock_irq(q->queue_lock);
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -3742,8 +3728,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
				 	 */
			
 
				 	if (q)
			
 
				 		queue_unplugged(q, depth, from_schedule);
			
 
				-
			
 
				-	local_irq_restore(flags);
			
 
				 }
			
 
				 
			
 
				 void blk_finish_plug(struct blk_plug *plug)
			
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -333,34 +333,34 @@ static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
 
				 }
			
 
				 
			
 
				 static struct integrity_sysfs_entry integrity_format_entry = {
			
 
				-	.attr = { .name = "format", .mode = S_IRUGO },
			
 
				+	.attr = { .name = "format", .mode = 0444 },
			
 
				 	.show = integrity_format_show,
			
 
				 };
			
 
				 
			
 
				 static struct integrity_sysfs_entry integrity_tag_size_entry = {
			
 
				-	.attr = { .name = "tag_size", .mode = S_IRUGO },
			
 
				+	.attr = { .name = "tag_size", .mode = 0444 },
			
 
				 	.show = integrity_tag_size_show,
			
 
				 };
			
 
				 
			
 
				 static struct integrity_sysfs_entry integrity_interval_entry = {
			
 
				-	.attr = { .name = "protection_interval_bytes", .mode = S_IRUGO },
			
 
				+	.attr = { .name = "protection_interval_bytes", .mode = 0444 },
			
 
				 	.show = integrity_interval_show,
			
 
				 };
			
 
				 
			
 
				 static struct integrity_sysfs_entry integrity_verify_entry = {
			
 
				-	.attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = { .name = "read_verify", .mode = 0644 },
			
 
				 	.show = integrity_verify_show,
			
 
				 	.store = integrity_verify_store,
			
 
				 };
			
 
				 
			
 
				 static struct integrity_sysfs_entry integrity_generate_entry = {
			
 
				-	.attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = { .name = "write_generate", .mode = 0644 },
			
 
				 	.show = integrity_generate_show,
			
 
				 	.store = integrity_generate_store,
			
 
				 };
			
 
				 
			
 
				 static struct integrity_sysfs_entry integrity_device_entry = {
			
 
				-	.attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
			
 
				+	.attr = { .name = "device_is_integrity_capable", .mode = 0444 },
			
 
				 	.show = integrity_device_show,
			
 
				 };
			
 
				 
			
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -62,10 +62,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
				 		unsigned int req_sects;
			
 
				 		sector_t end_sect, tmp;
			
 
				 
			
 
				-		/* Make sure bi_size doesn't overflow */
			
 
				-		req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
			
 
				+		/*
			
 
				+		 * Issue in chunks of the user defined max discard setting,
			
 
				+		 * ensuring that bi_size doesn't overflow
			
 
				+		 */
			
 
				+		req_sects = min_t(sector_t, nr_sects,
			
 
				+					q->limits.max_discard_sectors);
			
 
				+		if (req_sects > UINT_MAX >> 9)
			
 
				+			req_sects = UINT_MAX >> 9;
			
 
				 
			
 
				-		/**
			
 
				+		/*
			
 
				 		 * If splitting a request, and the next starting sector would be
			
 
				 		 * misaligned, stop the discard at the previous aligned sector.
			
 
				 		 */
			
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -188,16 +188,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
 
				 	switch (bio_op(*bio)) {
			
 
				 	case REQ_OP_DISCARD:
			
 
				 	case REQ_OP_SECURE_ERASE:
			
 
				-		split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
			
 
				+		split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs);
			
 
				 		break;
			
 
				 	case REQ_OP_WRITE_ZEROES:
			
 
				-		split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
			
 
				+		split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs);
			
 
				 		break;
			
 
				 	case REQ_OP_WRITE_SAME:
			
 
				-		split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
			
 
				+		split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs);
			
 
				 		break;
			
 
				 	default:
			
 
				-		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
			
 
				+		split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -210,6 +210,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
 
				 		/* there isn't chance to merge the splitted bio */
			
 
				 		split->bi_opf |= REQ_NOMERGE;
			
 
				 
			
 
				+		/*
			
 
				+		 * Since we're recursing into make_request here, ensure
			
 
				+		 * that we mark this bio as already having entered the queue.
			
 
				+		 * If not, and the queue is going away, we can get stuck
			
 
				+		 * forever on waiting for the queue reference to drop. But
			
 
				+		 * that will never happen, as we're already holding a
			
 
				+		 * reference to it.
			
 
				+		 */
			
 
				+		bio_set_flag(*bio, BIO_QUEUE_ENTERED);
			
 
				+
			
 
				 		bio_chain(split, *bio);
			
 
				 		trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
			
 
				 		generic_make_request(*bio);
			
@@ -724,13 +734,12 @@ static struct request *attempt_merge(struct request_queue *q,
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * At this point we have either done a back merge
			
 
				-	 * or front merge. We need the smaller start_time of
			
 
				-	 * the merged requests to be the current request
			
 
				-	 * for accounting purposes.
			
 
				+	 * At this point we have either done a back merge or front merge. We
			
 
				+	 * need the smaller start_time_ns of the merged requests to be the
			
 
				+	 * current request for accounting purposes.
			
 
				 	 */
			
 
				-	if (time_after(req->start_time, next->start_time))
			
 
				-		req->start_time = next->start_time;
			
 
				+	if (next->start_time_ns < req->start_time_ns)
			
 
				+		req->start_time_ns = next->start_time_ns;
			
 
				 
			
 
				 	req->biotail->bi_next = next->bio;
			
 
				 	req->biotail = next->biotail;
			
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -344,7 +344,6 @@ static const char *const rqf_name[] = {
 
				 	RQF_NAME(STATS),
			
 
				 	RQF_NAME(SPECIAL_PAYLOAD),
			
 
				 	RQF_NAME(ZONE_WRITE_LOCKED),
			
 
				-	RQF_NAME(MQ_TIMEOUT_EXPIRED),
			
 
				 	RQF_NAME(MQ_POLL_SLEPT),
			
 
				 };
			
 
				 #undef RQF_NAME
			
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -268,19 +268,16 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 
				 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
			
 
				 
			
 
				 /*
			
 
				- * Reverse check our software queue for entries that we could potentially
			
 
				- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
			
 
				- * too much time checking for merges.
			
 
				+ * Iterate list of requests and see if we can merge this bio with any
			
 
				+ * of them.
			
 
				  */
			
 
				-static bool blk_mq_attempt_merge(struct request_queue *q,
			
 
				-				 struct blk_mq_ctx *ctx, struct bio *bio)
			
 
				+bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
			
 
				+			   struct bio *bio)
			
 
				 {
			
 
				 	struct request *rq;
			
 
				 	int checked = 8;
			
 
				 
			
 
				-	lockdep_assert_held(&ctx->lock);
			
 
				-
			
 
				-	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
			
 
				+	list_for_each_entry_reverse(rq, list, queuelist) {
			
 
				 		bool merged = false;
			
 
				 
			
 
				 		if (!checked--)
			
@@ -305,13 +302,30 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		if (merged)
			
 
				-			ctx->rq_merged++;
			
 
				 		return merged;
			
 
				 	}
			
 
				 
			
 
				 	return false;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
			
 
				+
			
 
				+/*
			
 
				+ * Reverse check our software queue for entries that we could potentially
			
 
				+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
			
 
				+ * too much time checking for merges.
			
 
				+ */
			
 
				+static bool blk_mq_attempt_merge(struct request_queue *q,
			
 
				+				 struct blk_mq_ctx *ctx, struct bio *bio)
			
 
				+{
			
 
				+	lockdep_assert_held(&ctx->lock);
			
 
				+
			
 
				+	if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
			
 
				+		ctx->rq_merged++;
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				 
			
 
				 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
			
 
				 {
			
@@ -571,6 +585,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 
				 
			
 
				 	if (!e) {
			
 
				 		q->elevator = NULL;
			
 
				+		q->nr_requests = q->tag_set->queue_depth;
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -633,14 +648,3 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 
				 	blk_mq_sched_tags_teardown(q);
			
 
				 	q->elevator = NULL;
			
 
				 }
			
 
				-
			
 
				-int blk_mq_sched_init(struct request_queue *q)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	mutex_lock(&q->sysfs_lock);
			
 
				-	ret = elevator_init(q, NULL);
			
 
				-	mutex_unlock(&q->sysfs_lock);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -33,8 +33,6 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 
				 void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			
 
				 			    unsigned int hctx_idx);
			
 
				 
			
 
				-int blk_mq_sched_init(struct request_queue *q);
			
 
				-
			
 
				 static inline bool
			
 
				 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
			
 
				 {
			
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -166,15 +166,15 @@ static struct attribute *default_ctx_attrs[] = {
 
				 };
			
 
				 
			
 
				 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
			
 
				-	.attr = {.name = "nr_tags", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "nr_tags", .mode = 0444 },
			
 
				 	.show = blk_mq_hw_sysfs_nr_tags_show,
			
 
				 };
			
 
				 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
			
 
				-	.attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "nr_reserved_tags", .mode = 0444 },
			
 
				 	.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
			
 
				 };
			
 
				 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
			
 
				-	.attr = {.name = "cpu_list", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "cpu_list", .mode = 0444 },
			
 
				 	.show = blk_mq_hw_sysfs_cpus_show,
			
 
				 };
			
 
				 
			
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,6 +134,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 
				 	ws = bt_wait_ptr(bt, data->hctx);
			
 
				 	drop_ctx = data->ctx == NULL;
			
 
				 	do {
			
 
				+		struct sbitmap_queue *bt_prev;
			
 
				+
			
 
				 		/*
			
 
				 		 * We're out of tags on this hardware queue, kick any
			
 
				 		 * pending IO submits before going to sleep waiting for
			
@@ -159,6 +161,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 
				 		if (data->ctx)
			
 
				 			blk_mq_put_ctx(data->ctx);
			
 
				 
			
 
				+		bt_prev = bt;
			
 
				 		io_schedule();
			
 
				 
			
 
				 		data->ctx = blk_mq_get_ctx(data->q);
			
@@ -170,6 +173,15 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 
				 			bt = &tags->bitmap_tags;
			
 
				 
			
 
				 		finish_wait(&ws->wait, &wait);
			
 
				+
			
 
				+		/*
			
 
				+		 * If destination hw queue is changed, fake wake up on
			
 
				+		 * previous queue for compensating the wake up miss, so
			
 
				+		 * other allocations on previous queue won't be starved.
			
 
				+		 */
			
 
				+		if (bt != bt_prev)
			
 
				+			sbitmap_queue_wake_up(bt_prev);
			
 
				+
			
 
				 		ws = bt_wait_ptr(bt, data->hctx);
			
 
				 	} while (1);
			
 
				 
			
@@ -259,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 
				 	 * test and set the bit before assining ->rqs[].
			
 
				 	 */
			
 
				 	rq = tags->rqs[bitnr];
			
 
				-	if (rq)
			
 
				+	if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
			
 
				 		iter_data->fn(rq, iter_data->data, reserved);
			
 
				 
			
 
				 	return true;
			
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -309,7 +309,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 
				 	RB_CLEAR_NODE(&rq->rb_node);
			
 
				 	rq->rq_disk = NULL;
			
 
				 	rq->part = NULL;
			
 
				-	rq->start_time = jiffies;
			
 
				+	rq->start_time_ns = ktime_get_ns();
			
 
				+	rq->io_start_time_ns = 0;
			
 
				 	rq->nr_phys_segments = 0;
			
 
				 #if defined(CONFIG_BLK_DEV_INTEGRITY)
			
 
				 	rq->nr_integrity_segments = 0;
			
@@ -328,11 +329,10 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 
				 
			
 
				 #ifdef CONFIG_BLK_CGROUP
			
 
				 	rq->rl = NULL;
			
 
				-	set_start_time_ns(rq);
			
 
				-	rq->io_start_time_ns = 0;
			
 
				 #endif
			
 
				 
			
 
				 	data->ctx->rq_dispatched[op_is_sync(op)]++;
			
 
				+	refcount_set(&rq->ref, 1);
			
 
				 	return rq;
			
 
				 }
			
 
				 
			
@@ -361,9 +361,11 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 
				 
			
 
				 		/*
			
 
				 		 * Flush requests are special and go directly to the
			
 
				-		 * dispatch list.
			
 
				+		 * dispatch list. Don't include reserved tags in the
			
 
				+		 * limiting, as it isn't useful.
			
 
				 		 */
			
 
				-		if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
			
 
				+		if (!op_is_flush(op) && e->type->ops.mq.limit_depth &&
			
 
				+		    !(data->flags & BLK_MQ_REQ_RESERVED))
			
 
				 			e->type->ops.mq.limit_depth(op, data);
			
 
				 	}
			
 
				 
			
@@ -464,13 +466,27 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
			
 
				 
			
 
				+static void __blk_mq_free_request(struct request *rq)
			
 
				+{
			
 
				+	struct request_queue *q = rq->q;
			
 
				+	struct blk_mq_ctx *ctx = rq->mq_ctx;
			
 
				+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				+	const int sched_tag = rq->internal_tag;
			
 
				+
			
 
				+	if (rq->tag != -1)
			
 
				+		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
			
 
				+	if (sched_tag != -1)
			
 
				+		blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
			
 
				+	blk_mq_sched_restart(hctx);
			
 
				+	blk_queue_exit(q);
			
 
				+}
			
 
				+
			
 
				 void blk_mq_free_request(struct request *rq)
			
 
				 {
			
 
				 	struct request_queue *q = rq->q;
			
 
				 	struct elevator_queue *e = q->elevator;
			
 
				 	struct blk_mq_ctx *ctx = rq->mq_ctx;
			
 
				 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			
 
				-	const int sched_tag = rq->internal_tag;
			
 
				 
			
 
				 	if (rq->rq_flags & RQF_ELVPRIV) {
			
 
				 		if (e && e->type->ops.mq.finish_request)
			
@@ -488,27 +504,30 @@ void blk_mq_free_request(struct request *rq)
 
				 	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
			
 
				 		laptop_io_completion(q->backing_dev_info);
			
 
				 
			
 
				-	wbt_done(q->rq_wb, &rq->issue_stat);
			
 
				+	wbt_done(q->rq_wb, rq);
			
 
				 
			
 
				 	if (blk_rq_rl(rq))
			
 
				 		blk_put_rl(blk_rq_rl(rq));
			
 
				 
			
 
				-	blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
			
 
				-	if (rq->tag != -1)
			
 
				-		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
			
 
				-	if (sched_tag != -1)
			
 
				-		blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
			
 
				-	blk_mq_sched_restart(hctx);
			
 
				-	blk_queue_exit(q);
			
 
				+	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
			
 
				+	if (refcount_dec_and_test(&rq->ref))
			
 
				+		__blk_mq_free_request(rq);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blk_mq_free_request);
			
 
				 
			
 
				 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
			
 
				 {
			
 
				-	blk_account_io_done(rq);
			
 
				+	u64 now = ktime_get_ns();
			
 
				+
			
 
				+	if (rq->rq_flags & RQF_STATS) {
			
 
				+		blk_mq_poll_stats_start(rq->q);
			
 
				+		blk_stat_add(rq, now);
			
 
				+	}
			
 
				+
			
 
				+	blk_account_io_done(rq, now);
			
 
				 
			
 
				 	if (rq->end_io) {
			
 
				-		wbt_done(rq->q->rq_wb, &rq->issue_stat);
			
 
				+		wbt_done(rq->q->rq_wb, rq);
			
 
				 		rq->end_io(rq, error);
			
 
				 	} else {
			
 
				 		if (unlikely(blk_bidi_rq(rq)))
			
@@ -539,15 +558,12 @@ static void __blk_mq_complete_request(struct request *rq)
 
				 	bool shared = false;
			
 
				 	int cpu;
			
 
				 
			
 
				-	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);
			
 
				-	blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
			
 
				+	if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
			
 
				+			MQ_RQ_IN_FLIGHT)
			
 
				+		return;
			
 
				 
			
 
				 	if (rq->internal_tag != -1)
			
 
				 		blk_mq_sched_completed_request(rq);
			
 
				-	if (rq->rq_flags & RQF_STATS) {
			
 
				-		blk_mq_poll_stats_start(rq->q);
			
 
				-		blk_stat_add(rq);
			
 
				-	}
			
 
				 
			
 
				 	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
			
 
				 		rq->q->softirq_done_fn(rq);
			
@@ -589,36 +605,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
 
				 		*srcu_idx = srcu_read_lock(hctx->srcu);
			
 
				 }
			
 
				 
			
 
				-static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	/*
			
 
				-	 * blk_mq_rq_aborted_gstate() is used from the completion path and
			
 
				-	 * can thus be called from irq context.  u64_stats_fetch in the
			
 
				-	 * middle of update on the same CPU leads to lockup.  Disable irq
			
 
				-	 * while updating.
			
 
				-	 */
			
 
				-	local_irq_save(flags);
			
 
				-	u64_stats_update_begin(&rq->aborted_gstate_sync);
			
 
				-	rq->aborted_gstate = gstate;
			
 
				-	u64_stats_update_end(&rq->aborted_gstate_sync);
			
 
				-	local_irq_restore(flags);
			
 
				-}
			
 
				-
			
 
				-static u64 blk_mq_rq_aborted_gstate(struct request *rq)
			
 
				-{
			
 
				-	unsigned int start;
			
 
				-	u64 aborted_gstate;
			
 
				-
			
 
				-	do {
			
 
				-		start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
			
 
				-		aborted_gstate = rq->aborted_gstate;
			
 
				-	} while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
			
 
				-
			
 
				-	return aborted_gstate;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * blk_mq_complete_request - end I/O on a request
			
 
				  * @rq:		the request being processed
			
@@ -629,28 +615,9 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
 
				  **/
			
 
				 void blk_mq_complete_request(struct request *rq)
			
 
				 {
			
 
				-	struct request_queue *q = rq->q;
			
 
				-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
			
 
				-	int srcu_idx;
			
 
				-
			
 
				-	if (unlikely(blk_should_fake_timeout(q)))
			
 
				+	if (unlikely(blk_should_fake_timeout(rq->q)))
			
 
				 		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * If @rq->aborted_gstate equals the current instance, timeout is
			
 
				-	 * claiming @rq and we lost.  This is synchronized through
			
 
				-	 * hctx_lock().  See blk_mq_timeout_work() for details.
			
 
				-	 *
			
 
				-	 * Completion path never blocks and we can directly use RCU here
			
 
				-	 * instead of hctx_lock() which can be either RCU or SRCU.
			
 
				-	 * However, that would complicate paths which want to synchronize
			
 
				-	 * against us.  Let stay in sync with the issue path so that
			
 
				-	 * hctx_lock() covers both issue and completion paths.
			
 
				-	 */
			
 
				-	hctx_lock(hctx, &srcu_idx);
			
 
				-	if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
			
 
				-		__blk_mq_complete_request(rq);
			
 
				-	hctx_unlock(hctx, srcu_idx);
			
 
				+	__blk_mq_complete_request(rq);
			
 
				 }
			
 
				 EXPORT_SYMBOL(blk_mq_complete_request);
			
 
				 
			
@@ -669,32 +636,18 @@ void blk_mq_start_request(struct request *rq)
 
				 	trace_block_rq_issue(q, rq);
			
 
				 
			
 
				 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
			
 
				-		blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
			
 
				+		rq->io_start_time_ns = ktime_get_ns();
			
 
				+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
			
 
				+		rq->throtl_size = blk_rq_sectors(rq);
			
 
				+#endif
			
 
				 		rq->rq_flags |= RQF_STATS;
			
 
				-		wbt_issue(q->rq_wb, &rq->issue_stat);
			
 
				+		wbt_issue(q->rq_wb, rq);
			
 
				 	}
			
 
				 
			
 
				 	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
			
 
				 
			
 
				-	/*
			
 
				-	 * Mark @rq in-flight which also advances the generation number,
			
 
				-	 * and register for timeout.  Protect with a seqcount to allow the
			
 
				-	 * timeout path to read both @rq->gstate and @rq->deadline
			
 
				-	 * coherently.
			
 
				-	 *
			
 
				-	 * This is the only place where a request is marked in-flight.  If
			
 
				-	 * the timeout path reads an in-flight @rq->gstate, the
			
 
				-	 * @rq->deadline it reads together under @rq->gstate_seq is
			
 
				-	 * guaranteed to be the matching one.
			
 
				-	 */
			
 
				-	preempt_disable();
			
 
				-	write_seqcount_begin(&rq->gstate_seq);
			
 
				-
			
 
				-	blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
			
 
				 	blk_add_timer(rq);
			
 
				-
			
 
				-	write_seqcount_end(&rq->gstate_seq);
			
 
				-	preempt_enable();
			
 
				+	WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
			
 
				 
			
 
				 	if (q->dma_drain_size && blk_rq_bytes(rq)) {
			
 
				 		/*
			
@@ -707,11 +660,6 @@ void blk_mq_start_request(struct request *rq)
 
				 }
			
 
				 EXPORT_SYMBOL(blk_mq_start_request);
			
 
				 
			
 
				-/*
			
 
				- * When we reach here because queue is busy, it's safe to change the state
			
 
				- * to IDLE without checking @rq->aborted_gstate because we should still be
			
 
				- * holding the RCU read lock and thus protected against timeout.
			
 
				- */
			
 
				 static void __blk_mq_requeue_request(struct request *rq)
			
 
				 {
			
 
				 	struct request_queue *q = rq->q;
			
@@ -719,10 +667,10 @@ static void __blk_mq_requeue_request(struct request *rq)
 
				 	blk_mq_put_driver_tag(rq);
			
 
				 
			
 
				 	trace_block_rq_requeue(q, rq);
			
 
				-	wbt_requeue(q->rq_wb, &rq->issue_stat);
			
 
				+	wbt_requeue(q->rq_wb, rq);
			
 
				 
			
 
				-	if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
			
 
				-		blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
			
 
				+	if (blk_mq_request_started(rq)) {
			
 
				+		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
			
 
				 		if (q->dma_drain_size && blk_rq_bytes(rq))
			
 
				 			rq->nr_phys_segments--;
			
 
				 	}
			
@@ -820,101 +768,79 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 
				 }
			
 
				 EXPORT_SYMBOL(blk_mq_tag_to_rq);
			
 
				 
			
 
				-struct blk_mq_timeout_data {
			
 
				-	unsigned long next;
			
 
				-	unsigned int next_set;
			
 
				-	unsigned int nr_expired;
			
 
				-};
			
 
				-
			
 
				 static void blk_mq_rq_timed_out(struct request *req, bool reserved)
			
 
				 {
			
 
				-	const struct blk_mq_ops *ops = req->q->mq_ops;
			
 
				-	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
			
 
				-
			
 
				-	req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED;
			
 
				-
			
 
				-	if (ops->timeout)
			
 
				-		ret = ops->timeout(req, reserved);
			
 
				+	if (req->q->mq_ops->timeout) {
			
 
				+		enum blk_eh_timer_return ret;
			
 
				 
			
 
				-	switch (ret) {
			
 
				-	case BLK_EH_HANDLED:
			
 
				-		__blk_mq_complete_request(req);
			
 
				-		break;
			
 
				-	case BLK_EH_RESET_TIMER:
			
 
				-		/*
			
 
				-		 * As nothing prevents from completion happening while
			
 
				-		 * ->aborted_gstate is set, this may lead to ignored
			
 
				-		 * completions and further spurious timeouts.
			
 
				-		 */
			
 
				-		blk_mq_rq_update_aborted_gstate(req, 0);
			
 
				-		blk_add_timer(req);
			
 
				-		break;
			
 
				-	case BLK_EH_NOT_HANDLED:
			
 
				-		break;
			
 
				-	default:
			
 
				-		printk(KERN_ERR "block: bad eh return: %d\n", ret);
			
 
				-		break;
			
 
				+		ret = req->q->mq_ops->timeout(req, reserved);
			
 
				+		if (ret == BLK_EH_DONE)
			
 
				+			return;
			
 
				+		WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
			
 
				 	}
			
 
				+
			
 
				+	blk_add_timer(req);
			
 
				 }
			
 
				 
			
 
				-static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
			
 
				-		struct request *rq, void *priv, bool reserved)
			
 
				+static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
			
 
				 {
			
 
				-	struct blk_mq_timeout_data *data = priv;
			
 
				-	unsigned long gstate, deadline;
			
 
				-	int start;
			
 
				+	unsigned long deadline;
			
 
				 
			
 
				-	might_sleep();
			
 
				-
			
 
				-	if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)
			
 
				-		return;
			
 
				+	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
			
 
				+		return false;
			
 
				 
			
 
				-	/* read coherent snapshots of @rq->state_gen and @rq->deadline */
			
 
				-	while (true) {
			
 
				-		start = read_seqcount_begin(&rq->gstate_seq);
			
 
				-		gstate = READ_ONCE(rq->gstate);
			
 
				-		deadline = blk_rq_deadline(rq);
			
 
				-		if (!read_seqcount_retry(&rq->gstate_seq, start))
			
 
				-			break;
			
 
				-		cond_resched();
			
 
				-	}
			
 
				+	deadline = blk_rq_deadline(rq);
			
 
				+	if (time_after_eq(jiffies, deadline))
			
 
				+		return true;
			
 
				 
			
 
				-	/* if in-flight && overdue, mark for abortion */
			
 
				-	if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&
			
 
				-	    time_after_eq(jiffies, deadline)) {
			
 
				-		blk_mq_rq_update_aborted_gstate(rq, gstate);
			
 
				-		data->nr_expired++;
			
 
				-		hctx->nr_expired++;
			
 
				-	} else if (!data->next_set || time_after(data->next, deadline)) {
			
 
				-		data->next = deadline;
			
 
				-		data->next_set = 1;
			
 
				-	}
			
 
				+	if (*next == 0)
			
 
				+		*next = deadline;
			
 
				+	else if (time_after(*next, deadline))
			
 
				+		*next = deadline;
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				-static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
			
 
				+static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
			
 
				 		struct request *rq, void *priv, bool reserved)
			
 
				 {
			
 
				+	unsigned long *next = priv;
			
 
				+
			
 
				 	/*
			
 
				-	 * We marked @rq->aborted_gstate and waited for RCU.  If there were
			
 
				-	 * completions that we lost to, they would have finished and
			
 
				-	 * updated @rq->gstate by now; otherwise, the completion path is
			
 
				-	 * now guaranteed to see @rq->aborted_gstate and yield.  If
			
 
				-	 * @rq->aborted_gstate still matches @rq->gstate, @rq is ours.
			
 
				+	 * Just do a quick check if it is expired before locking the request in
			
 
				+	 * so we're not unnecessarilly synchronizing across CPUs.
			
 
				+	 */
			
 
				+	if (!blk_mq_req_expired(rq, next))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * We have reason to believe the request may be expired. Take a
			
 
				+	 * reference on the request to lock this request lifetime into its
			
 
				+	 * currently allocated context to prevent it from being reallocated in
			
 
				+	 * the event the completion by-passes this timeout handler.
			
 
				+	 *
			
 
				+	 * If the reference was already released, then the driver beat the
			
 
				+	 * timeout handler to posting a natural completion.
			
 
				 	 */
			
 
				-	if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
			
 
				-	    READ_ONCE(rq->gstate) == rq->aborted_gstate)
			
 
				+	if (!refcount_inc_not_zero(&rq->ref))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * The request is now locked and cannot be reallocated underneath the
			
 
				+	 * timeout handler's processing. Re-verify this exact request is truly
			
 
				+	 * expired; if it is not expired, then the request was completed and
			
 
				+	 * reallocated as a new request.
			
 
				+	 */
			
 
				+	if (blk_mq_req_expired(rq, next))
			
 
				 		blk_mq_rq_timed_out(rq, reserved);
			
 
				+	if (refcount_dec_and_test(&rq->ref))
			
 
				+		__blk_mq_free_request(rq);
			
 
				 }
			
 
				 
			
 
				 static void blk_mq_timeout_work(struct work_struct *work)
			
 
				 {
			
 
				 	struct request_queue *q =
			
 
				 		container_of(work, struct request_queue, timeout_work);
			
 
				-	struct blk_mq_timeout_data data = {
			
 
				-		.next		= 0,
			
 
				-		.next_set	= 0,
			
 
				-		.nr_expired	= 0,
			
 
				-	};
			
 
				+	unsigned long next = 0;
			
 
				 	struct blk_mq_hw_ctx *hctx;
			
 
				 	int i;
			
 
				 
			
@@ -934,39 +860,10 @@ static void blk_mq_timeout_work(struct work_struct *work)
 
				 	if (!percpu_ref_tryget(&q->q_usage_counter))
			
 
				 		return;
			
 
				 
			
 
				-	/* scan for the expired ones and set their ->aborted_gstate */
			
 
				-	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
			
 
				-
			
 
				-	if (data.nr_expired) {
			
 
				-		bool has_rcu = false;
			
 
				-
			
 
				-		/*
			
 
				-		 * Wait till everyone sees ->aborted_gstate.  The
			
 
				-		 * sequential waits for SRCUs aren't ideal.  If this ever
			
 
				-		 * becomes a problem, we can add per-hw_ctx rcu_head and
			
 
				-		 * wait in parallel.
			
 
				-		 */
			
 
				-		queue_for_each_hw_ctx(q, hctx, i) {
			
 
				-			if (!hctx->nr_expired)
			
 
				-				continue;
			
 
				-
			
 
				-			if (!(hctx->flags & BLK_MQ_F_BLOCKING))
			
 
				-				has_rcu = true;
			
 
				-			else
			
 
				-				synchronize_srcu(hctx->srcu);
			
 
				-
			
 
				-			hctx->nr_expired = 0;
			
 
				-		}
			
 
				-		if (has_rcu)
			
 
				-			synchronize_rcu();
			
 
				-
			
 
				-		/* terminate the ones we won */
			
 
				-		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
			
 
				-	}
			
 
				+	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
			
 
				 
			
 
				-	if (data.next_set) {
			
 
				-		data.next = blk_rq_timeout(round_jiffies_up(data.next));
			
 
				-		mod_timer(&q->timeout, data.next);
			
 
				+	if (next != 0) {
			
 
				+		mod_timer(&q->timeout, next);
			
 
				 	} else {
			
 
				 		/*
			
 
				 		 * Request timeouts are handled as a forward rolling timer. If
			
@@ -1029,7 +926,7 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
 
				 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
			
 
				 
			
 
				 	spin_lock(&ctx->lock);
			
 
				-	if (unlikely(!list_empty(&ctx->rq_list))) {
			
 
				+	if (!list_empty(&ctx->rq_list)) {
			
 
				 		dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
			
 
				 		list_del_init(&dispatch_data->rq->queuelist);
			
 
				 		if (list_empty(&ctx->rq_list))
			
@@ -1716,15 +1613,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
 
				 	blk_account_io_start(rq, true);
			
 
				 }
			
 
				 
			
 
				-static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
			
 
				-				   struct blk_mq_ctx *ctx,
			
 
				-				   struct request *rq)
			
 
				-{
			
 
				-	spin_lock(&ctx->lock);
			
 
				-	__blk_mq_insert_request(hctx, rq, false);
			
 
				-	spin_unlock(&ctx->lock);
			
 
				-}
			
 
				-
			
 
				 static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
			
 
				 {
			
 
				 	if (rq->tag != -1)
			
@@ -1882,7 +1770,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
				 		return BLK_QC_T_NONE;
			
 
				 	}
			
 
				 
			
 
				-	wbt_track(&rq->issue_stat, wb_acct);
			
 
				+	wbt_track(rq, wb_acct);
			
 
				 
			
 
				 	cookie = request_to_qc_t(data.hctx, rq);
			
 
				 
			
@@ -1949,15 +1837,10 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
				 		blk_mq_put_ctx(data.ctx);
			
 
				 		blk_mq_bio_to_request(rq, bio);
			
 
				 		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
			
 
				-	} else if (q->elevator) {
			
 
				-		blk_mq_put_ctx(data.ctx);
			
 
				-		blk_mq_bio_to_request(rq, bio);
			
 
				-		blk_mq_sched_insert_request(rq, false, true, true);
			
 
				 	} else {
			
 
				 		blk_mq_put_ctx(data.ctx);
			
 
				 		blk_mq_bio_to_request(rq, bio);
			
 
				-		blk_mq_queue_io(data.hctx, data.ctx, rq);
			
 
				-		blk_mq_run_hw_queue(data.hctx, true);
			
 
				+		blk_mq_sched_insert_request(rq, false, true, true);
			
 
				 	}
			
 
				 
			
 
				 	return cookie;
			
@@ -2056,15 +1939,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 
				 			return ret;
			
 
				 	}
			
 
				 
			
 
				-	seqcount_init(&rq->gstate_seq);
			
 
				-	u64_stats_init(&rq->aborted_gstate_sync);
			
 
				-	/*
			
 
				-	 * start gstate with gen 1 instead of 0, otherwise it will be equal
			
 
				-	 * to aborted_gstate, and be identified timed out by
			
 
				-	 * blk_mq_terminate_expired.
			
 
				-	 */
			
 
				-	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
			
 
				-
			
 
				+	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -2365,6 +2240,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 
				 	queue_for_each_hw_ctx(q, hctx, i) {
			
 
				 		cpumask_clear(hctx->cpumask);
			
 
				 		hctx->nr_ctx = 0;
			
 
				+		hctx->dispatch_from = NULL;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -2697,7 +2573,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
				 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
			
 
				 		int ret;
			
 
				 
			
 
				-		ret = blk_mq_sched_init(q);
			
 
				+		ret = elevator_init_mq(q);
			
 
				 		if (ret)
			
 
				 			return ERR_PTR(ret);
			
 
				 	}
			
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -30,20 +30,6 @@ struct blk_mq_ctx {
 
				 	struct kobject		kobj;
			
 
				 } ____cacheline_aligned_in_smp;
			
 
				 
			
 
				-/*
			
 
				- * Bits for request->gstate.  The lower two bits carry MQ_RQ_* state value
			
 
				- * and the upper bits the generation number.
			
 
				- */
			
 
				-enum mq_rq_state {
			
 
				-	MQ_RQ_IDLE		= 0,
			
 
				-	MQ_RQ_IN_FLIGHT		= 1,
			
 
				-	MQ_RQ_COMPLETE		= 2,
			
 
				-
			
 
				-	MQ_RQ_STATE_BITS	= 2,
			
 
				-	MQ_RQ_STATE_MASK	= (1 << MQ_RQ_STATE_BITS) - 1,
			
 
				-	MQ_RQ_GEN_INC		= 1 << MQ_RQ_STATE_BITS,
			
 
				-};
			
 
				-
			
 
				 void blk_mq_freeze_queue(struct request_queue *q);
			
 
				 void blk_mq_free_queue(struct request_queue *q);
			
 
				 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
			
@@ -107,33 +93,9 @@ void blk_mq_release(struct request_queue *q);
 
				  * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
			
 
				  * @rq: target request.
			
 
				  */
			
 
				-static inline int blk_mq_rq_state(struct request *rq)
			
 
				+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
			
 
				 {
			
 
				-	return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request
			
 
				- * @rq: target request.
			
 
				- * @state: new state to set.
			
 
				- *
			
 
				- * Set @rq's state to @state.  The caller is responsible for ensuring that
			
 
				- * there are no other updaters.  A request can transition into IN_FLIGHT
			
 
				- * only from IDLE and doing so increments the generation number.
			
 
				- */
			
 
				-static inline void blk_mq_rq_update_state(struct request *rq,
			
 
				-					  enum mq_rq_state state)
			
 
				-{
			
 
				-	u64 old_val = READ_ONCE(rq->gstate);
			
 
				-	u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
			
 
				-
			
 
				-	if (state == MQ_RQ_IN_FLIGHT) {
			
 
				-		WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
			
 
				-		new_val += MQ_RQ_GEN_INC;
			
 
				-	}
			
 
				-
			
 
				-	/* avoid exposing interim values */
			
 
				-	WRITE_ONCE(rq->gstate, new_val);
			
 
				+	return READ_ONCE(rq->state);
			
 
				 }
			
 
				 
			
 
				 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
			
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -47,19 +47,15 @@ static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
 
				 	stat->nr_samples++;
			
 
				 }
			
 
				 
			
 
				-void blk_stat_add(struct request *rq)
			
 
				+void blk_stat_add(struct request *rq, u64 now)
			
 
				 {
			
 
				 	struct request_queue *q = rq->q;
			
 
				 	struct blk_stat_callback *cb;
			
 
				 	struct blk_rq_stat *stat;
			
 
				 	int bucket;
			
 
				-	u64 now, value;
			
 
				+	u64 value;
			
 
				 
			
 
				-	now = __blk_stat_time(ktime_to_ns(ktime_get()));
			
 
				-	if (now < blk_stat_time(&rq->issue_stat))
			
 
				-		return;
			
 
				-
			
 
				-	value = now - blk_stat_time(&rq->issue_stat);
			
 
				+	value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
			
 
				 
			
 
				 	blk_throtl_stat_add(rq, value);
			
 
				 
			
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -8,21 +8,6 @@
 
				 #include <linux/rcupdate.h>
			
 
				 #include <linux/timer.h>
			
 
				 
			
 
				-/*
			
 
				- * from upper:
			
 
				- * 3 bits: reserved for other usage
			
 
				- * 12 bits: size
			
 
				- * 49 bits: time
			
 
				- */
			
 
				-#define BLK_STAT_RES_BITS	3
			
 
				-#define BLK_STAT_SIZE_BITS	12
			
 
				-#define BLK_STAT_RES_SHIFT	(64 - BLK_STAT_RES_BITS)
			
 
				-#define BLK_STAT_SIZE_SHIFT	(BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS)
			
 
				-#define BLK_STAT_TIME_MASK	((1ULL << BLK_STAT_SIZE_SHIFT) - 1)
			
 
				-#define BLK_STAT_SIZE_MASK	\
			
 
				-	(((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT)
			
 
				-#define BLK_STAT_RES_MASK	(~((1ULL << BLK_STAT_RES_SHIFT) - 1))
			
 
				-
			
 
				 /**
			
 
				  * struct blk_stat_callback - Block statistics callback.
			
 
				  *
			
@@ -80,35 +65,7 @@ struct blk_stat_callback {
 
				 struct blk_queue_stats *blk_alloc_queue_stats(void);
			
 
				 void blk_free_queue_stats(struct blk_queue_stats *);
			
 
				 
			
 
				-void blk_stat_add(struct request *);
			
 
				-
			
 
				-static inline u64 __blk_stat_time(u64 time)
			
 
				-{
			
 
				-	return time & BLK_STAT_TIME_MASK;
			
 
				-}
			
 
				-
			
 
				-static inline u64 blk_stat_time(struct blk_issue_stat *stat)
			
 
				-{
			
 
				-	return __blk_stat_time(stat->stat);
			
 
				-}
			
 
				-
			
 
				-static inline sector_t blk_capped_size(sector_t size)
			
 
				-{
			
 
				-	return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1);
			
 
				-}
			
 
				-
			
 
				-static inline sector_t blk_stat_size(struct blk_issue_stat *stat)
			
 
				-{
			
 
				-	return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT;
			
 
				-}
			
 
				-
			
 
				-static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
			
 
				-	sector_t size)
			
 
				-{
			
 
				-	stat->stat = (stat->stat & BLK_STAT_RES_MASK) |
			
 
				-		(ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) |
			
 
				-		(((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
			
 
				-}
			
 
				+void blk_stat_add(struct request *rq, u64 now);
			
 
				 
			
 
				 /* record time/size info in request but not add a callback */
			
 
				 void blk_stat_enable_accounting(struct request_queue *q);
			
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -491,188 +491,198 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
 
				 	return count;
			
 
				 }
			
 
				 
			
 
				+static ssize_t queue_fua_show(struct request_queue *q, char *page)
			
 
				+{
			
 
				+	return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
			
 
				+}
			
 
				+
			
 
				 static ssize_t queue_dax_show(struct request_queue *q, char *page)
			
 
				 {
			
 
				 	return queue_var_show(blk_queue_dax(q), page);
			
 
				 }
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_requests_entry = {
			
 
				-	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "nr_requests", .mode = 0644 },
			
 
				 	.show = queue_requests_show,
			
 
				 	.store = queue_requests_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_ra_entry = {
			
 
				-	.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "read_ahead_kb", .mode = 0644 },
			
 
				 	.show = queue_ra_show,
			
 
				 	.store = queue_ra_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_max_sectors_entry = {
			
 
				-	.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "max_sectors_kb", .mode = 0644 },
			
 
				 	.show = queue_max_sectors_show,
			
 
				 	.store = queue_max_sectors_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
			
 
				-	.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "max_hw_sectors_kb", .mode = 0444 },
			
 
				 	.show = queue_max_hw_sectors_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_max_segments_entry = {
			
 
				-	.attr = {.name = "max_segments", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "max_segments", .mode = 0444 },
			
 
				 	.show = queue_max_segments_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_max_discard_segments_entry = {
			
 
				-	.attr = {.name = "max_discard_segments", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "max_discard_segments", .mode = 0444 },
			
 
				 	.show = queue_max_discard_segments_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
			
 
				-	.attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "max_integrity_segments", .mode = 0444 },
			
 
				 	.show = queue_max_integrity_segments_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_max_segment_size_entry = {
			
 
				-	.attr = {.name = "max_segment_size", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "max_segment_size", .mode = 0444 },
			
 
				 	.show = queue_max_segment_size_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_iosched_entry = {
			
 
				-	.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "scheduler", .mode = 0644 },
			
 
				 	.show = elv_iosched_show,
			
 
				 	.store = elv_iosched_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
			
 
				-	.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "hw_sector_size", .mode = 0444 },
			
 
				 	.show = queue_logical_block_size_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_logical_block_size_entry = {
			
 
				-	.attr = {.name = "logical_block_size", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "logical_block_size", .mode = 0444 },
			
 
				 	.show = queue_logical_block_size_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_physical_block_size_entry = {
			
 
				-	.attr = {.name = "physical_block_size", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "physical_block_size", .mode = 0444 },
			
 
				 	.show = queue_physical_block_size_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_chunk_sectors_entry = {
			
 
				-	.attr = {.name = "chunk_sectors", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "chunk_sectors", .mode = 0444 },
			
 
				 	.show = queue_chunk_sectors_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_io_min_entry = {
			
 
				-	.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "minimum_io_size", .mode = 0444 },
			
 
				 	.show = queue_io_min_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_io_opt_entry = {
			
 
				-	.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "optimal_io_size", .mode = 0444 },
			
 
				 	.show = queue_io_opt_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_discard_granularity_entry = {
			
 
				-	.attr = {.name = "discard_granularity", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "discard_granularity", .mode = 0444 },
			
 
				 	.show = queue_discard_granularity_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_discard_max_hw_entry = {
			
 
				-	.attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "discard_max_hw_bytes", .mode = 0444 },
			
 
				 	.show = queue_discard_max_hw_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_discard_max_entry = {
			
 
				-	.attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "discard_max_bytes", .mode = 0644 },
			
 
				 	.show = queue_discard_max_show,
			
 
				 	.store = queue_discard_max_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
			
 
				-	.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "discard_zeroes_data", .mode = 0444 },
			
 
				 	.show = queue_discard_zeroes_data_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_write_same_max_entry = {
			
 
				-	.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "write_same_max_bytes", .mode = 0444 },
			
 
				 	.show = queue_write_same_max_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
			
 
				-	.attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "write_zeroes_max_bytes", .mode = 0444 },
			
 
				 	.show = queue_write_zeroes_max_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_nonrot_entry = {
			
 
				-	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "rotational", .mode = 0644 },
			
 
				 	.show = queue_show_nonrot,
			
 
				 	.store = queue_store_nonrot,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_zoned_entry = {
			
 
				-	.attr = {.name = "zoned", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "zoned", .mode = 0444 },
			
 
				 	.show = queue_zoned_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_nomerges_entry = {
			
 
				-	.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "nomerges", .mode = 0644 },
			
 
				 	.show = queue_nomerges_show,
			
 
				 	.store = queue_nomerges_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_rq_affinity_entry = {
			
 
				-	.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "rq_affinity", .mode = 0644 },
			
 
				 	.show = queue_rq_affinity_show,
			
 
				 	.store = queue_rq_affinity_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_iostats_entry = {
			
 
				-	.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "iostats", .mode = 0644 },
			
 
				 	.show = queue_show_iostats,
			
 
				 	.store = queue_store_iostats,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_random_entry = {
			
 
				-	.attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "add_random", .mode = 0644 },
			
 
				 	.show = queue_show_random,
			
 
				 	.store = queue_store_random,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_poll_entry = {
			
 
				-	.attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "io_poll", .mode = 0644 },
			
 
				 	.show = queue_poll_show,
			
 
				 	.store = queue_poll_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_poll_delay_entry = {
			
 
				-	.attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "io_poll_delay", .mode = 0644 },
			
 
				 	.show = queue_poll_delay_show,
			
 
				 	.store = queue_poll_delay_store,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_wc_entry = {
			
 
				-	.attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "write_cache", .mode = 0644 },
			
 
				 	.show = queue_wc_show,
			
 
				 	.store = queue_wc_store,
			
 
				 };
			
 
				 
			
 
				+static struct queue_sysfs_entry queue_fua_entry = {
			
 
				+	.attr = {.name = "fua", .mode = 0444 },
			
 
				+	.show = queue_fua_show,
			
 
				+};
			
 
				+
			
 
				 static struct queue_sysfs_entry queue_dax_entry = {
			
 
				-	.attr = {.name = "dax", .mode = S_IRUGO },
			
 
				+	.attr = {.name = "dax", .mode = 0444 },
			
 
				 	.show = queue_dax_show,
			
 
				 };
			
 
				 
			
 
				 static struct queue_sysfs_entry queue_wb_lat_entry = {
			
 
				-	.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "wbt_lat_usec", .mode = 0644 },
			
 
				 	.show = queue_wb_lat_show,
			
 
				 	.store = queue_wb_lat_store,
			
 
				 };
			
 
				 
			
 
				 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
			
 
				 static struct queue_sysfs_entry throtl_sample_time_entry = {
			
 
				-	.attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR },
			
 
				+	.attr = {.name = "throttle_sample_time", .mode = 0644 },
			
 
				 	.show = blk_throtl_sample_time_show,
			
 
				 	.store = blk_throtl_sample_time_store,
			
 
				 };
			
@@ -708,6 +718,7 @@ static struct attribute *default_attrs[] = {
 
				 	&queue_random_entry.attr,
			
 
				 	&queue_poll_entry.attr,
			
 
				 	&queue_wc_entry.attr,
			
 
				+	&queue_fua_entry.attr,
			
 
				 	&queue_dax_entry.attr,
			
 
				 	&queue_wb_lat_entry.attr,
			
 
				 	&queue_poll_delay_entry.attr,
			
@@ -813,8 +824,7 @@ static void __blk_release_queue(struct work_struct *work)
 
				 	if (q->mq_ops)
			
 
				 		blk_mq_debugfs_unregister(q);
			
 
				 
			
 
				-	if (q->bio_split)
			
 
				-		bioset_free(q->bio_split);
			
 
				+	bioset_exit(&q->bio_split);
			
 
				 
			
 
				 	ida_simple_remove(&blk_queue_ida, q->id);
			
 
				 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
			
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -36,8 +36,6 @@ static int throtl_quantum = 32;
 
				  */
			
 
				 #define LATENCY_FILTERED_HD (1000L) /* 1ms */
			
 
				 
			
 
				-#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
			
 
				-
			
 
				 static struct blkcg_policy blkcg_policy_throtl;
			
 
				 
			
 
				 /* A workqueue to queue throttle related work */
			
@@ -821,7 +819,7 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
 
				 	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
			
 
				 		return false;
			
 
				 
			
 
				-	return 1;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 /* Trim the used slices and adjust slice start accordingly */
			
@@ -931,7 +929,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
 
				 
			
 
				 	if (wait)
			
 
				 		*wait = jiffy_wait;
			
 
				-	return 0;
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				 static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
			
@@ -974,7 +972,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
 
				 	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
			
 
				 	if (wait)
			
 
				 		*wait = jiffy_wait;
			
 
				-	return 0;
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1024,7 +1022,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 
				 	    tg_with_in_iops_limit(tg, bio, &iops_wait)) {
			
 
				 		if (wait)
			
 
				 			*wait = 0;
			
 
				-		return 1;
			
 
				+		return true;
			
 
				 	}
			
 
				 
			
 
				 	max_wait = max(bps_wait, iops_wait);
			
@@ -1035,7 +1033,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 
				 	if (time_before(tg->slice_end[rw], jiffies + max_wait))
			
 
				 		throtl_extend_slice(tg, rw, jiffies + max_wait);
			
 
				 
			
 
				-	return 0;
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				 static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
			
@@ -1209,7 +1207,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
 
				 
			
 
				 	while (1) {
			
 
				 		struct throtl_grp *tg = throtl_rb_first(parent_sq);
			
 
				-		struct throtl_service_queue *sq = &tg->service_queue;
			
 
				+		struct throtl_service_queue *sq;
			
 
				 
			
 
				 		if (!tg)
			
 
				 			break;
			
@@ -1221,6 +1219,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
 
				 
			
 
				 		nr_disp += throtl_dispatch_tg(tg);
			
 
				 
			
 
				+		sq = &tg->service_queue;
			
 
				 		if (sq->nr_queued[0] || sq->nr_queued[1])
			
 
				 			tg_update_disptime(tg);
			
 
				 
			
@@ -2139,7 +2138,7 @@ static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
 
				 		bio->bi_cg_private = tg;
			
 
				 		blkg_get(tg_to_blkg(tg));
			
 
				 	}
			
 
				-	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
			
 
				+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
			
 
				 #endif
			
 
				 }
			
 
				 
			
@@ -2251,7 +2250,7 @@ out:
 
				 
			
 
				 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
			
 
				 	if (throttled || !td->track_bio_latency)
			
 
				-		bio->bi_issue_stat.stat |= SKIP_LATENCY;
			
 
				+		bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
			
 
				 #endif
			
 
				 	return throttled;
			
 
				 }
			
@@ -2281,8 +2280,7 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
 
				 	struct request_queue *q = rq->q;
			
 
				 	struct throtl_data *td = q->td;
			
 
				 
			
 
				-	throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
			
 
				-		req_op(rq), time_ns >> 10);
			
 
				+	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
			
 
				 }
			
 
				 
			
 
				 void blk_throtl_bio_endio(struct bio *bio)
			
@@ -2302,8 +2300,8 @@ void blk_throtl_bio_endio(struct bio *bio)
 
				 	finish_time_ns = ktime_get_ns();
			
 
				 	tg->last_finish_time = finish_time_ns >> 10;
			
 
				 
			
 
				-	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
			
 
				-	finish_time = __blk_stat_time(finish_time_ns) >> 10;
			
 
				+	start_time = bio_issue_time(&bio->bi_issue) >> 10;
			
 
				+	finish_time = __bio_issue_time(finish_time_ns) >> 10;
			
 
				 	if (!start_time || finish_time <= start_time) {
			
 
				 		blkg_put(tg_to_blkg(tg));
			
 
				 		return;
			
@@ -2311,16 +2309,15 @@ void blk_throtl_bio_endio(struct bio *bio)
 
				 
			
 
				 	lat = finish_time - start_time;
			
 
				 	/* this is only for bio based driver */
			
 
				-	if (!(bio->bi_issue_stat.stat & SKIP_LATENCY))
			
 
				-		throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
			
 
				-			bio_op(bio), lat);
			
 
				+	if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
			
 
				+		throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
			
 
				+				     bio_op(bio), lat);
			
 
				 
			
 
				 	if (tg->latency_target && lat >= tg->td->filtered_latency) {
			
 
				 		int bucket;
			
 
				 		unsigned int threshold;
			
 
				 
			
 
				-		bucket = request_bucket_index(
			
 
				-			blk_stat_size(&bio->bi_issue_stat));
			
 
				+		bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
			
 
				 		threshold = tg->td->avg_buckets[rw][bucket].latency +
			
 
				 			tg->latency_target;
			
 
				 		if (lat > threshold)
			
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -86,14 +86,11 @@ static void blk_rq_timed_out(struct request *req)
 
				 	if (q->rq_timed_out_fn)
			
 
				 		ret = q->rq_timed_out_fn(req);
			
 
				 	switch (ret) {
			
 
				-	case BLK_EH_HANDLED:
			
 
				-		__blk_complete_request(req);
			
 
				-		break;
			
 
				 	case BLK_EH_RESET_TIMER:
			
 
				 		blk_add_timer(req);
			
 
				 		blk_clear_rq_complete(req);
			
 
				 		break;
			
 
				-	case BLK_EH_NOT_HANDLED:
			
 
				+	case BLK_EH_DONE:
			
 
				 		/*
			
 
				 		 * LLD handles this for now but in the future
			
 
				 		 * we can send a request msg to abort the command
			
@@ -214,7 +211,6 @@ void blk_add_timer(struct request *req)
 
				 		req->timeout = q->rq_timeout;
			
 
				 
			
 
				 	blk_rq_set_deadline(req, jiffies + req->timeout);
			
 
				-	req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
			
 
				 
			
 
				 	/*
			
 
				 	 * Only the non-mq case needs to add the request to a protected list.
			
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -29,6 +29,26 @@
 
				 #define CREATE_TRACE_POINTS
			
 
				 #include <trace/events/wbt.h>
			
 
				 
			
 
				+static inline void wbt_clear_state(struct request *rq)
			
 
				+{
			
 
				+	rq->wbt_flags = 0;
			
 
				+}
			
 
				+
			
 
				+static inline enum wbt_flags wbt_flags(struct request *rq)
			
 
				+{
			
 
				+	return rq->wbt_flags;
			
 
				+}
			
 
				+
			
 
				+static inline bool wbt_is_tracked(struct request *rq)
			
 
				+{
			
 
				+	return rq->wbt_flags & WBT_TRACKED;
			
 
				+}
			
 
				+
			
 
				+static inline bool wbt_is_read(struct request *rq)
			
 
				+{
			
 
				+	return rq->wbt_flags & WBT_READ;
			
 
				+}
			
 
				+
			
 
				 enum {
			
 
				 	/*
			
 
				 	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
			
@@ -101,9 +121,15 @@ static bool wb_recent_wait(struct rq_wb *rwb)
 
				 	return time_before(jiffies, wb->dirty_sleep + HZ);
			
 
				 }
			
 
				 
			
 
				-static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd)
			
 
				+static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
			
 
				+					  enum wbt_flags wb_acct)
			
 
				 {
			
 
				-	return &rwb->rq_wait[is_kswapd];
			
 
				+	if (wb_acct & WBT_KSWAPD)
			
 
				+		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
			
 
				+	else if (wb_acct & WBT_DISCARD)
			
 
				+		return &rwb->rq_wait[WBT_RWQ_DISCARD];
			
 
				+
			
 
				+	return &rwb->rq_wait[WBT_RWQ_BG];
			
 
				 }
			
 
				 
			
 
				 static void rwb_wake_all(struct rq_wb *rwb)
			
@@ -126,7 +152,7 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
 
				 	if (!(wb_acct & WBT_TRACKED))
			
 
				 		return;
			
 
				 
			
 
				-	rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD);
			
 
				+	rqw = get_rq_wait(rwb, wb_acct);
			
 
				 	inflight = atomic_dec_return(&rqw->inflight);
			
 
				 
			
 
				 	/*
			
@@ -139,10 +165,13 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * If the device does write back caching, drop further down
			
 
				-	 * before we wake people up.
			
 
				+	 * For discards, our limit is always the background. For writes, if
			
 
				+	 * the device does write back caching, drop further down before we
			
 
				+	 * wake people up.
			
 
				 	 */
			
 
				-	if (rwb->wc && !wb_recent_wait(rwb))
			
 
				+	if (wb_acct & WBT_DISCARD)
			
 
				+		limit = rwb->wb_background;
			
 
				+	else if (rwb->wc && !wb_recent_wait(rwb))
			
 
				 		limit = 0;
			
 
				 	else
			
 
				 		limit = rwb->wb_normal;
			
@@ -165,24 +194,24 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
 
				  * Called on completion of a request. Note that it's also called when
			
 
				  * a request is merged, when the request gets freed.
			
 
				  */
			
 
				-void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
			
 
				+void wbt_done(struct rq_wb *rwb, struct request *rq)
			
 
				 {
			
 
				 	if (!rwb)
			
 
				 		return;
			
 
				 
			
 
				-	if (!wbt_is_tracked(stat)) {
			
 
				-		if (rwb->sync_cookie == stat) {
			
 
				+	if (!wbt_is_tracked(rq)) {
			
 
				+		if (rwb->sync_cookie == rq) {
			
 
				 			rwb->sync_issue = 0;
			
 
				 			rwb->sync_cookie = NULL;
			
 
				 		}
			
 
				 
			
 
				-		if (wbt_is_read(stat))
			
 
				+		if (wbt_is_read(rq))
			
 
				 			wb_timestamp(rwb, &rwb->last_comp);
			
 
				 	} else {
			
 
				-		WARN_ON_ONCE(stat == rwb->sync_cookie);
			
 
				-		__wbt_done(rwb, wbt_stat_to_mask(stat));
			
 
				+		WARN_ON_ONCE(rq == rwb->sync_cookie);
			
 
				+		__wbt_done(rwb, wbt_flags(rq));
			
 
				 	}
			
 
				-	wbt_clear_state(stat);
			
 
				+	wbt_clear_state(rq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -479,6 +508,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 
				 {
			
 
				 	unsigned int limit;
			
 
				 
			
 
				+	if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
			
 
				+		return rwb->wb_background;
			
 
				+
			
 
				 	/*
			
 
				 	 * At this point we know it's a buffered write. If this is
			
 
				 	 * kswapd trying to free memory, or REQ_SYNC is set, then
			
@@ -529,11 +561,12 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
 
				  * Block if we will exceed our limit, or if we are currently waiting for
			
 
				  * the timer to kick off queuing again.
			
 
				  */
			
 
				-static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
			
 
				+static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
			
 
				+		       unsigned long rw, spinlock_t *lock)
			
 
				 	__releases(lock)
			
 
				 	__acquires(lock)
			
 
				 {
			
 
				-	struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
			
 
				+	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
			
 
				 	DEFINE_WAIT(wait);
			
 
				 
			
 
				 	if (may_queue(rwb, rqw, &wait, rw))
			
@@ -559,21 +592,20 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
 
				 
			
 
				 static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
			
 
				 {
			
 
				-	const int op = bio_op(bio);
			
 
				-
			
 
				-	/*
			
 
				-	 * If not a WRITE, do nothing
			
 
				-	 */
			
 
				-	if (op != REQ_OP_WRITE)
			
 
				-		return false;
			
 
				-
			
 
				-	/*
			
 
				-	 * Don't throttle WRITE_ODIRECT
			
 
				-	 */
			
 
				-	if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE))
			
 
				+	switch (bio_op(bio)) {
			
 
				+	case REQ_OP_WRITE:
			
 
				+		/*
			
 
				+		 * Don't throttle WRITE_ODIRECT
			
 
				+		 */
			
 
				+		if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
			
 
				+		    (REQ_SYNC | REQ_IDLE))
			
 
				+			return false;
			
 
				+		/* fallthrough */
			
 
				+	case REQ_OP_DISCARD:
			
 
				+		return true;
			
 
				+	default:
			
 
				 		return false;
			
 
				-
			
 
				-	return true;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -584,7 +616,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
 
				  */
			
 
				 enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
			
 
				 {
			
 
				-	unsigned int ret = 0;
			
 
				+	enum wbt_flags ret = 0;
			
 
				 
			
 
				 	if (!rwb_enabled(rwb))
			
 
				 		return 0;
			
@@ -598,41 +630,42 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
 
				 		return ret;
			
 
				 	}
			
 
				 
			
 
				-	__wbt_wait(rwb, bio->bi_opf, lock);
			
 
				+	if (current_is_kswapd())
			
 
				+		ret |= WBT_KSWAPD;
			
 
				+	if (bio_op(bio) == REQ_OP_DISCARD)
			
 
				+		ret |= WBT_DISCARD;
			
 
				+
			
 
				+	__wbt_wait(rwb, ret, bio->bi_opf, lock);
			
 
				 
			
 
				 	if (!blk_stat_is_active(rwb->cb))
			
 
				 		rwb_arm_timer(rwb);
			
 
				 
			
 
				-	if (current_is_kswapd())
			
 
				-		ret |= WBT_KSWAPD;
			
 
				-
			
 
				 	return ret | WBT_TRACKED;
			
 
				 }
			
 
				 
			
 
				-void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
			
 
				+void wbt_issue(struct rq_wb *rwb, struct request *rq)
			
 
				 {
			
 
				 	if (!rwb_enabled(rwb))
			
 
				 		return;
			
 
				 
			
 
				 	/*
			
 
				-	 * Track sync issue, in case it takes a long time to complete. Allows
			
 
				-	 * us to react quicker, if a sync IO takes a long time to complete.
			
 
				-	 * Note that this is just a hint. 'stat' can go away when the
			
 
				-	 * request completes, so it's important we never dereference it. We
			
 
				-	 * only use the address to compare with, which is why we store the
			
 
				-	 * sync_issue time locally.
			
 
				+	 * Track sync issue, in case it takes a long time to complete. Allows us
			
 
				+	 * to react quicker, if a sync IO takes a long time to complete. Note
			
 
				+	 * that this is just a hint. The request can go away when it completes,
			
 
				+	 * so it's important we never dereference it. We only use the address to
			
 
				+	 * compare with, which is why we store the sync_issue time locally.
			
 
				 	 */
			
 
				-	if (wbt_is_read(stat) && !rwb->sync_issue) {
			
 
				-		rwb->sync_cookie = stat;
			
 
				-		rwb->sync_issue = blk_stat_time(stat);
			
 
				+	if (wbt_is_read(rq) && !rwb->sync_issue) {
			
 
				+		rwb->sync_cookie = rq;
			
 
				+		rwb->sync_issue = rq->io_start_time_ns;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
			
 
				+void wbt_requeue(struct rq_wb *rwb, struct request *rq)
			
 
				 {
			
 
				 	if (!rwb_enabled(rwb))
			
 
				 		return;
			
 
				-	if (stat == rwb->sync_cookie) {
			
 
				+	if (rq == rwb->sync_cookie) {
			
 
				 		rwb->sync_issue = 0;
			
 
				 		rwb->sync_cookie = NULL;
			
 
				 	}
			
@@ -701,7 +734,7 @@ static int wbt_data_dir(const struct request *rq)
 
				 
			
 
				 	if (op == REQ_OP_READ)
			
 
				 		return READ;
			
 
				-	else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
			
 
				+	else if (op_is_write(op))
			
 
				 		return WRITE;
			
 
				 
			
 
				 	/* don't account */
			
@@ -713,8 +746,6 @@ int wbt_init(struct request_queue *q)
 
				 	struct rq_wb *rwb;
			
 
				 	int i;
			
 
				 
			
 
				-	BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
			
 
				-
			
 
				 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
			
 
				 	if (!rwb)
			
 
				 		return -ENOMEM;
			
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -14,12 +14,16 @@ enum wbt_flags {
 
				 	WBT_TRACKED		= 1,	/* write, tracked for throttling */
			
 
				 	WBT_READ		= 2,	/* read */
			
 
				 	WBT_KSWAPD		= 4,	/* write, from kswapd */
			
 
				+	WBT_DISCARD		= 8,	/* discard */
			
 
				 
			
 
				-	WBT_NR_BITS		= 3,	/* number of bits */
			
 
				+	WBT_NR_BITS		= 4,	/* number of bits */
			
 
				 };
			
 
				 
			
 
				 enum {
			
 
				-	WBT_NUM_RWQ		= 2,
			
 
				+	WBT_RWQ_BG		= 0,
			
 
				+	WBT_RWQ_KSWAPD,
			
 
				+	WBT_RWQ_DISCARD,
			
 
				+	WBT_NUM_RWQ,
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -31,31 +35,6 @@ enum {
 
				 	WBT_STATE_ON_MANUAL	= 2,
			
 
				 };
			
 
				 
			
 
				-static inline void wbt_clear_state(struct blk_issue_stat *stat)
			
 
				-{
			
 
				-	stat->stat &= ~BLK_STAT_RES_MASK;
			
 
				-}
			
 
				-
			
 
				-static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
			
 
				-{
			
 
				-	return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
			
 
				-}
			
 
				-
			
 
				-static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
			
 
				-{
			
 
				-	stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT;
			
 
				-}
			
 
				-
			
 
				-static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
			
 
				-{
			
 
				-	return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
			
 
				-}
			
 
				-
			
 
				-static inline bool wbt_is_read(struct blk_issue_stat *stat)
			
 
				-{
			
 
				-	return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
			
 
				-}
			
 
				-
			
 
				 struct rq_wait {
			
 
				 	wait_queue_head_t wait;
			
 
				 	atomic_t inflight;
			
@@ -84,7 +63,7 @@ struct rq_wb {
 
				 
			
 
				 	struct blk_stat_callback *cb;
			
 
				 
			
 
				-	s64 sync_issue;
			
 
				+	u64 sync_issue;
			
 
				 	void *sync_cookie;
			
 
				 
			
 
				 	unsigned int wc;
			
@@ -109,14 +88,19 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 
				 
			
 
				 #ifdef CONFIG_BLK_WBT
			
 
				 
			
 
				+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
			
 
				+{
			
 
				+	rq->wbt_flags |= flags;
			
 
				+}
			
 
				+
			
 
				 void __wbt_done(struct rq_wb *, enum wbt_flags);
			
 
				-void wbt_done(struct rq_wb *, struct blk_issue_stat *);
			
 
				+void wbt_done(struct rq_wb *, struct request *);
			
 
				 enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
			
 
				 int wbt_init(struct request_queue *);
			
 
				 void wbt_exit(struct request_queue *);
			
 
				 void wbt_update_limits(struct rq_wb *);
			
 
				-void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
			
 
				-void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
			
 
				+void wbt_requeue(struct rq_wb *, struct request *);
			
 
				+void wbt_issue(struct rq_wb *, struct request *);
			
 
				 void wbt_disable_default(struct request_queue *);
			
 
				 void wbt_enable_default(struct request_queue *);
			
 
				 
			
@@ -127,10 +111,13 @@ u64 wbt_default_latency_nsec(struct request_queue *);
 
				 
			
 
				 #else
			
 
				 
			
 
				+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
			
 
				+{
			
 
				+}
			
 
				 static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
			
 
				 {
			
 
				 }
			
 
				-static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
			
 
				+static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
			
 
				 {
			
 
				 }
			
 
				 static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
			
@@ -148,10 +135,10 @@ static inline void wbt_exit(struct request_queue *q)
 
				 static inline void wbt_update_limits(struct rq_wb *rwb)
			
 
				 {
			
 
				 }
			
 
				-static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
			
 
				+static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
			
 
				 {
			
 
				 }
			
 
				-static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
			
 
				+static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
			
 
				 {
			
 
				 }
			
 
				 static inline void wbt_disable_default(struct request_queue *q)
			
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -328,7 +328,11 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 
				 	if (!rep.nr_zones)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
			
 
				+	if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
			
 
				+		return -ERANGE;
			
 
				+
			
 
				+	zones = kvmalloc(rep.nr_zones * sizeof(struct blk_zone),
			
 
				+			GFP_KERNEL | __GFP_ZERO);
			
 
				 	if (!zones)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -350,7 +354,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 
				 	}
			
 
				 
			
 
				  out:
			
 
				-	kfree(zones);
			
 
				+	kvfree(zones);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/block/blk.h
+++ b/block/blk.h
@@ -186,7 +186,7 @@ unsigned int blk_plug_queued_count(struct request_queue *q);
 
				 
			
 
				 void blk_account_io_start(struct request *req, bool new_io);
			
 
				 void blk_account_io_completion(struct request *req, unsigned int bytes);
			
 
				-void blk_account_io_done(struct request *req);
			
 
				+void blk_account_io_done(struct request *req, u64 now);
			
 
				 
			
 
				 /*
			
 
				  * EH timer and IO completion will both attempt to 'grab' the request, make
			
@@ -231,6 +231,9 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
 
				 		e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
			
 
				 }
			
 
				 
			
 
				+int elevator_init(struct request_queue *);
			
 
				+int elevator_init_mq(struct request_queue *q);
			
 
				+void elevator_exit(struct request_queue *, struct elevator_queue *);
			
 
				 int elv_register_queue(struct request_queue *q);
			
 
				 void elv_unregister_queue(struct request_queue *q);
			
 
				 
			
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -28,28 +28,29 @@
 
				 #define POOL_SIZE	64
			
 
				 #define ISA_POOL_SIZE	16
			
 
				 
			
 
				-static struct bio_set *bounce_bio_set, *bounce_bio_split;
			
 
				-static mempool_t *page_pool, *isa_page_pool;
			
 
				+static struct bio_set bounce_bio_set, bounce_bio_split;
			
 
				+static mempool_t page_pool, isa_page_pool;
			
 
				 
			
 
				 #if defined(CONFIG_HIGHMEM)
			
 
				 static __init int init_emergency_pool(void)
			
 
				 {
			
 
				+	int ret;
			
 
				 #if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
			
 
				 	if (max_pfn <= max_low_pfn)
			
 
				 		return 0;
			
 
				 #endif
			
 
				 
			
 
				-	page_pool = mempool_create_page_pool(POOL_SIZE, 0);
			
 
				-	BUG_ON(!page_pool);
			
 
				+	ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0);
			
 
				+	BUG_ON(ret);
			
 
				 	pr_info("pool size: %d pages\n", POOL_SIZE);
			
 
				 
			
 
				-	bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
			
 
				-	BUG_ON(!bounce_bio_set);
			
 
				-	if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
			
 
				+	ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
			
 
				+	BUG_ON(ret);
			
 
				+	if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
			
 
				 		BUG_ON(1);
			
 
				 
			
 
				-	bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
			
 
				-	BUG_ON(!bounce_bio_split);
			
 
				+	ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
			
 
				+	BUG_ON(ret);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -63,14 +64,11 @@ __initcall(init_emergency_pool);
 
				  */
			
 
				 static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
			
 
				 {
			
 
				-	unsigned long flags;
			
 
				 	unsigned char *vto;
			
 
				 
			
 
				-	local_irq_save(flags);
			
 
				 	vto = kmap_atomic(to->bv_page);
			
 
				 	memcpy(vto + to->bv_offset, vfrom, to->bv_len);
			
 
				 	kunmap_atomic(vto);
			
 
				-	local_irq_restore(flags);
			
 
				 }
			
 
				 
			
 
				 #else /* CONFIG_HIGHMEM */
			
@@ -94,12 +92,14 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
 
				  */
			
 
				 int init_emergency_isa_pool(void)
			
 
				 {
			
 
				-	if (isa_page_pool)
			
 
				+	int ret;
			
 
				+
			
 
				+	if (mempool_initialized(&isa_page_pool))
			
 
				 		return 0;
			
 
				 
			
 
				-	isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
			
 
				-				       mempool_free_pages, (void *) 0);
			
 
				-	BUG_ON(!isa_page_pool);
			
 
				+	ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
			
 
				+			   mempool_free_pages, (void *) 0);
			
 
				+	BUG_ON(ret);
			
 
				 
			
 
				 	pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
			
 
				 	return 0;
			
@@ -166,13 +166,13 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
 
				 
			
 
				 static void bounce_end_io_write(struct bio *bio)
			
 
				 {
			
 
				-	bounce_end_io(bio, page_pool);
			
 
				+	bounce_end_io(bio, &page_pool);
			
 
				 }
			
 
				 
			
 
				 static void bounce_end_io_write_isa(struct bio *bio)
			
 
				 {
			
 
				 
			
 
				-	bounce_end_io(bio, isa_page_pool);
			
 
				+	bounce_end_io(bio, &isa_page_pool);
			
 
				 }
			
 
				 
			
 
				 static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
			
@@ -187,12 +187,12 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
 
				 
			
 
				 static void bounce_end_io_read(struct bio *bio)
			
 
				 {
			
 
				-	__bounce_end_io_read(bio, page_pool);
			
 
				+	__bounce_end_io_read(bio, &page_pool);
			
 
				 }
			
 
				 
			
 
				 static void bounce_end_io_read_isa(struct bio *bio)
			
 
				 {
			
 
				-	__bounce_end_io_read(bio, isa_page_pool);
			
 
				+	__bounce_end_io_read(bio, &isa_page_pool);
			
 
				 }
			
 
				 
			
 
				 static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
			
@@ -217,13 +217,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 
				 		return;
			
 
				 
			
 
				 	if (!passthrough && sectors < bio_sectors(*bio_orig)) {
			
 
				-		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
			
 
				+		bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
			
 
				 		bio_chain(bio, *bio_orig);
			
 
				 		generic_make_request(*bio_orig);
			
 
				 		*bio_orig = bio;
			
 
				 	}
			
 
				 	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
			
 
				-			bounce_bio_set);
			
 
				+			&bounce_bio_set);
			
 
				 
			
 
				 	bio_for_each_segment_all(to, bio, i) {
			
 
				 		struct page *page = to->bv_page;
			
@@ -250,7 +250,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 
				 
			
 
				 	bio->bi_flags |= (1 << BIO_BOUNCED);
			
 
				 
			
 
				-	if (pool == page_pool) {
			
 
				+	if (pool == &page_pool) {
			
 
				 		bio->bi_end_io = bounce_end_io_write;
			
 
				 		if (rw == READ)
			
 
				 			bio->bi_end_io = bounce_end_io_read;
			
@@ -282,10 +282,10 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 
				 	if (!(q->bounce_gfp & GFP_DMA)) {
			
 
				 		if (q->limits.bounce_pfn >= blk_max_pfn)
			
 
				 			return;
			
 
				-		pool = page_pool;
			
 
				+		pool = &page_pool;
			
 
				 	} else {
			
 
				-		BUG_ON(!isa_page_pool);
			
 
				-		pool = isa_page_pool;
			
 
				+		BUG_ON(!mempool_initialized(&isa_page_pool));
			
 
				+		pool = &isa_page_pool;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -303,11 +303,9 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req)
 
				  * @name: device to give bsg device
			
 
				  * @job_fn: bsg job handler
			
 
				  * @dd_job_size: size of LLD data needed for each job
			
 
				- * @release: @dev release function
			
 
				  */
			
 
				 struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
			
 
				-		bsg_job_fn *job_fn, int dd_job_size,
			
 
				-		void (*release)(struct device *))
			
 
				+		bsg_job_fn *job_fn, int dd_job_size)
			
 
				 {
			
 
				 	struct request_queue *q;
			
 
				 	int ret;
			
@@ -331,7 +329,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 
				 	blk_queue_softirq_done(q, bsg_softirq_done);
			
 
				 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
			
 
				 
			
 
				-	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
			
 
				+	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
			
 
				 	if (ret) {
			
 
				 		printk(KERN_ERR "%s: bsg interface failed to "
			
 
				 		       "initialize - register queue\n", dev->kobj.name);
			
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -226,8 +226,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
 
				 		return ERR_PTR(ret);
			
 
				 
			
 
				 	rq = blk_get_request(q, hdr->dout_xfer_len ?
			
 
				-			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
			
 
				-			GFP_KERNEL);
			
 
				+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
			
 
				 	if (IS_ERR(rq))
			
 
				 		return rq;
			
 
				 
			
@@ -249,7 +248,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				-		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
			
 
				+		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
			
 
				 		if (IS_ERR(next_rq)) {
			
 
				 			ret = PTR_ERR(next_rq);
			
 
				 			goto out;
			
@@ -650,18 +649,6 @@ static struct bsg_device *bsg_alloc_device(void)
 
				 	return bd;
			
 
				 }
			
 
				 
			
 
				-static void bsg_kref_release_function(struct kref *kref)
			
 
				-{
			
 
				-	struct bsg_class_device *bcd =
			
 
				-		container_of(kref, struct bsg_class_device, ref);
			
 
				-	struct device *parent = bcd->parent;
			
 
				-
			
 
				-	if (bcd->release)
			
 
				-		bcd->release(bcd->parent);
			
 
				-
			
 
				-	put_device(parent);
			
 
				-}
			
 
				-
			
 
				 static int bsg_put_device(struct bsg_device *bd)
			
 
				 {
			
 
				 	int ret = 0, do_free;
			
@@ -694,7 +681,6 @@ static int bsg_put_device(struct bsg_device *bd)
 
				 
			
 
				 	kfree(bd);
			
 
				 out:
			
 
				-	kref_put(&q->bsg_dev.ref, bsg_kref_release_function);
			
 
				 	if (do_free)
			
 
				 		blk_put_queue(q);
			
 
				 	return ret;
			
@@ -760,8 +746,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
 
				 	 */
			
 
				 	mutex_lock(&bsg_mutex);
			
 
				 	bcd = idr_find(&bsg_minor_idr, iminor(inode));
			
 
				-	if (bcd)
			
 
				-		kref_get(&bcd->ref);
			
 
				 	mutex_unlock(&bsg_mutex);
			
 
				 
			
 
				 	if (!bcd)
			
@@ -772,8 +756,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
 
				 		return bd;
			
 
				 
			
 
				 	bd = bsg_add_device(inode, bcd->queue, file);
			
 
				-	if (IS_ERR(bd))
			
 
				-		kref_put(&bcd->ref, bsg_kref_release_function);
			
 
				 
			
 
				 	return bd;
			
 
				 }
			
@@ -913,25 +895,17 @@ void bsg_unregister_queue(struct request_queue *q)
 
				 		sysfs_remove_link(&q->kobj, "bsg");
			
 
				 	device_unregister(bcd->class_dev);
			
 
				 	bcd->class_dev = NULL;
			
 
				-	kref_put(&bcd->ref, bsg_kref_release_function);
			
 
				 	mutex_unlock(&bsg_mutex);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
			
 
				 
			
 
				 int bsg_register_queue(struct request_queue *q, struct device *parent,
			
 
				-		const char *name, const struct bsg_ops *ops,
			
 
				-		void (*release)(struct device *))
			
 
				+		const char *name, const struct bsg_ops *ops)
			
 
				 {
			
 
				 	struct bsg_class_device *bcd;
			
 
				 	dev_t dev;
			
 
				 	int ret;
			
 
				 	struct device *class_dev = NULL;
			
 
				-	const char *devname;
			
 
				-
			
 
				-	if (name)
			
 
				-		devname = name;
			
 
				-	else
			
 
				-		devname = dev_name(parent);
			
 
				 
			
 
				 	/*
			
 
				 	 * we need a proper transport to send commands, not a stacked device
			
@@ -955,15 +929,12 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
 
				 
			
 
				 	bcd->minor = ret;
			
 
				 	bcd->queue = q;
			
 
				-	bcd->parent = get_device(parent);
			
 
				-	bcd->release = release;
			
 
				 	bcd->ops = ops;
			
 
				-	kref_init(&bcd->ref);
			
 
				 	dev = MKDEV(bsg_major, bcd->minor);
			
 
				-	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
			
 
				+	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name);
			
 
				 	if (IS_ERR(class_dev)) {
			
 
				 		ret = PTR_ERR(class_dev);
			
 
				-		goto put_dev;
			
 
				+		goto idr_remove;
			
 
				 	}
			
 
				 	bcd->class_dev = class_dev;
			
 
				 
			
@@ -978,8 +949,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
 
				 
			
 
				 unregister_class_dev:
			
 
				 	device_unregister(class_dev);
			
 
				-put_dev:
			
 
				-	put_device(parent);
			
 
				+idr_remove:
			
 
				 	idr_remove(&bsg_minor_idr, bcd->minor);
			
 
				 unlock:
			
 
				 	mutex_unlock(&bsg_mutex);
			
@@ -993,7 +963,7 @@ int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
			
 
				+	return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
			
 
				 
			
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -210,9 +210,9 @@ struct cfqg_stats {
 
				 	/* total time with empty current active q with other requests queued */
			
 
				 	struct blkg_stat		empty_time;
			
 
				 	/* fields after this shouldn't be cleared on stat reset */
			
 
				-	uint64_t			start_group_wait_time;
			
 
				-	uint64_t			start_idle_time;
			
 
				-	uint64_t			start_empty_time;
			
 
				+	u64				start_group_wait_time;
			
 
				+	u64				start_idle_time;
			
 
				+	u64				start_empty_time;
			
 
				 	uint16_t			flags;
			
 
				 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
			
 
				 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
			
@@ -491,13 +491,13 @@ CFQG_FLAG_FNS(empty)
 
				 /* This should be called with the queue_lock held. */
			
 
				 static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
			
 
				 {
			
 
				-	unsigned long long now;
			
 
				+	u64 now;
			
 
				 
			
 
				 	if (!cfqg_stats_waiting(stats))
			
 
				 		return;
			
 
				 
			
 
				-	now = sched_clock();
			
 
				-	if (time_after64(now, stats->start_group_wait_time))
			
 
				+	now = ktime_get_ns();
			
 
				+	if (now > stats->start_group_wait_time)
			
 
				 		blkg_stat_add(&stats->group_wait_time,
			
 
				 			      now - stats->start_group_wait_time);
			
 
				 	cfqg_stats_clear_waiting(stats);
			
@@ -513,20 +513,20 @@ static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
 
				 		return;
			
 
				 	if (cfqg == curr_cfqg)
			
 
				 		return;
			
 
				-	stats->start_group_wait_time = sched_clock();
			
 
				+	stats->start_group_wait_time = ktime_get_ns();
			
 
				 	cfqg_stats_mark_waiting(stats);
			
 
				 }
			
 
				 
			
 
				 /* This should be called with the queue_lock held. */
			
 
				 static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
			
 
				 {
			
 
				-	unsigned long long now;
			
 
				+	u64 now;
			
 
				 
			
 
				 	if (!cfqg_stats_empty(stats))
			
 
				 		return;
			
 
				 
			
 
				-	now = sched_clock();
			
 
				-	if (time_after64(now, stats->start_empty_time))
			
 
				+	now = ktime_get_ns();
			
 
				+	if (now > stats->start_empty_time)
			
 
				 		blkg_stat_add(&stats->empty_time,
			
 
				 			      now - stats->start_empty_time);
			
 
				 	cfqg_stats_clear_empty(stats);
			
@@ -552,7 +552,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
 
				 	if (cfqg_stats_empty(stats))
			
 
				 		return;
			
 
				 
			
 
				-	stats->start_empty_time = sched_clock();
			
 
				+	stats->start_empty_time = ktime_get_ns();
			
 
				 	cfqg_stats_mark_empty(stats);
			
 
				 }
			
 
				 
			
@@ -561,9 +561,9 @@ static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
 
				 	struct cfqg_stats *stats = &cfqg->stats;
			
 
				 
			
 
				 	if (cfqg_stats_idling(stats)) {
			
 
				-		unsigned long long now = sched_clock();
			
 
				+		u64 now = ktime_get_ns();
			
 
				 
			
 
				-		if (time_after64(now, stats->start_idle_time))
			
 
				+		if (now > stats->start_idle_time)
			
 
				 			blkg_stat_add(&stats->idle_time,
			
 
				 				      now - stats->start_idle_time);
			
 
				 		cfqg_stats_clear_idling(stats);
			
@@ -576,7 +576,7 @@ static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
 
				 
			
 
				 	BUG_ON(cfqg_stats_idling(stats));
			
 
				 
			
 
				-	stats->start_idle_time = sched_clock();
			
 
				+	stats->start_idle_time = ktime_get_ns();
			
 
				 	cfqg_stats_mark_idling(stats);
			
 
				 }
			
 
				 
			
@@ -701,17 +701,19 @@ static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
 
				 }
			
 
				 
			
 
				 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
			
 
				-			uint64_t start_time, uint64_t io_start_time,
			
 
				-			unsigned int op)
			
 
				+						u64 start_time_ns,
			
 
				+						u64 io_start_time_ns,
			
 
				+						unsigned int op)
			
 
				 {
			
 
				 	struct cfqg_stats *stats = &cfqg->stats;
			
 
				-	unsigned long long now = sched_clock();
			
 
				+	u64 now = ktime_get_ns();
			
 
				 
			
 
				-	if (time_after64(now, io_start_time))
			
 
				-		blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
			
 
				-	if (time_after64(io_start_time, start_time))
			
 
				+	if (now > io_start_time_ns)
			
 
				+		blkg_rwstat_add(&stats->service_time, op,
			
 
				+				now - io_start_time_ns);
			
 
				+	if (io_start_time_ns > start_time_ns)
			
 
				 		blkg_rwstat_add(&stats->wait_time, op,
			
 
				-				io_start_time - start_time);
			
 
				+				io_start_time_ns - start_time_ns);
			
 
				 }
			
 
				 
			
 
				 /* @stats = 0 */
			
@@ -797,8 +799,9 @@ static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
 
				 static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
			
 
				 			unsigned int op) { }
			
 
				 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
			
 
				-			uint64_t start_time, uint64_t io_start_time,
			
 
				-			unsigned int op) { }
			
 
				+						u64 start_time_ns,
			
 
				+						u64 io_start_time_ns,
			
 
				+						unsigned int op) { }
			
 
				 
			
 
				 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
			
 
				 
			
@@ -4225,8 +4228,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 
				 	cfqd->rq_in_driver--;
			
 
				 	cfqq->dispatched--;
			
 
				 	(RQ_CFQG(rq))->dispatched--;
			
 
				-	cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
			
 
				-				     rq_io_start_time_ns(rq), rq->cmd_flags);
			
 
				+	cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns,
			
 
				+				     rq->io_start_time_ns, rq->cmd_flags);
			
 
				 
			
 
				 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
			
 
				 
			
@@ -4242,16 +4245,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 
				 					cfqq_type(cfqq));
			
 
				 
			
 
				 		st->ttime.last_end_request = now;
			
 
				-		/*
			
 
				-		 * We have to do this check in jiffies since start_time is in
			
 
				-		 * jiffies and it is not trivial to convert to ns. If
			
 
				-		 * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
			
 
				-		 * will become problematic but so far we are fine (the default
			
 
				-		 * is 128 ms).
			
 
				-		 */
			
 
				-		if (!time_after(rq->start_time +
			
 
				-				  nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
			
 
				-				jiffies))
			
 
				+		if (rq->start_time_ns + cfqd->cfq_fifo_expire[1] <= now)
			
 
				 			cfqd->last_delayed_sync = now;
			
 
				 	}
			
 
				 
			
@@ -4792,7 +4786,7 @@ USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, U
 
				 #undef USEC_STORE_FUNCTION
			
 
				 
			
 
				 #define CFQ_ATTR(name) \
			
 
				-	__ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
			
 
				+	__ATTR(name, 0644, cfq_##name##_show, cfq_##name##_store)
			
 
				 
			
 
				 static struct elv_fs_entry cfq_attrs[] = {
			
 
				 	CFQ_ATTR(quantum),
			
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -512,8 +512,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
 
				 #undef STORE_FUNCTION
			
 
				 
			
 
				 #define DD_ATTR(name) \
			
 
				-	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
			
 
				-				      deadline_##name##_store)
			
 
				+	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
			
 
				 
			
 
				 static struct elv_fs_entry deadline_attrs[] = {
			
 
				 	DD_ATTR(read_expire),
			
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -199,76 +199,46 @@ static void elevator_release(struct kobject *kobj)
 
				 	kfree(e);
			
 
				 }
			
 
				 
			
 
				-int elevator_init(struct request_queue *q, char *name)
			
 
				+/*
			
 
				+ * Use the default elevator specified by config boot param for non-mq devices,
			
 
				+ * or by config option.  Don't try to load modules as we could be running off
			
 
				+ * async and request_module() isn't allowed from async.
			
 
				+ */
			
 
				+int elevator_init(struct request_queue *q)
			
 
				 {
			
 
				 	struct elevator_type *e = NULL;
			
 
				-	int err;
			
 
				+	int err = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * q->sysfs_lock must be held to provide mutual exclusion between
			
 
				 	 * elevator_switch() and here.
			
 
				 	 */
			
 
				-	lockdep_assert_held(&q->sysfs_lock);
			
 
				-
			
 
				+	mutex_lock(&q->sysfs_lock);
			
 
				 	if (unlikely(q->elevator))
			
 
				-		return 0;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&q->queue_head);
			
 
				-	q->last_merge = NULL;
			
 
				-	q->end_sector = 0;
			
 
				-	q->boundary_rq = NULL;
			
 
				-
			
 
				-	if (name) {
			
 
				-		e = elevator_get(q, name, true);
			
 
				-		if (!e)
			
 
				-			return -EINVAL;
			
 
				-	}
			
 
				+		goto out_unlock;
			
 
				 
			
 
				-	/*
			
 
				-	 * Use the default elevator specified by config boot param for
			
 
				-	 * non-mq devices, or by config option. Don't try to load modules
			
 
				-	 * as we could be running off async and request_module() isn't
			
 
				-	 * allowed from async.
			
 
				-	 */
			
 
				-	if (!e && !q->mq_ops && *chosen_elevator) {
			
 
				+	if (*chosen_elevator) {
			
 
				 		e = elevator_get(q, chosen_elevator, false);
			
 
				 		if (!e)
			
 
				 			printk(KERN_ERR "I/O scheduler %s not found\n",
			
 
				 							chosen_elevator);
			
 
				 	}
			
 
				 
			
 
				+	if (!e)
			
 
				+		e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
			
 
				 	if (!e) {
			
 
				-		/*
			
 
				-		 * For blk-mq devices, we default to using mq-deadline,
			
 
				-		 * if available, for single queue devices. If deadline
			
 
				-		 * isn't available OR we have multiple queues, default
			
 
				-		 * to "none".
			
 
				-		 */
			
 
				-		if (q->mq_ops) {
			
 
				-			if (q->nr_hw_queues == 1)
			
 
				-				e = elevator_get(q, "mq-deadline", false);
			
 
				-			if (!e)
			
 
				-				return 0;
			
 
				-		} else
			
 
				-			e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
			
 
				-
			
 
				-		if (!e) {
			
 
				-			printk(KERN_ERR
			
 
				-				"Default I/O scheduler not found. " \
			
 
				-				"Using noop.\n");
			
 
				-			e = elevator_get(q, "noop", false);
			
 
				-		}
			
 
				+		printk(KERN_ERR
			
 
				+			"Default I/O scheduler not found. Using noop.\n");
			
 
				+		e = elevator_get(q, "noop", false);
			
 
				 	}
			
 
				 
			
 
				-	if (e->uses_mq)
			
 
				-		err = blk_mq_init_sched(q, e);
			
 
				-	else
			
 
				-		err = e->ops.sq.elevator_init_fn(q, e);
			
 
				+	err = e->ops.sq.elevator_init_fn(q, e);
			
 
				 	if (err)
			
 
				 		elevator_put(e);
			
 
				+out_unlock:
			
 
				+	mutex_unlock(&q->sysfs_lock);
			
 
				 	return err;
			
 
				 }
			
 
				-EXPORT_SYMBOL(elevator_init);
			
 
				 
			
 
				 void elevator_exit(struct request_queue *q, struct elevator_queue *e)
			
 
				 {
			
@@ -281,7 +251,6 @@ void elevator_exit(struct request_queue *q, struct elevator_queue *e)
 
				 
			
 
				 	kobject_put(&e->kobj);
			
 
				 }
			
 
				-EXPORT_SYMBOL(elevator_exit);
			
 
				 
			
 
				 static inline void __elv_rqhash_del(struct request *rq)
			
 
				 {
			
@@ -1004,6 +973,40 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * For blk-mq devices, we default to using mq-deadline, if available, for single
			
 
				+ * queue devices.  If deadline isn't available OR we have multiple queues,
			
 
				+ * default to "none".
			
 
				+ */
			
 
				+int elevator_init_mq(struct request_queue *q)
			
 
				+{
			
 
				+	struct elevator_type *e;
			
 
				+	int err = 0;
			
 
				+
			
 
				+	if (q->nr_hw_queues != 1)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * q->sysfs_lock must be held to provide mutual exclusion between
			
 
				+	 * elevator_switch() and here.
			
 
				+	 */
			
 
				+	mutex_lock(&q->sysfs_lock);
			
 
				+	if (unlikely(q->elevator))
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	e = elevator_get(q, "mq-deadline", false);
			
 
				+	if (!e)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	err = blk_mq_init_sched(q, e);
			
 
				+	if (err)
			
 
				+		elevator_put(e);
			
 
				+out_unlock:
			
 
				+	mutex_unlock(&q->sysfs_lock);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 /*
			
 
				  * switch to new_e io scheduler. be careful not to introduce deadlocks -
			
 
				  * we don't free the old io scheduler, before we have allocated what we
			
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1139,28 +1139,25 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 
				 	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
			
 
				 }
			
 
				 
			
 
				-static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
			
 
				-static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
			
 
				-static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
			
 
				-static DEVICE_ATTR(hidden, S_IRUGO, disk_hidden_show, NULL);
			
 
				-static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
			
 
				-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
			
 
				-static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
			
 
				-static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
			
 
				-		   NULL);
			
 
				-static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
			
 
				-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
			
 
				-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
			
 
				-static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
			
 
				-		disk_badblocks_store);
			
 
				+static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
			
 
				+static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
			
 
				+static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
			
 
				+static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
			
 
				+static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
			
 
				+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
			
 
				+static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
			
 
				+static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
			
 
				+static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
			
 
				+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
			
 
				+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
			
 
				+static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
			
 
				 #ifdef CONFIG_FAIL_MAKE_REQUEST
			
 
				 static struct device_attribute dev_attr_fail =
			
 
				-	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
			
 
				+	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
			
 
				 #endif
			
 
				 #ifdef CONFIG_FAIL_IO_TIMEOUT
			
 
				 static struct device_attribute dev_attr_fail_timeout =
			
 
				-	__ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
			
 
				-		part_timeout_store);
			
 
				+	__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
			
 
				 #endif
			
 
				 
			
 
				 static struct attribute *disk_attrs[] = {
			
@@ -1924,9 +1921,9 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
 
				 	return count;
			
 
				 }
			
 
				 
			
 
				-static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
			
 
				-static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
			
 
				-static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
			
 
				+static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
			
 
				+static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
			
 
				+static const DEVICE_ATTR(events_poll_msecs, 0644,
			
 
				 			 disk_events_poll_msecs_show,
			
 
				 			 disk_events_poll_msecs_store);
			
 
				 
			
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -72,6 +72,19 @@ static const unsigned int kyber_batch_size[] = {
 
				 	[KYBER_OTHER] = 8,
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * There is a same mapping between ctx & hctx and kcq & khd,
			
 
				+ * we use request->mq_ctx->index_hw to index the kcq in khd.
			
 
				+ */
			
 
				+struct kyber_ctx_queue {
			
 
				+	/*
			
 
				+	 * Used to ensure operations on rq_list and kcq_map to be an atmoic one.
			
 
				+	 * Also protect the rqs on rq_list when merge.
			
 
				+	 */
			
 
				+	spinlock_t lock;
			
 
				+	struct list_head rq_list[KYBER_NUM_DOMAINS];
			
 
				+} ____cacheline_aligned_in_smp;
			
 
				+
			
 
				 struct kyber_queue_data {
			
 
				 	struct request_queue *q;
			
 
				 
			
@@ -99,6 +112,8 @@ struct kyber_hctx_data {
 
				 	struct list_head rqs[KYBER_NUM_DOMAINS];
			
 
				 	unsigned int cur_domain;
			
 
				 	unsigned int batching;
			
 
				+	struct kyber_ctx_queue *kcqs;
			
 
				+	struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
			
 
				 	wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
			
 
				 	struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
			
 
				 	atomic_t wait_index[KYBER_NUM_DOMAINS];
			
@@ -107,10 +122,8 @@ struct kyber_hctx_data {
 
				 static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
			
 
				 			     void *key);
			
 
				 
			
 
				-static int rq_sched_domain(const struct request *rq)
			
 
				+static unsigned int kyber_sched_domain(unsigned int op)
			
 
				 {
			
 
				-	unsigned int op = rq->cmd_flags;
			
 
				-
			
 
				 	if ((op & REQ_OP_MASK) == REQ_OP_READ)
			
 
				 		return KYBER_READ;
			
 
				 	else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
			
@@ -284,6 +297,11 @@ static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
 
				 	return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
			
 
				 }
			
 
				 
			
 
				+static int kyber_bucket_fn(const struct request *rq)
			
 
				+{
			
 
				+	return kyber_sched_domain(rq->cmd_flags);
			
 
				+}
			
 
				+
			
 
				 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
			
 
				 {
			
 
				 	struct kyber_queue_data *kqd;
			
@@ -297,7 +315,7 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 
				 		goto err;
			
 
				 	kqd->q = q;
			
 
				 
			
 
				-	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
			
 
				+	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn,
			
 
				 					  KYBER_NUM_DOMAINS, kqd);
			
 
				 	if (!kqd->cb)
			
 
				 		goto err_kqd;
			
@@ -376,8 +394,18 @@ static void kyber_exit_sched(struct elevator_queue *e)
 
				 	kfree(kqd);
			
 
				 }
			
 
				 
			
 
				+static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
			
 
				+{
			
 
				+	unsigned int i;
			
 
				+
			
 
				+	spin_lock_init(&kcq->lock);
			
 
				+	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
			
 
				+		INIT_LIST_HEAD(&kcq->rq_list[i]);
			
 
				+}
			
 
				+
			
 
				 static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
			
 
				 {
			
 
				+	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
			
 
				 	struct kyber_hctx_data *khd;
			
 
				 	int i;
			
 
				 
			
@@ -385,6 +413,24 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
				 	if (!khd)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				+	khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
			
 
				+				       sizeof(struct kyber_ctx_queue),
			
 
				+				       GFP_KERNEL, hctx->numa_node);
			
 
				+	if (!khd->kcqs)
			
 
				+		goto err_khd;
			
 
				+
			
 
				+	for (i = 0; i < hctx->nr_ctx; i++)
			
 
				+		kyber_ctx_queue_init(&khd->kcqs[i]);
			
 
				+
			
 
				+	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
			
 
				+		if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
			
 
				+				      ilog2(8), GFP_KERNEL, hctx->numa_node)) {
			
 
				+			while (--i >= 0)
			
 
				+				sbitmap_free(&khd->kcq_map[i]);
			
 
				+			goto err_kcqs;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	spin_lock_init(&khd->lock);
			
 
				 
			
 
				 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
			
@@ -400,12 +446,26 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
				 	khd->batching = 0;
			
 
				 
			
 
				 	hctx->sched_data = khd;
			
 
				+	sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags,
			
 
				+					kqd->async_depth);
			
 
				 
			
 
				 	return 0;
			
 
				+
			
 
				+err_kcqs:
			
 
				+	kfree(khd->kcqs);
			
 
				+err_khd:
			
 
				+	kfree(khd);
			
 
				+	return -ENOMEM;
			
 
				 }
			
 
				 
			
 
				 static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
			
 
				 {
			
 
				+	struct kyber_hctx_data *khd = hctx->sched_data;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
			
 
				+		sbitmap_free(&khd->kcq_map[i]);
			
 
				+	kfree(khd->kcqs);
			
 
				 	kfree(hctx->sched_data);
			
 
				 }
			
 
				 
			
@@ -427,7 +487,7 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
 
				 
			
 
				 	nr = rq_get_domain_token(rq);
			
 
				 	if (nr != -1) {
			
 
				-		sched_domain = rq_sched_domain(rq);
			
 
				+		sched_domain = kyber_sched_domain(rq->cmd_flags);
			
 
				 		sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
			
 
				 				    rq->mq_ctx->cpu);
			
 
				 	}
			
@@ -446,11 +506,51 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
			
 
				+{
			
 
				+	struct kyber_hctx_data *khd = hctx->sched_data;
			
 
				+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
			
 
				+	struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw];
			
 
				+	unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
			
 
				+	struct list_head *rq_list = &kcq->rq_list[sched_domain];
			
 
				+	bool merged;
			
 
				+
			
 
				+	spin_lock(&kcq->lock);
			
 
				+	merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio);
			
 
				+	spin_unlock(&kcq->lock);
			
 
				+	blk_mq_put_ctx(ctx);
			
 
				+
			
 
				+	return merged;
			
 
				+}
			
 
				+
			
 
				 static void kyber_prepare_request(struct request *rq, struct bio *bio)
			
 
				 {
			
 
				 	rq_set_domain_token(rq, -1);
			
 
				 }
			
 
				 
			
 
				+static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
			
 
				+				  struct list_head *rq_list, bool at_head)
			
 
				+{
			
 
				+	struct kyber_hctx_data *khd = hctx->sched_data;
			
 
				+	struct request *rq, *next;
			
 
				+
			
 
				+	list_for_each_entry_safe(rq, next, rq_list, queuelist) {
			
 
				+		unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
			
 
				+		struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw];
			
 
				+		struct list_head *head = &kcq->rq_list[sched_domain];
			
 
				+
			
 
				+		spin_lock(&kcq->lock);
			
 
				+		if (at_head)
			
 
				+			list_move(&rq->queuelist, head);
			
 
				+		else
			
 
				+			list_move_tail(&rq->queuelist, head);
			
 
				+		sbitmap_set_bit(&khd->kcq_map[sched_domain],
			
 
				+				rq->mq_ctx->index_hw);
			
 
				+		blk_mq_sched_request_inserted(rq);
			
 
				+		spin_unlock(&kcq->lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void kyber_finish_request(struct request *rq)
			
 
				 {
			
 
				 	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
			
@@ -469,7 +569,7 @@ static void kyber_completed_request(struct request *rq)
 
				 	 * Check if this request met our latency goal. If not, quickly gather
			
 
				 	 * some statistics and start throttling.
			
 
				 	 */
			
 
				-	sched_domain = rq_sched_domain(rq);
			
 
				+	sched_domain = kyber_sched_domain(rq->cmd_flags);
			
 
				 	switch (sched_domain) {
			
 
				 	case KYBER_READ:
			
 
				 		target = kqd->read_lat_nsec;
			
@@ -485,29 +585,48 @@ static void kyber_completed_request(struct request *rq)
 
				 	if (blk_stat_is_active(kqd->cb))
			
 
				 		return;
			
 
				 
			
 
				-	now = __blk_stat_time(ktime_to_ns(ktime_get()));
			
 
				-	if (now < blk_stat_time(&rq->issue_stat))
			
 
				+	now = ktime_get_ns();
			
 
				+	if (now < rq->io_start_time_ns)
			
 
				 		return;
			
 
				 
			
 
				-	latency = now - blk_stat_time(&rq->issue_stat);
			
 
				+	latency = now - rq->io_start_time_ns;
			
 
				 
			
 
				 	if (latency > target)
			
 
				 		blk_stat_activate_msecs(kqd->cb, 10);
			
 
				 }
			
 
				 
			
 
				-static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
			
 
				-				  struct blk_mq_hw_ctx *hctx)
			
 
				+struct flush_kcq_data {
			
 
				+	struct kyber_hctx_data *khd;
			
 
				+	unsigned int sched_domain;
			
 
				+	struct list_head *list;
			
 
				+};
			
 
				+
			
 
				+static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
			
 
				 {
			
 
				-	LIST_HEAD(rq_list);
			
 
				-	struct request *rq, *next;
			
 
				+	struct flush_kcq_data *flush_data = data;
			
 
				+	struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
			
 
				 
			
 
				-	blk_mq_flush_busy_ctxs(hctx, &rq_list);
			
 
				-	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
			
 
				-		unsigned int sched_domain;
			
 
				+	spin_lock(&kcq->lock);
			
 
				+	list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
			
 
				+			      flush_data->list);
			
 
				+	sbitmap_clear_bit(sb, bitnr);
			
 
				+	spin_unlock(&kcq->lock);
			
 
				 
			
 
				-		sched_domain = rq_sched_domain(rq);
			
 
				-		list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
			
 
				-	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
			
 
				+				  unsigned int sched_domain,
			
 
				+				  struct list_head *list)
			
 
				+{
			
 
				+	struct flush_kcq_data data = {
			
 
				+		.khd = khd,
			
 
				+		.sched_domain = sched_domain,
			
 
				+		.list = list,
			
 
				+	};
			
 
				+
			
 
				+	sbitmap_for_each_set(&khd->kcq_map[sched_domain],
			
 
				+			     flush_busy_kcq, &data);
			
 
				 }
			
 
				 
			
 
				 static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
			
@@ -570,26 +689,23 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 
				 static struct request *
			
 
				 kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
			
 
				 			  struct kyber_hctx_data *khd,
			
 
				-			  struct blk_mq_hw_ctx *hctx,
			
 
				-			  bool *flushed)
			
 
				+			  struct blk_mq_hw_ctx *hctx)
			
 
				 {
			
 
				 	struct list_head *rqs;
			
 
				 	struct request *rq;
			
 
				 	int nr;
			
 
				 
			
 
				 	rqs = &khd->rqs[khd->cur_domain];
			
 
				-	rq = list_first_entry_or_null(rqs, struct request, queuelist);
			
 
				 
			
 
				 	/*
			
 
				-	 * If there wasn't already a pending request and we haven't flushed the
			
 
				-	 * software queues yet, flush the software queues and check again.
			
 
				+	 * If we already have a flushed request, then we just need to get a
			
 
				+	 * token for it. Otherwise, if there are pending requests in the kcqs,
			
 
				+	 * flush the kcqs, but only if we can get a token. If not, we should
			
 
				+	 * leave the requests in the kcqs so that they can be merged. Note that
			
 
				+	 * khd->lock serializes the flushes, so if we observed any bit set in
			
 
				+	 * the kcq_map, we will always get a request.
			
 
				 	 */
			
 
				-	if (!rq && !*flushed) {
			
 
				-		kyber_flush_busy_ctxs(khd, hctx);
			
 
				-		*flushed = true;
			
 
				-		rq = list_first_entry_or_null(rqs, struct request, queuelist);
			
 
				-	}
			
 
				-
			
 
				+	rq = list_first_entry_or_null(rqs, struct request, queuelist);
			
 
				 	if (rq) {
			
 
				 		nr = kyber_get_domain_token(kqd, khd, hctx);
			
 
				 		if (nr >= 0) {
			
@@ -598,6 +714,16 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
 
				 			list_del_init(&rq->queuelist);
			
 
				 			return rq;
			
 
				 		}
			
 
				+	} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
			
 
				+		nr = kyber_get_domain_token(kqd, khd, hctx);
			
 
				+		if (nr >= 0) {
			
 
				+			kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
			
 
				+			rq = list_first_entry(rqs, struct request, queuelist);
			
 
				+			khd->batching++;
			
 
				+			rq_set_domain_token(rq, nr);
			
 
				+			list_del_init(&rq->queuelist);
			
 
				+			return rq;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/* There were either no pending requests or no tokens. */
			
@@ -608,7 +734,6 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
 
				 {
			
 
				 	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
			
 
				 	struct kyber_hctx_data *khd = hctx->sched_data;
			
 
				-	bool flushed = false;
			
 
				 	struct request *rq;
			
 
				 	int i;
			
 
				 
			
@@ -619,7 +744,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
 
				 	 * from the batch.
			
 
				 	 */
			
 
				 	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
			
 
				-		rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
			
 
				+		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
			
 
				 		if (rq)
			
 
				 			goto out;
			
 
				 	}
			
@@ -640,7 +765,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
 
				 		else
			
 
				 			khd->cur_domain++;
			
 
				 
			
 
				-		rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
			
 
				+		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
			
 
				 		if (rq)
			
 
				 			goto out;
			
 
				 	}
			
@@ -657,10 +782,12 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
 
				 	int i;
			
 
				 
			
 
				 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
			
 
				-		if (!list_empty_careful(&khd->rqs[i]))
			
 
				+		if (!list_empty_careful(&khd->rqs[i]) ||
			
 
				+		    sbitmap_any_bit_set(&khd->kcq_map[i]))
			
 
				 			return true;
			
 
				 	}
			
 
				-	return sbitmap_any_bit_set(&hctx->ctx_map);
			
 
				+
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				 #define KYBER_LAT_SHOW_STORE(op)					\
			
@@ -831,7 +958,9 @@ static struct elevator_type kyber_sched = {
 
				 		.init_hctx = kyber_init_hctx,
			
 
				 		.exit_hctx = kyber_exit_hctx,
			
 
				 		.limit_depth = kyber_limit_depth,
			
 
				+		.bio_merge = kyber_bio_merge,
			
 
				 		.prepare_request = kyber_prepare_request,
			
 
				+		.insert_requests = kyber_insert_requests,
			
 
				 		.finish_request = kyber_finish_request,
			
 
				 		.requeue_request = kyber_finish_request,
			
 
				 		.completed_request = kyber_completed_request,
			
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -630,8 +630,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
 
				 #undef STORE_FUNCTION
			
 
				 
			
 
				 #define DD_ATTR(name) \
			
 
				-	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
			
 
				-				      deadline_##name##_store)
			
 
				+	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
			
 
				 
			
 
				 static struct elv_fs_entry deadline_attrs[] = {
			
 
				 	DD_ATTR(read_expire),
			
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -179,18 +179,17 @@ ssize_t part_fail_store(struct device *dev,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
			
 
				-static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
			
 
				-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
			
 
				-static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
			
 
				-static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
			
 
				-static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
			
 
				-		   NULL);
			
 
				-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
			
 
				-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
			
 
				+static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
			
 
				+static DEVICE_ATTR(start, 0444, part_start_show, NULL);
			
 
				+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
			
 
				+static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
			
 
				+static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
			
 
				+static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
			
 
				+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
			
 
				+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
			
 
				 #ifdef CONFIG_FAIL_MAKE_REQUEST
			
 
				 static struct device_attribute dev_attr_fail =
			
 
				-	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
			
 
				+	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
			
 
				 #endif
			
 
				 
			
 
				 static struct attribute *part_attrs[] = {
			
@@ -291,8 +290,7 @@ static ssize_t whole_disk_show(struct device *dev,
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				-static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
			
 
				-		   whole_disk_show, NULL);
			
 
				+static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
			
 
				 
			
 
				 /*
			
 
				  * Must be called either with bd_mutex held, before a disk can be opened or
			
@@ -518,7 +516,7 @@ rescan:
 
				 
			
 
				 	if (disk->fops->revalidate_disk)
			
 
				 		disk->fops->revalidate_disk(disk);
			
 
				-	check_disk_size_change(disk, bdev);
			
 
				+	check_disk_size_change(disk, bdev, true);
			
 
				 	bdev->bd_invalidated = 0;
			
 
				 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
			
 
				 		return 0;
			
@@ -643,7 +641,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
 
				 		return res;
			
 
				 
			
 
				 	set_capacity(disk, 0);
			
 
				-	check_disk_size_change(disk, bdev);
			
 
				+	check_disk_size_change(disk, bdev, false);
			
 
				 	bdev->bd_invalidated = 0;
			
 
				 	/* tell userspace that the media / partition table may have changed */
			
 
				 	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
			
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -321,8 +321,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 
				 		at_head = 1;
			
 
				 
			
 
				 	ret = -ENOMEM;
			
 
				-	rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
			
 
				-			GFP_KERNEL);
			
 
				+	rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
			
 
				 	if (IS_ERR(rq))
			
 
				 		return PTR_ERR(rq);
			
 
				 	req = scsi_req(rq);
			
@@ -449,8 +448,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 
				 
			
 
				 	}
			
 
				 
			
 
				-	rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
			
 
				-			__GFP_RECLAIM);
			
 
				+	rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
			
 
				 	if (IS_ERR(rq)) {
			
 
				 		err = PTR_ERR(rq);
			
 
				 		goto error_free_buffer;
			
@@ -501,7 +499,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				-	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
			
 
				+	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO)) {
			
 
				 		err = DRIVER_ERROR << 24;
			
 
				 		goto error;
			
 
				 	}
			
@@ -538,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 
				 	struct request *rq;
			
 
				 	int err;
			
 
				 
			
 
				-	rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(q, REQ_OP_SCSI_OUT, 0);
			
 
				 	if (IS_ERR(rq))
			
 
				 		return PTR_ERR(rq);
			
 
				 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
			
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -500,57 +500,6 @@ void ata_eh_release(struct ata_port *ap)
 
				 	mutex_unlock(&ap->host->eh_mutex);
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- *	ata_scsi_timed_out - SCSI layer time out callback
			
 
				- *	@cmd: timed out SCSI command
			
 
				- *
			
 
				- *	Handles SCSI layer timeout.  We race with normal completion of
			
 
				- *	the qc for @cmd.  If the qc is already gone, we lose and let
			
 
				- *	the scsi command finish (EH_HANDLED).  Otherwise, the qc has
			
 
				- *	timed out and EH should be invoked.  Prevent ata_qc_complete()
			
 
				- *	from finishing it by setting EH_SCHEDULED and return
			
 
				- *	EH_NOT_HANDLED.
			
 
				- *
			
 
				- *	TODO: kill this function once old EH is gone.
			
 
				- *
			
 
				- *	LOCKING:
			
 
				- *	Called from timer context
			
 
				- *
			
 
				- *	RETURNS:
			
 
				- *	EH_HANDLED or EH_NOT_HANDLED
			
 
				- */
			
 
				-enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
			
 
				-{
			
 
				-	struct Scsi_Host *host = cmd->device->host;
			
 
				-	struct ata_port *ap = ata_shost_to_port(host);
			
 
				-	unsigned long flags;
			
 
				-	struct ata_queued_cmd *qc;
			
 
				-	enum blk_eh_timer_return ret;
			
 
				-
			
 
				-	DPRINTK("ENTER\n");
			
 
				-
			
 
				-	if (ap->ops->error_handler) {
			
 
				-		ret = BLK_EH_NOT_HANDLED;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	ret = BLK_EH_HANDLED;
			
 
				-	spin_lock_irqsave(ap->lock, flags);
			
 
				-	qc = ata_qc_from_tag(ap, ap->link.active_tag);
			
 
				-	if (qc) {
			
 
				-		WARN_ON(qc->scsicmd != cmd);
			
 
				-		qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
			
 
				-		qc->err_mask |= AC_ERR_TIMEOUT;
			
 
				-		ret = BLK_EH_NOT_HANDLED;
			
 
				-	}
			
 
				-	spin_unlock_irqrestore(ap->lock, flags);
			
 
				-
			
 
				- out:
			
 
				-	DPRINTK("EXIT, ret=%d\n", ret);
			
 
				-	return ret;
			
 
				-}
			
 
				-EXPORT_SYMBOL(ata_scsi_timed_out);
			
 
				-
			
 
				 static void ata_eh_unload(struct ata_port *ap)
			
 
				 {
			
 
				 	struct ata_link *link;
			
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1179,7 +1179,6 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T
 
				 
			
 
				   if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
			
 
				 	return DAC960_Failure(Controller, "DMA mask out of range");
			
 
				-  Controller->BounceBufferLimit = DMA_BIT_MASK(32);
			
 
				 
			
 
				   if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) {
			
 
				     CommandMailboxesSize =  0;
			
@@ -1380,11 +1379,8 @@ static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T
 
				   dma_addr_t	CommandMailboxDMA;
			
 
				   DAC960_V2_CommandStatus_T CommandStatus;
			
 
				 
			
 
				-	if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)))
			
 
				-		Controller->BounceBufferLimit = DMA_BIT_MASK(64);
			
 
				-	else if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
			
 
				-		Controller->BounceBufferLimit = DMA_BIT_MASK(32);
			
 
				-	else
			
 
				+	if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) &&
			
 
				+	    pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
			
 
				 		return DAC960_Failure(Controller, "DMA mask out of range");
			
 
				 
			
 
				   /* This is a temporary dma mapping, used only in the scope of this function */
			
@@ -2540,7 +2536,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
 
				 		continue;
			
 
				   	}
			
 
				   	Controller->RequestQueue[n] = RequestQueue;
			
 
				-  	blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
			
 
				   	RequestQueue->queuedata = Controller;
			
 
				 	blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
			
 
				 	blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
			
@@ -6594,7 +6589,7 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
 
				 					 DAC960_ProcDirectoryEntry);
			
 
				 	proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
			
 
				 	proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
			
 
				-	proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
			
 
				+	proc_create_data("user_command", 0600, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
			
 
				 	Controller->ControllerProcEntry = ControllerProcEntry;
			
 
				 }
			
 
				 
			
--- a/drivers/block/DAC960.h
+++ b/drivers/block/DAC960.h
@@ -2295,7 +2295,6 @@ typedef struct DAC960_Controller
 
				   unsigned short MaxBlocksPerCommand;
			
 
				   unsigned short ControllerScatterGatherLimit;
			
 
				   unsigned short DriverScatterGatherLimit;
			
 
				-  u64		BounceBufferLimit;
			
 
				   unsigned int CombinedStatusBufferLength;
			
 
				   unsigned int InitialStatusLength;
			
 
				   unsigned int CurrentStatusLength;
			
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -159,14 +159,14 @@ static int aoe_debugfs_open(struct inode *inode, struct file *file)
 
				 	return single_open(file, aoedisk_debugfs_show, inode->i_private);
			
 
				 }
			
 
				 
			
 
				-static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
			
 
				-static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
			
 
				-static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
			
 
				+static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
			
 
				+static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
			
 
				+static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
			
 
				 static struct device_attribute dev_attr_firmware_version = {
			
 
				-	.attr = { .name = "firmware-version", .mode = S_IRUGO },
			
 
				+	.attr = { .name = "firmware-version", .mode = 0444 },
			
 
				 	.show = aoedisk_show_fwver,
			
 
				 };
			
 
				-static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
			
 
				+static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
			
 
				 
			
 
				 static struct attribute *aoe_attrs[] = {
			
 
				 	&dev_attr_state.attr,
			
@@ -388,7 +388,6 @@ aoeblk_gdalloc(void *vp)
 
				 			d->aoemajor, d->aoeminor);
			
 
				 		goto err_mempool;
			
 
				 	}
			
 
				-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
			
 
				 
			
 
				 	spin_lock_irqsave(&d->lock, flags);
			
 
				 	WARN_ON(!(d->flags & DEVFL_GD_NOW));
			
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1032,8 +1032,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
 
				 	iter.bi_size = cnt;
			
 
				 
			
 
				 	__bio_for_each_segment(bv, bio, iter, iter) {
			
 
				-		char *p = page_address(bv.bv_page) + bv.bv_offset;
			
 
				+		char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
			
 
				 		skb_copy_bits(skb, soff, p, bv.bv_len);
			
 
				+		kunmap_atomic(p);
			
 
				 		soff += bv.bv_len;
			
 
				 	}
			
 
				 }
			
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -331,15 +331,15 @@ static const struct block_device_operations brd_fops = {
 
				  * And now the modules code and kernel interface.
			
 
				  */
			
 
				 static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
			
 
				-module_param(rd_nr, int, S_IRUGO);
			
 
				+module_param(rd_nr, int, 0444);
			
 
				 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
			
 
				 
			
 
				 unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
			
 
				-module_param(rd_size, ulong, S_IRUGO);
			
 
				+module_param(rd_size, ulong, 0444);
			
 
				 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
			
 
				 
			
 
				 static int max_part = 1;
			
 
				-module_param(max_part, int, S_IRUGO);
			
 
				+module_param(max_part, int, 0444);
			
 
				 MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
			
 
				 
			
 
				 MODULE_LICENSE("GPL");
			
@@ -402,6 +402,10 @@ static struct brd_device *brd_alloc(int i)
 
				 	set_capacity(disk, rd_size * 2);
			
 
				 	disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
			
 
				 
			
 
				+	/* Tell the block layer that this is not a rotational device */
			
 
				+	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
			
 
				+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
			
 
				+
			
 
				 	return brd;
			
 
				 
			
 
				 out_free_queue:
			
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -977,7 +977,7 @@ static void drbd_bm_endio(struct bio *bio)
 
				 	bm_page_unlock_io(device, idx);
			
 
				 
			
 
				 	if (ctx->flags & BM_AIO_COPY_PAGES)
			
 
				-		mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
			
 
				+		mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool);
			
 
				 
			
 
				 	bio_put(bio);
			
 
				 
			
@@ -1014,7 +1014,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
 
				 	bm_set_page_unchanged(b->bm_pages[page_nr]);
			
 
				 
			
 
				 	if (ctx->flags & BM_AIO_COPY_PAGES) {
			
 
				-		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
			
 
				+		page = mempool_alloc(&drbd_md_io_page_pool,
			
 
				+				GFP_NOIO | __GFP_HIGHMEM);
			
 
				 		copy_highpage(page, b->bm_pages[page_nr]);
			
 
				 		bm_store_page_idx(page, page_nr);
			
 
				 	} else
			
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -481,9 +481,9 @@ void drbd_debugfs_resource_add(struct drbd_resource *resource)
 
				 		goto fail;
			
 
				 	resource->debugfs_res_connections = dentry;
			
 
				 
			
 
				-	dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
			
 
				-			resource->debugfs_res, resource,
			
 
				-			&in_flight_summary_fops);
			
 
				+	dentry = debugfs_create_file("in_flight_summary", 0440,
			
 
				+				     resource->debugfs_res, resource,
			
 
				+				     &in_flight_summary_fops);
			
 
				 	if (IS_ERR_OR_NULL(dentry))
			
 
				 		goto fail;
			
 
				 	resource->debugfs_res_in_flight_summary = dentry;
			
@@ -645,16 +645,16 @@ void drbd_debugfs_connection_add(struct drbd_connection *connection)
 
				 		goto fail;
			
 
				 	connection->debugfs_conn = dentry;
			
 
				 
			
 
				-	dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
			
 
				-			connection->debugfs_conn, connection,
			
 
				-			&connection_callback_history_fops);
			
 
				+	dentry = debugfs_create_file("callback_history", 0440,
			
 
				+				     connection->debugfs_conn, connection,
			
 
				+				     &connection_callback_history_fops);
			
 
				 	if (IS_ERR_OR_NULL(dentry))
			
 
				 		goto fail;
			
 
				 	connection->debugfs_conn_callback_history = dentry;
			
 
				 
			
 
				-	dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
			
 
				-			connection->debugfs_conn, connection,
			
 
				-			&connection_oldest_requests_fops);
			
 
				+	dentry = debugfs_create_file("oldest_requests", 0440,
			
 
				+				     connection->debugfs_conn, connection,
			
 
				+				     &connection_oldest_requests_fops);
			
 
				 	if (IS_ERR_OR_NULL(dentry))
			
 
				 		goto fail;
			
 
				 	connection->debugfs_conn_oldest_requests = dentry;
			
@@ -824,7 +824,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
 
				 	device->debugfs_minor = dentry;
			
 
				 
			
 
				 #define DCF(name)	do {					\
			
 
				-	dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP,	\
			
 
				+	dentry = debugfs_create_file(#name, 0440,	\
			
 
				 			device->debugfs_vol, device,		\
			
 
				 			&device_ ## name ## _fops);		\
			
 
				 	if (IS_ERR_OR_NULL(dentry))				\
			
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1405,8 +1405,8 @@ extern struct kmem_cache *drbd_request_cache;
 
				 extern struct kmem_cache *drbd_ee_cache;	/* peer requests */
			
 
				 extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
			
 
				 extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
			
 
				-extern mempool_t *drbd_request_mempool;
			
 
				-extern mempool_t *drbd_ee_mempool;
			
 
				+extern mempool_t drbd_request_mempool;
			
 
				+extern mempool_t drbd_ee_mempool;
			
 
				 
			
 
				 /* drbd's page pool, used to buffer data received from the peer,
			
 
				  * or data requested by the peer.
			
@@ -1432,16 +1432,16 @@ extern wait_queue_head_t drbd_pp_wait;
 
				  * 128 should be plenty, currently we probably can get away with as few as 1.
			
 
				  */
			
 
				 #define DRBD_MIN_POOL_PAGES	128
			
 
				-extern mempool_t *drbd_md_io_page_pool;
			
 
				+extern mempool_t drbd_md_io_page_pool;
			
 
				 
			
 
				 /* We also need to make sure we get a bio
			
 
				  * when we need it for housekeeping purposes */
			
 
				-extern struct bio_set *drbd_md_io_bio_set;
			
 
				+extern struct bio_set drbd_md_io_bio_set;
			
 
				 /* to allocate from that set */
			
 
				 extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
			
 
				 
			
 
				 /* And a bio_set for cloning */
			
 
				-extern struct bio_set *drbd_io_bio_set;
			
 
				+extern struct bio_set drbd_io_bio_set;
			
 
				 
			
 
				 extern struct mutex resources_mutex;
			
 
				 
			
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -124,11 +124,11 @@ struct kmem_cache *drbd_request_cache;
 
				 struct kmem_cache *drbd_ee_cache;	/* peer requests */
			
 
				 struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
			
 
				 struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
			
 
				-mempool_t *drbd_request_mempool;
			
 
				-mempool_t *drbd_ee_mempool;
			
 
				-mempool_t *drbd_md_io_page_pool;
			
 
				-struct bio_set *drbd_md_io_bio_set;
			
 
				-struct bio_set *drbd_io_bio_set;
			
 
				+mempool_t drbd_request_mempool;
			
 
				+mempool_t drbd_ee_mempool;
			
 
				+mempool_t drbd_md_io_page_pool;
			
 
				+struct bio_set drbd_md_io_bio_set;
			
 
				+struct bio_set drbd_io_bio_set;
			
 
				 
			
 
				 /* I do not use a standard mempool, because:
			
 
				    1) I want to hand out the pre-allocated objects first.
			
@@ -153,10 +153,10 @@ struct bio *bio_alloc_drbd(gfp_t gfp_mask)
 
				 {
			
 
				 	struct bio *bio;
			
 
				 
			
 
				-	if (!drbd_md_io_bio_set)
			
 
				+	if (!bioset_initialized(&drbd_md_io_bio_set))
			
 
				 		return bio_alloc(gfp_mask, 1);
			
 
				 
			
 
				-	bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
			
 
				+	bio = bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set);
			
 
				 	if (!bio)
			
 
				 		return NULL;
			
 
				 	return bio;
			
@@ -2097,16 +2097,11 @@ static void drbd_destroy_mempools(void)
 
				 
			
 
				 	/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
			
 
				 
			
 
				-	if (drbd_io_bio_set)
			
 
				-		bioset_free(drbd_io_bio_set);
			
 
				-	if (drbd_md_io_bio_set)
			
 
				-		bioset_free(drbd_md_io_bio_set);
			
 
				-	if (drbd_md_io_page_pool)
			
 
				-		mempool_destroy(drbd_md_io_page_pool);
			
 
				-	if (drbd_ee_mempool)
			
 
				-		mempool_destroy(drbd_ee_mempool);
			
 
				-	if (drbd_request_mempool)
			
 
				-		mempool_destroy(drbd_request_mempool);
			
 
				+	bioset_exit(&drbd_io_bio_set);
			
 
				+	bioset_exit(&drbd_md_io_bio_set);
			
 
				+	mempool_exit(&drbd_md_io_page_pool);
			
 
				+	mempool_exit(&drbd_ee_mempool);
			
 
				+	mempool_exit(&drbd_request_mempool);
			
 
				 	if (drbd_ee_cache)
			
 
				 		kmem_cache_destroy(drbd_ee_cache);
			
 
				 	if (drbd_request_cache)
			
@@ -2116,11 +2111,6 @@ static void drbd_destroy_mempools(void)
 
				 	if (drbd_al_ext_cache)
			
 
				 		kmem_cache_destroy(drbd_al_ext_cache);
			
 
				 
			
 
				-	drbd_io_bio_set      = NULL;
			
 
				-	drbd_md_io_bio_set   = NULL;
			
 
				-	drbd_md_io_page_pool = NULL;
			
 
				-	drbd_ee_mempool      = NULL;
			
 
				-	drbd_request_mempool = NULL;
			
 
				 	drbd_ee_cache        = NULL;
			
 
				 	drbd_request_cache   = NULL;
			
 
				 	drbd_bm_ext_cache    = NULL;
			
@@ -2133,18 +2123,7 @@ static int drbd_create_mempools(void)
 
				 {
			
 
				 	struct page *page;
			
 
				 	const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
			
 
				-	int i;
			
 
				-
			
 
				-	/* prepare our caches and mempools */
			
 
				-	drbd_request_mempool = NULL;
			
 
				-	drbd_ee_cache        = NULL;
			
 
				-	drbd_request_cache   = NULL;
			
 
				-	drbd_bm_ext_cache    = NULL;
			
 
				-	drbd_al_ext_cache    = NULL;
			
 
				-	drbd_pp_pool         = NULL;
			
 
				-	drbd_md_io_page_pool = NULL;
			
 
				-	drbd_md_io_bio_set   = NULL;
			
 
				-	drbd_io_bio_set      = NULL;
			
 
				+	int i, ret;
			
 
				 
			
 
				 	/* caches */
			
 
				 	drbd_request_cache = kmem_cache_create(
			
@@ -2168,26 +2147,26 @@ static int drbd_create_mempools(void)
 
				 		goto Enomem;
			
 
				 
			
 
				 	/* mempools */
			
 
				-	drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
			
 
				-	if (drbd_io_bio_set == NULL)
			
 
				+	ret = bioset_init(&drbd_io_bio_set, BIO_POOL_SIZE, 0, 0);
			
 
				+	if (ret)
			
 
				 		goto Enomem;
			
 
				 
			
 
				-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
			
 
				-					   BIOSET_NEED_BVECS);
			
 
				-	if (drbd_md_io_bio_set == NULL)
			
 
				+	ret = bioset_init(&drbd_md_io_bio_set, DRBD_MIN_POOL_PAGES, 0,
			
 
				+			  BIOSET_NEED_BVECS);
			
 
				+	if (ret)
			
 
				 		goto Enomem;
			
 
				 
			
 
				-	drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
			
 
				-	if (drbd_md_io_page_pool == NULL)
			
 
				+	ret = mempool_init_page_pool(&drbd_md_io_page_pool, DRBD_MIN_POOL_PAGES, 0);
			
 
				+	if (ret)
			
 
				 		goto Enomem;
			
 
				 
			
 
				-	drbd_request_mempool = mempool_create_slab_pool(number,
			
 
				-		drbd_request_cache);
			
 
				-	if (drbd_request_mempool == NULL)
			
 
				+	ret = mempool_init_slab_pool(&drbd_request_mempool, number,
			
 
				+				     drbd_request_cache);
			
 
				+	if (ret)
			
 
				 		goto Enomem;
			
 
				 
			
 
				-	drbd_ee_mempool = mempool_create_slab_pool(number, drbd_ee_cache);
			
 
				-	if (drbd_ee_mempool == NULL)
			
 
				+	ret = mempool_init_slab_pool(&drbd_ee_mempool, number, drbd_ee_cache);
			
 
				+	if (ret)
			
 
				 		goto Enomem;
			
 
				 
			
 
				 	/* drbd's page pool */
			
@@ -3010,7 +2989,7 @@ static int __init drbd_init(void)
 
				 		goto fail;
			
 
				 
			
 
				 	err = -ENOMEM;
			
 
				-	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
			
 
				+	drbd_proc = proc_create_data("drbd", S_IFREG | 0444 , NULL, &drbd_proc_fops, NULL);
			
 
				 	if (!drbd_proc)	{
			
 
				 		pr_err("unable to register proc file\n");
			
 
				 		goto fail;
			
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -378,7 +378,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 
				 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
			
 
				 		return NULL;
			
 
				 
			
 
				-	peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
			
 
				+	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
			
 
				 	if (!peer_req) {
			
 
				 		if (!(gfp_mask & __GFP_NOWARN))
			
 
				 			drbd_err(device, "%s: allocation failed\n", __func__);
			
@@ -409,7 +409,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 
				 	return peer_req;
			
 
				 
			
 
				  fail:
			
 
				-	mempool_free(peer_req, drbd_ee_mempool);
			
 
				+	mempool_free(peer_req, &drbd_ee_mempool);
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -426,7 +426,7 @@ void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *
 
				 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
			
 
				 		drbd_al_complete_io(device, &peer_req->i);
			
 
				 	}
			
 
				-	mempool_free(peer_req, drbd_ee_mempool);
			
 
				+	mempool_free(peer_req, &drbd_ee_mempool);
			
 
				 }
			
 
				 
			
 
				 int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
			
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -55,7 +55,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
 
				 {
			
 
				 	struct drbd_request *req;
			
 
				 
			
 
				-	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
			
 
				+	req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
			
 
				 	if (!req)
			
 
				 		return NULL;
			
 
				 	memset(req, 0, sizeof(*req));
			
@@ -184,7 +184,7 @@ void drbd_req_destroy(struct kref *kref)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	mempool_free(req, drbd_request_mempool);
			
 
				+	mempool_free(req, &drbd_request_mempool);
			
 
				 }
			
 
				 
			
 
				 static void wake_all_senders(struct drbd_connection *connection)
			
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -269,7 +269,7 @@ enum drbd_req_state_bits {
 
				 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
			
 
				 {
			
 
				 	struct bio *bio;
			
 
				-	bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
			
 
				+	bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
			
 
				 
			
 
				 	req->private_bio = bio;
			
 
				 
			
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4450,7 +4450,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
 
				 	return sprintf(buf, "%X\n", UDP->cmos);
			
 
				 }
			
 
				 
			
 
				-static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
			
 
				+static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL);
			
 
				 
			
 
				 static struct attribute *floppy_dev_attrs[] = {
			
 
				 	&dev_attr_cmos.attr,
			
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -732,7 +732,7 @@ static ssize_t loop_attr_do_show_##_name(struct device *d,		\
 
				 	return loop_attr_show(d, b, loop_attr_##_name##_show);		\
			
 
				 }									\
			
 
				 static struct device_attribute loop_attr_##_name =			\
			
 
				-	__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
			
 
				+	__ATTR(_name, 0444, loop_attr_do_show_##_name, NULL);
			
 
				 
			
 
				 static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
			
 
				 {
			
@@ -809,16 +809,17 @@ static struct attribute_group loop_attribute_group = {
 
				 	.attrs= loop_attrs,
			
 
				 };
			
 
				 
			
 
				-static int loop_sysfs_init(struct loop_device *lo)
			
 
				+static void loop_sysfs_init(struct loop_device *lo)
			
 
				 {
			
 
				-	return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
			
 
				-				  &loop_attribute_group);
			
 
				+	lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
			
 
				+						&loop_attribute_group);
			
 
				 }
			
 
				 
			
 
				 static void loop_sysfs_exit(struct loop_device *lo)
			
 
				 {
			
 
				-	sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
			
 
				-			   &loop_attribute_group);
			
 
				+	if (lo->sysfs_inited)
			
 
				+		sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
			
 
				+				   &loop_attribute_group);
			
 
				 }
			
 
				 
			
 
				 static void loop_config_discard(struct loop_device *lo)
			
@@ -1677,9 +1678,9 @@ static const struct block_device_operations lo_fops = {
 
				  * And now the modules code and kernel interface.
			
 
				  */
			
 
				 static int max_loop;
			
 
				-module_param(max_loop, int, S_IRUGO);
			
 
				+module_param(max_loop, int, 0444);
			
 
				 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
			
 
				-module_param(max_part, int, S_IRUGO);
			
 
				+module_param(max_part, int, 0444);
			
 
				 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
			
 
				 MODULE_LICENSE("GPL");
			
 
				 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
			
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -58,6 +58,7 @@ struct loop_device {
 
				 	struct kthread_worker	worker;
			
 
				 	struct task_struct	*worker_task;
			
 
				 	bool			use_dio;
			
 
				+	bool			sysfs_inited;
			
 
				 
			
 
				 	struct request_queue	*lo_queue;
			
 
				 	struct blk_mq_tag_set	tag_set;
			
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2285,7 +2285,7 @@ static ssize_t mtip_hw_show_status(struct device *dev,
 
				 	return size;
			
 
				 }
			
 
				 
			
 
				-static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
			
 
				+static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL);
			
 
				 
			
 
				 /* debugsfs entries */
			
 
				 
			
@@ -2566,10 +2566,9 @@ static int mtip_hw_debugfs_init(struct driver_data *dd)
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				-	debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
			
 
				-							&mtip_flags_fops);
			
 
				-	debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
			
 
				-							&mtip_regs_fops);
			
 
				+	debugfs_create_file("flags", 0444, dd->dfs_node, dd, &mtip_flags_fops);
			
 
				+	debugfs_create_file("registers", 0444, dd->dfs_node, dd,
			
 
				+			    &mtip_regs_fops);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -2726,15 +2725,11 @@ static void mtip_softirq_done_fn(struct request *rq)
 
				 	blk_mq_end_request(rq, cmd->status);
			
 
				 }
			
 
				 
			
 
				-static void mtip_abort_cmd(struct request *req, void *data,
			
 
				-							bool reserved)
			
 
				+static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
			
 
				 {
			
 
				 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
			
 
				 	struct driver_data *dd = data;
			
 
				 
			
 
				-	if (!blk_mq_request_started(req))
			
 
				-		return;
			
 
				-
			
 
				 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
			
 
				 
			
 
				 	clear_bit(req->tag, dd->port->cmds_to_issue);
			
@@ -2742,14 +2737,10 @@ static void mtip_abort_cmd(struct request *req, void *data,
 
				 	mtip_softirq_done_fn(req);
			
 
				 }
			
 
				 
			
 
				-static void mtip_queue_cmd(struct request *req, void *data,
			
 
				-							bool reserved)
			
 
				+static void mtip_queue_cmd(struct request *req, void *data, bool reserved)
			
 
				 {
			
 
				 	struct driver_data *dd = data;
			
 
				 
			
 
				-	if (!blk_mq_request_started(req))
			
 
				-		return;
			
 
				-
			
 
				 	set_bit(req->tag, dd->port->cmds_to_issue);
			
 
				 	blk_abort_request(req);
			
 
				 }
			
@@ -3720,7 +3711,8 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
 
				 		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
			
 
				 
			
 
				 		cmd->status = BLK_STS_TIMEOUT;
			
 
				-		return BLK_EH_HANDLED;
			
 
				+		blk_mq_complete_request(req);
			
 
				+		return BLK_EH_DONE;
			
 
				 	}
			
 
				 
			
 
				 	if (test_bit(req->tag, dd->port->cmds_to_issue))
			
@@ -3862,7 +3854,6 @@ skip_create_disk:
 
				 	blk_queue_max_hw_sectors(dd->queue, 0xffff);
			
 
				 	blk_queue_max_segment_size(dd->queue, 0x400000);
			
 
				 	blk_queue_io_min(dd->queue, 4096);
			
 
				-	blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask);
			
 
				 
			
 
				 	/* Signal trim support */
			
 
				 	if (dd->trim_supp == true) {
			
@@ -4273,7 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 
				 	if (!dd->isr_workq) {
			
 
				 		dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
			
 
				 		rv = -ENOMEM;
			
 
				-		goto block_initialize_err;
			
 
				+		goto setmask_err;
			
 
				 	}
			
 
				 
			
 
				 	memset(cpu_list, 0, sizeof(cpu_list));
			
@@ -4614,7 +4605,7 @@ static int __init mtip_init(void)
 
				 	}
			
 
				 	if (dfs_parent) {
			
 
				 		dfs_device_status = debugfs_create_file("device_status",
			
 
				-					S_IRUGO, dfs_parent, NULL,
			
 
				+					0444, dfs_parent, NULL,
			
 
				 					&mtip_device_status_fops);
			
 
				 		if (IS_ERR_OR_NULL(dfs_device_status)) {
			
 
				 			pr_err("Error creating device_status node\n");
			
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -166,16 +166,19 @@ static ssize_t pid_show(struct device *dev,
 
				 }
			
 
				 
			
 
				 static const struct device_attribute pid_attr = {
			
 
				-	.attr = { .name = "pid", .mode = S_IRUGO},
			
 
				+	.attr = { .name = "pid", .mode = 0444},
			
 
				 	.show = pid_show,
			
 
				 };
			
 
				 
			
 
				 static void nbd_dev_remove(struct nbd_device *nbd)
			
 
				 {
			
 
				 	struct gendisk *disk = nbd->disk;
			
 
				+	struct request_queue *q;
			
 
				+
			
 
				 	if (disk) {
			
 
				+		q = disk->queue;
			
 
				 		del_gendisk(disk);
			
 
				-		blk_cleanup_queue(disk->queue);
			
 
				+		blk_cleanup_queue(q);
			
 
				 		blk_mq_free_tag_set(&nbd->tag_set);
			
 
				 		disk->private_data = NULL;
			
 
				 		put_disk(disk);
			
@@ -213,7 +216,15 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
 
				 	}
			
 
				 	if (!nsock->dead) {
			
 
				 		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
			
 
				-		atomic_dec(&nbd->config->live_connections);
			
 
				+		if (atomic_dec_return(&nbd->config->live_connections) == 0) {
			
 
				+			if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
			
 
				+					       &nbd->config->runtime_flags)) {
			
 
				+				set_bit(NBD_DISCONNECTED,
			
 
				+					&nbd->config->runtime_flags);
			
 
				+				dev_info(nbd_to_dev(nbd),
			
 
				+					"Disconnected due to user request.\n");
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				 	nsock->dead = true;
			
 
				 	nsock->pending = NULL;
			
@@ -231,9 +242,22 @@ static void nbd_size_clear(struct nbd_device *nbd)
 
				 static void nbd_size_update(struct nbd_device *nbd)
			
 
				 {
			
 
				 	struct nbd_config *config = nbd->config;
			
 
				+	struct block_device *bdev = bdget_disk(nbd->disk, 0);
			
 
				+
			
 
				+	if (config->flags & NBD_FLAG_SEND_TRIM) {
			
 
				+		nbd->disk->queue->limits.discard_granularity = config->blksize;
			
 
				+		blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
			
 
				+	}
			
 
				 	blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
			
 
				 	blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
			
 
				 	set_capacity(nbd->disk, config->bytesize >> 9);
			
 
				+	if (bdev) {
			
 
				+		if (bdev->bd_disk)
			
 
				+			bd_set_size(bdev, config->bytesize);
			
 
				+		else
			
 
				+			bdev->bd_invalidated = 1;
			
 
				+		bdput(bdev);
			
 
				+	}
			
 
				 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
			
 
				 }
			
 
				 
			
@@ -243,6 +267,8 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
 
				 	struct nbd_config *config = nbd->config;
			
 
				 	config->blksize = blocksize;
			
 
				 	config->bytesize = blocksize * nr_blocks;
			
 
				+	if (nbd->task_recv != NULL)
			
 
				+		nbd_size_update(nbd);
			
 
				 }
			
 
				 
			
 
				 static void nbd_complete_rq(struct request *req)
			
@@ -286,13 +312,15 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 
				 
			
 
				 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
			
 
				 		cmd->status = BLK_STS_TIMEOUT;
			
 
				-		return BLK_EH_HANDLED;
			
 
				+		goto done;
			
 
				 	}
			
 
				 	config = nbd->config;
			
 
				 
			
 
				 	if (config->num_connections > 1) {
			
 
				 		dev_err_ratelimited(nbd_to_dev(nbd),
			
 
				-				    "Connection timed out, retrying\n");
			
 
				+				    "Connection timed out, retrying (%d/%d alive)\n",
			
 
				+				    atomic_read(&config->live_connections),
			
 
				+				    config->num_connections);
			
 
				 		/*
			
 
				 		 * Hooray we have more connections, requeue this IO, the submit
			
 
				 		 * path will put it on a real connection.
			
@@ -314,7 +342,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 
				 			}
			
 
				 			blk_mq_requeue_request(req, true);
			
 
				 			nbd_config_put(nbd);
			
 
				-			return BLK_EH_NOT_HANDLED;
			
 
				+			return BLK_EH_DONE;
			
 
				 		}
			
 
				 	} else {
			
 
				 		dev_err_ratelimited(nbd_to_dev(nbd),
			
@@ -324,8 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 
				 	cmd->status = BLK_STS_IOERR;
			
 
				 	sock_shutdown(nbd);
			
 
				 	nbd_config_put(nbd);
			
 
				-
			
 
				-	return BLK_EH_HANDLED;
			
 
				+done:
			
 
				+	blk_mq_complete_request(req);
			
 
				+	return BLK_EH_DONE;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -647,11 +676,8 @@ static void recv_work(struct work_struct *work)
 
				 
			
 
				 static void nbd_clear_req(struct request *req, void *data, bool reserved)
			
 
				 {
			
 
				-	struct nbd_cmd *cmd;
			
 
				+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
			
 
				 
			
 
				-	if (!blk_mq_request_started(req))
			
 
				-		return;
			
 
				-	cmd = blk_mq_rq_to_pdu(req);
			
 
				 	cmd->status = BLK_STS_IOERR;
			
 
				 	blk_mq_complete_request(req);
			
 
				 }
			
@@ -714,10 +740,9 @@ static int wait_for_reconnect(struct nbd_device *nbd)
 
				 		return 0;
			
 
				 	if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
			
 
				 		return 0;
			
 
				-	wait_event_timeout(config->conn_wait,
			
 
				-			   atomic_read(&config->live_connections),
			
 
				-			   config->dead_conn_timeout);
			
 
				-	return atomic_read(&config->live_connections);
			
 
				+	return wait_event_timeout(config->conn_wait,
			
 
				+				  atomic_read(&config->live_connections) > 0,
			
 
				+				  config->dead_conn_timeout) > 0;
			
 
				 }
			
 
				 
			
 
				 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
			
@@ -950,10 +975,6 @@ static void nbd_bdev_reset(struct block_device *bdev)
 
				 	if (bdev->bd_openers > 1)
			
 
				 		return;
			
 
				 	bd_set_size(bdev, 0);
			
 
				-	if (max_part > 0) {
			
 
				-		blkdev_reread_part(bdev);
			
 
				-		bdev->bd_invalidated = 1;
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 static void nbd_parse_flags(struct nbd_device *nbd)
			
@@ -1040,6 +1061,8 @@ static void nbd_config_put(struct nbd_device *nbd)
 
				 		nbd->config = NULL;
			
 
				 
			
 
				 		nbd->tag_set.timeout = 0;
			
 
				+		nbd->disk->queue->limits.discard_granularity = 0;
			
 
				+		blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
			
 
				 		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
			
 
				 
			
 
				 		mutex_unlock(&nbd->config_lock);
			
@@ -1109,7 +1132,6 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	bd_set_size(bdev, config->bytesize);
			
 
				 	if (max_part)
			
 
				 		bdev->bd_invalidated = 1;
			
 
				 	mutex_unlock(&nbd->config_lock);
			
@@ -1118,7 +1140,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
 
				 	if (ret)
			
 
				 		sock_shutdown(nbd);
			
 
				 	mutex_lock(&nbd->config_lock);
			
 
				-	bd_set_size(bdev, 0);
			
 
				+	nbd_bdev_reset(bdev);
			
 
				 	/* user requested, ignore socket errors */
			
 
				 	if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
			
 
				 		ret = 0;
			
@@ -1269,6 +1291,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
 
				 		refcount_set(&nbd->config_refs, 1);
			
 
				 		refcount_inc(&nbd->refs);
			
 
				 		mutex_unlock(&nbd->config_lock);
			
 
				+		bdev->bd_invalidated = 1;
			
 
				+	} else if (nbd_disconnected(nbd->config)) {
			
 
				+		bdev->bd_invalidated = 1;
			
 
				 	}
			
 
				 out:
			
 
				 	mutex_unlock(&nbd_index_mutex);
			
@@ -1490,8 +1515,8 @@ static int nbd_dev_add(int index)
 
				 	 */
			
 
				 	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
			
 
				 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
			
 
				-	disk->queue->limits.discard_granularity = 512;
			
 
				-	blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
			
 
				+	disk->queue->limits.discard_granularity = 0;
			
 
				+	blk_queue_max_discard_sectors(disk->queue, 0);
			
 
				 	blk_queue_max_segment_size(disk->queue, UINT_MAX);
			
 
				 	blk_queue_max_segments(disk->queue, USHRT_MAX);
			
 
				 	blk_queue_max_hw_sectors(disk->queue, 65536);
			
@@ -1755,6 +1780,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 
				 	}
			
 
				 	mutex_lock(&nbd->config_lock);
			
 
				 	nbd_disconnect(nbd);
			
 
				+	nbd_clear_sock(nbd);
			
 
				 	mutex_unlock(&nbd->config_lock);
			
 
				 	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
			
 
				 			       &nbd->config->runtime_flags))
			
@@ -2093,7 +2119,8 @@ static int __init nbd_init(void)
 
				 	if (nbds_max > 1UL << (MINORBITS - part_shift))
			
 
				 		return -EINVAL;
			
 
				 	recv_workqueue = alloc_workqueue("knbd-recv",
			
 
				-					 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
			
 
				+					 WQ_MEM_RECLAIM | WQ_HIGHPRI |
			
 
				+					 WQ_UNBOUND, 0);
			
 
				 	if (!recv_workqueue)
			
 
				 		return -ENOMEM;
			
 
				 
			
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -157,23 +157,23 @@ enum {
 
				 };
			
 
				 
			
 
				 static int g_no_sched;
			
 
				-module_param_named(no_sched, g_no_sched, int, S_IRUGO);
			
 
				+module_param_named(no_sched, g_no_sched, int, 0444);
			
 
				 MODULE_PARM_DESC(no_sched, "No io scheduler");
			
 
				 
			
 
				 static int g_submit_queues = 1;
			
 
				-module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
			
 
				+module_param_named(submit_queues, g_submit_queues, int, 0444);
			
 
				 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
			
 
				 
			
 
				 static int g_home_node = NUMA_NO_NODE;
			
 
				-module_param_named(home_node, g_home_node, int, S_IRUGO);
			
 
				+module_param_named(home_node, g_home_node, int, 0444);
			
 
				 MODULE_PARM_DESC(home_node, "Home node for the device");
			
 
				 
			
 
				 #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
			
 
				 static char g_timeout_str[80];
			
 
				-module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
			
 
				+module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
			
 
				 
			
 
				 static char g_requeue_str[80];
			
 
				-module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
			
 
				+module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
			
 
				 #endif
			
 
				 
			
 
				 static int g_queue_mode = NULL_Q_MQ;
			
@@ -203,27 +203,27 @@ static const struct kernel_param_ops null_queue_mode_param_ops = {
 
				 	.get	= param_get_int,
			
 
				 };
			
 
				 
			
 
				-device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
			
 
				+device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
			
 
				 MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
			
 
				 
			
 
				 static int g_gb = 250;
			
 
				-module_param_named(gb, g_gb, int, S_IRUGO);
			
 
				+module_param_named(gb, g_gb, int, 0444);
			
 
				 MODULE_PARM_DESC(gb, "Size in GB");
			
 
				 
			
 
				 static int g_bs = 512;
			
 
				-module_param_named(bs, g_bs, int, S_IRUGO);
			
 
				+module_param_named(bs, g_bs, int, 0444);
			
 
				 MODULE_PARM_DESC(bs, "Block size (in bytes)");
			
 
				 
			
 
				 static int nr_devices = 1;
			
 
				-module_param(nr_devices, int, S_IRUGO);
			
 
				+module_param(nr_devices, int, 0444);
			
 
				 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
			
 
				 
			
 
				 static bool g_blocking;
			
 
				-module_param_named(blocking, g_blocking, bool, S_IRUGO);
			
 
				+module_param_named(blocking, g_blocking, bool, 0444);
			
 
				 MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
			
 
				 
			
 
				 static bool shared_tags;
			
 
				-module_param(shared_tags, bool, S_IRUGO);
			
 
				+module_param(shared_tags, bool, 0444);
			
 
				 MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
			
 
				 
			
 
				 static int g_irqmode = NULL_IRQ_SOFTIRQ;
			
@@ -239,19 +239,19 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
 
				 	.get	= param_get_int,
			
 
				 };
			
 
				 
			
 
				-device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
			
 
				+device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
			
 
				 MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
			
 
				 
			
 
				 static unsigned long g_completion_nsec = 10000;
			
 
				-module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
			
 
				+module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
			
 
				 MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
			
 
				 
			
 
				 static int g_hw_queue_depth = 64;
			
 
				-module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
			
 
				+module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
			
 
				 MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
			
 
				 
			
 
				 static bool g_use_per_node_hctx;
			
 
				-module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
			
 
				+module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
			
 
				 MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
			
 
				 
			
 
				 static struct nullb_device *null_alloc_dev(void);
			
@@ -1365,7 +1365,8 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
 
				 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
			
 
				 {
			
 
				 	pr_info("null: rq %p timed out\n", rq);
			
 
				-	return BLK_EH_HANDLED;
			
 
				+	blk_mq_complete_request(rq);
			
 
				+	return BLK_EH_DONE;
			
 
				 }
			
 
				 
			
 
				 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
			
@@ -1427,7 +1428,8 @@ static void null_request_fn(struct request_queue *q)
 
				 static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
			
 
				 {
			
 
				 	pr_info("null: rq %p timed out\n", rq);
			
 
				-	return BLK_EH_HANDLED;
			
 
				+	blk_mq_complete_request(rq);
			
 
				+	return BLK_EH_DONE;
			
 
				 }
			
 
				 
			
 
				 static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
			
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -740,7 +740,7 @@ static int pd_special_command(struct pd_unit *disk,
 
				 {
			
 
				 	struct request *rq;
			
 
				 
			
 
				-	rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
			
 
				 	if (IS_ERR(rq))
			
 
				 		return PTR_ERR(rq);
			
 
				 
			
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -97,8 +97,8 @@ static int pktdev_major;
 
				 static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
			
 
				 static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
			
 
				 static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
			
 
				-static mempool_t *psd_pool;
			
 
				-static struct bio_set *pkt_bio_set;
			
 
				+static mempool_t psd_pool;
			
 
				+static struct bio_set pkt_bio_set;
			
 
				 
			
 
				 static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
			
 
				 static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
			
@@ -478,8 +478,8 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
 
				 	if (!pd->dfs_d_root)
			
 
				 		return;
			
 
				 
			
 
				-	pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
			
 
				-				pd->dfs_d_root, pd, &debug_fops);
			
 
				+	pd->dfs_f_info = debugfs_create_file("info", 0444,
			
 
				+					     pd->dfs_d_root, pd, &debug_fops);
			
 
				 }
			
 
				 
			
 
				 static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
			
@@ -631,7 +631,7 @@ static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
 
				 static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
			
 
				 {
			
 
				 	rb_erase(&node->rb_node, &pd->bio_queue);
			
 
				-	mempool_free(node, pd->rb_pool);
			
 
				+	mempool_free(node, &pd->rb_pool);
			
 
				 	pd->bio_queue_size--;
			
 
				 	BUG_ON(pd->bio_queue_size < 0);
			
 
				 }
			
@@ -704,13 +704,13 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 
				 	int ret = 0;
			
 
				 
			
 
				 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
			
 
				-			     REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
			
 
				+			     REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
			
 
				 	if (IS_ERR(rq))
			
 
				 		return PTR_ERR(rq);
			
 
				 
			
 
				 	if (cgc->buflen) {
			
 
				 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
			
 
				-				      __GFP_RECLAIM);
			
 
				+				      GFP_NOIO);
			
 
				 		if (ret)
			
 
				 			goto out;
			
 
				 	}
			
@@ -1285,7 +1285,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 
				 	 * Fill-in bvec with data from orig_bios.
			
 
				 	 */
			
 
				 	spin_lock(&pkt->lock);
			
 
				-	bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
			
 
				+	bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
			
 
				 
			
 
				 	pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
			
 
				 	spin_unlock(&pkt->lock);
			
@@ -2303,14 +2303,14 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 
				 	psd->bio->bi_status = bio->bi_status;
			
 
				 	bio_put(bio);
			
 
				 	bio_endio(psd->bio);
			
 
				-	mempool_free(psd, psd_pool);
			
 
				+	mempool_free(psd, &psd_pool);
			
 
				 	pkt_bio_finished(pd);
			
 
				 }
			
 
				 
			
 
				 static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
			
 
				 {
			
 
				-	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
			
 
				-	struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
			
 
				+	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
			
 
				+	struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
			
 
				 
			
 
				 	psd->pd = pd;
			
 
				 	psd->bio = bio;
			
@@ -2381,7 +2381,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
 
				 	/*
			
 
				 	 * No matching packet found. Store the bio in the work queue.
			
 
				 	 */
			
 
				-	node = mempool_alloc(pd->rb_pool, GFP_NOIO);
			
 
				+	node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
			
 
				 	node->bio = bio;
			
 
				 	spin_lock(&pd->lock);
			
 
				 	BUG_ON(pd->bio_queue_size < 0);
			
@@ -2451,7 +2451,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
				 
			
 
				 			split = bio_split(bio, last_zone -
			
 
				 					  bio->bi_iter.bi_sector,
			
 
				-					  GFP_NOIO, pkt_bio_set);
			
 
				+					  GFP_NOIO, &pkt_bio_set);
			
 
				 			bio_chain(split, bio);
			
 
				 		} else {
			
 
				 			split = bio;
			
@@ -2707,9 +2707,9 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 
				 	if (!pd)
			
 
				 		goto out_mutex;
			
 
				 
			
 
				-	pd->rb_pool = mempool_create_kmalloc_pool(PKT_RB_POOL_SIZE,
			
 
				-						  sizeof(struct pkt_rb_node));
			
 
				-	if (!pd->rb_pool)
			
 
				+	ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
			
 
				+					sizeof(struct pkt_rb_node));
			
 
				+	if (ret)
			
 
				 		goto out_mem;
			
 
				 
			
 
				 	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
			
@@ -2766,7 +2766,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 
				 out_mem2:
			
 
				 	put_disk(disk);
			
 
				 out_mem:
			
 
				-	mempool_destroy(pd->rb_pool);
			
 
				+	mempool_exit(&pd->rb_pool);
			
 
				 	kfree(pd);
			
 
				 out_mutex:
			
 
				 	mutex_unlock(&ctl_mutex);
			
@@ -2817,7 +2817,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
 
				 	blk_cleanup_queue(pd->disk->queue);
			
 
				 	put_disk(pd->disk);
			
 
				 
			
 
				-	mempool_destroy(pd->rb_pool);
			
 
				+	mempool_exit(&pd->rb_pool);
			
 
				 	kfree(pd);
			
 
				 
			
 
				 	/* This is safe: open() is still holding a reference. */
			
@@ -2914,14 +2914,14 @@ static int __init pkt_init(void)
 
				 
			
 
				 	mutex_init(&ctl_mutex);
			
 
				 
			
 
				-	psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
			
 
				-					sizeof(struct packet_stacked_data));
			
 
				-	if (!psd_pool)
			
 
				-		return -ENOMEM;
			
 
				-	pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
			
 
				-	if (!pkt_bio_set) {
			
 
				-		mempool_destroy(psd_pool);
			
 
				-		return -ENOMEM;
			
 
				+	ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
			
 
				+				    sizeof(struct packet_stacked_data));
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+	ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
			
 
				+	if (ret) {
			
 
				+		mempool_exit(&psd_pool);
			
 
				+		return ret;
			
 
				 	}
			
 
				 
			
 
				 	ret = register_blkdev(pktdev_major, DRIVER_NAME);
			
@@ -2954,8 +2954,8 @@ out_misc:
 
				 out:
			
 
				 	unregister_blkdev(pktdev_major, DRIVER_NAME);
			
 
				 out2:
			
 
				-	mempool_destroy(psd_pool);
			
 
				-	bioset_free(pkt_bio_set);
			
 
				+	mempool_exit(&psd_pool);
			
 
				+	bioset_exit(&pkt_bio_set);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -2968,8 +2968,8 @@ static void __exit pkt_exit(void)
 
				 	pkt_sysfs_cleanup();
			
 
				 
			
 
				 	unregister_blkdev(pktdev_major, DRIVER_NAME);
			
 
				-	mempool_destroy(psd_pool);
			
 
				-	bioset_free(pkt_bio_set);
			
 
				+	mempool_exit(&psd_pool);
			
 
				+	bioset_exit(&pkt_bio_set);
			
 
				 }
			
 
				 
			
 
				 MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
			
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -465,8 +465,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 
				 	priv->queue = queue;
			
 
				 	queue->queuedata = dev;
			
 
				 
			
 
				-	blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
			
 
				-
			
 
				 	blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
			
 
				 	blk_queue_segment_boundary(queue, -1UL);
			
 
				 	blk_queue_dma_alignment(queue, dev->blk_size-1);
			
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -424,7 +424,7 @@ static struct workqueue_struct *rbd_wq;
 
				  * single-major requires >= 0.75 version of userspace rbd utility.
			
 
				  */
			
 
				 static bool single_major = true;
			
 
				-module_param(single_major, bool, S_IRUGO);
			
 
				+module_param(single_major, bool, 0444);
			
 
				 MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
			
 
				 
			
 
				 static ssize_t rbd_add(struct bus_type *bus, const char *buf,
			
@@ -468,11 +468,11 @@ static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
 
				 	return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
			
 
				 }
			
 
				 
			
 
				-static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
			
 
				-static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
			
 
				-static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
			
 
				-static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
			
 
				-static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
			
 
				+static BUS_ATTR(add, 0200, NULL, rbd_add);
			
 
				+static BUS_ATTR(remove, 0200, NULL, rbd_remove);
			
 
				+static BUS_ATTR(add_single_major, 0200, NULL, rbd_add_single_major);
			
 
				+static BUS_ATTR(remove_single_major, 0200, NULL, rbd_remove_single_major);
			
 
				+static BUS_ATTR(supported_features, 0444, rbd_supported_features_show, NULL);
			
 
				 
			
 
				 static struct attribute *rbd_bus_attrs[] = {
			
 
				 	&bus_attr_add.attr,
			
@@ -4204,22 +4204,22 @@ static ssize_t rbd_image_refresh(struct device *dev,
 
				 	return size;
			
 
				 }
			
 
				 
			
 
				-static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
			
 
				-static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
			
 
				-static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
			
 
				-static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
			
 
				-static DEVICE_ATTR(client_addr, S_IRUGO, rbd_client_addr_show, NULL);
			
 
				-static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
			
 
				-static DEVICE_ATTR(cluster_fsid, S_IRUGO, rbd_cluster_fsid_show, NULL);
			
 
				-static DEVICE_ATTR(config_info, S_IRUSR, rbd_config_info_show, NULL);
			
 
				-static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
			
 
				-static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
			
 
				-static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
			
 
				-static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
			
 
				-static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
			
 
				-static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
			
 
				-static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
			
 
				-static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
			
 
				+static DEVICE_ATTR(size, 0444, rbd_size_show, NULL);
			
 
				+static DEVICE_ATTR(features, 0444, rbd_features_show, NULL);
			
 
				+static DEVICE_ATTR(major, 0444, rbd_major_show, NULL);
			
 
				+static DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL);
			
 
				+static DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL);
			
 
				+static DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL);
			
 
				+static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
			
 
				+static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
			
 
				+static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
			
 
				+static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
			
 
				+static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
			
 
				+static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
			
 
				+static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
			
 
				+static DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL);
			
 
				+static DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL);
			
 
				+static DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL);
			
 
				 
			
 
				 static struct attribute *rbd_attrs[] = {
			
 
				 	&dev_attr_size.attr,
			
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -247,19 +247,19 @@ static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
 
				 	if (IS_ERR_OR_NULL(card->debugfs_dir))
			
 
				 		goto failed_debugfs_dir;
			
 
				 
			
 
				-	debugfs_stats = debugfs_create_file("stats", S_IRUGO,
			
 
				+	debugfs_stats = debugfs_create_file("stats", 0444,
			
 
				 					    card->debugfs_dir, card,
			
 
				 					    &debugfs_stats_fops);
			
 
				 	if (IS_ERR_OR_NULL(debugfs_stats))
			
 
				 		goto failed_debugfs_stats;
			
 
				 
			
 
				-	debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
			
 
				+	debugfs_pci_regs = debugfs_create_file("pci_regs", 0444,
			
 
				 					       card->debugfs_dir, card,
			
 
				 					       &debugfs_pci_regs_fops);
			
 
				 	if (IS_ERR_OR_NULL(debugfs_pci_regs))
			
 
				 		goto failed_debugfs_pci_regs;
			
 
				 
			
 
				-	debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
			
 
				+	debugfs_cram = debugfs_create_file("cram", 0644,
			
 
				 					   card->debugfs_dir, card,
			
 
				 					   &debugfs_cram_fops);
			
 
				 	if (IS_ERR_OR_NULL(debugfs_cram))
			
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -567,7 +567,7 @@ static struct carm_request *carm_get_special(struct carm_host *host)
 
				 	if (!crq)
			
 
				 		return NULL;
			
 
				 
			
 
				-	rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL);
			
 
				+	rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0);
			
 
				 	if (IS_ERR(rq)) {
			
 
				 		spin_lock_irqsave(&host->lock, flags);
			
 
				 		carm_put_request(host, crq);
			
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -298,7 +298,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 
				 	struct request *req;
			
 
				 	int err;
			
 
				 
			
 
				-	req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL);
			
 
				+	req = blk_get_request(q, REQ_OP_DRV_IN, 0);
			
 
				 	if (IS_ERR(req))
			
 
				 		return PTR_ERR(req);
			
 
				 
			
@@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
			
 
				+static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL);
			
 
				 
			
 
				 /* The queue's logical block size must be set before calling this */
			
 
				 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
			
@@ -576,10 +576,10 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
 
				 }
			
 
				 
			
 
				 static const struct device_attribute dev_attr_cache_type_ro =
			
 
				-	__ATTR(cache_type, S_IRUGO,
			
 
				+	__ATTR(cache_type, 0444,
			
 
				 	       virtblk_cache_type_show, NULL);
			
 
				 static const struct device_attribute dev_attr_cache_type_rw =
			
 
				-	__ATTR(cache_type, S_IRUGO|S_IWUSR,
			
 
				+	__ATTR(cache_type, 0644,
			
 
				 	       virtblk_cache_type_show, virtblk_cache_type_store);
			
 
				 
			
 
				 static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
			
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -98,7 +98,7 @@ MODULE_PARM_DESC(max_queues,
 
				  * backend, 4KB page granularity is used.
			
 
				  */
			
 
				 unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
			
 
				-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
			
 
				+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
			
 
				 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
			
 
				 /*
			
 
				  * The LRU mechanism to clean the lists of persistent grants needs to
			
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -367,7 +367,7 @@ int __init xen_blkif_interface_init(void)
 
				 out:									\
			
 
				 		return sprintf(buf, format, result);			\
			
 
				 	}								\
			
 
				-	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
			
 
				+	static DEVICE_ATTR(name, 0444, show_##name, NULL)
			
 
				 
			
 
				 VBD_SHOW_ALLRING(oo_req,  "%llu\n");
			
 
				 VBD_SHOW_ALLRING(rd_req,  "%llu\n");
			
@@ -403,7 +403,7 @@ static const struct attribute_group xen_vbdstat_group = {
 
				 									\
			
 
				 		return sprintf(buf, format, ##args);			\
			
 
				 	}								\
			
 
				-	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
			
 
				+	static DEVICE_ATTR(name, 0444, show_##name, NULL)
			
 
				 
			
 
				 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
			
 
				 VBD_SHOW(mode, "%s\n", be->mode);
			
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -129,13 +129,12 @@ static const struct block_device_operations xlvbd_block_fops;
 
				  */
			
 
				 
			
 
				 static unsigned int xen_blkif_max_segments = 32;
			
 
				-module_param_named(max_indirect_segments, xen_blkif_max_segments, uint,
			
 
				-		   S_IRUGO);
			
 
				+module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444);
			
 
				 MODULE_PARM_DESC(max_indirect_segments,
			
 
				 		 "Maximum amount of segments in indirect requests (default is 32)");
			
 
				 
			
 
				 static unsigned int xen_blkif_max_queues = 4;
			
 
				-module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
			
 
				+module_param_named(max_queues, xen_blkif_max_queues, uint, 0444);
			
 
				 MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
			
 
				 
			
 
				 /*
			
@@ -143,7 +142,7 @@ MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per v
 
				  * backend, 4KB page granularity is used.
			
 
				  */
			
 
				 static unsigned int xen_blkif_max_ring_order;
			
 
				-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
			
 
				+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
			
 
				 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
			
 
				 
			
 
				 #define BLK_RING_SIZE(info)	\
			
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2192,7 +2192,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 
				 
			
 
				 		len = nr * CD_FRAMESIZE_RAW;
			
 
				 
			
 
				-		rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
			
 
				+		rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
			
 
				 		if (IS_ERR(rq)) {
			
 
				 			ret = PTR_ERR(rq);
			
 
				 			break;
			
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 
				 	struct request *rq;
			
 
				 	int error;
			
 
				 
			
 
				-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_MISC;
			
 
				 	rq->special = (char *)pc;
			
 
				 
			
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -437,7 +437,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 
				 		bool delay = false;
			
 
				 
			
 
				 		rq = blk_get_request(drive->queue,
			
 
				-			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,  __GFP_RECLAIM);
			
 
				+			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
			
 
				 		memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
			
 
				 		ide_req(rq)->type = ATA_PRIV_PC;
			
 
				 		rq->rq_flags |= rq_flags;
			
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 
				 	struct request *rq;
			
 
				 	int ret;
			
 
				 
			
 
				-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_MISC;
			
 
				 	rq->rq_flags = RQF_QUIET;
			
 
				 	blk_execute_rq(drive->queue, cd->disk, rq, 0);
			
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 
				 	if (!(setting->flags & DS_SYNC))
			
 
				 		return setting->set(drive, arg);
			
 
				 
			
 
				-	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_MISC;
			
 
				 	scsi_req(rq)->cmd_len = 5;
			
 
				 	scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
			
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 
				 	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
			
 
				 		return -EBUSY;
			
 
				 
			
 
				-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
			
 
				 
			
 
				 	drive->mult_req = arg;
			
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 
				 	if (NULL == (void *) arg) {
			
 
				 		struct request *rq;
			
 
				 
			
 
				-		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
			
 
				 		ide_req(rq)->type = ATA_PRIV_TASKFILE;
			
 
				 		blk_execute_rq(drive->queue, NULL, rq, 0);
			
 
				 		err = scsi_req(rq)->result ? -EIO : 0;
			
@@ -222,7 +222,7 @@ static int generic_drive_reset(ide_drive_t *drive)
 
				 	struct request *rq;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_MISC;
			
 
				 	scsi_req(rq)->cmd_len = 1;
			
 
				 	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
			
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 
				 	}
			
 
				 	spin_unlock_irq(&hwif->lock);
			
 
				 
			
 
				-	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
			
 
				 	scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
			
 
				 	scsi_req(rq)->cmd_len = 1;
			
 
				 	ide_req(rq)->type = ATA_PRIV_MISC;
			
@@ -47,7 +47,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 
				 	 * Make sure that *some* command is sent to the drive after the
			
 
				 	 * timeout has expired, so power management will be reenabled.
			
 
				 	 */
			
 
				-	rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
			
 
				+	rq = blk_get_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_NOWAIT);
			
 
				 	if (IS_ERR(rq))
			
 
				 		goto out;
			
 
				 
			
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 
				 	}
			
 
				 
			
 
				 	memset(&rqpm, 0, sizeof(rqpm));
			
 
				-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
			
 
				 	rq->special = &rqpm;
			
 
				 	rqpm.pm_step = IDE_PM_START_SUSPEND;
			
@@ -90,8 +90,7 @@ int generic_ide_resume(struct device *dev)
 
				 	}
			
 
				 
			
 
				 	memset(&rqpm, 0, sizeof(rqpm));
			
 
				-	rq = blk_get_request_flags(drive->queue, REQ_OP_DRV_IN,
			
 
				-				   BLK_MQ_REQ_PREEMPT);
			
 
				+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT);
			
 
				 	ide_req(rq)->type = ATA_PRIV_PM_RESUME;
			
 
				 	rq->special = &rqpm;
			
 
				 	rqpm.pm_step = IDE_PM_START_RESUME;
			
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -854,7 +854,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 
				 	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
			
 
				 	BUG_ON(size < 0 || size % tape->blk_size);
			
 
				 
			
 
				-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_MISC;
			
 
				 	scsi_req(rq)->cmd[13] = cmd;
			
 
				 	rq->rq_disk = tape->disk;
			
@@ -862,7 +862,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 
				 
			
 
				 	if (size) {
			
 
				 		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
			
 
				-				      __GFP_RECLAIM);
			
 
				+				      GFP_NOIO);
			
 
				 		if (ret)
			
 
				 			goto out_put;
			
 
				 	}
			
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -431,7 +431,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 
				 
			
 
				 	rq = blk_get_request(drive->queue,
			
 
				 		(cmd->tf_flags & IDE_TFLAG_WRITE) ?
			
 
				-			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
			
 
				+			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
			
 
				 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
			
 
				 
			
 
				 	/*
			
@@ -442,7 +442,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 
				 	 */
			
 
				 	if (nsect) {
			
 
				 		error = blk_rq_map_kern(drive->queue, rq, buf,
			
 
				-					nsect * SECTOR_SIZE, __GFP_RECLAIM);
			
 
				+					nsect * SECTOR_SIZE, GFP_NOIO);
			
 
				 		if (error)
			
 
				 			goto put_req;
			
 
				 	}
			
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -431,7 +431,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 
				 	return 0;
			
 
				 err_sysfs:
			
 
				 	if (tt->exit)
			
 
				-		tt->exit(targetdata);
			
 
				+		tt->exit(targetdata, true);
			
 
				 err_init:
			
 
				 	blk_cleanup_queue(tqueue);
			
 
				 	tdisk->queue = NULL;
			
@@ -446,7 +446,7 @@ err_reserve:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void __nvm_remove_target(struct nvm_target *t)
			
 
				+static void __nvm_remove_target(struct nvm_target *t, bool graceful)
			
 
				 {
			
 
				 	struct nvm_tgt_type *tt = t->type;
			
 
				 	struct gendisk *tdisk = t->disk;
			
@@ -459,7 +459,7 @@ static void __nvm_remove_target(struct nvm_target *t)
 
				 		tt->sysfs_exit(tdisk);
			
 
				 
			
 
				 	if (tt->exit)
			
 
				-		tt->exit(tdisk->private_data);
			
 
				+		tt->exit(tdisk->private_data, graceful);
			
 
				 
			
 
				 	nvm_remove_tgt_dev(t->dev, 1);
			
 
				 	put_disk(tdisk);
			
@@ -489,7 +489,7 @@ static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
 
				 		mutex_unlock(&dev->mlock);
			
 
				 		return 1;
			
 
				 	}
			
 
				-	__nvm_remove_target(t);
			
 
				+	__nvm_remove_target(t, true);
			
 
				 	mutex_unlock(&dev->mlock);
			
 
				 
			
 
				 	return 0;
			
@@ -963,7 +963,7 @@ void nvm_unregister(struct nvm_dev *dev)
 
				 	list_for_each_entry_safe(t, tmp, &dev->targets, list) {
			
 
				 		if (t->dev->parent != dev)
			
 
				 			continue;
			
 
				-		__nvm_remove_target(t);
			
 
				+		__nvm_remove_target(t, false);
			
 
				 	}
			
 
				 	mutex_unlock(&dev->mlock);
			
 
				 
			
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -44,13 +44,15 @@ retry:
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	if (unlikely(!bio_has_data(bio)))
			
 
				-		goto out;
			
 
				-
			
 
				 	pblk_ppa_set_empty(&w_ctx.ppa);
			
 
				 	w_ctx.flags = flags;
			
 
				-	if (bio->bi_opf & REQ_PREFLUSH)
			
 
				+	if (bio->bi_opf & REQ_PREFLUSH) {
			
 
				 		w_ctx.flags |= PBLK_FLUSH_ENTRY;
			
 
				+		pblk_write_kick(pblk);
			
 
				+	}
			
 
				+
			
 
				+	if (unlikely(!bio_has_data(bio)))
			
 
				+		goto out;
			
 
				 
			
 
				 	for (i = 0; i < nr_entries; i++) {
			
 
				 		void *data = bio_data(bio);
			
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -40,7 +40,7 @@ static void pblk_line_mark_bb(struct work_struct *work)
 
				 	}
			
 
				 
			
 
				 	kfree(ppa);
			
 
				-	mempool_free(line_ws, pblk->gen_ws_pool);
			
 
				+	mempool_free(line_ws, &pblk->gen_ws_pool);
			
 
				 }
			
 
				 
			
 
				 static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
			
@@ -102,7 +102,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
 
				 	struct pblk *pblk = rqd->private;
			
 
				 
			
 
				 	__pblk_end_io_erase(pblk, rqd);
			
 
				-	mempool_free(rqd, pblk->e_rq_pool);
			
 
				+	mempool_free(rqd, &pblk->e_rq_pool);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -237,15 +237,15 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
 
				 	switch (type) {
			
 
				 	case PBLK_WRITE:
			
 
				 	case PBLK_WRITE_INT:
			
 
				-		pool = pblk->w_rq_pool;
			
 
				+		pool = &pblk->w_rq_pool;
			
 
				 		rq_size = pblk_w_rq_size;
			
 
				 		break;
			
 
				 	case PBLK_READ:
			
 
				-		pool = pblk->r_rq_pool;
			
 
				+		pool = &pblk->r_rq_pool;
			
 
				 		rq_size = pblk_g_rq_size;
			
 
				 		break;
			
 
				 	default:
			
 
				-		pool = pblk->e_rq_pool;
			
 
				+		pool = &pblk->e_rq_pool;
			
 
				 		rq_size = pblk_g_rq_size;
			
 
				 	}
			
 
				 
			
@@ -265,20 +265,22 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
 
				 	case PBLK_WRITE:
			
 
				 		kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
			
 
				 	case PBLK_WRITE_INT:
			
 
				-		pool = pblk->w_rq_pool;
			
 
				+		pool = &pblk->w_rq_pool;
			
 
				 		break;
			
 
				 	case PBLK_READ:
			
 
				-		pool = pblk->r_rq_pool;
			
 
				+		pool = &pblk->r_rq_pool;
			
 
				 		break;
			
 
				 	case PBLK_ERASE:
			
 
				-		pool = pblk->e_rq_pool;
			
 
				+		pool = &pblk->e_rq_pool;
			
 
				 		break;
			
 
				 	default:
			
 
				 		pr_err("pblk: trying to free unknown rqd type\n");
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
			
 
				+	if (rqd->meta_list)
			
 
				+		nvm_dev_dma_free(dev->parent, rqd->meta_list,
			
 
				+				rqd->dma_meta_list);
			
 
				 	mempool_free(rqd, pool);
			
 
				 }
			
 
				 
			
@@ -292,7 +294,7 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
 
				 
			
 
				 	for (i = off; i < nr_pages + off; i++) {
			
 
				 		bv = bio->bi_io_vec[i];
			
 
				-		mempool_free(bv.bv_page, pblk->page_bio_pool);
			
 
				+		mempool_free(bv.bv_page, &pblk->page_bio_pool);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -304,23 +306,23 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
 
				 	int i, ret;
			
 
				 
			
 
				 	for (i = 0; i < nr_pages; i++) {
			
 
				-		page = mempool_alloc(pblk->page_bio_pool, flags);
			
 
				+		page = mempool_alloc(&pblk->page_bio_pool, flags);
			
 
				 
			
 
				 		ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
			
 
				 		if (ret != PBLK_EXPOSED_PAGE_SIZE) {
			
 
				 			pr_err("pblk: could not add page to bio\n");
			
 
				-			mempool_free(page, pblk->page_bio_pool);
			
 
				+			mempool_free(page, &pblk->page_bio_pool);
			
 
				 			goto err;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				 err:
			
 
				-	pblk_bio_free_pages(pblk, bio, 0, i - 1);
			
 
				+	pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i);
			
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				-static void pblk_write_kick(struct pblk *pblk)
			
 
				+void pblk_write_kick(struct pblk *pblk)
			
 
				 {
			
 
				 	wake_up_process(pblk->writer_ts);
			
 
				 	mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
			
@@ -342,13 +344,6 @@ void pblk_write_should_kick(struct pblk *pblk)
 
				 		pblk_write_kick(pblk);
			
 
				 }
			
 
				 
			
 
				-void pblk_end_io_sync(struct nvm_rq *rqd)
			
 
				-{
			
 
				-	struct completion *waiting = rqd->private;
			
 
				-
			
 
				-	complete(waiting);
			
 
				-}
			
 
				-
			
 
				 static void pblk_wait_for_meta(struct pblk *pblk)
			
 
				 {
			
 
				 	do {
			
@@ -380,7 +375,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
 
				 
			
 
				 	lockdep_assert_held(&line->lock);
			
 
				 
			
 
				-	if (!vsc) {
			
 
				+	if (line->w_err_gc->has_write_err) {
			
 
				+		if (line->gc_group != PBLK_LINEGC_WERR) {
			
 
				+			line->gc_group = PBLK_LINEGC_WERR;
			
 
				+			move_list = &l_mg->gc_werr_list;
			
 
				+			pblk_rl_werr_line_in(&pblk->rl);
			
 
				+		}
			
 
				+	} else if (!vsc) {
			
 
				 		if (line->gc_group != PBLK_LINEGC_FULL) {
			
 
				 			line->gc_group = PBLK_LINEGC_FULL;
			
 
				 			move_list = &l_mg->gc_full_list;
			
@@ -467,16 +468,13 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
 
				 {
			
 
				 	struct nvm_tgt_dev *dev = pblk->dev;
			
 
				 
			
 
				-#ifdef CONFIG_NVM_DEBUG
			
 
				-	int ret;
			
 
				+	atomic_inc(&pblk->inflight_io);
			
 
				 
			
 
				-	ret = pblk_check_io(pblk, rqd);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				+#ifdef CONFIG_NVM_DEBUG
			
 
				+	if (pblk_check_io(pblk, rqd))
			
 
				+		return NVM_IO_ERR;
			
 
				 #endif
			
 
				 
			
 
				-	atomic_inc(&pblk->inflight_io);
			
 
				-
			
 
				 	return nvm_submit_io(dev, rqd);
			
 
				 }
			
 
				 
			
@@ -484,16 +482,13 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
 
				 {
			
 
				 	struct nvm_tgt_dev *dev = pblk->dev;
			
 
				 
			
 
				-#ifdef CONFIG_NVM_DEBUG
			
 
				-	int ret;
			
 
				+	atomic_inc(&pblk->inflight_io);
			
 
				 
			
 
				-	ret = pblk_check_io(pblk, rqd);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				+#ifdef CONFIG_NVM_DEBUG
			
 
				+	if (pblk_check_io(pblk, rqd))
			
 
				+		return NVM_IO_ERR;
			
 
				 #endif
			
 
				 
			
 
				-	atomic_inc(&pblk->inflight_io);
			
 
				-
			
 
				 	return nvm_submit_io_sync(dev, rqd);
			
 
				 }
			
 
				 
			
@@ -856,9 +851,10 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 
				 	atomic_dec(&pblk->inflight_io);
			
 
				 
			
 
				 	if (rqd.error) {
			
 
				-		if (dir == PBLK_WRITE)
			
 
				+		if (dir == PBLK_WRITE) {
			
 
				 			pblk_log_write_err(pblk, &rqd);
			
 
				-		else if (dir == PBLK_READ)
			
 
				+			ret = 1;
			
 
				+		} else if (dir == PBLK_READ)
			
 
				 			pblk_log_read_err(pblk, &rqd);
			
 
				 	}
			
 
				 
			
@@ -1071,6 +1067,25 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
			
 
				+{
			
 
				+	struct pblk_line_meta *lm = &pblk->lm;
			
 
				+
			
 
				+	line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
			
 
				+	if (!line->map_bitmap)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	/* will be initialized using bb info from map_bitmap */
			
 
				+	line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
			
 
				+	if (!line->invalid_bitmap) {
			
 
				+		kfree(line->map_bitmap);
			
 
				+		line->map_bitmap = NULL;
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /* For now lines are always assumed full lines. Thus, smeta former and current
			
 
				  * lun bitmaps are omitted.
			
 
				  */
			
@@ -1108,7 +1123,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
 
				 
			
 
				 	if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) {
			
 
				 		pr_debug("pblk: line smeta I/O failed. Retry\n");
			
 
				-		return 1;
			
 
				+		return 0;
			
 
				 	}
			
 
				 
			
 
				 	bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
			
@@ -1174,19 +1189,9 @@ static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
 
				 static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
			
 
				 {
			
 
				 	struct pblk_line_meta *lm = &pblk->lm;
			
 
				+	int blk_in_line = atomic_read(&line->blk_in_line);
			
 
				 	int blk_to_erase;
			
 
				 
			
 
				-	line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
			
 
				-	if (!line->map_bitmap)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	/* will be initialized using bb info from map_bitmap */
			
 
				-	line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC);
			
 
				-	if (!line->invalid_bitmap) {
			
 
				-		kfree(line->map_bitmap);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-
			
 
				 	/* Bad blocks do not need to be erased */
			
 
				 	bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
			
 
				 
			
@@ -1199,16 +1204,19 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
 
				 		blk_to_erase = pblk_prepare_new_line(pblk, line);
			
 
				 		line->state = PBLK_LINESTATE_FREE;
			
 
				 	} else {
			
 
				-		blk_to_erase = atomic_read(&line->blk_in_line);
			
 
				+		blk_to_erase = blk_in_line;
			
 
				 	}
			
 
				 
			
 
				-	if (line->state != PBLK_LINESTATE_FREE) {
			
 
				-		kfree(line->map_bitmap);
			
 
				-		kfree(line->invalid_bitmap);
			
 
				+	if (blk_in_line < lm->min_blk_line) {
			
 
				 		spin_unlock(&line->lock);
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+	if (line->state != PBLK_LINESTATE_FREE) {
			
 
				 		WARN(1, "pblk: corrupted line %d, state %d\n",
			
 
				 							line->id, line->state);
			
 
				-		return -EAGAIN;
			
 
				+		spin_unlock(&line->lock);
			
 
				+		return -EINTR;
			
 
				 	}
			
 
				 
			
 
				 	line->state = PBLK_LINESTATE_OPEN;
			
@@ -1241,13 +1249,16 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
 
				 	}
			
 
				 	spin_unlock(&l_mg->free_lock);
			
 
				 
			
 
				-	pblk_rl_free_lines_dec(&pblk->rl, line, true);
			
 
				+	ret = pblk_line_alloc_bitmaps(pblk, line);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 
			
 
				 	if (!pblk_line_init_bb(pblk, line, 0)) {
			
 
				 		list_add(&line->list, &l_mg->free_list);
			
 
				 		return -EINTR;
			
 
				 	}
			
 
				 
			
 
				+	pblk_rl_free_lines_dec(&pblk->rl, line, true);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1259,6 +1270,24 @@ void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
 
				 	line->emeta = NULL;
			
 
				 }
			
 
				 
			
 
				+static void pblk_line_reinit(struct pblk_line *line)
			
 
				+{
			
 
				+	*line->vsc = cpu_to_le32(EMPTY_ENTRY);
			
 
				+
			
 
				+	line->map_bitmap = NULL;
			
 
				+	line->invalid_bitmap = NULL;
			
 
				+	line->smeta = NULL;
			
 
				+	line->emeta = NULL;
			
 
				+}
			
 
				+
			
 
				+void pblk_line_free(struct pblk_line *line)
			
 
				+{
			
 
				+	kfree(line->map_bitmap);
			
 
				+	kfree(line->invalid_bitmap);
			
 
				+
			
 
				+	pblk_line_reinit(line);
			
 
				+}
			
 
				+
			
 
				 struct pblk_line *pblk_line_get(struct pblk *pblk)
			
 
				 {
			
 
				 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
@@ -1292,10 +1321,14 @@ retry:
 
				 
			
 
				 	ret = pblk_line_prepare(pblk, line);
			
 
				 	if (ret) {
			
 
				-		if (ret == -EAGAIN) {
			
 
				+		switch (ret) {
			
 
				+		case -EAGAIN:
			
 
				+			list_add(&line->list, &l_mg->bad_list);
			
 
				+			goto retry;
			
 
				+		case -EINTR:
			
 
				 			list_add(&line->list, &l_mg->corrupt_list);
			
 
				 			goto retry;
			
 
				-		} else {
			
 
				+		default:
			
 
				 			pr_err("pblk: failed to prepare line %d\n", line->id);
			
 
				 			list_add(&line->list, &l_mg->free_list);
			
 
				 			l_mg->nr_free_lines++;
			
@@ -1321,11 +1354,14 @@ retry:
 
				 		return NULL;
			
 
				 	}
			
 
				 
			
 
				+	retry_line->map_bitmap = line->map_bitmap;
			
 
				+	retry_line->invalid_bitmap = line->invalid_bitmap;
			
 
				 	retry_line->smeta = line->smeta;
			
 
				 	retry_line->emeta = line->emeta;
			
 
				 	retry_line->meta_line = line->meta_line;
			
 
				 
			
 
				-	pblk_line_free(pblk, line);
			
 
				+	pblk_line_reinit(line);
			
 
				+
			
 
				 	l_mg->data_line = retry_line;
			
 
				 	spin_unlock(&l_mg->free_lock);
			
 
				 
			
@@ -1378,6 +1414,9 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 
				 	}
			
 
				 	spin_unlock(&l_mg->free_lock);
			
 
				 
			
 
				+	if (pblk_line_alloc_bitmaps(pblk, line))
			
 
				+		return NULL;
			
 
				+
			
 
				 	if (pblk_line_erase(pblk, line)) {
			
 
				 		line = pblk_line_retry(pblk, line);
			
 
				 		if (!line)
			
@@ -1449,7 +1488,7 @@ static void pblk_line_close_meta_sync(struct pblk *pblk)
 
				 	flush_workqueue(pblk->close_wq);
			
 
				 }
			
 
				 
			
 
				-void pblk_pipeline_stop(struct pblk *pblk)
			
 
				+void __pblk_pipeline_flush(struct pblk *pblk)
			
 
				 {
			
 
				 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
 
				 	int ret;
			
@@ -1474,6 +1513,11 @@ void pblk_pipeline_stop(struct pblk *pblk)
 
				 
			
 
				 	flush_workqueue(pblk->bb_wq);
			
 
				 	pblk_line_close_meta_sync(pblk);
			
 
				+}
			
 
				+
			
 
				+void __pblk_pipeline_stop(struct pblk *pblk)
			
 
				+{
			
 
				+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
 
				 
			
 
				 	spin_lock(&l_mg->free_lock);
			
 
				 	pblk->state = PBLK_STATE_STOPPED;
			
@@ -1482,6 +1526,12 @@ void pblk_pipeline_stop(struct pblk *pblk)
 
				 	spin_unlock(&l_mg->free_lock);
			
 
				 }
			
 
				 
			
 
				+void pblk_pipeline_stop(struct pblk *pblk)
			
 
				+{
			
 
				+	__pblk_pipeline_flush(pblk);
			
 
				+	__pblk_pipeline_stop(pblk);
			
 
				+}
			
 
				+
			
 
				 struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
			
 
				 {
			
 
				 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
@@ -1511,6 +1561,9 @@ retry_erase:
 
				 		goto retry_erase;
			
 
				 	}
			
 
				 
			
 
				+	if (pblk_line_alloc_bitmaps(pblk, new))
			
 
				+		return NULL;
			
 
				+
			
 
				 retry_setup:
			
 
				 	if (!pblk_line_init_metadata(pblk, new, cur)) {
			
 
				 		new = pblk_line_retry(pblk, new);
			
@@ -1550,19 +1603,6 @@ out:
 
				 	return new;
			
 
				 }
			
 
				 
			
 
				-void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
			
 
				-{
			
 
				-	kfree(line->map_bitmap);
			
 
				-	kfree(line->invalid_bitmap);
			
 
				-
			
 
				-	*line->vsc = cpu_to_le32(EMPTY_ENTRY);
			
 
				-
			
 
				-	line->map_bitmap = NULL;
			
 
				-	line->invalid_bitmap = NULL;
			
 
				-	line->smeta = NULL;
			
 
				-	line->emeta = NULL;
			
 
				-}
			
 
				-
			
 
				 static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
			
 
				 {
			
 
				 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
@@ -1572,9 +1612,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
 
				 	WARN_ON(line->state != PBLK_LINESTATE_GC);
			
 
				 	line->state = PBLK_LINESTATE_FREE;
			
 
				 	line->gc_group = PBLK_LINEGC_NONE;
			
 
				-	pblk_line_free(pblk, line);
			
 
				-	spin_unlock(&line->lock);
			
 
				+	pblk_line_free(line);
			
 
				+
			
 
				+	if (line->w_err_gc->has_write_err) {
			
 
				+		pblk_rl_werr_line_out(&pblk->rl);
			
 
				+		line->w_err_gc->has_write_err = 0;
			
 
				+	}
			
 
				 
			
 
				+	spin_unlock(&line->lock);
			
 
				 	atomic_dec(&gc->pipeline_gc);
			
 
				 
			
 
				 	spin_lock(&l_mg->free_lock);
			
@@ -1593,7 +1638,7 @@ static void pblk_line_put_ws(struct work_struct *work)
 
				 	struct pblk_line *line = line_put_ws->line;
			
 
				 
			
 
				 	__pblk_line_put(pblk, line);
			
 
				-	mempool_free(line_put_ws, pblk->gen_ws_pool);
			
 
				+	mempool_free(line_put_ws, &pblk->gen_ws_pool);
			
 
				 }
			
 
				 
			
 
				 void pblk_line_put(struct kref *ref)
			
@@ -1610,7 +1655,7 @@ void pblk_line_put_wq(struct kref *ref)
 
				 	struct pblk *pblk = line->pblk;
			
 
				 	struct pblk_line_ws *line_put_ws;
			
 
				 
			
 
				-	line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC);
			
 
				+	line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC);
			
 
				 	if (!line_put_ws)
			
 
				 		return;
			
 
				 
			
@@ -1737,11 +1782,34 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
 
				 
			
 
				 	spin_lock(&l_mg->close_lock);
			
 
				 	spin_lock(&line->lock);
			
 
				+
			
 
				+	/* Update the in-memory start address for emeta, in case it has
			
 
				+	 * shifted due to write errors
			
 
				+	 */
			
 
				+	if (line->emeta_ssec != line->cur_sec)
			
 
				+		line->emeta_ssec = line->cur_sec;
			
 
				+
			
 
				 	list_add_tail(&line->list, &l_mg->emeta_list);
			
 
				 	spin_unlock(&line->lock);
			
 
				 	spin_unlock(&l_mg->close_lock);
			
 
				 
			
 
				 	pblk_line_should_sync_meta(pblk);
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
			
 
				+{
			
 
				+	struct pblk_line_meta *lm = &pblk->lm;
			
 
				+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
 
				+	unsigned int lba_list_size = lm->emeta_len[2];
			
 
				+	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
			
 
				+	struct pblk_emeta *emeta = line->emeta;
			
 
				+
			
 
				+	w_err_gc->lba_list = pblk_malloc(lba_list_size,
			
 
				+					 l_mg->emeta_alloc_type, GFP_KERNEL);
			
 
				+	memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
			
 
				+				lba_list_size);
			
 
				 }
			
 
				 
			
 
				 void pblk_line_close_ws(struct work_struct *work)
			
@@ -1750,9 +1818,16 @@ void pblk_line_close_ws(struct work_struct *work)
 
				 									ws);
			
 
				 	struct pblk *pblk = line_ws->pblk;
			
 
				 	struct pblk_line *line = line_ws->line;
			
 
				+	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
			
 
				+
			
 
				+	/* Write errors makes the emeta start address stored in smeta invalid,
			
 
				+	 * so keep a copy of the lba list until we've gc'd the line
			
 
				+	 */
			
 
				+	if (w_err_gc->has_write_err)
			
 
				+		pblk_save_lba_list(pblk, line);
			
 
				 
			
 
				 	pblk_line_close(pblk, line);
			
 
				-	mempool_free(line_ws, pblk->gen_ws_pool);
			
 
				+	mempool_free(line_ws, &pblk->gen_ws_pool);
			
 
				 }
			
 
				 
			
 
				 void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
			
@@ -1761,7 +1836,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
 
				 {
			
 
				 	struct pblk_line_ws *line_ws;
			
 
				 
			
 
				-	line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask);
			
 
				+	line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask);
			
 
				 
			
 
				 	line_ws->pblk = pblk;
			
 
				 	line_ws->line = line;
			
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -129,6 +129,53 @@ out:
 
				 	kfree(gc_rq_ws);
			
 
				 }
			
 
				 
			
 
				+static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
			
 
				+				       struct pblk_line *line)
			
 
				+{
			
 
				+	struct line_emeta *emeta_buf;
			
 
				+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
 
				+	struct pblk_line_meta *lm = &pblk->lm;
			
 
				+	unsigned int lba_list_size = lm->emeta_len[2];
			
 
				+	__le64 *lba_list;
			
 
				+	int ret;
			
 
				+
			
 
				+	emeta_buf = pblk_malloc(lm->emeta_len[0],
			
 
				+				l_mg->emeta_alloc_type, GFP_KERNEL);
			
 
				+	if (!emeta_buf)
			
 
				+		return NULL;
			
 
				+
			
 
				+	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
			
 
				+	if (ret) {
			
 
				+		pr_err("pblk: line %d read emeta failed (%d)\n",
			
 
				+				line->id, ret);
			
 
				+		pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	/* If this read fails, it means that emeta is corrupted.
			
 
				+	 * For now, leave the line untouched.
			
 
				+	 * TODO: Implement a recovery routine that scans and moves
			
 
				+	 * all sectors on the line.
			
 
				+	 */
			
 
				+
			
 
				+	ret = pblk_recov_check_emeta(pblk, emeta_buf);
			
 
				+	if (ret) {
			
 
				+		pr_err("pblk: inconsistent emeta (line %d)\n",
			
 
				+				line->id);
			
 
				+		pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	lba_list = pblk_malloc(lba_list_size,
			
 
				+			       l_mg->emeta_alloc_type, GFP_KERNEL);
			
 
				+	if (lba_list)
			
 
				+		memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
			
 
				+
			
 
				+	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
			
 
				+
			
 
				+	return lba_list;
			
 
				+}
			
 
				+
			
 
				 static void pblk_gc_line_prepare_ws(struct work_struct *work)
			
 
				 {
			
 
				 	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
			
@@ -138,46 +185,26 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
 
				 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
			
 
				 	struct pblk_line_meta *lm = &pblk->lm;
			
 
				 	struct pblk_gc *gc = &pblk->gc;
			
 
				-	struct line_emeta *emeta_buf;
			
 
				 	struct pblk_line_ws *gc_rq_ws;
			
 
				 	struct pblk_gc_rq *gc_rq;
			
 
				 	__le64 *lba_list;
			
 
				 	unsigned long *invalid_bitmap;
			
 
				 	int sec_left, nr_secs, bit;
			
 
				-	int ret;
			
 
				 
			
 
				 	invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
			
 
				 	if (!invalid_bitmap)
			
 
				 		goto fail_free_ws;
			
 
				 
			
 
				-	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
			
 
				-								GFP_KERNEL);
			
 
				-	if (!emeta_buf) {
			
 
				-		pr_err("pblk: cannot use GC emeta\n");
			
 
				-		goto fail_free_bitmap;
			
 
				-	}
			
 
				-
			
 
				-	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
			
 
				-	if (ret) {
			
 
				-		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
			
 
				-		goto fail_free_emeta;
			
 
				-	}
			
 
				-
			
 
				-	/* If this read fails, it means that emeta is corrupted. For now, leave
			
 
				-	 * the line untouched. TODO: Implement a recovery routine that scans and
			
 
				-	 * moves all sectors on the line.
			
 
				-	 */
			
 
				-
			
 
				-	ret = pblk_recov_check_emeta(pblk, emeta_buf);
			
 
				-	if (ret) {
			
 
				-		pr_err("pblk: inconsistent emeta (line %d)\n", line->id);
			
 
				-		goto fail_free_emeta;
			
 
				-	}
			
 
				-
			
 
				-	lba_list = emeta_to_lbas(pblk, emeta_buf);
			
 
				-	if (!lba_list) {
			
 
				-		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
			
 
				-		goto fail_free_emeta;
			
 
				+	if (line->w_err_gc->has_write_err) {
			
 
				+		lba_list = line->w_err_gc->lba_list;
			
 
				+		line->w_err_gc->lba_list = NULL;
			
 
				+	} else {
			
 
				+		lba_list = get_lba_list_from_emeta(pblk, line);
			
 
				+		if (!lba_list) {
			
 
				+			pr_err("pblk: could not interpret emeta (line %d)\n",
			
 
				+					line->id);
			
 
				+			goto fail_free_ws;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	spin_lock(&line->lock);
			
@@ -187,14 +214,14 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
 
				 
			
 
				 	if (sec_left < 0) {
			
 
				 		pr_err("pblk: corrupted GC line (%d)\n", line->id);
			
 
				-		goto fail_free_emeta;
			
 
				+		goto fail_free_lba_list;
			
 
				 	}
			
 
				 
			
 
				 	bit = -1;
			
 
				 next_rq:
			
 
				 	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
			
 
				 	if (!gc_rq)
			
 
				-		goto fail_free_emeta;
			
 
				+		goto fail_free_lba_list;
			
 
				 
			
 
				 	nr_secs = 0;
			
 
				 	do {
			
@@ -240,7 +267,7 @@ next_rq:
 
				 		goto next_rq;
			
 
				 
			
 
				 out:
			
 
				-	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
			
 
				+	pblk_mfree(lba_list, l_mg->emeta_alloc_type);
			
 
				 	kfree(line_ws);
			
 
				 	kfree(invalid_bitmap);
			
 
				 
			
@@ -251,9 +278,8 @@ out:
 
				 
			
 
				 fail_free_gc_rq:
			
 
				 	kfree(gc_rq);
			
 
				-fail_free_emeta:
			
 
				-	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
			
 
				-fail_free_bitmap:
			
 
				+fail_free_lba_list:
			
 
				+	pblk_mfree(lba_list, l_mg->emeta_alloc_type);
			
 
				 	kfree(invalid_bitmap);
			
 
				 fail_free_ws:
			
 
				 	kfree(line_ws);
			
@@ -349,12 +375,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
 
				 static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
			
 
				 {
			
 
				 	unsigned int nr_blocks_free, nr_blocks_need;
			
 
				+	unsigned int werr_lines = atomic_read(&rl->werr_lines);
			
 
				 
			
 
				 	nr_blocks_need = pblk_rl_high_thrs(rl);
			
 
				 	nr_blocks_free = pblk_rl_nr_free_blks(rl);
			
 
				 
			
 
				 	/* This is not critical, no need to take lock here */
			
 
				-	return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
			
 
				+	return ((werr_lines > 0) ||
			
 
				+		((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
			
 
				 }
			
 
				 
			
 
				 void pblk_gc_free_full_lines(struct pblk *pblk)
			
@@ -649,7 +677,7 @@ fail_free_main_kthread:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void pblk_gc_exit(struct pblk *pblk)
			
 
				+void pblk_gc_exit(struct pblk *pblk, bool graceful)
			
 
				 {
			
 
				 	struct pblk_gc *gc = &pblk->gc;
			
 
				 
			
@@ -663,10 +691,12 @@ void pblk_gc_exit(struct pblk *pblk)
 
				 	if (gc->gc_reader_ts)
			
 
				 		kthread_stop(gc->gc_reader_ts);
			
 
				 
			
 
				-	flush_workqueue(gc->gc_reader_wq);
			
 
				-	destroy_workqueue(gc->gc_reader_wq);
			
 
				+	if (graceful) {
			
 
				+		flush_workqueue(gc->gc_reader_wq);
			
 
				+		flush_workqueue(gc->gc_line_reader_wq);
			
 
				+	}
			
 
				 
			
 
				-	flush_workqueue(gc->gc_line_reader_wq);
			
 
				+	destroy_workqueue(gc->gc_reader_wq);
			
 
				 	destroy_workqueue(gc->gc_line_reader_wq);
			
 
				 
			
 
				 	if (gc->gc_writer_ts)
			
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,10 +20,15 @@
 
				 
			
 
				 #include "pblk.h"
			
 
				 
			
 
				+unsigned int write_buffer_size;
			
 
				+
			
 
				+module_param(write_buffer_size, uint, 0644);
			
 
				+MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
			
 
				+
			
 
				 static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
			
 
				 				*pblk_w_rq_cache;
			
 
				 static DECLARE_RWSEM(pblk_lock);
			
 
				-struct bio_set *pblk_bio_set;
			
 
				+struct bio_set pblk_bio_set;
			
 
				 
			
 
				 static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
			
 
				 			  struct bio *bio)
			
@@ -127,10 +132,8 @@ static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
 
				 	if (!line) {
			
 
				 		/* Configure next line for user data */
			
 
				 		line = pblk_line_get_first_data(pblk);
			
 
				-		if (!line) {
			
 
				-			pr_err("pblk: line list corrupted\n");
			
 
				+		if (!line)
			
 
				 			return -EFAULT;
			
 
				-		}
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -141,6 +144,7 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
 
				 	sector_t i;
			
 
				 	struct ppa_addr ppa;
			
 
				 	size_t map_size;
			
 
				+	int ret = 0;
			
 
				 
			
 
				 	map_size = pblk_trans_map_size(pblk);
			
 
				 	pblk->trans_map = vmalloc(map_size);
			
@@ -152,7 +156,11 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
 
				 	for (i = 0; i < pblk->rl.nr_secs; i++)
			
 
				 		pblk_trans_map_set(pblk, i, ppa);
			
 
				 
			
 
				-	return pblk_l2p_recover(pblk, factory_init);
			
 
				+	ret = pblk_l2p_recover(pblk, factory_init);
			
 
				+	if (ret)
			
 
				+		vfree(pblk->trans_map);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void pblk_rwb_free(struct pblk *pblk)
			
@@ -169,10 +177,15 @@ static int pblk_rwb_init(struct pblk *pblk)
 
				 	struct nvm_tgt_dev *dev = pblk->dev;
			
 
				 	struct nvm_geo *geo = &dev->geo;
			
 
				 	struct pblk_rb_entry *entries;
			
 
				-	unsigned long nr_entries;
			
 
				+	unsigned long nr_entries, buffer_size;
			
 
				 	unsigned int power_size, power_seg_sz;
			
 
				 
			
 
				-	nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
			
 
				+	if (write_buffer_size && (write_buffer_size > pblk->pgs_in_buffer))
			
 
				+		buffer_size = write_buffer_size;
			
 
				+	else
			
 
				+		buffer_size = pblk->pgs_in_buffer;
			
 
				+
			
 
				+	nr_entries = pblk_rb_calculate_size(buffer_size);
			
 
				 
			
 
				 	entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
			
 
				 	if (!entries)
			
@@ -341,7 +354,7 @@ static int pblk_core_init(struct pblk *pblk)
 
				 {
			
 
				 	struct nvm_tgt_dev *dev = pblk->dev;
			
 
				 	struct nvm_geo *geo = &dev->geo;
			
 
				-	int max_write_ppas;
			
 
				+	int ret, max_write_ppas;
			
 
				 
			
 
				 	atomic64_set(&pblk->user_wa, 0);
			
 
				 	atomic64_set(&pblk->pad_wa, 0);
			
@@ -375,33 +388,33 @@ static int pblk_core_init(struct pblk *pblk)
 
				 		goto fail_free_pad_dist;
			
 
				 
			
 
				 	/* Internal bios can be at most the sectors signaled by the device. */
			
 
				-	pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
			
 
				-	if (!pblk->page_bio_pool)
			
 
				+	ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0);
			
 
				+	if (ret)
			
 
				 		goto free_global_caches;
			
 
				 
			
 
				-	pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE,
			
 
				-							pblk_ws_cache);
			
 
				-	if (!pblk->gen_ws_pool)
			
 
				+	ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE,
			
 
				+				     pblk_ws_cache);
			
 
				+	if (ret)
			
 
				 		goto free_page_bio_pool;
			
 
				 
			
 
				-	pblk->rec_pool = mempool_create_slab_pool(geo->all_luns,
			
 
				-							pblk_rec_cache);
			
 
				-	if (!pblk->rec_pool)
			
 
				+	ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns,
			
 
				+				     pblk_rec_cache);
			
 
				+	if (ret)
			
 
				 		goto free_gen_ws_pool;
			
 
				 
			
 
				-	pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns,
			
 
				-							pblk_g_rq_cache);
			
 
				-	if (!pblk->r_rq_pool)
			
 
				+	ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns,
			
 
				+				     pblk_g_rq_cache);
			
 
				+	if (ret)
			
 
				 		goto free_rec_pool;
			
 
				 
			
 
				-	pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns,
			
 
				-							pblk_g_rq_cache);
			
 
				-	if (!pblk->e_rq_pool)
			
 
				+	ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns,
			
 
				+				     pblk_g_rq_cache);
			
 
				+	if (ret)
			
 
				 		goto free_r_rq_pool;
			
 
				 
			
 
				-	pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns,
			
 
				-							pblk_w_rq_cache);
			
 
				-	if (!pblk->w_rq_pool)
			
 
				+	ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns,
			
 
				+				     pblk_w_rq_cache);
			
 
				+	if (ret)
			
 
				 		goto free_e_rq_pool;
			
 
				 
			
 
				 	pblk->close_wq = alloc_workqueue("pblk-close-wq",
			
@@ -423,6 +436,7 @@ static int pblk_core_init(struct pblk *pblk)
 
				 		goto free_r_end_wq;
			
 
				 
			
 
				 	INIT_LIST_HEAD(&pblk->compl_list);
			
 
				+	INIT_LIST_HEAD(&pblk->resubmit_list);
			
 
				 
			
 
				 	return 0;
			
 
				 
			
@@ -433,17 +447,17 @@ free_bb_wq:
 
				 free_close_wq:
			
 
				 	destroy_workqueue(pblk->close_wq);
			
 
				 free_w_rq_pool:
			
 
				-	mempool_destroy(pblk->w_rq_pool);
			
 
				+	mempool_exit(&pblk->w_rq_pool);
			
 
				 free_e_rq_pool:
			
 
				-	mempool_destroy(pblk->e_rq_pool);
			
 
				+	mempool_exit(&pblk->e_rq_pool);
			
 
				 free_r_rq_pool:
			
 
				-	mempool_destroy(pblk->r_rq_pool);
			
 
				+	mempool_exit(&pblk->r_rq_pool);
			
 
				 free_rec_pool:
			
 
				-	mempool_destroy(pblk->rec_pool);
			
 
				+	mempool_exit(&pblk->rec_pool);
			
 
				 free_gen_ws_pool:
			
 
				-	mempool_destroy(pblk->gen_ws_pool);
			
 
				+	mempool_exit(&pblk->gen_ws_pool);
			
 
				 free_page_bio_pool:
			
 
				-	mempool_destroy(pblk->page_bio_pool);
			
 
				+	mempool_exit(&pblk->page_bio_pool);
			
 
				 free_global_caches:
			
 
				 	pblk_free_global_caches(pblk);
			
 
				 fail_free_pad_dist:
			
@@ -462,12 +476,12 @@ static void pblk_core_free(struct pblk *pblk)
 
				 	if (pblk->bb_wq)
			
 
				 		destroy_workqueue(pblk->bb_wq);
			
 
				 
			
 
				-	mempool_destroy(pblk->page_bio_pool);
			
 
				-	mempool_destroy(pblk->gen_ws_pool);
			
 
				-	mempool_destroy(pblk->rec_pool);
			
 
				-	mempool_destroy(pblk->r_rq_pool);
			
 
				-	mempool_destroy(pblk->e_rq_pool);
			
 
				-	mempool_destroy(pblk->w_rq_pool);
			
 
				+	mempool_exit(&pblk->page_bio_pool);
			
 
				+	mempool_exit(&pblk->gen_ws_pool);
			
 
				+	mempool_exit(&pblk->rec_pool);
			
 
				+	mempool_exit(&pblk->r_rq_pool);
			
 
				+	mempool_exit(&pblk->e_rq_pool);
			
 
				+	mempool_exit(&pblk->w_rq_pool);
			
 
				 
			
 
				 	pblk_free_global_caches(pblk);
			
 
				 	kfree(pblk->pad_dist);
			
@@ -489,11 +503,17 @@ static void pblk_line_mg_free(struct pblk *pblk)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void pblk_line_meta_free(struct pblk_line *line)
			
 
				+static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
			
 
				+				struct pblk_line *line)
			
 
				 {
			
 
				+	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
			
 
				+
			
 
				 	kfree(line->blk_bitmap);
			
 
				 	kfree(line->erase_bitmap);
			
 
				 	kfree(line->chks);
			
 
				+
			
 
				+	pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type);
			
 
				+	kfree(w_err_gc);
			
 
				 }
			
 
				 
			
 
				 static void pblk_lines_free(struct pblk *pblk)
			
@@ -506,8 +526,8 @@ static void pblk_lines_free(struct pblk *pblk)
 
				 	for (i = 0; i < l_mg->nr_lines; i++) {
			
 
				 		line = &pblk->lines[i];
			
 
				 
			
 
				-		pblk_line_free(pblk, line);
			
 
				-		pblk_line_meta_free(line);
			
 
				+		pblk_line_free(line);
			
 
				+		pblk_line_meta_free(l_mg, line);
			
 
				 	}
			
 
				 	spin_unlock(&l_mg->free_lock);
			
 
				 
			
@@ -748,14 +768,14 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
 
				 		chunk->cnlb = chunk_meta->cnlb;
			
 
				 		chunk->wp = chunk_meta->wp;
			
 
				 
			
 
				-		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
			
 
				-			continue;
			
 
				-
			
 
				 		if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
			
 
				 			WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				+		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
			
 
				+			continue;
			
 
				+
			
 
				 		set_bit(pos, line->blk_bitmap);
			
 
				 		nr_bad_chks++;
			
 
				 	}
			
@@ -809,20 +829,28 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
			
 
				-	if (!line->erase_bitmap) {
			
 
				-		kfree(line->blk_bitmap);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				+	if (!line->erase_bitmap)
			
 
				+		goto free_blk_bitmap;
			
 
				+
			
 
				 
			
 
				 	line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
			
 
				 								GFP_KERNEL);
			
 
				-	if (!line->chks) {
			
 
				-		kfree(line->erase_bitmap);
			
 
				-		kfree(line->blk_bitmap);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				+	if (!line->chks)
			
 
				+		goto free_erase_bitmap;
			
 
				+
			
 
				+	line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
			
 
				+	if (!line->w_err_gc)
			
 
				+		goto free_chks;
			
 
				 
			
 
				 	return 0;
			
 
				+
			
 
				+free_chks:
			
 
				+	kfree(line->chks);
			
 
				+free_erase_bitmap:
			
 
				+	kfree(line->erase_bitmap);
			
 
				+free_blk_bitmap:
			
 
				+	kfree(line->blk_bitmap);
			
 
				+	return -ENOMEM;
			
 
				 }
			
 
				 
			
 
				 static int pblk_line_mg_init(struct pblk *pblk)
			
@@ -847,12 +875,14 @@ static int pblk_line_mg_init(struct pblk *pblk)
 
				 	INIT_LIST_HEAD(&l_mg->gc_mid_list);
			
 
				 	INIT_LIST_HEAD(&l_mg->gc_low_list);
			
 
				 	INIT_LIST_HEAD(&l_mg->gc_empty_list);
			
 
				+	INIT_LIST_HEAD(&l_mg->gc_werr_list);
			
 
				 
			
 
				 	INIT_LIST_HEAD(&l_mg->emeta_list);
			
 
				 
			
 
				-	l_mg->gc_lists[0] = &l_mg->gc_high_list;
			
 
				-	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
			
 
				-	l_mg->gc_lists[2] = &l_mg->gc_low_list;
			
 
				+	l_mg->gc_lists[0] = &l_mg->gc_werr_list;
			
 
				+	l_mg->gc_lists[1] = &l_mg->gc_high_list;
			
 
				+	l_mg->gc_lists[2] = &l_mg->gc_mid_list;
			
 
				+	l_mg->gc_lists[3] = &l_mg->gc_low_list;
			
 
				 
			
 
				 	spin_lock_init(&l_mg->free_lock);
			
 
				 	spin_lock_init(&l_mg->close_lock);
			
@@ -1047,6 +1077,11 @@ static int pblk_lines_init(struct pblk *pblk)
 
				 		nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
			
 
				 	}
			
 
				 
			
 
				+	if (!nr_free_chks) {
			
 
				+		pr_err("pblk: too many bad blocks prevent for sane instance\n");
			
 
				+		return -EINTR;
			
 
				+	}
			
 
				+
			
 
				 	pblk_set_provision(pblk, nr_free_chks);
			
 
				 
			
 
				 	kfree(chunk_meta);
			
@@ -1054,7 +1089,7 @@ static int pblk_lines_init(struct pblk *pblk)
 
				 
			
 
				 fail_free_lines:
			
 
				 	while (--i >= 0)
			
 
				-		pblk_line_meta_free(&pblk->lines[i]);
			
 
				+		pblk_line_meta_free(l_mg, &pblk->lines[i]);
			
 
				 	kfree(pblk->lines);
			
 
				 fail_free_chunk_meta:
			
 
				 	kfree(chunk_meta);
			
@@ -1110,23 +1145,25 @@ static void pblk_free(struct pblk *pblk)
 
				 	kfree(pblk);
			
 
				 }
			
 
				 
			
 
				-static void pblk_tear_down(struct pblk *pblk)
			
 
				+static void pblk_tear_down(struct pblk *pblk, bool graceful)
			
 
				 {
			
 
				-	pblk_pipeline_stop(pblk);
			
 
				+	if (graceful)
			
 
				+		__pblk_pipeline_flush(pblk);
			
 
				+	__pblk_pipeline_stop(pblk);
			
 
				 	pblk_writer_stop(pblk);
			
 
				 	pblk_rb_sync_l2p(&pblk->rwb);
			
 
				 	pblk_rl_free(&pblk->rl);
			
 
				 
			
 
				-	pr_debug("pblk: consistent tear down\n");
			
 
				+	pr_debug("pblk: consistent tear down (graceful:%d)\n", graceful);
			
 
				 }
			
 
				 
			
 
				-static void pblk_exit(void *private)
			
 
				+static void pblk_exit(void *private, bool graceful)
			
 
				 {
			
 
				 	struct pblk *pblk = private;
			
 
				 
			
 
				 	down_write(&pblk_lock);
			
 
				-	pblk_gc_exit(pblk);
			
 
				-	pblk_tear_down(pblk);
			
 
				+	pblk_gc_exit(pblk, graceful);
			
 
				+	pblk_tear_down(pblk, graceful);
			
 
				 
			
 
				 #ifdef CONFIG_NVM_DEBUG
			
 
				 	pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
			
@@ -1175,6 +1212,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 
				 	pblk->state = PBLK_STATE_RUNNING;
			
 
				 	pblk->gc.gc_enabled = 0;
			
 
				 
			
 
				+	spin_lock_init(&pblk->resubmit_lock);
			
 
				 	spin_lock_init(&pblk->trans_lock);
			
 
				 	spin_lock_init(&pblk->lock);
			
 
				 
			
@@ -1297,18 +1335,18 @@ static int __init pblk_module_init(void)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
			
 
				-	if (!pblk_bio_set)
			
 
				-		return -ENOMEM;
			
 
				+	ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 	ret = nvm_register_tgt_type(&tt_pblk);
			
 
				 	if (ret)
			
 
				-		bioset_free(pblk_bio_set);
			
 
				+		bioset_exit(&pblk_bio_set);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				 static void pblk_module_exit(void)
			
 
				 {
			
 
				-	bioset_free(pblk_bio_set);
			
 
				+	bioset_exit(&pblk_bio_set);
			
 
				 	nvm_unregister_tgt_type(&tt_pblk);
			
 
				 }
			
 
				 
			
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -18,11 +18,11 @@
 
				 
			
 
				 #include "pblk.h"
			
 
				 
			
 
				-static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
			
 
				-			       struct ppa_addr *ppa_list,
			
 
				-			       unsigned long *lun_bitmap,
			
 
				-			       struct pblk_sec_meta *meta_list,
			
 
				-			       unsigned int valid_secs)
			
 
				+static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
			
 
				+			      struct ppa_addr *ppa_list,
			
 
				+			      unsigned long *lun_bitmap,
			
 
				+			      struct pblk_sec_meta *meta_list,
			
 
				+			      unsigned int valid_secs)
			
 
				 {
			
 
				 	struct pblk_line *line = pblk_line_get_data(pblk);
			
 
				 	struct pblk_emeta *emeta;
			
@@ -35,8 +35,14 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 
				 	if (pblk_line_is_full(line)) {
			
 
				 		struct pblk_line *prev_line = line;
			
 
				 
			
 
				+		/* If we cannot allocate a new line, make sure to store metadata
			
 
				+		 * on current line and then fail
			
 
				+		 */
			
 
				 		line = pblk_line_replace_data(pblk);
			
 
				 		pblk_line_close_meta(pblk, prev_line);
			
 
				+
			
 
				+		if (!line)
			
 
				+			return -EINTR;
			
 
				 	}
			
 
				 
			
 
				 	emeta = line->emeta;
			
@@ -74,6 +80,7 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 
				 	}
			
 
				 
			
 
				 	pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
			
@@ -87,8 +94,12 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
 
				 
			
 
				 	for (i = off; i < rqd->nr_ppas; i += min) {
			
 
				 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
			
 
				-		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
			
 
				-					lun_bitmap, &meta_list[i], map_secs);
			
 
				+		if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
			
 
				+					lun_bitmap, &meta_list[i], map_secs)) {
			
 
				+			bio_put(rqd->bio);
			
 
				+			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
			
 
				+			pblk_pipeline_stop(pblk);
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -108,8 +119,12 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 
				 
			
 
				 	for (i = 0; i < rqd->nr_ppas; i += min) {
			
 
				 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
			
 
				-		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
			
 
				-					lun_bitmap, &meta_list[i], map_secs);
			
 
				+		if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
			
 
				+					lun_bitmap, &meta_list[i], map_secs)) {
			
 
				+			bio_put(rqd->bio);
			
 
				+			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
			
 
				+			pblk_pipeline_stop(pblk);
			
 
				+		}
			
 
				 
			
 
				 		erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
			
 
				 
			
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -142,10 +142,9 @@ static void clean_wctx(struct pblk_w_ctx *w_ctx)
 
				 {
			
 
				 	int flags;
			
 
				 
			
 
				-try:
			
 
				 	flags = READ_ONCE(w_ctx->flags);
			
 
				-	if (!(flags & PBLK_SUBMITTED_ENTRY))
			
 
				-		goto try;
			
 
				+	WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
			
 
				+			"pblk: overwriting unsubmitted data\n");
			
 
				 
			
 
				 	/* Release flags on context. Protect from writes and reads */
			
 
				 	smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
			
@@ -350,7 +349,7 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
 
				 }
			
 
				 
			
 
				 static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
			
 
				-				  unsigned int pos)
			
 
				+				   unsigned int pos)
			
 
				 {
			
 
				 	struct pblk_rb_entry *entry;
			
 
				 	unsigned int sync, flush_point;
			
@@ -420,7 +419,7 @@ void pblk_rb_flush(struct pblk_rb *rb)
 
				 	if (pblk_rb_flush_point_set(rb, NULL, mem))
			
 
				 		return;
			
 
				 
			
 
				-	pblk_write_should_kick(pblk);
			
 
				+	pblk_write_kick(pblk);
			
 
				 }
			
 
				 
			
 
				 static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
			
@@ -503,45 +502,6 @@ int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * The caller of this function must ensure that the backpointer will not
			
 
				- * overwrite the entries passed on the list.
			
 
				- */
			
 
				-unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
			
 
				-				      struct list_head *list,
			
 
				-				      unsigned int max)
			
 
				-{
			
 
				-	struct pblk_rb_entry *entry, *tentry;
			
 
				-	struct page *page;
			
 
				-	unsigned int read = 0;
			
 
				-	int ret;
			
 
				-
			
 
				-	list_for_each_entry_safe(entry, tentry, list, index) {
			
 
				-		if (read > max) {
			
 
				-			pr_err("pblk: too many entries on list\n");
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		page = virt_to_page(entry->data);
			
 
				-		if (!page) {
			
 
				-			pr_err("pblk: could not allocate write bio page\n");
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		ret = bio_add_page(bio, page, rb->seg_size, 0);
			
 
				-		if (ret != rb->seg_size) {
			
 
				-			pr_err("pblk: could not add page to write bio\n");
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		list_del(&entry->index);
			
 
				-		read++;
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	return read;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Read available entries on rb and add them to the given bio. To avoid a memory
			
 
				  * copy, a page reference to the write buffer is used to be added to the bio.
			
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -39,10 +39,10 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
 
				 }
			
 
				 
			
 
				 static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				-				 sector_t blba, unsigned long *read_bitmap)
			
 
				+				 struct bio *bio, sector_t blba,
			
 
				+				 unsigned long *read_bitmap)
			
 
				 {
			
 
				 	struct pblk_sec_meta *meta_list = rqd->meta_list;
			
 
				-	struct bio *bio = rqd->bio;
			
 
				 	struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
			
 
				 	int nr_secs = rqd->nr_ppas;
			
 
				 	bool advanced_bio = false;
			
@@ -102,32 +102,69 @@ next:
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
			
 
				+
			
 
				+static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				+				sector_t blba)
			
 
				 {
			
 
				-	int err;
			
 
				+	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
			
 
				+	int nr_lbas = rqd->nr_ppas;
			
 
				+	int i;
			
 
				 
			
 
				-	err = pblk_submit_io(pblk, rqd);
			
 
				-	if (err)
			
 
				-		return NVM_IO_ERR;
			
 
				+	for (i = 0; i < nr_lbas; i++) {
			
 
				+		u64 lba = le64_to_cpu(meta_lba_list[i].lba);
			
 
				+
			
 
				+		if (lba == ADDR_EMPTY)
			
 
				+			continue;
			
 
				+
			
 
				+		if (lba != blba + i) {
			
 
				+#ifdef CONFIG_NVM_DEBUG
			
 
				+			struct ppa_addr *p;
			
 
				 
			
 
				-	return NVM_IO_OK;
			
 
				+			p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr;
			
 
				+			print_ppa(&pblk->dev->geo, p, "seq", i);
			
 
				+#endif
			
 
				+			pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
			
 
				+							lba, (u64)blba + i);
			
 
				+			WARN_ON(1);
			
 
				+		}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				-static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				-			   sector_t blba)
			
 
				+/*
			
 
				+ * There can be holes in the lba list.
			
 
				+ */
			
 
				+static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				+				 u64 *lba_list, int nr_lbas)
			
 
				 {
			
 
				-	struct pblk_sec_meta *meta_list = rqd->meta_list;
			
 
				-	int nr_lbas = rqd->nr_ppas;
			
 
				-	int i;
			
 
				+	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
			
 
				+	int i, j;
			
 
				 
			
 
				-	for (i = 0; i < nr_lbas; i++) {
			
 
				-		u64 lba = le64_to_cpu(meta_list[i].lba);
			
 
				+	for (i = 0, j = 0; i < nr_lbas; i++) {
			
 
				+		u64 lba = lba_list[i];
			
 
				+		u64 meta_lba;
			
 
				 
			
 
				 		if (lba == ADDR_EMPTY)
			
 
				 			continue;
			
 
				 
			
 
				-		WARN(lba != blba + i, "pblk: corrupted read LBA\n");
			
 
				+		meta_lba = le64_to_cpu(meta_lba_list[j].lba);
			
 
				+
			
 
				+		if (lba != meta_lba) {
			
 
				+#ifdef CONFIG_NVM_DEBUG
			
 
				+			struct ppa_addr *p;
			
 
				+			int nr_ppas = rqd->nr_ppas;
			
 
				+
			
 
				+			p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr;
			
 
				+			print_ppa(&pblk->dev->geo, p, "seq", j);
			
 
				+#endif
			
 
				+			pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
			
 
				+								lba, meta_lba);
			
 
				+			WARN_ON(1);
			
 
				+		}
			
 
				+
			
 
				+		j++;
			
 
				 	}
			
 
				+
			
 
				+	WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
			
 
				 }
			
 
				 
			
 
				 static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd)
			
@@ -152,7 +189,6 @@ static void pblk_end_user_read(struct bio *bio)
 
				 	WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
			
 
				 #endif
			
 
				 	bio_endio(bio);
			
 
				-	bio_put(bio);
			
 
				 }
			
 
				 
			
 
				 static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
			
@@ -160,23 +196,18 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
 
				 {
			
 
				 	struct nvm_tgt_dev *dev = pblk->dev;
			
 
				 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
			
 
				-	struct bio *bio = rqd->bio;
			
 
				+	struct bio *int_bio = rqd->bio;
			
 
				 	unsigned long start_time = r_ctx->start_time;
			
 
				 
			
 
				 	generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time);
			
 
				 
			
 
				 	if (rqd->error)
			
 
				 		pblk_log_read_err(pblk, rqd);
			
 
				-#ifdef CONFIG_NVM_DEBUG
			
 
				-	else
			
 
				-		WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
			
 
				-#endif
			
 
				 
			
 
				-	pblk_read_check(pblk, rqd, r_ctx->lba);
			
 
				+	pblk_read_check_seq(pblk, rqd, r_ctx->lba);
			
 
				 
			
 
				-	bio_put(bio);
			
 
				-	if (r_ctx->private)
			
 
				-		pblk_end_user_read((struct bio *)r_ctx->private);
			
 
				+	if (int_bio)
			
 
				+		bio_put(int_bio);
			
 
				 
			
 
				 	if (put_line)
			
 
				 		pblk_read_put_rqd_kref(pblk, rqd);
			
@@ -193,16 +224,19 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
 
				 static void pblk_end_io_read(struct nvm_rq *rqd)
			
 
				 {
			
 
				 	struct pblk *pblk = rqd->private;
			
 
				+	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
			
 
				+	struct bio *bio = (struct bio *)r_ctx->private;
			
 
				 
			
 
				+	pblk_end_user_read(bio);
			
 
				 	__pblk_end_io_read(pblk, rqd, true);
			
 
				 }
			
 
				 
			
 
				-static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				-				 unsigned int bio_init_idx,
			
 
				-				 unsigned long *read_bitmap)
			
 
				+static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				+			     struct bio *orig_bio, unsigned int bio_init_idx,
			
 
				+			     unsigned long *read_bitmap)
			
 
				 {
			
 
				-	struct bio *new_bio, *bio = rqd->bio;
			
 
				 	struct pblk_sec_meta *meta_list = rqd->meta_list;
			
 
				+	struct bio *new_bio;
			
 
				 	struct bio_vec src_bv, dst_bv;
			
 
				 	void *ppa_ptr = NULL;
			
 
				 	void *src_p, *dst_p;
			
@@ -219,11 +253,11 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
				 	new_bio = bio_alloc(GFP_KERNEL, nr_holes);
			
 
				 
			
 
				 	if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
			
 
				-		goto err;
			
 
				+		goto fail_add_pages;
			
 
				 
			
 
				 	if (nr_holes != new_bio->bi_vcnt) {
			
 
				 		pr_err("pblk: malformed bio\n");
			
 
				-		goto err;
			
 
				+		goto fail;
			
 
				 	}
			
 
				 
			
 
				 	for (i = 0; i < nr_secs; i++)
			
@@ -246,7 +280,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
				 	if (ret) {
			
 
				 		bio_put(rqd->bio);
			
 
				 		pr_err("pblk: sync read IO submission failed\n");
			
 
				-		goto err;
			
 
				+		goto fail;
			
 
				 	}
			
 
				 
			
 
				 	if (rqd->error) {
			
@@ -282,7 +316,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
				 		meta_list[hole].lba = lba_list_media[i];
			
 
				 
			
 
				 		src_bv = new_bio->bi_io_vec[i++];
			
 
				-		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
			
 
				+		dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole];
			
 
				 
			
 
				 		src_p = kmap_atomic(src_bv.bv_page);
			
 
				 		dst_p = kmap_atomic(dst_bv.bv_page);
			
@@ -294,35 +328,33 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
				 		kunmap_atomic(src_p);
			
 
				 		kunmap_atomic(dst_p);
			
 
				 
			
 
				-		mempool_free(src_bv.bv_page, pblk->page_bio_pool);
			
 
				+		mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
			
 
				 
			
 
				 		hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
			
 
				 	} while (hole < nr_secs);
			
 
				 
			
 
				 	bio_put(new_bio);
			
 
				 
			
 
				-	/* Complete the original bio and associated request */
			
 
				-	bio_endio(bio);
			
 
				-	rqd->bio = bio;
			
 
				+	/* restore original request */
			
 
				+	rqd->bio = NULL;
			
 
				 	rqd->nr_ppas = nr_secs;
			
 
				 
			
 
				 	__pblk_end_io_read(pblk, rqd, false);
			
 
				-	return NVM_IO_OK;
			
 
				-
			
 
				-err:
			
 
				-	pr_err("pblk: failed to perform partial read\n");
			
 
				+	return NVM_IO_DONE;
			
 
				 
			
 
				+fail:
			
 
				 	/* Free allocated pages in new bio */
			
 
				-	pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
			
 
				+	pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
			
 
				+fail_add_pages:
			
 
				+	pr_err("pblk: failed to perform partial read\n");
			
 
				 	__pblk_end_io_read(pblk, rqd, false);
			
 
				 	return NVM_IO_ERR;
			
 
				 }
			
 
				 
			
 
				-static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				+static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
			
 
				 			 sector_t lba, unsigned long *read_bitmap)
			
 
				 {
			
 
				 	struct pblk_sec_meta *meta_list = rqd->meta_list;
			
 
				-	struct bio *bio = rqd->bio;
			
 
				 	struct ppa_addr ppa;
			
 
				 
			
 
				 	pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
			
@@ -386,14 +418,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 
				 	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
			
 
				 
			
 
				 	rqd->opcode = NVM_OP_PREAD;
			
 
				-	rqd->bio = bio;
			
 
				 	rqd->nr_ppas = nr_secs;
			
 
				+	rqd->bio = NULL; /* cloned bio if needed */
			
 
				 	rqd->private = pblk;
			
 
				 	rqd->end_io = pblk_end_io_read;
			
 
				 
			
 
				 	r_ctx = nvm_rq_to_pdu(rqd);
			
 
				 	r_ctx->start_time = jiffies;
			
 
				 	r_ctx->lba = blba;
			
 
				+	r_ctx->private = bio; /* original bio */
			
 
				 
			
 
				 	/* Save the index for this bio's start. This is needed in case
			
 
				 	 * we need to fill a partial read.
			
@@ -411,17 +444,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 
				 		rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
			
 
				 		rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
			
 
				 
			
 
				-		pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap);
			
 
				+		pblk_read_ppalist_rq(pblk, rqd, bio, blba, &read_bitmap);
			
 
				 	} else {
			
 
				-		pblk_read_rq(pblk, rqd, blba, &read_bitmap);
			
 
				+		pblk_read_rq(pblk, rqd, bio, blba, &read_bitmap);
			
 
				 	}
			
 
				 
			
 
				-	bio_get(bio);
			
 
				 	if (bitmap_full(&read_bitmap, nr_secs)) {
			
 
				-		bio_endio(bio);
			
 
				 		atomic_inc(&pblk->inflight_io);
			
 
				 		__pblk_end_io_read(pblk, rqd, false);
			
 
				-		return NVM_IO_OK;
			
 
				+		return NVM_IO_DONE;
			
 
				 	}
			
 
				 
			
 
				 	/* All sectors are to be read from the device */
			
@@ -429,20 +460,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 
				 		struct bio *int_bio = NULL;
			
 
				 
			
 
				 		/* Clone read bio to deal with read errors internally */
			
 
				-		int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
			
 
				+		int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
			
 
				 		if (!int_bio) {
			
 
				 			pr_err("pblk: could not clone read bio\n");
			
 
				 			goto fail_end_io;
			
 
				 		}
			
 
				 
			
 
				 		rqd->bio = int_bio;
			
 
				-		r_ctx->private = bio;
			
 
				 
			
 
				-		ret = pblk_submit_read_io(pblk, rqd);
			
 
				-		if (ret) {
			
 
				+		if (pblk_submit_io(pblk, rqd)) {
			
 
				 			pr_err("pblk: read IO submission failed\n");
			
 
				-			if (int_bio)
			
 
				-				bio_put(int_bio);
			
 
				+			ret = NVM_IO_ERR;
			
 
				 			goto fail_end_io;
			
 
				 		}
			
 
				 
			
@@ -452,7 +480,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 
				 	/* The read bio request could be partially filled by the write buffer,
			
 
				 	 * but there are some holes that need to be read from the drive.
			
 
				 	 */
			
 
				-	return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap);
			
 
				+	return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap);
			
 
				 
			
 
				 fail_rqd_free:
			
 
				 	pblk_free_rqd(pblk, rqd, PBLK_READ);
			
@@ -585,6 +613,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 
				 		goto err_free_bio;
			
 
				 	}
			
 
				 
			
 
				+	pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
			
 
				+
			
 
				 	atomic_dec(&pblk->inflight_io);
			
 
				 
			
 
				 	if (rqd.error) {
			
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -16,97 +16,6 @@
 
				 
			
 
				 #include "pblk.h"
			
 
				 
			
 
				-void pblk_submit_rec(struct work_struct *work)
			
 
				-{
			
 
				-	struct pblk_rec_ctx *recovery =
			
 
				-			container_of(work, struct pblk_rec_ctx, ws_rec);
			
 
				-	struct pblk *pblk = recovery->pblk;
			
 
				-	struct nvm_rq *rqd = recovery->rqd;
			
 
				-	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
			
 
				-	struct bio *bio;
			
 
				-	unsigned int nr_rec_secs;
			
 
				-	unsigned int pgs_read;
			
 
				-	int ret;
			
 
				-
			
 
				-	nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
			
 
				-								NVM_MAX_VLBA);
			
 
				-
			
 
				-	bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
			
 
				-
			
 
				-	bio->bi_iter.bi_sector = 0;
			
 
				-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
			
 
				-	rqd->bio = bio;
			
 
				-	rqd->nr_ppas = nr_rec_secs;
			
 
				-
			
 
				-	pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
			
 
				-								nr_rec_secs);
			
 
				-	if (pgs_read != nr_rec_secs) {
			
 
				-		pr_err("pblk: could not read recovery entries\n");
			
 
				-		goto err;
			
 
				-	}
			
 
				-
			
 
				-	if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
			
 
				-		pr_err("pblk: could not setup recovery request\n");
			
 
				-		goto err;
			
 
				-	}
			
 
				-
			
 
				-#ifdef CONFIG_NVM_DEBUG
			
 
				-	atomic_long_add(nr_rec_secs, &pblk->recov_writes);
			
 
				-#endif
			
 
				-
			
 
				-	ret = pblk_submit_io(pblk, rqd);
			
 
				-	if (ret) {
			
 
				-		pr_err("pblk: I/O submission failed: %d\n", ret);
			
 
				-		goto err;
			
 
				-	}
			
 
				-
			
 
				-	mempool_free(recovery, pblk->rec_pool);
			
 
				-	return;
			
 
				-
			
 
				-err:
			
 
				-	bio_put(bio);
			
 
				-	pblk_free_rqd(pblk, rqd, PBLK_WRITE);
			
 
				-}
			
 
				-
			
 
				-int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
			
 
				-			struct pblk_rec_ctx *recovery, u64 *comp_bits,
			
 
				-			unsigned int comp)
			
 
				-{
			
 
				-	struct nvm_rq *rec_rqd;
			
 
				-	struct pblk_c_ctx *rec_ctx;
			
 
				-	int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
			
 
				-
			
 
				-	rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
			
 
				-	rec_ctx = nvm_rq_to_pdu(rec_rqd);
			
 
				-
			
 
				-	/* Copy completion bitmap, but exclude the first X completed entries */
			
 
				-	bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
			
 
				-				(unsigned long int *)comp_bits,
			
 
				-				comp, NVM_MAX_VLBA);
			
 
				-
			
 
				-	/* Save the context for the entries that need to be re-written and
			
 
				-	 * update current context with the completed entries.
			
 
				-	 */
			
 
				-	rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
			
 
				-	if (comp >= c_ctx->nr_valid) {
			
 
				-		rec_ctx->nr_valid = 0;
			
 
				-		rec_ctx->nr_padded = nr_entries - comp;
			
 
				-
			
 
				-		c_ctx->nr_padded = comp - c_ctx->nr_valid;
			
 
				-	} else {
			
 
				-		rec_ctx->nr_valid = c_ctx->nr_valid - comp;
			
 
				-		rec_ctx->nr_padded = c_ctx->nr_padded;
			
 
				-
			
 
				-		c_ctx->nr_valid = comp;
			
 
				-		c_ctx->nr_padded = 0;
			
 
				-	}
			
 
				-
			
 
				-	recovery->rqd = rec_rqd;
			
 
				-	recovery->pblk = pblk;
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
			
 
				 {
			
 
				 	u32 crc;
			
@@ -865,18 +774,30 @@ static void pblk_recov_wa_counters(struct pblk *pblk,
 
				 }
			
 
				 
			
 
				 static int pblk_line_was_written(struct pblk_line *line,
			
 
				-			    struct pblk_line_meta *lm)
			
 
				+			    struct pblk *pblk)
			
 
				 {
			
 
				 
			
 
				-	int i;
			
 
				-	int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
			
 
				+	struct pblk_line_meta *lm = &pblk->lm;
			
 
				+	struct nvm_tgt_dev *dev = pblk->dev;
			
 
				+	struct nvm_geo *geo = &dev->geo;
			
 
				+	struct nvm_chk_meta *chunk;
			
 
				+	struct ppa_addr bppa;
			
 
				+	int smeta_blk;
			
 
				 
			
 
				-	for (i = 0; i < lm->blk_per_line; i++) {
			
 
				-		if (!(line->chks[i].state & state_mask))
			
 
				-			return 1;
			
 
				-	}
			
 
				+	if (line->state == PBLK_LINESTATE_BAD)
			
 
				+		return 0;
			
 
				 
			
 
				-	return 0;
			
 
				+	smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
			
 
				+	if (smeta_blk >= lm->blk_per_line)
			
 
				+		return 0;
			
 
				+
			
 
				+	bppa = pblk->luns[smeta_blk].bppa;
			
 
				+	chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
			
 
				+
			
 
				+	if (chunk->state & NVM_CHK_ST_FREE)
			
 
				+		return 0;
			
 
				+
			
 
				+	return 1;
			
 
				 }
			
 
				 
			
 
				 struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
			
@@ -915,7 +836,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
				 		line->lun_bitmap = ((void *)(smeta_buf)) +
			
 
				 						sizeof(struct line_smeta);
			
 
				 
			
 
				-		if (!pblk_line_was_written(line, lm))
			
 
				+		if (!pblk_line_was_written(line, pblk))
			
 
				 			continue;
			
 
				 
			
 
				 		/* Lines that cannot be read are assumed as not written here */
			
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
 
				 	pblk_rl_kick_u_timer(rl);
			
 
				 }
			
 
				 
			
 
				+void pblk_rl_werr_line_in(struct pblk_rl *rl)
			
 
				+{
			
 
				+	atomic_inc(&rl->werr_lines);
			
 
				+}
			
 
				+
			
 
				+void pblk_rl_werr_line_out(struct pblk_rl *rl)
			
 
				+{
			
 
				+	atomic_dec(&rl->werr_lines);
			
 
				+}
			
 
				+
			
 
				 void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
			
 
				 {
			
 
				 	atomic_add(nr_entries, &rl->rb_gc_cnt);
			
@@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
 
				 {
			
 
				 	struct pblk *pblk = container_of(rl, struct pblk, rl);
			
 
				 	int max = rl->rb_budget;
			
 
				+	int werr_gc_needed = atomic_read(&rl->werr_lines);
			
 
				 
			
 
				 	if (free_blocks >= rl->high) {
			
 
				-		rl->rb_user_max = max;
			
 
				-		rl->rb_gc_max = 0;
			
 
				-		rl->rb_state = PBLK_RL_HIGH;
			
 
				+		if (werr_gc_needed) {
			
 
				+			/* Allocate a small budget for recovering
			
 
				+			 * lines with write errors
			
 
				+			 */
			
 
				+			rl->rb_gc_max = 1 << rl->rb_windows_pw;
			
 
				+			rl->rb_user_max = max - rl->rb_gc_max;
			
 
				+			rl->rb_state = PBLK_RL_WERR;
			
 
				+		} else {
			
 
				+			rl->rb_user_max = max;
			
 
				+			rl->rb_gc_max = 0;
			
 
				+			rl->rb_state = PBLK_RL_OFF;
			
 
				+		}
			
 
				 	} else if (free_blocks < rl->high) {
			
 
				 		int shift = rl->high_pw - rl->rb_windows_pw;
			
 
				 		int user_windows = free_blocks >> shift;
			
@@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
 
				 		rl->rb_state = PBLK_RL_LOW;
			
 
				 	}
			
 
				 
			
 
				-	if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW))
			
 
				+	if (rl->rb_state != PBLK_RL_OFF)
			
 
				 		pblk_gc_should_start(pblk);
			
 
				 	else
			
 
				 		pblk_gc_should_stop(pblk);
			
@@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
 
				 	atomic_set(&rl->rb_user_cnt, 0);
			
 
				 	atomic_set(&rl->rb_gc_cnt, 0);
			
 
				 	atomic_set(&rl->rb_space, -1);
			
 
				+	atomic_set(&rl->werr_lines, 0);
			
 
				 
			
 
				 	timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
			
 
				 
			
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 
				 	int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
			
 
				 	int d_line_cnt = 0, l_line_cnt = 0;
			
 
				 	int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
			
 
				+	int gc_werr = 0;
			
 
				+
			
 
				 	int bad = 0, cor = 0;
			
 
				 	int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
			
 
				 	int map_weight = 0, meta_weight = 0;
			
@@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 
				 		gc_empty++;
			
 
				 	}
			
 
				 
			
 
				+	list_for_each_entry(line, &l_mg->gc_werr_list, list) {
			
 
				+		if (line->type == PBLK_LINETYPE_DATA)
			
 
				+			d_line_cnt++;
			
 
				+		else if (line->type == PBLK_LINETYPE_LOG)
			
 
				+			l_line_cnt++;
			
 
				+		closed_line_cnt++;
			
 
				+		gc_werr++;
			
 
				+	}
			
 
				+
			
 
				 	list_for_each_entry(line, &l_mg->bad_list, list)
			
 
				 		bad++;
			
 
				 	list_for_each_entry(line, &l_mg->corrupt_list, list)
			
@@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 
				 					l_mg->nr_lines);
			
 
				 
			
 
				 	sz += snprintf(page + sz, PAGE_SIZE - sz,
			
 
				-		"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
			
 
				-			gc_full, gc_high, gc_mid, gc_low, gc_empty,
			
 
				+		"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
			
 
				+			gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
			
 
				 			atomic_read(&pblk->gc.read_inflight_gc));
			
 
				 
			
 
				 	sz += snprintf(page + sz, PAGE_SIZE - sz,
			
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -103,68 +103,150 @@ retry:
 
				 	pblk_rb_sync_end(&pblk->rwb, &flags);
			
 
				 }
			
 
				 
			
 
				-/* When a write fails, we are not sure whether the block has grown bad or a page
			
 
				- * range is more susceptible to write errors. If a high number of pages fail, we
			
 
				- * assume that the block is bad and we mark it accordingly. In all cases, we
			
 
				- * remap and resubmit the failed entries as fast as possible; if a flush is
			
 
				- * waiting on a completion, the whole stack would stall otherwise.
			
 
				- */
			
 
				-static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
			
 
				+/* Map remaining sectors in chunk, starting from ppa */
			
 
				+static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
			
 
				 {
			
 
				-	void *comp_bits = &rqd->ppa_status;
			
 
				-	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
			
 
				-	struct pblk_rec_ctx *recovery;
			
 
				-	struct ppa_addr *ppa_list = rqd->ppa_list;
			
 
				-	int nr_ppas = rqd->nr_ppas;
			
 
				-	unsigned int c_entries;
			
 
				-	int bit, ret;
			
 
				+	struct nvm_tgt_dev *dev = pblk->dev;
			
 
				+	struct nvm_geo *geo = &dev->geo;
			
 
				+	struct pblk_line *line;
			
 
				+	struct ppa_addr map_ppa = *ppa;
			
 
				+	u64 paddr;
			
 
				+	int done = 0;
			
 
				 
			
 
				-	if (unlikely(nr_ppas == 1))
			
 
				-		ppa_list = &rqd->ppa_addr;
			
 
				+	line = &pblk->lines[pblk_ppa_to_line(*ppa)];
			
 
				+	spin_lock(&line->lock);
			
 
				 
			
 
				-	recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
			
 
				+	while (!done)  {
			
 
				+		paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
			
 
				 
			
 
				-	INIT_LIST_HEAD(&recovery->failed);
			
 
				+		if (!test_and_set_bit(paddr, line->map_bitmap))
			
 
				+			line->left_msecs--;
			
 
				 
			
 
				-	bit = -1;
			
 
				-	while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
			
 
				-		struct pblk_rb_entry *entry;
			
 
				-		struct ppa_addr ppa;
			
 
				+		if (!test_and_set_bit(paddr, line->invalid_bitmap))
			
 
				+			le32_add_cpu(line->vsc, -1);
			
 
				 
			
 
				-		/* Logic error */
			
 
				-		if (bit > c_ctx->nr_valid) {
			
 
				-			WARN_ONCE(1, "pblk: corrupted write request\n");
			
 
				-			mempool_free(recovery, pblk->rec_pool);
			
 
				-			goto out;
			
 
				+		if (geo->version == NVM_OCSSD_SPEC_12) {
			
 
				+			map_ppa.ppa++;
			
 
				+			if (map_ppa.g.pg == geo->num_pg)
			
 
				+				done = 1;
			
 
				+		} else {
			
 
				+			map_ppa.m.sec++;
			
 
				+			if (map_ppa.m.sec == geo->clba)
			
 
				+				done = 1;
			
 
				 		}
			
 
				+	}
			
 
				 
			
 
				-		ppa = ppa_list[bit];
			
 
				-		entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa);
			
 
				-		if (!entry) {
			
 
				-			pr_err("pblk: could not scan entry on write failure\n");
			
 
				-			mempool_free(recovery, pblk->rec_pool);
			
 
				-			goto out;
			
 
				-		}
			
 
				+	line->w_err_gc->has_write_err = 1;
			
 
				+	spin_unlock(&line->lock);
			
 
				+}
			
 
				 
			
 
				-		/* The list is filled first and emptied afterwards. No need for
			
 
				-		 * protecting it with a lock
			
 
				+static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
			
 
				+				  unsigned int nr_entries)
			
 
				+{
			
 
				+	struct pblk_rb *rb = &pblk->rwb;
			
 
				+	struct pblk_rb_entry *entry;
			
 
				+	struct pblk_line *line;
			
 
				+	struct pblk_w_ctx *w_ctx;
			
 
				+	struct ppa_addr ppa_l2p;
			
 
				+	int flags;
			
 
				+	unsigned int pos, i;
			
 
				+
			
 
				+	spin_lock(&pblk->trans_lock);
			
 
				+	pos = sentry;
			
 
				+	for (i = 0; i < nr_entries; i++) {
			
 
				+		entry = &rb->entries[pos];
			
 
				+		w_ctx = &entry->w_ctx;
			
 
				+
			
 
				+		/* Check if the lba has been overwritten */
			
 
				+		ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
			
 
				+		if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
			
 
				+			w_ctx->lba = ADDR_EMPTY;
			
 
				+
			
 
				+		/* Mark up the entry as submittable again */
			
 
				+		flags = READ_ONCE(w_ctx->flags);
			
 
				+		flags |= PBLK_WRITTEN_DATA;
			
 
				+		/* Release flags on write context. Protect from writes */
			
 
				+		smp_store_release(&w_ctx->flags, flags);
			
 
				+
			
 
				+		/* Decrese the reference count to the line as we will
			
 
				+		 * re-map these entries
			
 
				 		 */
			
 
				-		list_add_tail(&entry->index, &recovery->failed);
			
 
				+		line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)];
			
 
				+		kref_put(&line->ref, pblk_line_put);
			
 
				+
			
 
				+		pos = (pos + 1) & (rb->nr_entries - 1);
			
 
				 	}
			
 
				+	spin_unlock(&pblk->trans_lock);
			
 
				+}
			
 
				 
			
 
				-	c_entries = find_first_bit(comp_bits, nr_ppas);
			
 
				-	ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries);
			
 
				-	if (ret) {
			
 
				-		pr_err("pblk: could not recover from write failure\n");
			
 
				-		mempool_free(recovery, pblk->rec_pool);
			
 
				-		goto out;
			
 
				+static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
			
 
				+{
			
 
				+	struct pblk_c_ctx *r_ctx;
			
 
				+
			
 
				+	r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
			
 
				+	if (!r_ctx)
			
 
				+		return;
			
 
				+
			
 
				+	r_ctx->lun_bitmap = NULL;
			
 
				+	r_ctx->sentry = c_ctx->sentry;
			
 
				+	r_ctx->nr_valid = c_ctx->nr_valid;
			
 
				+	r_ctx->nr_padded = c_ctx->nr_padded;
			
 
				+
			
 
				+	spin_lock(&pblk->resubmit_lock);
			
 
				+	list_add_tail(&r_ctx->list, &pblk->resubmit_list);
			
 
				+	spin_unlock(&pblk->resubmit_lock);
			
 
				+
			
 
				+#ifdef CONFIG_NVM_DEBUG
			
 
				+	atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static void pblk_submit_rec(struct work_struct *work)
			
 
				+{
			
 
				+	struct pblk_rec_ctx *recovery =
			
 
				+			container_of(work, struct pblk_rec_ctx, ws_rec);
			
 
				+	struct pblk *pblk = recovery->pblk;
			
 
				+	struct nvm_rq *rqd = recovery->rqd;
			
 
				+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
			
 
				+	struct ppa_addr *ppa_list;
			
 
				+
			
 
				+	pblk_log_write_err(pblk, rqd);
			
 
				+
			
 
				+	if (rqd->nr_ppas == 1)
			
 
				+		ppa_list = &rqd->ppa_addr;
			
 
				+	else
			
 
				+		ppa_list = rqd->ppa_list;
			
 
				+
			
 
				+	pblk_map_remaining(pblk, ppa_list);
			
 
				+	pblk_queue_resubmit(pblk, c_ctx);
			
 
				+
			
 
				+	pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap);
			
 
				+	if (c_ctx->nr_padded)
			
 
				+		pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
			
 
				+							c_ctx->nr_padded);
			
 
				+	bio_put(rqd->bio);
			
 
				+	pblk_free_rqd(pblk, rqd, PBLK_WRITE);
			
 
				+	mempool_free(recovery, &pblk->rec_pool);
			
 
				+
			
 
				+	atomic_dec(&pblk->inflight_io);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
			
 
				+{
			
 
				+	struct pblk_rec_ctx *recovery;
			
 
				+
			
 
				+	recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
			
 
				+	if (!recovery) {
			
 
				+		pr_err("pblk: could not allocate recovery work\n");
			
 
				+		return;
			
 
				 	}
			
 
				 
			
 
				+	recovery->pblk = pblk;
			
 
				+	recovery->rqd = rqd;
			
 
				+
			
 
				 	INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
			
 
				 	queue_work(pblk->close_wq, &recovery->ws_rec);
			
 
				-
			
 
				-out:
			
 
				-	pblk_complete_write(pblk, rqd, c_ctx);
			
 
				 }
			
 
				 
			
 
				 static void pblk_end_io_write(struct nvm_rq *rqd)
			
@@ -173,8 +255,8 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
 
				 	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
			
 
				 
			
 
				 	if (rqd->error) {
			
 
				-		pblk_log_write_err(pblk, rqd);
			
 
				-		return pblk_end_w_fail(pblk, rqd);
			
 
				+		pblk_end_w_fail(pblk, rqd);
			
 
				+		return;
			
 
				 	}
			
 
				 #ifdef CONFIG_NVM_DEBUG
			
 
				 	else
			
@@ -198,6 +280,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 
				 	if (rqd->error) {
			
 
				 		pblk_log_write_err(pblk, rqd);
			
 
				 		pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
			
 
				+		line->w_err_gc->has_write_err = 1;
			
 
				 	}
			
 
				 
			
 
				 	sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
			
@@ -266,31 +349,6 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
			
 
				-			struct pblk_c_ctx *c_ctx)
			
 
				-{
			
 
				-	struct pblk_line_meta *lm = &pblk->lm;
			
 
				-	unsigned long *lun_bitmap;
			
 
				-	int ret;
			
 
				-
			
 
				-	lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
			
 
				-	if (!lun_bitmap)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	c_ctx->lun_bitmap = lun_bitmap;
			
 
				-
			
 
				-	ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				-
			
 
				-	pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
			
 
				-
			
 
				-	rqd->ppa_status = (u64)0;
			
 
				-	rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
			
 
				 				  unsigned int secs_to_flush)
			
 
				 {
			
@@ -339,6 +397,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 
				 	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
			
 
				 					l_mg->emeta_alloc_type, GFP_KERNEL);
			
 
				 	if (IS_ERR(bio)) {
			
 
				+		pr_err("pblk: failed to map emeta io");
			
 
				 		ret = PTR_ERR(bio);
			
 
				 		goto fail_free_rqd;
			
 
				 	}
			
@@ -515,26 +574,54 @@ static int pblk_submit_write(struct pblk *pblk)
 
				 	unsigned int secs_avail, secs_to_sync, secs_to_com;
			
 
				 	unsigned int secs_to_flush;
			
 
				 	unsigned long pos;
			
 
				+	unsigned int resubmit;
			
 
				 
			
 
				-	/* If there are no sectors in the cache, flushes (bios without data)
			
 
				-	 * will be cleared on the cache threads
			
 
				-	 */
			
 
				-	secs_avail = pblk_rb_read_count(&pblk->rwb);
			
 
				-	if (!secs_avail)
			
 
				-		return 1;
			
 
				-
			
 
				-	secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
			
 
				-	if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
			
 
				-		return 1;
			
 
				-
			
 
				-	secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
			
 
				-	if (secs_to_sync > pblk->max_write_pgs) {
			
 
				-		pr_err("pblk: bad buffer sync calculation\n");
			
 
				-		return 1;
			
 
				-	}
			
 
				+	spin_lock(&pblk->resubmit_lock);
			
 
				+	resubmit = !list_empty(&pblk->resubmit_list);
			
 
				+	spin_unlock(&pblk->resubmit_lock);
			
 
				+
			
 
				+	/* Resubmit failed writes first */
			
 
				+	if (resubmit) {
			
 
				+		struct pblk_c_ctx *r_ctx;
			
 
				+
			
 
				+		spin_lock(&pblk->resubmit_lock);
			
 
				+		r_ctx = list_first_entry(&pblk->resubmit_list,
			
 
				+					struct pblk_c_ctx, list);
			
 
				+		list_del(&r_ctx->list);
			
 
				+		spin_unlock(&pblk->resubmit_lock);
			
 
				+
			
 
				+		secs_avail = r_ctx->nr_valid;
			
 
				+		pos = r_ctx->sentry;
			
 
				+
			
 
				+		pblk_prepare_resubmit(pblk, pos, secs_avail);
			
 
				+		secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
			
 
				+				secs_avail);
			
 
				 
			
 
				-	secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
			
 
				-	pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
			
 
				+		kfree(r_ctx);
			
 
				+	} else {
			
 
				+		/* If there are no sectors in the cache,
			
 
				+		 * flushes (bios without data) will be cleared on
			
 
				+		 * the cache threads
			
 
				+		 */
			
 
				+		secs_avail = pblk_rb_read_count(&pblk->rwb);
			
 
				+		if (!secs_avail)
			
 
				+			return 1;
			
 
				+
			
 
				+		secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
			
 
				+		if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
			
 
				+			return 1;
			
 
				+
			
 
				+		secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
			
 
				+					secs_to_flush);
			
 
				+		if (secs_to_sync > pblk->max_write_pgs) {
			
 
				+			pr_err("pblk: bad buffer sync calculation\n");
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+		secs_to_com = (secs_to_sync > secs_avail) ?
			
 
				+			secs_avail : secs_to_sync;
			
 
				+		pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
			
 
				+	}
			
 
				 
			
 
				 	bio = bio_alloc(GFP_KERNEL, secs_to_sync);
			
 
				 
			
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -89,12 +89,14 @@ struct pblk_sec_meta {
 
				 /* The number of GC lists and the rate-limiter states go together. This way the
			
 
				  * rate-limiter can dictate how much GC is needed based on resource utilization.
			
 
				  */
			
 
				-#define PBLK_GC_NR_LISTS 3
			
 
				+#define PBLK_GC_NR_LISTS 4
			
 
				 
			
 
				 enum {
			
 
				-	PBLK_RL_HIGH = 1,
			
 
				-	PBLK_RL_MID = 2,
			
 
				-	PBLK_RL_LOW = 3,
			
 
				+	PBLK_RL_OFF = 0,
			
 
				+	PBLK_RL_WERR = 1,
			
 
				+	PBLK_RL_HIGH = 2,
			
 
				+	PBLK_RL_MID = 3,
			
 
				+	PBLK_RL_LOW = 4
			
 
				 };
			
 
				 
			
 
				 #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
			
@@ -128,7 +130,6 @@ struct pblk_pad_rq {
 
				 struct pblk_rec_ctx {
			
 
				 	struct pblk *pblk;
			
 
				 	struct nvm_rq *rqd;
			
 
				-	struct list_head failed;
			
 
				 	struct work_struct ws_rec;
			
 
				 };
			
 
				 
			
@@ -279,6 +280,8 @@ struct pblk_rl {
 
				 	int rb_user_active;
			
 
				 	int rb_gc_active;
			
 
				 
			
 
				+	atomic_t werr_lines;	/* Number of write error lines that needs gc */
			
 
				+
			
 
				 	struct timer_list u_timer;
			
 
				 
			
 
				 	unsigned long long nr_secs;
			
@@ -312,6 +315,7 @@ enum {
 
				 	PBLK_LINEGC_MID = 23,
			
 
				 	PBLK_LINEGC_HIGH = 24,
			
 
				 	PBLK_LINEGC_FULL = 25,
			
 
				+	PBLK_LINEGC_WERR = 26
			
 
				 };
			
 
				 
			
 
				 #define PBLK_MAGIC 0x70626c6b /*pblk*/
			
@@ -413,6 +417,11 @@ struct pblk_smeta {
 
				 	struct line_smeta *buf;		/* smeta buffer in persistent format */
			
 
				 };
			
 
				 
			
 
				+struct pblk_w_err_gc {
			
 
				+	int has_write_err;
			
 
				+	__le64 *lba_list;
			
 
				+};
			
 
				+
			
 
				 struct pblk_line {
			
 
				 	struct pblk *pblk;
			
 
				 	unsigned int id;		/* Line number corresponds to the
			
@@ -458,6 +467,8 @@ struct pblk_line {
 
				 
			
 
				 	struct kref ref;		/* Write buffer L2P references */
			
 
				 
			
 
				+	struct pblk_w_err_gc *w_err_gc;	/* Write error gc recovery metadata */
			
 
				+
			
 
				 	spinlock_t lock;		/* Necessary for invalid_bitmap only */
			
 
				 };
			
 
				 
			
@@ -489,6 +500,8 @@ struct pblk_line_mgmt {
 
				 	struct list_head gc_mid_list;	/* Full lines ready to GC, mid isc */
			
 
				 	struct list_head gc_low_list;	/* Full lines ready to GC, low isc */
			
 
				 
			
 
				+	struct list_head gc_werr_list;  /* Write err recovery list */
			
 
				+
			
 
				 	struct list_head gc_full_list;	/* Full lines ready to GC, no valid */
			
 
				 	struct list_head gc_empty_list;	/* Full lines close, all valid */
			
 
				 
			
@@ -664,12 +677,15 @@ struct pblk {
 
				 
			
 
				 	struct list_head compl_list;
			
 
				 
			
 
				-	mempool_t *page_bio_pool;
			
 
				-	mempool_t *gen_ws_pool;
			
 
				-	mempool_t *rec_pool;
			
 
				-	mempool_t *r_rq_pool;
			
 
				-	mempool_t *w_rq_pool;
			
 
				-	mempool_t *e_rq_pool;
			
 
				+	spinlock_t resubmit_lock;	 /* Resubmit list lock */
			
 
				+	struct list_head resubmit_list; /* Resubmit list for failed writes*/
			
 
				+
			
 
				+	mempool_t page_bio_pool;
			
 
				+	mempool_t gen_ws_pool;
			
 
				+	mempool_t rec_pool;
			
 
				+	mempool_t r_rq_pool;
			
 
				+	mempool_t w_rq_pool;
			
 
				+	mempool_t e_rq_pool;
			
 
				 
			
 
				 	struct workqueue_struct *close_wq;
			
 
				 	struct workqueue_struct *bb_wq;
			
@@ -713,9 +729,6 @@ void pblk_rb_sync_l2p(struct pblk_rb *rb);
 
				 unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
			
 
				 				 unsigned int pos, unsigned int nr_entries,
			
 
				 				 unsigned int count);
			
 
				-unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
			
 
				-				      struct list_head *list,
			
 
				-				      unsigned int max);
			
 
				 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
			
 
				 			struct ppa_addr ppa, int bio_iter, bool advanced_bio);
			
 
				 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
			
@@ -766,11 +779,13 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk);
 
				 struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
			
 
				 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
			
 
				 int pblk_line_is_full(struct pblk_line *line);
			
 
				-void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
			
 
				+void pblk_line_free(struct pblk_line *line);
			
 
				 void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
			
 
				 void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
			
 
				 void pblk_line_close_ws(struct work_struct *work);
			
 
				 void pblk_pipeline_stop(struct pblk *pblk);
			
 
				+void __pblk_pipeline_stop(struct pblk *pblk);
			
 
				+void __pblk_pipeline_flush(struct pblk *pblk);
			
 
				 void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
			
 
				 		     void (*work)(struct work_struct *), gfp_t gfp_mask,
			
 
				 		     struct workqueue_struct *wq);
			
@@ -794,7 +809,6 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 
				 void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
			
 
				 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
			
 
				 		unsigned long *lun_bitmap);
			
 
				-void pblk_end_io_sync(struct nvm_rq *rqd);
			
 
				 int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
			
 
				 		       int nr_pages);
			
 
				 void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
			
@@ -837,23 +851,20 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
 
				 int pblk_write_ts(void *data);
			
 
				 void pblk_write_timer_fn(struct timer_list *t);
			
 
				 void pblk_write_should_kick(struct pblk *pblk);
			
 
				+void pblk_write_kick(struct pblk *pblk);
			
 
				 
			
 
				 /*
			
 
				  * pblk read path
			
 
				  */
			
 
				-extern struct bio_set *pblk_bio_set;
			
 
				+extern struct bio_set pblk_bio_set;
			
 
				 int pblk_submit_read(struct pblk *pblk, struct bio *bio);
			
 
				 int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
			
 
				 /*
			
 
				  * pblk recovery
			
 
				  */
			
 
				-void pblk_submit_rec(struct work_struct *work);
			
 
				 struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
			
 
				 int pblk_recov_pad(struct pblk *pblk);
			
 
				 int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
			
 
				-int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
			
 
				-			struct pblk_rec_ctx *recovery, u64 *comp_bits,
			
 
				-			unsigned int comp);
			
 
				 
			
 
				 /*
			
 
				  * pblk gc
			
@@ -864,7 +875,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 
				 #define PBLK_GC_RSV_LINE 1	/* Reserved lines for GC */
			
 
				 
			
 
				 int pblk_gc_init(struct pblk *pblk);
			
 
				-void pblk_gc_exit(struct pblk *pblk);
			
 
				+void pblk_gc_exit(struct pblk *pblk, bool graceful);
			
 
				 void pblk_gc_should_start(struct pblk *pblk);
			
 
				 void pblk_gc_should_stop(struct pblk *pblk);
			
 
				 void pblk_gc_should_kick(struct pblk *pblk);
			
@@ -894,6 +905,9 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
 
				 			    bool used);
			
 
				 int pblk_rl_is_limit(struct pblk_rl *rl);
			
 
				 
			
 
				+void pblk_rl_werr_line_in(struct pblk_rl *rl);
			
 
				+void pblk_rl_werr_line_out(struct pblk_rl *rl);
			
 
				+
			
 
				 /*
			
 
				  * pblk sysfs
			
 
				  */
			
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -269,7 +269,7 @@ struct bcache_device {
 
				 	atomic_t		*stripe_sectors_dirty;
			
 
				 	unsigned long		*full_dirty_stripes;
			
 
				 
			
 
				-	struct bio_set		*bio_split;
			
 
				+	struct bio_set		bio_split;
			
 
				 
			
 
				 	unsigned		data_csum:1;
			
 
				 
			
@@ -345,6 +345,7 @@ struct cached_dev {
 
				 
			
 
				 	struct keybuf		writeback_keys;
			
 
				 
			
 
				+	struct task_struct	*status_update_thread;
			
 
				 	/*
			
 
				 	 * Order the write-half of writeback operations strongly in dispatch
			
 
				 	 * order.  (Maintain LBA order; don't allow reads completing out of
			
@@ -392,6 +393,7 @@ struct cached_dev {
 
				 #define DEFAULT_CACHED_DEV_ERROR_LIMIT	64
			
 
				 	atomic_t		io_errors;
			
 
				 	unsigned		error_limit;
			
 
				+	unsigned		offline_seconds;
			
 
				 
			
 
				 	char			backing_dev_name[BDEVNAME_SIZE];
			
 
				 };
			
@@ -528,9 +530,9 @@ struct cache_set {
 
				 	struct closure		sb_write;
			
 
				 	struct semaphore	sb_write_mutex;
			
 
				 
			
 
				-	mempool_t		*search;
			
 
				-	mempool_t		*bio_meta;
			
 
				-	struct bio_set		*bio_split;
			
 
				+	mempool_t		search;
			
 
				+	mempool_t		bio_meta;
			
 
				+	struct bio_set		bio_split;
			
 
				 
			
 
				 	/* For the btree cache */
			
 
				 	struct shrinker		shrink;
			
@@ -655,7 +657,7 @@ struct cache_set {
 
				 	 * A btree node on disk could have too many bsets for an iterator to fit
			
 
				 	 * on the stack - have to dynamically allocate them
			
 
				 	 */
			
 
				-	mempool_t		*fill_iter;
			
 
				+	mempool_t		fill_iter;
			
 
				 
			
 
				 	struct bset_sort_state	sort;
			
 
				 
			
@@ -956,8 +958,6 @@ void bch_prio_write(struct cache *);
 
				 void bch_write_bdev_super(struct cached_dev *, struct closure *);
			
 
				 
			
 
				 extern struct workqueue_struct *bcache_wq;
			
 
				-extern const char * const bch_cache_modes[];
			
 
				-extern const char * const bch_stop_on_failure_modes[];
			
 
				 extern struct mutex bch_register_lock;
			
 
				 extern struct list_head bch_cache_sets;
			
 
				 
			
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1118,8 +1118,7 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
 
				 
			
 
				 void bch_bset_sort_state_free(struct bset_sort_state *state)
			
 
				 {
			
 
				-	if (state->pool)
			
 
				-		mempool_destroy(state->pool);
			
 
				+	mempool_exit(&state->pool);
			
 
				 }
			
 
				 
			
 
				 int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
			
@@ -1129,11 +1128,7 @@ int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
 
				 	state->page_order = page_order;
			
 
				 	state->crit_factor = int_sqrt(1 << page_order);
			
 
				 
			
 
				-	state->pool = mempool_create_page_pool(1, page_order);
			
 
				-	if (!state->pool)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	return 0;
			
 
				+	return mempool_init_page_pool(&state->pool, 1, page_order);
			
 
				 }
			
 
				 EXPORT_SYMBOL(bch_bset_sort_state_init);
			
 
				 
			
@@ -1191,7 +1186,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 
				 
			
 
				 		BUG_ON(order > state->page_order);
			
 
				 
			
 
				-		outp = mempool_alloc(state->pool, GFP_NOIO);
			
 
				+		outp = mempool_alloc(&state->pool, GFP_NOIO);
			
 
				 		out = page_address(outp);
			
 
				 		used_mempool = true;
			
 
				 		order = state->page_order;
			
@@ -1220,7 +1215,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 
				 	}
			
 
				 
			
 
				 	if (used_mempool)
			
 
				-		mempool_free(virt_to_page(out), state->pool);
			
 
				+		mempool_free(virt_to_page(out), &state->pool);
			
 
				 	else
			
 
				 		free_pages((unsigned long) out, order);
			
 
				 
			
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -347,7 +347,7 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
 
				 /* Sorting */
			
 
				 
			
 
				 struct bset_sort_state {
			
 
				-	mempool_t		*pool;
			
 
				+	mempool_t		pool;
			
 
				 
			
 
				 	unsigned		page_order;
			
 
				 	unsigned		crit_factor;
			
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -204,7 +204,7 @@ void bch_btree_node_read_done(struct btree *b)
 
				 	struct bset *i = btree_bset_first(b);
			
 
				 	struct btree_iter *iter;
			
 
				 
			
 
				-	iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
			
 
				+	iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
			
 
				 	iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
			
 
				 	iter->used = 0;
			
 
				 
			
@@ -271,7 +271,7 @@ void bch_btree_node_read_done(struct btree *b)
 
				 		bch_bset_init_next(&b->keys, write_block(b),
			
 
				 				   bset_magic(&b->c->sb));
			
 
				 out:
			
 
				-	mempool_free(iter, b->c->fill_iter);
			
 
				+	mempool_free(iter, &b->c->fill_iter);
			
 
				 	return;
			
 
				 err:
			
 
				 	set_btree_node_io_error(b);
			
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -17,12 +17,12 @@
 
				 void bch_bbio_free(struct bio *bio, struct cache_set *c)
			
 
				 {
			
 
				 	struct bbio *b = container_of(bio, struct bbio, bio);
			
 
				-	mempool_free(b, c->bio_meta);
			
 
				+	mempool_free(b, &c->bio_meta);
			
 
				 }
			
 
				 
			
 
				 struct bio *bch_bbio_alloc(struct cache_set *c)
			
 
				 {
			
 
				-	struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO);
			
 
				+	struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
			
 
				 	struct bio *bio = &b->bio;
			
 
				 
			
 
				 	bio_init(bio, bio->bi_inline_vecs, bucket_pages(c));
			
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -213,7 +213,7 @@ static void bch_data_insert_start(struct closure *cl)
 
				 	do {
			
 
				 		unsigned i;
			
 
				 		struct bkey *k;
			
 
				-		struct bio_set *split = op->c->bio_split;
			
 
				+		struct bio_set *split = &op->c->bio_split;
			
 
				 
			
 
				 		/* 1 for the device pointer and 1 for the chksum */
			
 
				 		if (bch_keylist_realloc(&op->insert_keys,
			
@@ -548,7 +548,7 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
 
				 
			
 
				 	n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
			
 
				 				      KEY_OFFSET(k) - bio->bi_iter.bi_sector),
			
 
				-			   GFP_NOIO, s->d->bio_split);
			
 
				+			   GFP_NOIO, &s->d->bio_split);
			
 
				 
			
 
				 	bio_key = &container_of(n, struct bbio, bio)->key;
			
 
				 	bch_bkey_copy_single_ptr(bio_key, k, ptr);
			
@@ -707,7 +707,7 @@ static void search_free(struct closure *cl)
 
				 
			
 
				 	bio_complete(s);
			
 
				 	closure_debug_destroy(cl);
			
 
				-	mempool_free(s, s->d->c->search);
			
 
				+	mempool_free(s, &s->d->c->search);
			
 
				 }
			
 
				 
			
 
				 static inline struct search *search_alloc(struct bio *bio,
			
@@ -715,7 +715,7 @@ static inline struct search *search_alloc(struct bio *bio,
 
				 {
			
 
				 	struct search *s;
			
 
				 
			
 
				-	s = mempool_alloc(d->c->search, GFP_NOIO);
			
 
				+	s = mempool_alloc(&d->c->search, GFP_NOIO);
			
 
				 
			
 
				 	closure_init(&s->cl, NULL);
			
 
				 	do_bio_hook(s, bio, request_endio);
			
@@ -864,7 +864,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 
				 	s->cache_missed = 1;
			
 
				 
			
 
				 	if (s->cache_miss || s->iop.bypass) {
			
 
				-		miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
			
 
				+		miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
			
 
				 		ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
			
 
				 		goto out_submit;
			
 
				 	}
			
@@ -887,14 +887,14 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 
				 
			
 
				 	s->iop.replace = true;
			
 
				 
			
 
				-	miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
			
 
				+	miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
			
 
				 
			
 
				 	/* btree_search_recurse()'s btree iterator is no good anymore */
			
 
				 	ret = miss == bio ? MAP_DONE : -EINTR;
			
 
				 
			
 
				 	cache_bio = bio_alloc_bioset(GFP_NOWAIT,
			
 
				 			DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
			
 
				-			dc->disk.bio_split);
			
 
				+			&dc->disk.bio_split);
			
 
				 	if (!cache_bio)
			
 
				 		goto out_submit;
			
 
				 
			
@@ -1008,7 +1008,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 
				 			struct bio *flush;
			
 
				 
			
 
				 			flush = bio_alloc_bioset(GFP_NOIO, 0,
			
 
				-						 dc->disk.bio_split);
			
 
				+						 &dc->disk.bio_split);
			
 
				 			if (!flush) {
			
 
				 				s->iop.status = BLK_STS_RESOURCE;
			
 
				 				goto insert_data;
			
@@ -1021,7 +1021,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 
				 			closure_bio_submit(s->iop.c, flush, cl);
			
 
				 		}
			
 
				 	} else {
			
 
				-		s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
			
 
				+		s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
			
 
				 		/* I/O request sent to backing device */
			
 
				 		bio->bi_end_io = backing_request_endio;
			
 
				 		closure_bio_submit(s->iop.c, bio, cl);
			
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -37,24 +37,6 @@ static const char invalid_uuid[] = {
 
				 	0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99
			
 
				 };
			
 
				 
			
 
				-/* Default is -1; we skip past it for struct cached_dev's cache mode */
			
 
				-const char * const bch_cache_modes[] = {
			
 
				-	"default",
			
 
				-	"writethrough",
			
 
				-	"writeback",
			
 
				-	"writearound",
			
 
				-	"none",
			
 
				-	NULL
			
 
				-};
			
 
				-
			
 
				-/* Default is -1; we skip past it for stop_when_cache_set_failed */
			
 
				-const char * const bch_stop_on_failure_modes[] = {
			
 
				-	"default",
			
 
				-	"auto",
			
 
				-	"always",
			
 
				-	NULL
			
 
				-};
			
 
				-
			
 
				 static struct kobject *bcache_kobj;
			
 
				 struct mutex bch_register_lock;
			
 
				 LIST_HEAD(bch_cache_sets);
			
@@ -654,6 +636,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
 
				 		     unsigned int cmd, unsigned long arg)
			
 
				 {
			
 
				 	struct bcache_device *d = b->bd_disk->private_data;
			
 
				+	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
			
 
				+
			
 
				+	if (dc->io_disable)
			
 
				+		return -EIO;
			
 
				+
			
 
				 	return d->ioctl(d, mode, cmd, arg);
			
 
				 }
			
 
				 
			
@@ -766,8 +753,7 @@ static void bcache_device_free(struct bcache_device *d)
 
				 		put_disk(d->disk);
			
 
				 	}
			
 
				 
			
 
				-	if (d->bio_split)
			
 
				-		bioset_free(d->bio_split);
			
 
				+	bioset_exit(&d->bio_split);
			
 
				 	kvfree(d->full_dirty_stripes);
			
 
				 	kvfree(d->stripe_sectors_dirty);
			
 
				 
			
@@ -809,9 +795,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
				 	if (idx < 0)
			
 
				 		return idx;
			
 
				 
			
 
				-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
			
 
				-					   BIOSET_NEED_BVECS |
			
 
				-					   BIOSET_NEED_RESCUER)) ||
			
 
				+	if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
			
 
				+			BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
			
 
				 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
			
 
				 		ida_simple_remove(&bcache_device_idx, idx);
			
 
				 		return -ENOMEM;
			
@@ -864,6 +849,44 @@ static void calc_cached_dev_sectors(struct cache_set *c)
 
				 	c->cached_dev_sectors = sectors;
			
 
				 }
			
 
				 
			
 
				+#define BACKING_DEV_OFFLINE_TIMEOUT 5
			
 
				+static int cached_dev_status_update(void *arg)
			
 
				+{
			
 
				+	struct cached_dev *dc = arg;
			
 
				+	struct request_queue *q;
			
 
				+
			
 
				+	/*
			
 
				+	 * If this delayed worker is stopping outside, directly quit here.
			
 
				+	 * dc->io_disable might be set via sysfs interface, so check it
			
 
				+	 * here too.
			
 
				+	 */
			
 
				+	while (!kthread_should_stop() && !dc->io_disable) {
			
 
				+		q = bdev_get_queue(dc->bdev);
			
 
				+		if (blk_queue_dying(q))
			
 
				+			dc->offline_seconds++;
			
 
				+		else
			
 
				+			dc->offline_seconds = 0;
			
 
				+
			
 
				+		if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
			
 
				+			pr_err("%s: device offline for %d seconds",
			
 
				+			       dc->backing_dev_name,
			
 
				+			       BACKING_DEV_OFFLINE_TIMEOUT);
			
 
				+			pr_err("%s: disable I/O request due to backing "
			
 
				+			       "device offline", dc->disk.name);
			
 
				+			dc->io_disable = true;
			
 
				+			/* let others know earlier that io_disable is true */
			
 
				+			smp_mb();
			
 
				+			bcache_device_stop(&dc->disk);
			
 
				+			break;
			
 
				+		}
			
 
				+		schedule_timeout_interruptible(HZ);
			
 
				+	}
			
 
				+
			
 
				+	wait_for_kthread_stop();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 void bch_cached_dev_run(struct cached_dev *dc)
			
 
				 {
			
 
				 	struct bcache_device *d = &dc->disk;
			
@@ -906,6 +929,14 @@ void bch_cached_dev_run(struct cached_dev *dc)
 
				 	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
			
 
				 	    sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
			
 
				 		pr_debug("error creating sysfs link");
			
 
				+
			
 
				+	dc->status_update_thread = kthread_run(cached_dev_status_update,
			
 
				+					       dc, "bcache_status_update");
			
 
				+	if (IS_ERR(dc->status_update_thread)) {
			
 
				+		pr_warn("failed to create bcache_status_update kthread, "
			
 
				+			"continue to run without monitoring backing "
			
 
				+			"device status");
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1139,6 +1170,8 @@ static void cached_dev_free(struct closure *cl)
 
				 		kthread_stop(dc->writeback_thread);
			
 
				 	if (dc->writeback_write_wq)
			
 
				 		destroy_workqueue(dc->writeback_write_wq);
			
 
				+	if (!IS_ERR_OR_NULL(dc->status_update_thread))
			
 
				+		kthread_stop(dc->status_update_thread);
			
 
				 
			
 
				 	if (atomic_read(&dc->running))
			
 
				 		bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
			
@@ -1465,14 +1498,10 @@ static void cache_set_free(struct closure *cl)
 
				 
			
 
				 	if (c->moving_gc_wq)
			
 
				 		destroy_workqueue(c->moving_gc_wq);
			
 
				-	if (c->bio_split)
			
 
				-		bioset_free(c->bio_split);
			
 
				-	if (c->fill_iter)
			
 
				-		mempool_destroy(c->fill_iter);
			
 
				-	if (c->bio_meta)
			
 
				-		mempool_destroy(c->bio_meta);
			
 
				-	if (c->search)
			
 
				-		mempool_destroy(c->search);
			
 
				+	bioset_exit(&c->bio_split);
			
 
				+	mempool_exit(&c->fill_iter);
			
 
				+	mempool_exit(&c->bio_meta);
			
 
				+	mempool_exit(&c->search);
			
 
				 	kfree(c->devices);
			
 
				 
			
 
				 	mutex_lock(&bch_register_lock);
			
@@ -1683,21 +1712,17 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 
				 	INIT_LIST_HEAD(&c->btree_cache_freed);
			
 
				 	INIT_LIST_HEAD(&c->data_buckets);
			
 
				 
			
 
				-	c->search = mempool_create_slab_pool(32, bch_search_cache);
			
 
				-	if (!c->search)
			
 
				-		goto err;
			
 
				-
			
 
				 	iter_size = (sb->bucket_size / sb->block_size + 1) *
			
 
				 		sizeof(struct btree_iter_set);
			
 
				 
			
 
				 	if (!(c->devices = kzalloc(c->nr_uuids * sizeof(void *), GFP_KERNEL)) ||
			
 
				-	    !(c->bio_meta = mempool_create_kmalloc_pool(2,
			
 
				-				sizeof(struct bbio) + sizeof(struct bio_vec) *
			
 
				-				bucket_pages(c))) ||
			
 
				-	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
			
 
				-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
			
 
				-					   BIOSET_NEED_BVECS |
			
 
				-					   BIOSET_NEED_RESCUER)) ||
			
 
				+	    mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
			
 
				+	    mempool_init_kmalloc_pool(&c->bio_meta, 2,
			
 
				+				      sizeof(struct bbio) + sizeof(struct bio_vec) *
			
 
				+				      bucket_pages(c)) ||
			
 
				+	    mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
			
 
				+	    bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
			
 
				+			BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
			
 
				 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
			
 
				 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
			
 
				 						WQ_MEM_RECLAIM, 0)) ||