13 years ago · 89e5d6f0d9
--- a/Documentation/ABI/testing/sysfs-block-dm
+++ b/Documentation/ABI/testing/sysfs-block-dm
@@ -0,0 +1,25 @@
 
				+What:		/sys/block/dm-<num>/dm/name
			
 
				+Date:		January 2009
			
 
				+KernelVersion:	2.6.29
			
 
				+Contact:	dm-devel@redhat.com
			
 
				+Description:	Device-mapper device name.
			
 
				+		Read-only string containing mapped device name.
			
 
				+Users:		util-linux, device-mapper udev rules
			
 
				+
			
 
				+What:		/sys/block/dm-<num>/dm/uuid
			
 
				+Date:		January 2009
			
 
				+KernelVersion:	2.6.29
			
 
				+Contact:	dm-devel@redhat.com
			
 
				+Description:	Device-mapper device UUID.
			
 
				+		Read-only string containing DM-UUID or empty string
			
 
				+		if DM-UUID is not set.
			
 
				+Users:		util-linux, device-mapper udev rules
			
 
				+
			
 
				+What:		/sys/block/dm-<num>/dm/suspended
			
 
				+Date:		June 2009
			
 
				+KernelVersion:	2.6.31
			
 
				+Contact:	dm-devel@redhat.com
			
 
				+Description:	Device-mapper device suspend state.
			
 
				+		Contains the value 1 while the device is suspended.
			
 
				+		Otherwise it contains 0. Read-only attribute.
			
 
				+Users:		util-linux, device-mapper udev rules
			
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -75,10 +75,12 @@ less sharing than average you'll need a larger-than-average metadata device.
 
				 
			
 
				 As a guide, we suggest you calculate the number of bytes to use in the
			
 
				 metadata device as 48 * $data_dev_size / $data_block_size but round it up
			
 
				-to 2MB if the answer is smaller.  The largest size supported is 16GB.
			
 
				+to 2MB if the answer is smaller.  If you're creating large numbers of
			
 
				+snapshots which are recording large amounts of change, you may find you
			
 
				+need to increase this.
			
 
				 
			
 
				-If you're creating large numbers of snapshots which are recording large
			
 
				-amounts of change, you may need find you need to increase this.
			
 
				+The largest size supported is 16GB: If the device is larger,
			
 
				+a warning will be issued and the excess space will not be used.
			
 
				 
			
 
				 Reloading a pool table
			
 
				 ----------------------
			
@@ -167,6 +169,38 @@ ii) Using an internal snapshot.
 
				 
			
 
				     dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
			
 
				 
			
 
				+External snapshots
			
 
				+------------------
			
 
				+
			
 
				+You can use an external _read only_ device as an origin for a
			
 
				+thinly-provisioned volume.  Any read to an unprovisioned area of the
			
 
				+thin device will be passed through to the origin.  Writes trigger
			
 
				+the allocation of new blocks as usual.
			
 
				+
			
 
				+One use case for this is VM hosts that want to run guests on
			
 
				+thinly-provisioned volumes but have the base image on another device
			
 
				+(possibly shared between many VMs).
			
 
				+
			
 
				+You must not write to the origin device if you use this technique!
			
 
				+Of course, you may write to the thin device and take internal snapshots
			
 
				+of the thin volume.
			
 
				+
			
 
				+i) Creating a snapshot of an external device
			
 
				+
			
 
				+  This is the same as creating a thin device.
			
 
				+  You don't mention the origin at this stage.
			
 
				+
			
 
				+    dmsetup message /dev/mapper/pool 0 "create_thin 0"
			
 
				+
			
 
				+ii) Using a snapshot of an external device.
			
 
				+
			
 
				+  Append an extra parameter to the thin target specifying the origin:
			
 
				+
			
 
				+    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
			
 
				+
			
 
				+  N.B. All descendants (internal snapshots) of this snapshot require the
			
 
				+  same extra origin parameter.
			
 
				+
			
 
				 Deactivation
			
 
				 ------------
			
 
				 
			
@@ -189,7 +223,13 @@ i) Constructor
 
				 	      <low water mark (blocks)> [<number of feature args> [<arg>]*]
			
 
				 
			
 
				     Optional feature arguments:
			
 
				-    - 'skip_block_zeroing': skips the zeroing of newly-provisioned blocks.
			
 
				+
			
 
				+      skip_block_zeroing: Skip the zeroing of newly-provisioned blocks.
			
 
				+
			
 
				+      ignore_discard: Disable discard support.
			
 
				+
			
 
				+      no_discard_passdown: Don't pass discards down to the underlying
			
 
				+			   data device, but just remove the mapping.
			
 
				 
			
 
				     Data block size must be between 64KB (128 sectors) and 1GB
			
 
				     (2097152 sectors) inclusive.
			
@@ -237,16 +277,6 @@ iii) Messages
 
				 
			
 
				 	Deletes a thin device.  Irreversible.
			
 
				 
			
 
				-    trim <dev id> <new size in sectors>
			
 
				-
			
 
				-	Delete mappings from the end of a thin device.  Irreversible.
			
 
				-	You might want to use this if you're reducing the size of
			
 
				-	your thinly-provisioned device.  In many cases, due to the
			
 
				-	sharing of blocks between devices, it is not possible to
			
 
				-	determine in advance how much space 'trim' will release.  (In
			
 
				-	future a userspace tool might be able to perform this
			
 
				-	calculation.)
			
 
				-
			
 
				     set_transaction_id <current id> <new id>
			
 
				 
			
 
				 	Userland volume managers, such as LVM, need a way to
			
@@ -262,7 +292,7 @@ iii) Messages
 
				 
			
 
				 i) Constructor
			
 
				 
			
 
				-    thin <pool dev> <dev id>
			
 
				+    thin <pool dev> <dev id> [<external origin dev>]
			
 
				 
			
 
				     pool dev:
			
 
				 	the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
			
@@ -271,6 +301,11 @@ i) Constructor
 
				 	the internal device identifier of the device to be
			
 
				 	activated.
			
 
				 
			
 
				+    external origin dev:
			
 
				+	an optional block device outside the pool to be treated as a
			
 
				+	read-only snapshot origin: reads to unprovisioned areas of the
			
 
				+	thin target will be mapped to this device.
			
 
				+
			
 
				 The pool doesn't store any size against the thin devices.  If you
			
 
				 load a thin target that is smaller than you've been using previously,
			
 
				 then you'll have no access to blocks mapped beyond the end.  If you
			
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -0,0 +1,194 @@
 
				+dm-verity
			
 
				+==========
			
 
				+
			
 
				+Device-Mapper's "verity" target provides transparent integrity checking of
			
 
				+block devices using a cryptographic digest provided by the kernel crypto API.
			
 
				+This target is read-only.
			
 
				+
			
 
				+Construction Parameters
			
 
				+=======================
			
 
				+    <version> <dev> <hash_dev> <hash_start>
			
 
				+    <data_block_size> <hash_block_size>
			
 
				+    <num_data_blocks> <hash_start_block>
			
 
				+    <algorithm> <digest> <salt>
			
 
				+
			
 
				+<version>
			
 
				+    This is the version number of the on-disk format.
			
 
				+
			
 
				+    0 is the original format used in the Chromium OS.
			
 
				+	The salt is appended when hashing, digests are stored continuously and
			
 
				+	the rest of the block is padded with zeros.
			
 
				+
			
 
				+    1 is the current format that should be used for new devices.
			
 
				+	The salt is prepended when hashing and each digest is
			
 
				+	padded with zeros to the power of two.
			
 
				+
			
 
				+<dev>
			
 
				+    This is the device containing the data the integrity of which needs to be
			
 
				+    checked.  It may be specified as a path, like /dev/sdaX, or a device number,
			
 
				+    <major>:<minor>.
			
 
				+
			
 
				+<hash_dev>
			
 
				+    This is the device that that supplies the hash tree data.  It may be
			
 
				+    specified similarly to the device path and may be the same device.  If the
			
 
				+    same device is used, the hash_start should be outside of the dm-verity
			
 
				+    configured device size.
			
 
				+
			
 
				+<data_block_size>
			
 
				+    The block size on a data device.  Each block corresponds to one digest on
			
 
				+    the hash device.
			
 
				+
			
 
				+<hash_block_size>
			
 
				+    The size of a hash block.
			
 
				+
			
 
				+<num_data_blocks>
			
 
				+    The number of data blocks on the data device.  Additional blocks are
			
 
				+    inaccessible.  You can place hashes to the same partition as data, in this
			
 
				+    case hashes are placed after <num_data_blocks>.
			
 
				+
			
 
				+<hash_start_block>
			
 
				+    This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
			
 
				+    to the root block of the hash tree.
			
 
				+
			
 
				+<algorithm>
			
 
				+    The cryptographic hash algorithm used for this device.  This should
			
 
				+    be the name of the algorithm, like "sha1".
			
 
				+
			
 
				+<digest>
			
 
				+    The hexadecimal encoding of the cryptographic hash of the root hash block
			
 
				+    and the salt.  This hash should be trusted as there is no other authenticity
			
 
				+    beyond this point.
			
 
				+
			
 
				+<salt>
			
 
				+    The hexadecimal encoding of the salt value.
			
 
				+
			
 
				+Theory of operation
			
 
				+===================
			
 
				+
			
 
				+dm-verity is meant to be setup as part of a verified boot path.  This
			
 
				+may be anything ranging from a boot using tboot or trustedgrub to just
			
 
				+booting from a known-good device (like a USB drive or CD).
			
 
				+
			
 
				+When a dm-verity device is configured, it is expected that the caller
			
 
				+has been authenticated in some way (cryptographic signatures, etc).
			
 
				+After instantiation, all hashes will be verified on-demand during
			
 
				+disk access.  If they cannot be verified up to the root node of the
			
 
				+tree, the root hash, then the I/O will fail.  This should identify
			
 
				+tampering with any data on the device and the hash data.
			
 
				+
			
 
				+Cryptographic hashes are used to assert the integrity of the device on a
			
 
				+per-block basis.  This allows for a lightweight hash computation on first read
			
 
				+into the page cache.  Block hashes are stored linearly-aligned to the nearest
			
 
				+block the size of a page.
			
 
				+
			
 
				+Hash Tree
			
 
				+---------
			
 
				+
			
 
				+Each node in the tree is a cryptographic hash.  If it is a leaf node, the hash
			
 
				+is of some block data on disk.  If it is an intermediary node, then the hash is
			
 
				+of a number of child nodes.
			
 
				+
			
 
				+Each entry in the tree is a collection of neighboring nodes that fit in one
			
 
				+block.  The number is determined based on block_size and the size of the
			
 
				+selected cryptographic digest algorithm.  The hashes are linearly-ordered in
			
 
				+this entry and any unaligned trailing space is ignored but included when
			
 
				+calculating the parent node.
			
 
				+
			
 
				+The tree looks something like:
			
 
				+
			
 
				+alg = sha256, num_blocks = 32768, block_size = 4096
			
 
				+
			
 
				+                                 [   root    ]
			
 
				+                                /    . . .    \
			
 
				+                     [entry_0]                 [entry_1]
			
 
				+                    /  . . .  \                 . . .   \
			
 
				+         [entry_0_0]   . . .  [entry_0_127]    . . . .  [entry_1_127]
			
 
				+           / ... \             /   . . .  \             /           \
			
 
				+     blk_0 ... blk_127  blk_16256   blk_16383      blk_32640 . . . blk_32767
			
 
				+
			
 
				+
			
 
				+On-disk format
			
 
				+==============
			
 
				+
			
 
				+Below is the recommended on-disk format. The verity kernel code does not
			
 
				+read the on-disk header. It only reads the hash blocks which directly
			
 
				+follow the header. It is expected that a user-space tool will verify the
			
 
				+integrity of the verity_header and then call dmsetup with the correct
			
 
				+parameters. Alternatively, the header can be omitted and the dmsetup
			
 
				+parameters can be passed via the kernel command-line in a rooted chain
			
 
				+of trust where the command-line is verified.
			
 
				+
			
 
				+The on-disk format is especially useful in cases where the hash blocks
			
 
				+are on a separate partition. The magic number allows easy identification
			
 
				+of the partition contents. Alternatively, the hash blocks can be stored
			
 
				+in the same partition as the data to be verified. In such a configuration
			
 
				+the filesystem on the partition would be sized a little smaller than
			
 
				+the full-partition, leaving room for the hash blocks.
			
 
				+
			
 
				+struct superblock {
			
 
				+	uint8_t signature[8]
			
 
				+		"verity\0\0";
			
 
				+
			
 
				+	uint8_t version;
			
 
				+		1 - current format
			
 
				+
			
 
				+	uint8_t data_block_bits;
			
 
				+		log2(data block size)
			
 
				+
			
 
				+	uint8_t hash_block_bits;
			
 
				+		log2(hash block size)
			
 
				+
			
 
				+	uint8_t pad1[1];
			
 
				+		zero padding
			
 
				+
			
 
				+	uint16_t salt_size;
			
 
				+		big-endian salt size
			
 
				+
			
 
				+	uint8_t pad2[2];
			
 
				+		zero padding
			
 
				+
			
 
				+	uint32_t data_blocks_hi;
			
 
				+		big-endian high 32 bits of the 64-bit number of data blocks
			
 
				+
			
 
				+	uint32_t data_blocks_lo;
			
 
				+		big-endian low 32 bits of the 64-bit number of data blocks
			
 
				+
			
 
				+	uint8_t algorithm[16];
			
 
				+		cryptographic algorithm
			
 
				+
			
 
				+	uint8_t salt[384];
			
 
				+		salt (the salt size is specified above)
			
 
				+
			
 
				+	uint8_t pad3[88];
			
 
				+		zero padding to 512-byte boundary
			
 
				+}
			
 
				+
			
 
				+Directly following the header (and with sector number padded to the next hash
			
 
				+block boundary) are the hash blocks which are stored a depth at a time
			
 
				+(starting from the root), sorted in order of increasing index.
			
 
				+
			
 
				+Status
			
 
				+======
			
 
				+V (for Valid) is returned if every check performed so far was valid.
			
 
				+If any check failed, C (for Corruption) is returned.
			
 
				+
			
 
				+Example
			
 
				+=======
			
 
				+
			
 
				+Setup a device:
			
 
				+  dmsetup create vroot --table \
			
 
				+    "0 2097152 "\
			
 
				+    "verity 1 /dev/sda1 /dev/sda2 4096 4096 2097152 1 "\
			
 
				+    "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
			
 
				+    "1234000000000000000000000000000000000000000000000000000000000000"
			
 
				+
			
 
				+A command line tool veritysetup is available to compute or verify
			
 
				+the hash tree or activate the kernel driver.  This is available from
			
 
				+the LVM2 upstream repository and may be supplied as a package called
			
 
				+device-mapper-verity-tools:
			
 
				+    git://sources.redhat.com/git/lvm2
			
 
				+    http://sourceware.org/git/?p=lvm2.git
			
 
				+    http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/verity?cvsroot=lvm2
			
 
				+
			
 
				+veritysetup -a vroot /dev/sda1 /dev/sda2 \
			
 
				+	4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2225,13 +2225,16 @@ W:	http://lanana.org/docs/device-list/index.html
 
				 S:	Maintained
			
 
				 
			
 
				 DEVICE-MAPPER  (LVM)
			
 
				-P:	Alasdair Kergon
			
 
				+M:	Alasdair Kergon <agk@redhat.com>
			
 
				+M:	dm-devel@redhat.com
			
 
				 L:	dm-devel@redhat.com
			
 
				 W:	http://sources.redhat.com/dm
			
 
				 Q:	http://patchwork.kernel.org/project/dm-devel/list/
			
 
				+T:	quilt http://people.redhat.com/agk/patches/linux/editing/
			
 
				 S:	Maintained
			
 
				 F:	Documentation/device-mapper/
			
 
				 F:	drivers/md/dm*
			
 
				+F:	drivers/md/persistent-data/
			
 
				 F:	include/linux/device-mapper.h
			
 
				 F:	include/linux/dm-*.h
			
 
				 
			
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -277,8 +277,8 @@ config DM_MIRROR
 
				          needed for live data migration tools such as 'pvmove'.
			
 
				 
			
 
				 config DM_RAID
			
 
				-       tristate "RAID 1/4/5/6 target (EXPERIMENTAL)"
			
 
				-       depends on BLK_DEV_DM && EXPERIMENTAL
			
 
				+       tristate "RAID 1/4/5/6 target"
			
 
				+       depends on BLK_DEV_DM
			
 
				        select MD_RAID1
			
 
				        select MD_RAID456
			
 
				        select BLK_DEV_MD
			
@@ -359,8 +359,8 @@ config DM_DELAY
 
				 	If unsure, say N.
			
 
				 
			
 
				 config DM_UEVENT
			
 
				-	bool "DM uevents (EXPERIMENTAL)"
			
 
				-	depends on BLK_DEV_DM && EXPERIMENTAL
			
 
				+	bool "DM uevents"
			
 
				+	depends on BLK_DEV_DM
			
 
				 	---help---
			
 
				 	Generate udev events for DM events.
			
 
				 
			
@@ -370,4 +370,24 @@ config DM_FLAKEY
 
				        ---help---
			
 
				          A target that intermittently fails I/O for debugging purposes.
			
 
				 
			
 
				+config DM_VERITY
			
 
				+	tristate "Verity target support (EXPERIMENTAL)"
			
 
				+	depends on BLK_DEV_DM && EXPERIMENTAL
			
 
				+	select CRYPTO
			
 
				+	select CRYPTO_HASH
			
 
				+	select DM_BUFIO
			
 
				+	---help---
			
 
				+	  This device-mapper target creates a read-only device that
			
 
				+	  transparently validates the data on one underlying device against
			
 
				+	  a pre-generated tree of cryptographic checksums stored on a second
			
 
				+	  device.
			
 
				+
			
 
				+	  You'll need to activate the digests you're going to use in the
			
 
				+	  cryptoapi configuration.
			
 
				+
			
 
				+	  To compile this code as a module, choose M here: the module will
			
 
				+	  be called dm-verity.
			
 
				+
			
 
				+	  If unsure, say N.
			
 
				+
			
 
				 endif # MD
			
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_DM_LOG_USERSPACE)	+= dm-log-userspace.o
 
				 obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
			
 
				 obj-$(CONFIG_DM_RAID)	+= dm-raid.o
			
 
				 obj-$(CONFIG_DM_THIN_PROVISIONING)	+= dm-thin-pool.o
			
 
				+obj-$(CONFIG_DM_VERITY)		+= dm-verity.o
			
 
				 
			
 
				 ifeq ($(CONFIG_DM_UEVENT),y)
			
 
				 dm-mod-objs			+= dm-uevent.o
			
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -578,7 +578,7 @@ static void write_endio(struct bio *bio, int error)
 
				 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
			
 
				 
			
 
				 	b->write_error = error;
			
 
				-	if (error) {
			
 
				+	if (unlikely(error)) {
			
 
				 		struct dm_bufio_client *c = b->c;
			
 
				 		(void)cmpxchg(&c->async_write_error, 0, error);
			
 
				 	}
			
@@ -697,13 +697,20 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)
 
				 	dm_bufio_lock(c);
			
 
				 }
			
 
				 
			
 
				+enum new_flag {
			
 
				+	NF_FRESH = 0,
			
 
				+	NF_READ = 1,
			
 
				+	NF_GET = 2,
			
 
				+	NF_PREFETCH = 3
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Allocate a new buffer. If the allocation is not possible, wait until
			
 
				  * some other thread frees a buffer.
			
 
				  *
			
 
				  * May drop the lock and regain it.
			
 
				  */
			
 
				-static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c)
			
 
				+static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
			
 
				 {
			
 
				 	struct dm_buffer *b;
			
 
				 
			
@@ -726,6 +733,9 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
 
				 				return b;
			
 
				 		}
			
 
				 
			
 
				+		if (nf == NF_PREFETCH)
			
 
				+			return NULL;
			
 
				+
			
 
				 		if (!list_empty(&c->reserved_buffers)) {
			
 
				 			b = list_entry(c->reserved_buffers.next,
			
 
				 				       struct dm_buffer, lru_list);
			
@@ -743,9 +753,12 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
			
 
				+static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf)
			
 
				 {
			
 
				-	struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
			
 
				+	struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf);
			
 
				+
			
 
				+	if (!b)
			
 
				+		return NULL;
			
 
				 
			
 
				 	if (c->alloc_callback)
			
 
				 		c->alloc_callback(b);
			
@@ -865,32 +878,23 @@ static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
 
				  * Getting a buffer
			
 
				  *--------------------------------------------------------------*/
			
 
				 
			
 
				-enum new_flag {
			
 
				-	NF_FRESH = 0,
			
 
				-	NF_READ = 1,
			
 
				-	NF_GET = 2
			
 
				-};
			
 
				-
			
 
				 static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
			
 
				-				     enum new_flag nf, struct dm_buffer **bp,
			
 
				-				     int *need_submit)
			
 
				+				     enum new_flag nf, int *need_submit)
			
 
				 {
			
 
				 	struct dm_buffer *b, *new_b = NULL;
			
 
				 
			
 
				 	*need_submit = 0;
			
 
				 
			
 
				 	b = __find(c, block);
			
 
				-	if (b) {
			
 
				-		b->hold_count++;
			
 
				-		__relink_lru(b, test_bit(B_DIRTY, &b->state) ||
			
 
				-			     test_bit(B_WRITING, &b->state));
			
 
				-		return b;
			
 
				-	}
			
 
				+	if (b)
			
 
				+		goto found_buffer;
			
 
				 
			
 
				 	if (nf == NF_GET)
			
 
				 		return NULL;
			
 
				 
			
 
				-	new_b = __alloc_buffer_wait(c);
			
 
				+	new_b = __alloc_buffer_wait(c, nf);
			
 
				+	if (!new_b)
			
 
				+		return NULL;
			
 
				 
			
 
				 	/*
			
 
				 	 * We've had a period where the mutex was unlocked, so need to
			
@@ -899,10 +903,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
 
				 	b = __find(c, block);
			
 
				 	if (b) {
			
 
				 		__free_buffer_wake(new_b);
			
 
				-		b->hold_count++;
			
 
				-		__relink_lru(b, test_bit(B_DIRTY, &b->state) ||
			
 
				-			     test_bit(B_WRITING, &b->state));
			
 
				-		return b;
			
 
				+		goto found_buffer;
			
 
				 	}
			
 
				 
			
 
				 	__check_watermark(c);
			
@@ -922,6 +923,24 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
 
				 	*need_submit = 1;
			
 
				 
			
 
				 	return b;
			
 
				+
			
 
				+found_buffer:
			
 
				+	if (nf == NF_PREFETCH)
			
 
				+		return NULL;
			
 
				+	/*
			
 
				+	 * Note: it is essential that we don't wait for the buffer to be
			
 
				+	 * read if dm_bufio_get function is used. Both dm_bufio_get and
			
 
				+	 * dm_bufio_prefetch can be used in the driver request routine.
			
 
				+	 * If the user called both dm_bufio_prefetch and dm_bufio_get on
			
 
				+	 * the same buffer, it would deadlock if we waited.
			
 
				+	 */
			
 
				+	if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
			
 
				+		return NULL;
			
 
				+
			
 
				+	b->hold_count++;
			
 
				+	__relink_lru(b, test_bit(B_DIRTY, &b->state) ||
			
 
				+		     test_bit(B_WRITING, &b->state));
			
 
				+	return b;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -956,10 +975,10 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 
				 	struct dm_buffer *b;
			
 
				 
			
 
				 	dm_bufio_lock(c);
			
 
				-	b = __bufio_new(c, block, nf, bp, &need_submit);
			
 
				+	b = __bufio_new(c, block, nf, &need_submit);
			
 
				 	dm_bufio_unlock(c);
			
 
				 
			
 
				-	if (!b || IS_ERR(b))
			
 
				+	if (!b)
			
 
				 		return b;
			
 
				 
			
 
				 	if (need_submit)
			
@@ -1005,13 +1024,47 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(dm_bufio_new);
			
 
				 
			
 
				+void dm_bufio_prefetch(struct dm_bufio_client *c,
			
 
				+		       sector_t block, unsigned n_blocks)
			
 
				+{
			
 
				+	struct blk_plug plug;
			
 
				+
			
 
				+	blk_start_plug(&plug);
			
 
				+	dm_bufio_lock(c);
			
 
				+
			
 
				+	for (; n_blocks--; block++) {
			
 
				+		int need_submit;
			
 
				+		struct dm_buffer *b;
			
 
				+		b = __bufio_new(c, block, NF_PREFETCH, &need_submit);
			
 
				+		if (unlikely(b != NULL)) {
			
 
				+			dm_bufio_unlock(c);
			
 
				+
			
 
				+			if (need_submit)
			
 
				+				submit_io(b, READ, b->block, read_endio);
			
 
				+			dm_bufio_release(b);
			
 
				+
			
 
				+			dm_bufio_cond_resched();
			
 
				+
			
 
				+			if (!n_blocks)
			
 
				+				goto flush_plug;
			
 
				+			dm_bufio_lock(c);
			
 
				+		}
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	dm_bufio_unlock(c);
			
 
				+
			
 
				+flush_plug:
			
 
				+	blk_finish_plug(&plug);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
			
 
				+
			
 
				 void dm_bufio_release(struct dm_buffer *b)
			
 
				 {
			
 
				 	struct dm_bufio_client *c = b->c;
			
 
				 
			
 
				 	dm_bufio_lock(c);
			
 
				 
			
 
				-	BUG_ON(test_bit(B_READING, &b->state));
			
 
				 	BUG_ON(!b->hold_count);
			
 
				 
			
 
				 	b->hold_count--;
			
@@ -1024,6 +1077,7 @@ void dm_bufio_release(struct dm_buffer *b)
 
				 		 * invalid buffer.
			
 
				 		 */
			
 
				 		if ((b->read_error || b->write_error) &&
			
 
				+		    !test_bit(B_READING, &b->state) &&
			
 
				 		    !test_bit(B_WRITING, &b->state) &&
			
 
				 		    !test_bit(B_DIRTY, &b->state)) {
			
 
				 			__unlink_buffer(b);
			
@@ -1041,6 +1095,8 @@ void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
 
				 
			
 
				 	dm_bufio_lock(c);
			
 
				 
			
 
				+	BUG_ON(test_bit(B_READING, &b->state));
			
 
				+
			
 
				 	if (!test_and_set_bit(B_DIRTY, &b->state))
			
 
				 		__relink_lru(b, LIST_DIRTY);
			
 
				 
			
--- a/drivers/md/dm-bufio.h
+++ b/drivers/md/dm-bufio.h
@@ -62,6 +62,14 @@ void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
 
				 void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
			
 
				 		   struct dm_buffer **bp);
			
 
				 
			
 
				+/*
			
 
				+ * Prefetch the specified blocks to the cache.
			
 
				+ * The function starts to read the blocks and returns without waiting for
			
 
				+ * I/O to finish.
			
 
				+ */
			
 
				+void dm_bufio_prefetch(struct dm_bufio_client *c,
			
 
				+		       sector_t block, unsigned n_blocks);
			
 
				+
			
 
				 /*
			
 
				  * Release a reference obtained with dm_bufio_{read,get,new}. The data
			
 
				  * pointer and dm_buffer pointer is no longer valid after this call.
			
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -176,7 +176,6 @@ struct crypt_config {
 
				 
			
 
				 #define MIN_IOS        16
			
 
				 #define MIN_POOL_PAGES 32
			
 
				-#define MIN_BIO_PAGES  8
			
 
				 
			
 
				 static struct kmem_cache *_crypt_io_pool;
			
 
				 
			
@@ -848,12 +847,11 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
 
				 		}
			
 
				 
			
 
				 		/*
			
 
				-		 * if additional pages cannot be allocated without waiting,
			
 
				-		 * return a partially allocated bio, the caller will then try
			
 
				-		 * to allocate additional bios while submitting this partial bio
			
 
				+		 * If additional pages cannot be allocated without waiting,
			
 
				+		 * return a partially-allocated bio.  The caller will then try
			
 
				+		 * to allocate more bios while submitting this partial bio.
			
 
				 		 */
			
 
				-		if (i == (MIN_BIO_PAGES - 1))
			
 
				-			gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
			
 
				+		gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
			
 
				 
			
 
				 		len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
			
 
				 
			
@@ -1046,16 +1044,14 @@ static void kcryptd_queue_io(struct dm_crypt_io *io)
 
				 	queue_work(cc->io_queue, &io->work);
			
 
				 }
			
 
				 
			
 
				-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
			
 
				-					  int error, int async)
			
 
				+static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
			
 
				 {
			
 
				 	struct bio *clone = io->ctx.bio_out;
			
 
				 	struct crypt_config *cc = io->target->private;
			
 
				 
			
 
				-	if (unlikely(error < 0)) {
			
 
				+	if (unlikely(io->error < 0)) {
			
 
				 		crypt_free_buffer_pages(cc, clone);
			
 
				 		bio_put(clone);
			
 
				-		io->error = -EIO;
			
 
				 		crypt_dec_pending(io);
			
 
				 		return;
			
 
				 	}
			
@@ -1106,12 +1102,16 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
				 		sector += bio_sectors(clone);
			
 
				 
			
 
				 		crypt_inc_pending(io);
			
 
				+
			
 
				 		r = crypt_convert(cc, &io->ctx);
			
 
				+		if (r < 0)
			
 
				+			io->error = -EIO;
			
 
				+
			
 
				 		crypt_finished = atomic_dec_and_test(&io->ctx.pending);
			
 
				 
			
 
				 		/* Encryption was already finished, submit io now */
			
 
				 		if (crypt_finished) {
			
 
				-			kcryptd_crypt_write_io_submit(io, r, 0);
			
 
				+			kcryptd_crypt_write_io_submit(io, 0);
			
 
				 
			
 
				 			/*
			
 
				 			 * If there was an error, do not try next fragments.
			
@@ -1162,11 +1162,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
				 	crypt_dec_pending(io);
			
 
				 }
			
 
				 
			
 
				-static void kcryptd_crypt_read_done(struct dm_crypt_io *io, int error)
			
 
				+static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
			
 
				 {
			
 
				-	if (unlikely(error < 0))
			
 
				-		io->error = -EIO;
			
 
				-
			
 
				 	crypt_dec_pending(io);
			
 
				 }
			
 
				 
			
@@ -1181,9 +1178,11 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 
				 			   io->sector);
			
 
				 
			
 
				 	r = crypt_convert(cc, &io->ctx);
			
 
				+	if (r < 0)
			
 
				+		io->error = -EIO;
			
 
				 
			
 
				 	if (atomic_dec_and_test(&io->ctx.pending))
			
 
				-		kcryptd_crypt_read_done(io, r);
			
 
				+		kcryptd_crypt_read_done(io);
			
 
				 
			
 
				 	crypt_dec_pending(io);
			
 
				 }
			
@@ -1204,15 +1203,18 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
 
				 	if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
			
 
				 		error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
			
 
				 
			
 
				+	if (error < 0)
			
 
				+		io->error = -EIO;
			
 
				+
			
 
				 	mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
			
 
				 
			
 
				 	if (!atomic_dec_and_test(&ctx->pending))
			
 
				 		return;
			
 
				 
			
 
				 	if (bio_data_dir(io->base_bio) == READ)
			
 
				-		kcryptd_crypt_read_done(io, error);
			
 
				+		kcryptd_crypt_read_done(io);
			
 
				 	else
			
 
				-		kcryptd_crypt_write_io_submit(io, error, 1);
			
 
				+		kcryptd_crypt_write_io_submit(io, 1);
			
 
				 }
			
 
				 
			
 
				 static void kcryptd_crypt(struct work_struct *work)
			
@@ -1413,6 +1415,7 @@ static int crypt_ctr_cipher(struct dm_target *ti,
 
				 	char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount;
			
 
				 	char *cipher_api = NULL;
			
 
				 	int cpu, ret = -EINVAL;
			
 
				+	char dummy;
			
 
				 
			
 
				 	/* Convert to crypto api definition? */
			
 
				 	if (strchr(cipher_in, '(')) {
			
@@ -1434,7 +1437,7 @@ static int crypt_ctr_cipher(struct dm_target *ti,
 
				 
			
 
				 	if (!keycount)
			
 
				 		cc->tfms_count = 1;
			
 
				-	else if (sscanf(keycount, "%u", &cc->tfms_count) != 1 ||
			
 
				+	else if (sscanf(keycount, "%u%c", &cc->tfms_count, &dummy) != 1 ||
			
 
				 		 !is_power_of_2(cc->tfms_count)) {
			
 
				 		ti->error = "Bad cipher key count specification";
			
 
				 		return -EINVAL;
			
@@ -1579,6 +1582,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 	int ret;
			
 
				 	struct dm_arg_set as;
			
 
				 	const char *opt_string;
			
 
				+	char dummy;
			
 
				 
			
 
				 	static struct dm_arg _args[] = {
			
 
				 		{0, 1, "Invalid number of feature args"},
			
@@ -1636,7 +1640,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 	}
			
 
				 
			
 
				 	ret = -EINVAL;
			
 
				-	if (sscanf(argv[2], "%llu", &tmpll) != 1) {
			
 
				+	if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
			
 
				 		ti->error = "Invalid iv_offset sector";
			
 
				 		goto bad;
			
 
				 	}
			
@@ -1647,7 +1651,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 		goto bad;
			
 
				 	}
			
 
				 
			
 
				-	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
			
 
				+	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
			
 
				 		ti->error = "Invalid device sector";
			
 
				 		goto bad;
			
 
				 	}
			
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -131,6 +131,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 {
			
 
				 	struct delay_c *dc;
			
 
				 	unsigned long long tmpll;
			
 
				+	char dummy;
			
 
				 
			
 
				 	if (argc != 3 && argc != 6) {
			
 
				 		ti->error = "requires exactly 3 or 6 arguments";
			
@@ -145,13 +146,13 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 
			
 
				 	dc->reads = dc->writes = 0;
			
 
				 
			
 
				-	if (sscanf(argv[1], "%llu", &tmpll) != 1) {
			
 
				+	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) {
			
 
				 		ti->error = "Invalid device sector";
			
 
				 		goto bad;
			
 
				 	}
			
 
				 	dc->start_read = tmpll;
			
 
				 
			
 
				-	if (sscanf(argv[2], "%u", &dc->read_delay) != 1) {
			
 
				+	if (sscanf(argv[2], "%u%c", &dc->read_delay, &dummy) != 1) {
			
 
				 		ti->error = "Invalid delay";
			
 
				 		goto bad;
			
 
				 	}
			
@@ -166,13 +167,13 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 	if (argc == 3)
			
 
				 		goto out;
			
 
				 
			
 
				-	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
			
 
				+	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
			
 
				 		ti->error = "Invalid write device sector";
			
 
				 		goto bad_dev_read;
			
 
				 	}
			
 
				 	dc->start_write = tmpll;
			
 
				 
			
 
				-	if (sscanf(argv[5], "%u", &dc->write_delay) != 1) {
			
 
				+	if (sscanf(argv[5], "%u%c", &dc->write_delay, &dummy) != 1) {
			
 
				 		ti->error = "Invalid write delay";
			
 
				 		goto bad_dev_read;
			
 
				 	}
			
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -283,7 +283,7 @@ int dm_exception_store_init(void)
 
				 	return 0;
			
 
				 
			
 
				 persistent_fail:
			
 
				-	dm_persistent_snapshot_exit();
			
 
				+	dm_transient_snapshot_exit();
			
 
				 transient_fail:
			
 
				 	return r;
			
 
				 }
			
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -160,6 +160,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 	unsigned long long tmpll;
			
 
				 	struct dm_arg_set as;
			
 
				 	const char *devname;
			
 
				+	char dummy;
			
 
				 
			
 
				 	as.argc = argc;
			
 
				 	as.argv = argv;
			
@@ -178,7 +179,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 
			
 
				 	devname = dm_shift_arg(&as);
			
 
				 
			
 
				-	if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) {
			
 
				+	if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) {
			
 
				 		ti->error = "Invalid device sector";
			
 
				 		goto bad;
			
 
				 	}
			
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -880,6 +880,7 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
 
				 	struct hd_geometry geometry;
			
 
				 	unsigned long indata[4];
			
 
				 	char *geostr = (char *) param + param->data_start;
			
 
				+	char dummy;
			
 
				 
			
 
				 	md = find_device(param);
			
 
				 	if (!md)
			
@@ -891,8 +892,8 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	x = sscanf(geostr, "%lu %lu %lu %lu", indata,
			
 
				-		   indata + 1, indata + 2, indata + 3);
			
 
				+	x = sscanf(geostr, "%lu %lu %lu %lu%c", indata,
			
 
				+		   indata + 1, indata + 2, indata + 3, &dummy);
			
 
				 
			
 
				 	if (x != 4) {
			
 
				 		DMWARN("Unable to interpret geometry settings.");
			
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -29,6 +29,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 {
			
 
				 	struct linear_c *lc;
			
 
				 	unsigned long long tmp;
			
 
				+	char dummy;
			
 
				 
			
 
				 	if (argc != 2) {
			
 
				 		ti->error = "Invalid argument count";
			
@@ -41,7 +42,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				-	if (sscanf(argv[1], "%llu", &tmp) != 1) {
			
 
				+	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) {
			
 
				 		ti->error = "dm-linear: Invalid device sector";
			
 
				 		goto bad;
			
 
				 	}
			
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -369,6 +369,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 
				 	unsigned int region_count;
			
 
				 	size_t bitset_size, buf_size;
			
 
				 	int r;
			
 
				+	char dummy;
			
 
				 
			
 
				 	if (argc < 1 || argc > 2) {
			
 
				 		DMWARN("wrong number of arguments to dirty region log");
			
@@ -387,7 +388,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (sscanf(argv[0], "%u", &region_size) != 1 ||
			
 
				+	if (sscanf(argv[0], "%u%c", &region_size, &dummy) != 1 ||
			
 
				 	    !_check_region_size(ti, region_size)) {
			
 
				 		DMWARN("invalid region size %s", argv[0]);
			
 
				 		return -EINVAL;
			
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -226,6 +226,27 @@ static void free_multipath(struct multipath *m)
 
				 	kfree(m);
			
 
				 }
			
 
				 
			
 
				+static int set_mapinfo(struct multipath *m, union map_info *info)
			
 
				+{
			
 
				+	struct dm_mpath_io *mpio;
			
 
				+
			
 
				+	mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
			
 
				+	if (!mpio)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	memset(mpio, 0, sizeof(*mpio));
			
 
				+	info->ptr = mpio;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void clear_mapinfo(struct multipath *m, union map_info *info)
			
 
				+{
			
 
				+	struct dm_mpath_io *mpio = info->ptr;
			
 
				+
			
 
				+	info->ptr = NULL;
			
 
				+	mempool_free(mpio, m->mpio_pool);
			
 
				+}
			
 
				 
			
 
				 /*-----------------------------------------------
			
 
				  * Path selection
			
@@ -341,13 +362,14 @@ static int __must_push_back(struct multipath *m)
 
				 }
			
 
				 
			
 
				 static int map_io(struct multipath *m, struct request *clone,
			
 
				-		  struct dm_mpath_io *mpio, unsigned was_queued)
			
 
				+		  union map_info *map_context, unsigned was_queued)
			
 
				 {
			
 
				 	int r = DM_MAPIO_REMAPPED;
			
 
				 	size_t nr_bytes = blk_rq_bytes(clone);
			
 
				 	unsigned long flags;
			
 
				 	struct pgpath *pgpath;
			
 
				 	struct block_device *bdev;
			
 
				+	struct dm_mpath_io *mpio = map_context->ptr;
			
 
				 
			
 
				 	spin_lock_irqsave(&m->lock, flags);
			
 
				 
			
@@ -423,7 +445,6 @@ static void dispatch_queued_ios(struct multipath *m)
 
				 {
			
 
				 	int r;
			
 
				 	unsigned long flags;
			
 
				-	struct dm_mpath_io *mpio;
			
 
				 	union map_info *info;
			
 
				 	struct request *clone, *n;
			
 
				 	LIST_HEAD(cl);
			
@@ -436,16 +457,15 @@ static void dispatch_queued_ios(struct multipath *m)
 
				 		list_del_init(&clone->queuelist);
			
 
				 
			
 
				 		info = dm_get_rq_mapinfo(clone);
			
 
				-		mpio = info->ptr;
			
 
				 
			
 
				-		r = map_io(m, clone, mpio, 1);
			
 
				+		r = map_io(m, clone, info, 1);
			
 
				 		if (r < 0) {
			
 
				-			mempool_free(mpio, m->mpio_pool);
			
 
				+			clear_mapinfo(m, info);
			
 
				 			dm_kill_unmapped_request(clone, r);
			
 
				 		} else if (r == DM_MAPIO_REMAPPED)
			
 
				 			dm_dispatch_request(clone);
			
 
				 		else if (r == DM_MAPIO_REQUEUE) {
			
 
				-			mempool_free(mpio, m->mpio_pool);
			
 
				+			clear_mapinfo(m, info);
			
 
				 			dm_requeue_unmapped_request(clone);
			
 
				 		}
			
 
				 	}
			
@@ -908,20 +928,16 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
 
				 			 union map_info *map_context)
			
 
				 {
			
 
				 	int r;
			
 
				-	struct dm_mpath_io *mpio;
			
 
				 	struct multipath *m = (struct multipath *) ti->private;
			
 
				 
			
 
				-	mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
			
 
				-	if (!mpio)
			
 
				+	if (set_mapinfo(m, map_context) < 0)
			
 
				 		/* ENOMEM, requeue */
			
 
				 		return DM_MAPIO_REQUEUE;
			
 
				-	memset(mpio, 0, sizeof(*mpio));
			
 
				 
			
 
				-	map_context->ptr = mpio;
			
 
				 	clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
			
 
				-	r = map_io(m, clone, mpio, 0);
			
 
				+	r = map_io(m, clone, map_context, 0);
			
 
				 	if (r < 0 || r == DM_MAPIO_REQUEUE)
			
 
				-		mempool_free(mpio, m->mpio_pool);
			
 
				+		clear_mapinfo(m, map_context);
			
 
				 
			
 
				 	return r;
			
 
				 }
			
@@ -1054,8 +1070,9 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
 
				 	struct priority_group *pg;
			
 
				 	unsigned pgnum;
			
 
				 	unsigned long flags;
			
 
				+	char dummy;
			
 
				 
			
 
				-	if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
			
 
				+	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
			
 
				 	    (pgnum > m->nr_priority_groups)) {
			
 
				 		DMWARN("invalid PG number supplied to switch_pg_num");
			
 
				 		return -EINVAL;
			
@@ -1085,8 +1102,9 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
 
				 {
			
 
				 	struct priority_group *pg;
			
 
				 	unsigned pgnum;
			
 
				+	char dummy;
			
 
				 
			
 
				-	if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
			
 
				+	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
			
 
				 	    (pgnum > m->nr_priority_groups)) {
			
 
				 		DMWARN("invalid PG number supplied to bypass_pg");
			
 
				 		return -EINVAL;
			
@@ -1261,13 +1279,15 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 
				 	struct path_selector *ps;
			
 
				 	int r;
			
 
				 
			
 
				+	BUG_ON(!mpio);
			
 
				+
			
 
				 	r  = do_end_io(m, clone, error, mpio);
			
 
				 	if (pgpath) {
			
 
				 		ps = &pgpath->pg->ps;
			
 
				 		if (ps->type->end_io)
			
 
				 			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
			
 
				 	}
			
 
				-	mempool_free(mpio, m->mpio_pool);
			
 
				+	clear_mapinfo(m, map_context);
			
 
				 
			
 
				 	return r;
			
 
				 }
			
--- a/drivers/md/dm-queue-length.c
+++ b/drivers/md/dm-queue-length.c
@@ -112,6 +112,7 @@ static int ql_add_path(struct path_selector *ps, struct dm_path *path,
 
				 	struct selector *s = ps->context;
			
 
				 	struct path_info *pi;
			
 
				 	unsigned repeat_count = QL_MIN_IO;
			
 
				+	char dummy;
			
 
				 
			
 
				 	/*
			
 
				 	 * Arguments: [<repeat_count>]
			
@@ -123,7 +124,7 @@ static int ql_add_path(struct path_selector *ps, struct dm_path *path,
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
			
 
				+	if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
			
 
				 		*error = "queue-length ps: invalid repeat count";
			
 
				 		return -EINVAL;
			
 
				 	}
			
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -604,7 +604,9 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
 
				 		return 0;
			
 
				 
			
 
				 	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
			
 
				-		DMERR("Failed to read device superblock");
			
 
				+		DMERR("Failed to read superblock of device at position %d",
			
 
				+		      rdev->raid_disk);
			
 
				+		set_bit(Faulty, &rdev->flags);
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
@@ -855,9 +857,25 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev)
 
				 static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
			
 
				 {
			
 
				 	int ret;
			
 
				+	unsigned redundancy = 0;
			
 
				+	struct raid_dev *dev;
			
 
				 	struct md_rdev *rdev, *freshest;
			
 
				 	struct mddev *mddev = &rs->md;
			
 
				 
			
 
				+	switch (rs->raid_type->level) {
			
 
				+	case 1:
			
 
				+		redundancy = rs->md.raid_disks - 1;
			
 
				+		break;
			
 
				+	case 4:
			
 
				+	case 5:
			
 
				+	case 6:
			
 
				+		redundancy = rs->raid_type->parity_devs;
			
 
				+		break;
			
 
				+	default:
			
 
				+		ti->error = "Unknown RAID type";
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				 	freshest = NULL;
			
 
				 	rdev_for_each(rdev, mddev) {
			
 
				 		if (!rdev->meta_bdev)
			
@@ -872,6 +890,37 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
 
				 		case 0:
			
 
				 			break;
			
 
				 		default:
			
 
				+			dev = container_of(rdev, struct raid_dev, rdev);
			
 
				+			if (redundancy--) {
			
 
				+				if (dev->meta_dev)
			
 
				+					dm_put_device(ti, dev->meta_dev);
			
 
				+
			
 
				+				dev->meta_dev = NULL;
			
 
				+				rdev->meta_bdev = NULL;
			
 
				+
			
 
				+				if (rdev->sb_page)
			
 
				+					put_page(rdev->sb_page);
			
 
				+
			
 
				+				rdev->sb_page = NULL;
			
 
				+
			
 
				+				rdev->sb_loaded = 0;
			
 
				+
			
 
				+				/*
			
 
				+				 * We might be able to salvage the data device
			
 
				+				 * even though the meta device has failed.  For
			
 
				+				 * now, we behave as though '- -' had been
			
 
				+				 * set for this device in the table.
			
 
				+				 */
			
 
				+				if (dev->data_dev)
			
 
				+					dm_put_device(ti, dev->data_dev);
			
 
				+
			
 
				+				dev->data_dev = NULL;
			
 
				+				rdev->bdev = NULL;
			
 
				+
			
 
				+				list_del(&rdev->same_set);
			
 
				+
			
 
				+				continue;
			
 
				+			}
			
 
				 			ti->error = "Failed to load superblock";
			
 
				 			return ret;
			
 
				 		}
			
@@ -1214,7 +1263,7 @@ static void raid_resume(struct dm_target *ti)
 
				 
			
 
				 static struct target_type raid_target = {
			
 
				 	.name = "raid",
			
 
				-	.version = {1, 1, 0},
			
 
				+	.version = {1, 2, 0},
			
 
				 	.module = THIS_MODULE,
			
 
				 	.ctr = raid_ctr,
			
 
				 	.dtr = raid_dtr,
			
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -924,8 +924,9 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 
				 		      unsigned int mirror, char **argv)
			
 
				 {
			
 
				 	unsigned long long offset;
			
 
				+	char dummy;
			
 
				 
			
 
				-	if (sscanf(argv[1], "%llu", &offset) != 1) {
			
 
				+	if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) {
			
 
				 		ti->error = "Invalid offset";
			
 
				 		return -EINVAL;
			
 
				 	}
			
@@ -953,13 +954,14 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
 
				 {
			
 
				 	unsigned param_count;
			
 
				 	struct dm_dirty_log *dl;
			
 
				+	char dummy;
			
 
				 
			
 
				 	if (argc < 2) {
			
 
				 		ti->error = "Insufficient mirror log arguments";
			
 
				 		return NULL;
			
 
				 	}
			
 
				 
			
 
				-	if (sscanf(argv[1], "%u", &param_count) != 1) {
			
 
				+	if (sscanf(argv[1], "%u%c", &param_count, &dummy) != 1) {
			
 
				 		ti->error = "Invalid mirror log argument count";
			
 
				 		return NULL;
			
 
				 	}
			
@@ -986,13 +988,14 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
 
				 {
			
 
				 	unsigned num_features;
			
 
				 	struct dm_target *ti = ms->ti;
			
 
				+	char dummy;
			
 
				 
			
 
				 	*args_used = 0;
			
 
				 
			
 
				 	if (!argc)
			
 
				 		return 0;
			
 
				 
			
 
				-	if (sscanf(argv[0], "%u", &num_features) != 1) {
			
 
				+	if (sscanf(argv[0], "%u%c", &num_features, &dummy) != 1) {
			
 
				 		ti->error = "Invalid number of features";
			
 
				 		return -EINVAL;
			
 
				 	}
			
@@ -1036,6 +1039,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 	unsigned int nr_mirrors, m, args_used;
			
 
				 	struct mirror_set *ms;
			
 
				 	struct dm_dirty_log *dl;
			
 
				+	char dummy;
			
 
				 
			
 
				 	dl = create_dirty_log(ti, argc, argv, &args_used);
			
 
				 	if (!dl)
			
@@ -1044,7 +1048,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
				 	argv += args_used;
			
 
				 	argc -= args_used;
			
 
				 
			
 
				-	if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 ||
			
 
				+	if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 ||
			
 
				 	    nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
			
 
				 		ti->error = "Invalid number of mirrors";
			
 
				 		dm_dirty_log_destroy(dl);
			
--- a/drivers/md/dm-round-robin.c
+++ b/drivers/md/dm-round-robin.c
@@ -114,6 +114,7 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path,
 
				 	struct selector *s = (struct selector *) ps->context;
			
 
				 	struct path_info *pi;
			
 
				 	unsigned repeat_count = RR_MIN_IO;
			
 
				+	char dummy;
			
 
				 
			
 
				 	if (argc > 1) {
			
 
				 		*error = "round-robin ps: incorrect number of arguments";
			
@@ -121,7 +122,7 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path,
 
				 	}
			
 
				 
			
 
				 	/* First path argument is number of I/Os before switching path */
			
 
				-	if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
			
 
				+	if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
			
 
				 		*error = "round-robin ps: invalid repeat count";
			
 
				 		return -EINVAL;
			
 
				 	}
			
--- a/drivers/md/dm-service-time.c
+++ b/drivers/md/dm-service-time.c
@@ -110,6 +110,7 @@ static int st_add_path(struct path_selector *ps, struct dm_path *path,
 
				 	struct path_info *pi;
			
 
				 	unsigned repeat_count = ST_MIN_IO;
			
 
				 	unsigned relative_throughput = 1;
			
 
				+	char dummy;
			
 
				 
			
 
				 	/*
			
 
				 	 * Arguments: [<repeat_count> [<relative_throughput>]]
			
@@ -128,13 +129,13 @@ static int st_add_path(struct path_selector *ps, struct dm_path *path,
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
			
 
				+	if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
			
 
				 		*error = "service-time ps: invalid repeat count";
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				 	if ((argc == 2) &&
			
 
				-	    (sscanf(argv[1], "%u", &relative_throughput) != 1 ||
			
 
				+	    (sscanf(argv[1], "%u%c", &relative_throughput, &dummy) != 1 ||
			
 
				 	     relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
			
 
				 		*error = "service-time ps: invalid relative_throughput value";
			
 
				 		return -EINVAL;
			
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -75,8 +75,9 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
 
				 		      unsigned int stripe, char **argv)
			
 
				 {
			
 
				 	unsigned long long start;
			
 
				+	char dummy;
			
 
				 
			
 
				-	if (sscanf(argv[1], "%llu", &start) != 1)
			
 
				+	if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
			
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -268,8 +268,7 @@ void dm_table_destroy(struct dm_table *t)
 
				 	vfree(t->highs);
			
 
				 
			
 
				 	/* free the device list */
			
 
				-	if (t->devices.next != &t->devices)
			
 
				-		free_devices(&t->devices);
			
 
				+	free_devices(&t->devices);
			
 
				 
			
 
				 	dm_free_md_mempools(t->mempools);
			
 
				 
			
@@ -464,10 +463,11 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 
				 	struct dm_dev_internal *dd;
			
 
				 	unsigned int major, minor;
			
 
				 	struct dm_table *t = ti->table;
			
 
				+	char dummy;
			
 
				 
			
 
				 	BUG_ON(!t);
			
 
				 
			
 
				-	if (sscanf(path, "%u:%u", &major, &minor) == 2) {
			
 
				+	if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
			
 
				 		/* Extract the major/minor numbers */
			
 
				 		dev = MKDEV(major, minor);
			
 
				 		if (MAJOR(dev) != major || MINOR(dev) != minor)
			
@@ -842,9 +842,10 @@ static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
 
				 			     unsigned *value, char **error, unsigned grouped)
			
 
				 {
			
 
				 	const char *arg_str = dm_shift_arg(arg_set);
			
 
				+	char dummy;
			
 
				 
			
 
				 	if (!arg_str ||
			
 
				-	    (sscanf(arg_str, "%u", value) != 1) ||
			
 
				+	    (sscanf(arg_str, "%u%c", value, &dummy) != 1) ||
			
 
				 	    (*value < arg->min) ||
			
 
				 	    (*value > arg->max) ||
			
 
				 	    (grouped && arg_set->argc < *value)) {
			
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -614,7 +614,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
 
				 	if (r < 0)
			
 
				 		goto out;
			
 
				 
			
 
				-	r = dm_sm_root_size(pmd->metadata_sm, &data_len);
			
 
				+	r = dm_sm_root_size(pmd->data_sm, &data_len);
			
 
				 	if (r < 0)
			
 
				 		goto out;
			
 
				 
			
@@ -713,6 +713,9 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
 
				 	if (r)
			
 
				 		goto bad;
			
 
				 
			
 
				+	if (bdev_size > THIN_METADATA_MAX_SECTORS)
			
 
				+		bdev_size = THIN_METADATA_MAX_SECTORS;
			
 
				+
			
 
				 	disk_super = dm_block_data(sblock);
			
 
				 	disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC);
			
 
				 	disk_super->version = cpu_to_le32(THIN_VERSION);
			
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -11,6 +11,19 @@
 
				 
			
 
				 #define THIN_METADATA_BLOCK_SIZE 4096
			
 
				 
			
 
				+/*
			
 
				+ * The metadata device is currently limited in size.
			
 
				+ *
			
 
				+ * We have one block of index, which can hold 255 index entries.  Each
			
 
				+ * index entry contains allocation info about 16k metadata blocks.
			
 
				+ */
			
 
				+#define THIN_METADATA_MAX_SECTORS (255 * (1 << 14) * (THIN_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
			
 
				+
			
 
				+/*
			
 
				+ * A metadata device larger than 16GB triggers a warning.
			
 
				+ */
			
 
				+#define THIN_METADATA_MAX_SECTORS_WARNING (16 * (1024 * 1024 * 1024 >> SECTOR_SHIFT))
			
 
				+
			
 
				 /*----------------------------------------------------------------*/
			
 
				 
			
 
				 struct dm_pool_metadata;
			
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -23,6 +23,7 @@
 
				 #define DEFERRED_SET_SIZE 64
			
 
				 #define MAPPING_POOL_SIZE 1024
			
 
				 #define PRISON_CELLS 1024
			
 
				+#define COMMIT_PERIOD HZ
			
 
				 
			
 
				 /*
			
 
				  * The block size of the device holding pool data must be
			
@@ -31,16 +32,6 @@
 
				 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
			
 
				 #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
			
 
				 
			
 
				-/*
			
 
				- * The metadata device is currently limited in size.  The limitation is
			
 
				- * checked lower down in dm-space-map-metadata, but we also check it here
			
 
				- * so we can fail early.
			
 
				- *
			
 
				- * We have one block of index, which can hold 255 index entries.  Each
			
 
				- * index entry contains allocation info about 16k metadata blocks.
			
 
				- */
			
 
				-#define METADATA_DEV_MAX_SECTORS (255 * (1 << 14) * (THIN_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
			
 
				-
			
 
				 /*
			
 
				  * Device id is restricted to 24 bits.
			
 
				  */
			
@@ -72,7 +63,7 @@
 
				  * missed out if the io covers the block. (schedule_copy).
			
 
				  *
			
 
				  * iv) insert the new mapping into the origin's btree
			
 
				- * (process_prepared_mappings).  This act of inserting breaks some
			
 
				+ * (process_prepared_mapping).  This act of inserting breaks some
			
 
				  * sharing of btree nodes between the two devices.  Breaking sharing only
			
 
				  * effects the btree of that specific device.  Btrees for the other
			
 
				  * devices that share the block never change.  The btree for the origin
			
@@ -124,7 +115,7 @@ struct cell {
 
				 	struct hlist_node list;
			
 
				 	struct bio_prison *prison;
			
 
				 	struct cell_key key;
			
 
				-	unsigned count;
			
 
				+	struct bio *holder;
			
 
				 	struct bio_list bios;
			
 
				 };
			
 
				 
			
@@ -220,54 +211,59 @@ static struct cell *__search_bucket(struct hlist_head *bucket,
 
				  * This may block if a new cell needs allocating.  You must ensure that
			
 
				  * cells will be unlocked even if the calling thread is blocked.
			
 
				  *
			
 
				- * Returns the number of entries in the cell prior to the new addition
			
 
				- * or < 0 on failure.
			
 
				+ * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
			
 
				  */
			
 
				 static int bio_detain(struct bio_prison *prison, struct cell_key *key,
			
 
				 		      struct bio *inmate, struct cell **ref)
			
 
				 {
			
 
				-	int r;
			
 
				+	int r = 1;
			
 
				 	unsigned long flags;
			
 
				 	uint32_t hash = hash_key(prison, key);
			
 
				-	struct cell *uninitialized_var(cell), *cell2 = NULL;
			
 
				+	struct cell *cell, *cell2;
			
 
				 
			
 
				 	BUG_ON(hash > prison->nr_buckets);
			
 
				 
			
 
				 	spin_lock_irqsave(&prison->lock, flags);
			
 
				+
			
 
				 	cell = __search_bucket(prison->cells + hash, key);
			
 
				+	if (cell) {
			
 
				+		bio_list_add(&cell->bios, inmate);
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				-	if (!cell) {
			
 
				-		/*
			
 
				-		 * Allocate a new cell
			
 
				-		 */
			
 
				-		spin_unlock_irqrestore(&prison->lock, flags);
			
 
				-		cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
			
 
				-		spin_lock_irqsave(&prison->lock, flags);
			
 
				+	/*
			
 
				+	 * Allocate a new cell
			
 
				+	 */
			
 
				+	spin_unlock_irqrestore(&prison->lock, flags);
			
 
				+	cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
			
 
				+	spin_lock_irqsave(&prison->lock, flags);
			
 
				 
			
 
				-		/*
			
 
				-		 * We've been unlocked, so we have to double check that
			
 
				-		 * nobody else has inserted this cell in the meantime.
			
 
				-		 */
			
 
				-		cell = __search_bucket(prison->cells + hash, key);
			
 
				+	/*
			
 
				+	 * We've been unlocked, so we have to double check that
			
 
				+	 * nobody else has inserted this cell in the meantime.
			
 
				+	 */
			
 
				+	cell = __search_bucket(prison->cells + hash, key);
			
 
				+	if (cell) {
			
 
				+		mempool_free(cell2, prison->cell_pool);
			
 
				+		bio_list_add(&cell->bios, inmate);
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				-		if (!cell) {
			
 
				-			cell = cell2;
			
 
				-			cell2 = NULL;
			
 
				+	/*
			
 
				+	 * Use new cell.
			
 
				+	 */
			
 
				+	cell = cell2;
			
 
				 
			
 
				-			cell->prison = prison;
			
 
				-			memcpy(&cell->key, key, sizeof(cell->key));
			
 
				-			cell->count = 0;
			
 
				-			bio_list_init(&cell->bios);
			
 
				-			hlist_add_head(&cell->list, prison->cells + hash);
			
 
				-		}
			
 
				-	}
			
 
				+	cell->prison = prison;
			
 
				+	memcpy(&cell->key, key, sizeof(cell->key));
			
 
				+	cell->holder = inmate;
			
 
				+	bio_list_init(&cell->bios);
			
 
				+	hlist_add_head(&cell->list, prison->cells + hash);
			
 
				 
			
 
				-	r = cell->count++;
			
 
				-	bio_list_add(&cell->bios, inmate);
			
 
				-	spin_unlock_irqrestore(&prison->lock, flags);
			
 
				+	r = 0;
			
 
				 
			
 
				-	if (cell2)
			
 
				-		mempool_free(cell2, prison->cell_pool);
			
 
				+out:
			
 
				+	spin_unlock_irqrestore(&prison->lock, flags);
			
 
				 
			
 
				 	*ref = cell;
			
 
				 
			
@@ -283,8 +279,8 @@ static void __cell_release(struct cell *cell, struct bio_list *inmates)
 
				 
			
 
				 	hlist_del(&cell->list);
			
 
				 
			
 
				-	if (inmates)
			
 
				-		bio_list_merge(inmates, &cell->bios);
			
 
				+	bio_list_add(inmates, cell->holder);
			
 
				+	bio_list_merge(inmates, &cell->bios);
			
 
				 
			
 
				 	mempool_free(cell, prison->cell_pool);
			
 
				 }
			
@@ -305,22 +301,44 @@ static void cell_release(struct cell *cell, struct bio_list *bios)
 
				  * bio may be in the cell.  This function releases the cell, and also does
			
 
				  * a sanity check.
			
 
				  */
			
 
				+static void __cell_release_singleton(struct cell *cell, struct bio *bio)
			
 
				+{
			
 
				+	hlist_del(&cell->list);
			
 
				+	BUG_ON(cell->holder != bio);
			
 
				+	BUG_ON(!bio_list_empty(&cell->bios));
			
 
				+}
			
 
				+
			
 
				 static void cell_release_singleton(struct cell *cell, struct bio *bio)
			
 
				 {
			
 
				-	struct bio_prison *prison = cell->prison;
			
 
				-	struct bio_list bios;
			
 
				-	struct bio *b;
			
 
				 	unsigned long flags;
			
 
				-
			
 
				-	bio_list_init(&bios);
			
 
				+	struct bio_prison *prison = cell->prison;
			
 
				 
			
 
				 	spin_lock_irqsave(&prison->lock, flags);
			
 
				-	__cell_release(cell, &bios);
			
 
				+	__cell_release_singleton(cell, bio);
			
 
				 	spin_unlock_irqrestore(&prison->lock, flags);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Sometimes we don't want the holder, just the additional bios.
			
 
				+ */
			
 
				+static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
			
 
				+{
			
 
				+	struct bio_prison *prison = cell->prison;
			
 
				+
			
 
				+	hlist_del(&cell->list);
			
 
				+	bio_list_merge(inmates, &cell->bios);
			
 
				 
			
 
				-	b = bio_list_pop(&bios);
			
 
				-	BUG_ON(b != bio);
			
 
				-	BUG_ON(!bio_list_empty(&bios));
			
 
				+	mempool_free(cell, prison->cell_pool);
			
 
				+}
			
 
				+
			
 
				+static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	struct bio_prison *prison = cell->prison;
			
 
				+
			
 
				+	spin_lock_irqsave(&prison->lock, flags);
			
 
				+	__cell_release_no_holder(cell, inmates);
			
 
				+	spin_unlock_irqrestore(&prison->lock, flags);
			
 
				 }
			
 
				 
			
 
				 static void cell_error(struct cell *cell)
			
@@ -471,6 +489,13 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
 
				  * devices.
			
 
				  */
			
 
				 struct new_mapping;
			
 
				+
			
 
				+struct pool_features {
			
 
				+	unsigned zero_new_blocks:1;
			
 
				+	unsigned discard_enabled:1;
			
 
				+	unsigned discard_passdown:1;
			
 
				+};
			
 
				+
			
 
				 struct pool {
			
 
				 	struct list_head list;
			
 
				 	struct dm_target *ti;	/* Only set if a pool target is bound */
			
@@ -484,7 +509,7 @@ struct pool {
 
				 	dm_block_t offset_mask;
			
 
				 	dm_block_t low_water_blocks;
			
 
				 
			
 
				-	unsigned zero_new_blocks:1;
			
 
				+	struct pool_features pf;
			
 
				 	unsigned low_water_triggered:1;	/* A dm event has been sent */
			
 
				 	unsigned no_free_space:1;	/* A -ENOSPC warning has been issued */
			
 
				 
			
@@ -493,17 +518,21 @@ struct pool {
 
				 
			
 
				 	struct workqueue_struct *wq;
			
 
				 	struct work_struct worker;
			
 
				+	struct delayed_work waker;
			
 
				 
			
 
				 	unsigned ref_count;
			
 
				+	unsigned long last_commit_jiffies;
			
 
				 
			
 
				 	spinlock_t lock;
			
 
				 	struct bio_list deferred_bios;
			
 
				 	struct bio_list deferred_flush_bios;
			
 
				 	struct list_head prepared_mappings;
			
 
				+	struct list_head prepared_discards;
			
 
				 
			
 
				 	struct bio_list retry_on_resume_list;
			
 
				 
			
 
				-	struct deferred_set ds;	/* FIXME: move to thin_c */
			
 
				+	struct deferred_set shared_read_ds;
			
 
				+	struct deferred_set all_io_ds;
			
 
				 
			
 
				 	struct new_mapping *next_mapping;
			
 
				 	mempool_t *mapping_pool;
			
@@ -521,7 +550,7 @@ struct pool_c {
 
				 	struct dm_target_callbacks callbacks;
			
 
				 
			
 
				 	dm_block_t low_water_blocks;
			
 
				-	unsigned zero_new_blocks:1;
			
 
				+	struct pool_features pf;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -529,6 +558,7 @@ struct pool_c {
 
				  */
			
 
				 struct thin_c {
			
 
				 	struct dm_dev *pool_dev;
			
 
				+	struct dm_dev *origin_dev;
			
 
				 	dm_thin_id dev_id;
			
 
				 
			
 
				 	struct pool *pool;
			
@@ -597,6 +627,13 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev
 
				 
			
 
				 /*----------------------------------------------------------------*/
			
 
				 
			
 
				+struct endio_hook {
			
 
				+	struct thin_c *tc;
			
 
				+	struct deferred_entry *shared_read_entry;
			
 
				+	struct deferred_entry *all_io_entry;
			
 
				+	struct new_mapping *overwrite_mapping;
			
 
				+};
			
 
				+
			
 
				 static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
			
 
				 {
			
 
				 	struct bio *bio;
			
@@ -607,7 +644,8 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
 
				 	bio_list_init(master);
			
 
				 
			
 
				 	while ((bio = bio_list_pop(&bios))) {
			
 
				-		if (dm_get_mapinfo(bio)->ptr == tc)
			
 
				+		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				+		if (h->tc == tc)
			
 
				 			bio_endio(bio, DM_ENDIO_REQUEUE);
			
 
				 		else
			
 
				 			bio_list_add(master, bio);
			
@@ -646,14 +684,16 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
 
				 		(bio->bi_sector & pool->offset_mask);
			
 
				 }
			
 
				 
			
 
				-static void remap_and_issue(struct thin_c *tc, struct bio *bio,
			
 
				-			    dm_block_t block)
			
 
				+static void remap_to_origin(struct thin_c *tc, struct bio *bio)
			
 
				+{
			
 
				+	bio->bi_bdev = tc->origin_dev->bdev;
			
 
				+}
			
 
				+
			
 
				+static void issue(struct thin_c *tc, struct bio *bio)
			
 
				 {
			
 
				 	struct pool *pool = tc->pool;
			
 
				 	unsigned long flags;
			
 
				 
			
 
				-	remap(tc, bio, block);
			
 
				-
			
 
				 	/*
			
 
				 	 * Batch together any FUA/FLUSH bios we find and then issue
			
 
				 	 * a single commit for them in process_deferred_bios().
			
@@ -666,6 +706,19 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
 
				 		generic_make_request(bio);
			
 
				 }
			
 
				 
			
 
				+static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
			
 
				+{
			
 
				+	remap_to_origin(tc, bio);
			
 
				+	issue(tc, bio);
			
 
				+}
			
 
				+
			
 
				+static void remap_and_issue(struct thin_c *tc, struct bio *bio,
			
 
				+			    dm_block_t block)
			
 
				+{
			
 
				+	remap(tc, bio, block);
			
 
				+	issue(tc, bio);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * wake_worker() is used when new work is queued and when pool_resume is
			
 
				  * ready to continue deferred IO processing.
			
@@ -680,21 +733,17 @@ static void wake_worker(struct pool *pool)
 
				 /*
			
 
				  * Bio endio functions.
			
 
				  */
			
 
				-struct endio_hook {
			
 
				-	struct thin_c *tc;
			
 
				-	bio_end_io_t *saved_bi_end_io;
			
 
				-	struct deferred_entry *entry;
			
 
				-};
			
 
				-
			
 
				 struct new_mapping {
			
 
				 	struct list_head list;
			
 
				 
			
 
				-	int prepared;
			
 
				+	unsigned quiesced:1;
			
 
				+	unsigned prepared:1;
			
 
				+	unsigned pass_discard:1;
			
 
				 
			
 
				 	struct thin_c *tc;
			
 
				 	dm_block_t virt_block;
			
 
				 	dm_block_t data_block;
			
 
				-	struct cell *cell;
			
 
				+	struct cell *cell, *cell2;
			
 
				 	int err;
			
 
				 
			
 
				 	/*
			
@@ -711,7 +760,7 @@ static void __maybe_add_mapping(struct new_mapping *m)
 
				 {
			
 
				 	struct pool *pool = m->tc->pool;
			
 
				 
			
 
				-	if (list_empty(&m->list) && m->prepared) {
			
 
				+	if (m->quiesced && m->prepared) {
			
 
				 		list_add(&m->list, &pool->prepared_mappings);
			
 
				 		wake_worker(pool);
			
 
				 	}
			
@@ -734,7 +783,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 
				 static void overwrite_endio(struct bio *bio, int err)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				-	struct new_mapping *m = dm_get_mapinfo(bio)->ptr;
			
 
				+	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				+	struct new_mapping *m = h->overwrite_mapping;
			
 
				 	struct pool *pool = m->tc->pool;
			
 
				 
			
 
				 	m->err = err;
			
@@ -745,31 +795,6 @@ static void overwrite_endio(struct bio *bio, int err)
 
				 	spin_unlock_irqrestore(&pool->lock, flags);
			
 
				 }
			
 
				 
			
 
				-static void shared_read_endio(struct bio *bio, int err)
			
 
				-{
			
 
				-	struct list_head mappings;
			
 
				-	struct new_mapping *m, *tmp;
			
 
				-	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				-	unsigned long flags;
			
 
				-	struct pool *pool = h->tc->pool;
			
 
				-
			
 
				-	bio->bi_end_io = h->saved_bi_end_io;
			
 
				-	bio_endio(bio, err);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&mappings);
			
 
				-	ds_dec(h->entry, &mappings);
			
 
				-
			
 
				-	spin_lock_irqsave(&pool->lock, flags);
			
 
				-	list_for_each_entry_safe(m, tmp, &mappings, list) {
			
 
				-		list_del(&m->list);
			
 
				-		INIT_LIST_HEAD(&m->list);
			
 
				-		__maybe_add_mapping(m);
			
 
				-	}
			
 
				-	spin_unlock_irqrestore(&pool->lock, flags);
			
 
				-
			
 
				-	mempool_free(h, pool->endio_hook_pool);
			
 
				-}
			
 
				-
			
 
				 /*----------------------------------------------------------------*/
			
 
				 
			
 
				 /*
			
@@ -800,21 +825,16 @@ static void cell_defer(struct thin_c *tc, struct cell *cell,
 
				  * Same as cell_defer above, except it omits one particular detainee,
			
 
				  * a write bio that covers the block and has already been processed.
			
 
				  */
			
 
				-static void cell_defer_except(struct thin_c *tc, struct cell *cell,
			
 
				-			      struct bio *exception)
			
 
				+static void cell_defer_except(struct thin_c *tc, struct cell *cell)
			
 
				 {
			
 
				 	struct bio_list bios;
			
 
				-	struct bio *bio;
			
 
				 	struct pool *pool = tc->pool;
			
 
				 	unsigned long flags;
			
 
				 
			
 
				 	bio_list_init(&bios);
			
 
				-	cell_release(cell, &bios);
			
 
				 
			
 
				 	spin_lock_irqsave(&pool->lock, flags);
			
 
				-	while ((bio = bio_list_pop(&bios)))
			
 
				-		if (bio != exception)
			
 
				-			bio_list_add(&pool->deferred_bios, bio);
			
 
				+	cell_release_no_holder(cell, &pool->deferred_bios);
			
 
				 	spin_unlock_irqrestore(&pool->lock, flags);
			
 
				 
			
 
				 	wake_worker(pool);
			
@@ -854,7 +874,7 @@ static void process_prepared_mapping(struct new_mapping *m)
 
				 	 * the bios in the cell.
			
 
				 	 */
			
 
				 	if (bio) {
			
 
				-		cell_defer_except(tc, m->cell, bio);
			
 
				+		cell_defer_except(tc, m->cell);
			
 
				 		bio_endio(bio, 0);
			
 
				 	} else
			
 
				 		cell_defer(tc, m->cell, m->data_block);
			
@@ -863,7 +883,30 @@ static void process_prepared_mapping(struct new_mapping *m)
 
				 	mempool_free(m, tc->pool->mapping_pool);
			
 
				 }
			
 
				 
			
 
				-static void process_prepared_mappings(struct pool *pool)
			
 
				+static void process_prepared_discard(struct new_mapping *m)
			
 
				+{
			
 
				+	int r;
			
 
				+	struct thin_c *tc = m->tc;
			
 
				+
			
 
				+	r = dm_thin_remove_block(tc->td, m->virt_block);
			
 
				+	if (r)
			
 
				+		DMERR("dm_thin_remove_block() failed");
			
 
				+
			
 
				+	/*
			
 
				+	 * Pass the discard down to the underlying device?
			
 
				+	 */
			
 
				+	if (m->pass_discard)
			
 
				+		remap_and_issue(tc, m->bio, m->data_block);
			
 
				+	else
			
 
				+		bio_endio(m->bio, 0);
			
 
				+
			
 
				+	cell_defer_except(tc, m->cell);
			
 
				+	cell_defer_except(tc, m->cell2);
			
 
				+	mempool_free(m, tc->pool->mapping_pool);
			
 
				+}
			
 
				+
			
 
				+static void process_prepared(struct pool *pool, struct list_head *head,
			
 
				+			     void (*fn)(struct new_mapping *))
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				 	struct list_head maps;
			
@@ -871,21 +914,27 @@ static void process_prepared_mappings(struct pool *pool)
 
				 
			
 
				 	INIT_LIST_HEAD(&maps);
			
 
				 	spin_lock_irqsave(&pool->lock, flags);
			
 
				-	list_splice_init(&pool->prepared_mappings, &maps);
			
 
				+	list_splice_init(head, &maps);
			
 
				 	spin_unlock_irqrestore(&pool->lock, flags);
			
 
				 
			
 
				 	list_for_each_entry_safe(m, tmp, &maps, list)
			
 
				-		process_prepared_mapping(m);
			
 
				+		fn(m);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Deferred bio jobs.
			
 
				  */
			
 
				-static int io_overwrites_block(struct pool *pool, struct bio *bio)
			
 
				+static int io_overlaps_block(struct pool *pool, struct bio *bio)
			
 
				 {
			
 
				-	return ((bio_data_dir(bio) == WRITE) &&
			
 
				-		!(bio->bi_sector & pool->offset_mask)) &&
			
 
				+	return !(bio->bi_sector & pool->offset_mask) &&
			
 
				 		(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static int io_overwrites_block(struct pool *pool, struct bio *bio)
			
 
				+{
			
 
				+	return (bio_data_dir(bio) == WRITE) &&
			
 
				+		io_overlaps_block(pool, bio);
			
 
				 }
			
 
				 
			
 
				 static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
			
@@ -917,7 +966,8 @@ static struct new_mapping *get_next_mapping(struct pool *pool)
 
				 }
			
 
				 
			
 
				 static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
			
 
				-			  dm_block_t data_origin, dm_block_t data_dest,
			
 
				+			  struct dm_dev *origin, dm_block_t data_origin,
			
 
				+			  dm_block_t data_dest,
			
 
				 			  struct cell *cell, struct bio *bio)
			
 
				 {
			
 
				 	int r;
			
@@ -925,6 +975,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 
				 	struct new_mapping *m = get_next_mapping(pool);
			
 
				 
			
 
				 	INIT_LIST_HEAD(&m->list);
			
 
				+	m->quiesced = 0;
			
 
				 	m->prepared = 0;
			
 
				 	m->tc = tc;
			
 
				 	m->virt_block = virt_block;
			
@@ -933,7 +984,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 
				 	m->err = 0;
			
 
				 	m->bio = NULL;
			
 
				 
			
 
				-	ds_add_work(&pool->ds, &m->list);
			
 
				+	if (!ds_add_work(&pool->shared_read_ds, &m->list))
			
 
				+		m->quiesced = 1;
			
 
				 
			
 
				 	/*
			
 
				 	 * IO to pool_dev remaps to the pool target's data_dev.
			
@@ -942,14 +994,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 
				 	 * bio immediately. Otherwise we use kcopyd to clone the data first.
			
 
				 	 */
			
 
				 	if (io_overwrites_block(pool, bio)) {
			
 
				+		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				+		h->overwrite_mapping = m;
			
 
				 		m->bio = bio;
			
 
				 		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
			
 
				-		dm_get_mapinfo(bio)->ptr = m;
			
 
				 		remap_and_issue(tc, bio, data_dest);
			
 
				 	} else {
			
 
				 		struct dm_io_region from, to;
			
 
				 
			
 
				-		from.bdev = tc->pool_dev->bdev;
			
 
				+		from.bdev = origin->bdev;
			
 
				 		from.sector = data_origin * pool->sectors_per_block;
			
 
				 		from.count = pool->sectors_per_block;
			
 
				 
			
@@ -967,6 +1020,22 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
			
 
				+				   dm_block_t data_origin, dm_block_t data_dest,
			
 
				+				   struct cell *cell, struct bio *bio)
			
 
				+{
			
 
				+	schedule_copy(tc, virt_block, tc->pool_dev,
			
 
				+		      data_origin, data_dest, cell, bio);
			
 
				+}
			
 
				+
			
 
				+static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
			
 
				+				   dm_block_t data_dest,
			
 
				+				   struct cell *cell, struct bio *bio)
			
 
				+{
			
 
				+	schedule_copy(tc, virt_block, tc->origin_dev,
			
 
				+		      virt_block, data_dest, cell, bio);
			
 
				+}
			
 
				+
			
 
				 static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
			
 
				 			  dm_block_t data_block, struct cell *cell,
			
 
				 			  struct bio *bio)
			
@@ -975,6 +1044,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 
				 	struct new_mapping *m = get_next_mapping(pool);
			
 
				 
			
 
				 	INIT_LIST_HEAD(&m->list);
			
 
				+	m->quiesced = 1;
			
 
				 	m->prepared = 0;
			
 
				 	m->tc = tc;
			
 
				 	m->virt_block = virt_block;
			
@@ -988,13 +1058,14 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 
				 	 * zeroing pre-existing data, we can issue the bio immediately.
			
 
				 	 * Otherwise we use kcopyd to zero the data first.
			
 
				 	 */
			
 
				-	if (!pool->zero_new_blocks)
			
 
				+	if (!pool->pf.zero_new_blocks)
			
 
				 		process_prepared_mapping(m);
			
 
				 
			
 
				 	else if (io_overwrites_block(pool, bio)) {
			
 
				+		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				+		h->overwrite_mapping = m;
			
 
				 		m->bio = bio;
			
 
				 		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
			
 
				-		dm_get_mapinfo(bio)->ptr = m;
			
 
				 		remap_and_issue(tc, bio, data_block);
			
 
				 
			
 
				 	} else {
			
@@ -1081,7 +1152,8 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
 
				  */
			
 
				 static void retry_on_resume(struct bio *bio)
			
 
				 {
			
 
				-	struct thin_c *tc = dm_get_mapinfo(bio)->ptr;
			
 
				+	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				+	struct thin_c *tc = h->tc;
			
 
				 	struct pool *pool = tc->pool;
			
 
				 	unsigned long flags;
			
 
				 
			
@@ -1102,6 +1174,86 @@ static void no_space(struct cell *cell)
 
				 		retry_on_resume(bio);
			
 
				 }
			
 
				 
			
 
				+static void process_discard(struct thin_c *tc, struct bio *bio)
			
 
				+{
			
 
				+	int r;
			
 
				+	struct pool *pool = tc->pool;
			
 
				+	struct cell *cell, *cell2;
			
 
				+	struct cell_key key, key2;
			
 
				+	dm_block_t block = get_bio_block(tc, bio);
			
 
				+	struct dm_thin_lookup_result lookup_result;
			
 
				+	struct new_mapping *m;
			
 
				+
			
 
				+	build_virtual_key(tc->td, block, &key);
			
 
				+	if (bio_detain(tc->pool->prison, &key, bio, &cell))
			
 
				+		return;
			
 
				+
			
 
				+	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
			
 
				+	switch (r) {
			
 
				+	case 0:
			
 
				+		/*
			
 
				+		 * Check nobody is fiddling with this pool block.  This can
			
 
				+		 * happen if someone's in the process of breaking sharing
			
 
				+		 * on this block.
			
 
				+		 */
			
 
				+		build_data_key(tc->td, lookup_result.block, &key2);
			
 
				+		if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
			
 
				+			cell_release_singleton(cell, bio);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (io_overlaps_block(pool, bio)) {
			
 
				+			/*
			
 
				+			 * IO may still be going to the destination block.  We must
			
 
				+			 * quiesce before we can do the removal.
			
 
				+			 */
			
 
				+			m = get_next_mapping(pool);
			
 
				+			m->tc = tc;
			
 
				+			m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown;
			
 
				+			m->virt_block = block;
			
 
				+			m->data_block = lookup_result.block;
			
 
				+			m->cell = cell;
			
 
				+			m->cell2 = cell2;
			
 
				+			m->err = 0;
			
 
				+			m->bio = bio;
			
 
				+
			
 
				+			if (!ds_add_work(&pool->all_io_ds, &m->list)) {
			
 
				+				list_add(&m->list, &pool->prepared_discards);
			
 
				+				wake_worker(pool);
			
 
				+			}
			
 
				+		} else {
			
 
				+			/*
			
 
				+			 * This path is hit if people are ignoring
			
 
				+			 * limits->discard_granularity.  It ignores any
			
 
				+			 * part of the discard that is in a subsequent
			
 
				+			 * block.
			
 
				+			 */
			
 
				+			sector_t offset = bio->bi_sector - (block << pool->block_shift);
			
 
				+			unsigned remaining = (pool->sectors_per_block - offset) << 9;
			
 
				+			bio->bi_size = min(bio->bi_size, remaining);
			
 
				+
			
 
				+			cell_release_singleton(cell, bio);
			
 
				+			cell_release_singleton(cell2, bio);
			
 
				+			remap_and_issue(tc, bio, lookup_result.block);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	case -ENODATA:
			
 
				+		/*
			
 
				+		 * It isn't provisioned, just forget it.
			
 
				+		 */
			
 
				+		cell_release_singleton(cell, bio);
			
 
				+		bio_endio(bio, 0);
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		DMERR("discard: find block unexpectedly returned %d", r);
			
 
				+		cell_release_singleton(cell, bio);
			
 
				+		bio_io_error(bio);
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
			
 
				 			  struct cell_key *key,
			
 
				 			  struct dm_thin_lookup_result *lookup_result,
			
@@ -1113,8 +1265,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
 
				 	r = alloc_data_block(tc, &data_block);
			
 
				 	switch (r) {
			
 
				 	case 0:
			
 
				-		schedule_copy(tc, block, lookup_result->block,
			
 
				-			      data_block, cell, bio);
			
 
				+		schedule_internal_copy(tc, block, lookup_result->block,
			
 
				+				       data_block, cell, bio);
			
 
				 		break;
			
 
				 
			
 
				 	case -ENOSPC:
			
@@ -1147,13 +1299,9 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
 
				 	if (bio_data_dir(bio) == WRITE)
			
 
				 		break_sharing(tc, bio, block, &key, lookup_result, cell);
			
 
				 	else {
			
 
				-		struct endio_hook *h;
			
 
				-		h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
			
 
				+		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				 
			
 
				-		h->tc = tc;
			
 
				-		h->entry = ds_inc(&pool->ds);
			
 
				-		save_and_set_endio(bio, &h->saved_bi_end_io, shared_read_endio);
			
 
				-		dm_get_mapinfo(bio)->ptr = h;
			
 
				+		h->shared_read_entry = ds_inc(&pool->shared_read_ds);
			
 
				 
			
 
				 		cell_release_singleton(cell, bio);
			
 
				 		remap_and_issue(tc, bio, lookup_result->block);
			
@@ -1188,7 +1336,10 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
 
				 	r = alloc_data_block(tc, &data_block);
			
 
				 	switch (r) {
			
 
				 	case 0:
			
 
				-		schedule_zero(tc, block, data_block, cell, bio);
			
 
				+		if (tc->origin_dev)
			
 
				+			schedule_external_copy(tc, block, data_block, cell, bio);
			
 
				+		else
			
 
				+			schedule_zero(tc, block, data_block, cell, bio);
			
 
				 		break;
			
 
				 
			
 
				 	case -ENOSPC:
			
@@ -1239,16 +1390,27 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
 
				 		break;
			
 
				 
			
 
				 	case -ENODATA:
			
 
				-		provision_block(tc, bio, block, cell);
			
 
				+		if (bio_data_dir(bio) == READ && tc->origin_dev) {
			
 
				+			cell_release_singleton(cell, bio);
			
 
				+			remap_to_origin_and_issue(tc, bio);
			
 
				+		} else
			
 
				+			provision_block(tc, bio, block, cell);
			
 
				 		break;
			
 
				 
			
 
				 	default:
			
 
				 		DMERR("dm_thin_find_block() failed, error = %d", r);
			
 
				+		cell_release_singleton(cell, bio);
			
 
				 		bio_io_error(bio);
			
 
				 		break;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static int need_commit_due_to_time(struct pool *pool)
			
 
				+{
			
 
				+	return jiffies < pool->last_commit_jiffies ||
			
 
				+	       jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
			
 
				+}
			
 
				+
			
 
				 static void process_deferred_bios(struct pool *pool)
			
 
				 {
			
 
				 	unsigned long flags;
			
@@ -1264,7 +1426,9 @@ static void process_deferred_bios(struct pool *pool)
 
				 	spin_unlock_irqrestore(&pool->lock, flags);
			
 
				 
			
 
				 	while ((bio = bio_list_pop(&bios))) {
			
 
				-		struct thin_c *tc = dm_get_mapinfo(bio)->ptr;
			
 
				+		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
			
 
				+		struct thin_c *tc = h->tc;
			
 
				+
			
 
				 		/*
			
 
				 		 * If we've got no free new_mapping structs, and processing
			
 
				 		 * this bio might require one, we pause until there are some
			
@@ -1277,7 +1441,11 @@ static void process_deferred_bios(struct pool *pool)
 
				 
			
 
				 			break;
			
 
				 		}
			
 
				-		process_bio(tc, bio);
			
 
				+
			
 
				+		if (bio->bi_rw & REQ_DISCARD)
			
 
				+			process_discard(tc, bio);
			
 
				+		else
			
 
				+			process_bio(tc, bio);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1290,7 +1458,7 @@ static void process_deferred_bios(struct pool *pool)
 
				 	bio_list_init(&pool->deferred_flush_bios);
			
 
				 	spin_unlock_irqrestore(&pool->lock, flags);
			
 
				 
			
 
				-	if (bio_list_empty(&bios))
			
 
				+	if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
			
 
				 		return;
			
 
				 
			
 
				 	r = dm_pool_commit_metadata(pool->pmd);
			
@@ -1301,6 +1469,7 @@ static void process_deferred_bios(struct pool *pool)
 
				 			bio_io_error(bio);
			
 
				 		return;
			
 
				 	}
			
 
				+	pool->last_commit_jiffies = jiffies;
			
 
				 
			
 
				 	while ((bio = bio_list_pop(&bios)))
			
 
				 		generic_make_request(bio);
			
@@ -1310,10 +1479,22 @@ static void do_worker(struct work_struct *ws)
 
				 {
			
 
				 	struct pool *pool = container_of(ws, struct pool, worker);
			
 
				 
			
 
				-	process_prepared_mappings(pool);
			
 
				+	process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
			
 
				+	process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
			
 
				 	process_deferred_bios(pool);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * We want to commit periodically so that not too much
			
 
				+ * unwritten data builds up.
			
 
				+ */
			
 
				+static void do_waker(struct work_struct *ws)
			
 
				+{
			
 
				+	struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
			
 
				+	wake_worker(pool);
			
 
				+	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
			
 
				+}
			
 
				+
			
 
				 /*----------------------------------------------------------------*/
			
 
				 
			
 
				 /*
			
@@ -1335,6 +1516,19 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
 
				 	wake_worker(pool);
			
 
				 }
			
 
				 
			
 
				+static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
			
 
				+{
			
 
				+	struct pool *pool = tc->pool;
			
 
				+	struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
			
 
				+
			
 
				+	h->tc = tc;
			
 
				+	h->shared_read_entry = NULL;
			
 
				+	h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds);
			
 
				+	h->overwrite_mapping = NULL;
			
 
				+
			
 
				+	return h;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Non-blocking function called from the thin target's map function.
			
 
				  */
			
@@ -1347,12 +1541,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
 
				 	struct dm_thin_device *td = tc->td;
			
 
				 	struct dm_thin_lookup_result result;
			
 
				 
			
 
				-	/*
			
 
				-	 * Save the thin context for easy access from the deferred bio later.
			
 
				-	 */
			
 
				-	map_context->ptr = tc;
			
 
				-
			
 
				-	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
			
 
				+	map_context->ptr = thin_hook_bio(tc, bio);
			
 
				+	if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
			
 
				 		thin_defer_bio(tc, bio);
			
 
				 		return DM_MAPIO_SUBMITTED;
			
 
				 	}
			
@@ -1434,7 +1624,7 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
 
				 
			
 
				 	pool->ti = ti;
			
 
				 	pool->low_water_blocks = pt->low_water_blocks;
			
 
				-	pool->zero_new_blocks = pt->zero_new_blocks;
			
 
				+	pool->pf = pt->pf;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -1448,6 +1638,14 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)
 
				 /*----------------------------------------------------------------
			
 
				  * Pool creation
			
 
				  *--------------------------------------------------------------*/
			
 
				+/* Initialize pool features. */
			
 
				+static void pool_features_init(struct pool_features *pf)
			
 
				+{
			
 
				+	pf->zero_new_blocks = 1;
			
 
				+	pf->discard_enabled = 1;
			
 
				+	pf->discard_passdown = 1;
			
 
				+}
			
 
				+
			
 
				 static void __pool_destroy(struct pool *pool)
			
 
				 {
			
 
				 	__pool_table_remove(pool);
			
@@ -1495,7 +1693,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 
				 	pool->block_shift = ffs(block_size) - 1;
			
 
				 	pool->offset_mask = block_size - 1;
			
 
				 	pool->low_water_blocks = 0;
			
 
				-	pool->zero_new_blocks = 1;
			
 
				+	pool_features_init(&pool->pf);
			
 
				 	pool->prison = prison_create(PRISON_CELLS);
			
 
				 	if (!pool->prison) {
			
 
				 		*error = "Error creating pool's bio prison";
			
@@ -1523,14 +1721,17 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 
				 	}
			
 
				 
			
 
				 	INIT_WORK(&pool->worker, do_worker);
			
 
				+	INIT_DELAYED_WORK(&pool->waker, do_waker);
			
 
				 	spin_lock_init(&pool->lock);
			
 
				 	bio_list_init(&pool->deferred_bios);
			
 
				 	bio_list_init(&pool->deferred_flush_bios);
			
 
				 	INIT_LIST_HEAD(&pool->prepared_mappings);
			
 
				+	INIT_LIST_HEAD(&pool->prepared_discards);
			
 
				 	pool->low_water_triggered = 0;
			
 
				 	pool->no_free_space = 0;
			
 
				 	bio_list_init(&pool->retry_on_resume_list);
			
 
				-	ds_init(&pool->ds);
			
 
				+	ds_init(&pool->shared_read_ds);
			
 
				+	ds_init(&pool->all_io_ds);
			
 
				 
			
 
				 	pool->next_mapping = NULL;
			
 
				 	pool->mapping_pool =
			
@@ -1549,6 +1750,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 
				 		goto bad_endio_hook_pool;
			
 
				 	}
			
 
				 	pool->ref_count = 1;
			
 
				+	pool->last_commit_jiffies = jiffies;
			
 
				 	pool->pool_md = pool_md;
			
 
				 	pool->md_dev = metadata_dev;
			
 
				 	__pool_table_insert(pool);
			
@@ -1588,7 +1790,8 @@ static void __pool_dec(struct pool *pool)
 
				 
			
 
				 static struct pool *__pool_find(struct mapped_device *pool_md,
			
 
				 				struct block_device *metadata_dev,
			
 
				-				unsigned long block_size, char **error)
			
 
				+				unsigned long block_size, char **error,
			
 
				+				int *created)
			
 
				 {
			
 
				 	struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
			
 
				 
			
@@ -1604,8 +1807,10 @@ static struct pool *__pool_find(struct mapped_device *pool_md,
 
				 				return ERR_PTR(-EINVAL);
			
 
				 			__pool_inc(pool);
			
 
				 
			
 
				-		} else
			
 
				+		} else {
			
 
				 			pool = pool_create(pool_md, metadata_dev, block_size, error);
			
 
				+			*created = 1;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return pool;
			
@@ -1629,10 +1834,6 @@ static void pool_dtr(struct dm_target *ti)
 
				 	mutex_unlock(&dm_thin_pool_table.mutex);
			
 
				 }
			
 
				 
			
 
				-struct pool_features {
			
 
				-	unsigned zero_new_blocks:1;
			
 
				-};
			
 
				-
			
 
				 static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
			
 
				 			       struct dm_target *ti)
			
 
				 {
			
@@ -1641,7 +1842,7 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
 
				 	const char *arg_name;
			
 
				 
			
 
				 	static struct dm_arg _args[] = {
			
 
				-		{0, 1, "Invalid number of pool feature arguments"},
			
 
				+		{0, 3, "Invalid number of pool feature arguments"},
			
 
				 	};
			
 
				 
			
 
				 	/*
			
@@ -1661,6 +1862,12 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
 
				 		if (!strcasecmp(arg_name, "skip_block_zeroing")) {
			
 
				 			pf->zero_new_blocks = 0;
			
 
				 			continue;
			
 
				+		} else if (!strcasecmp(arg_name, "ignore_discard")) {
			
 
				+			pf->discard_enabled = 0;
			
 
				+			continue;
			
 
				+		} else if (!strcasecmp(arg_name, "no_discard_passdown")) {
			
 
				+			pf->discard_passdown = 0;
			
 
				+			continue;
			
 
				 		}
			
 
				 
			
 
				 		ti->error = "Unrecognised pool feature requested";
			
@@ -1678,10 +1885,12 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
 
				  *
			
 
				  * Optional feature arguments are:
			
 
				  *	     skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
			
 
				+ *	     ignore_discard: disable discard
			
 
				+ *	     no_discard_passdown: don't pass discards down to the data device
			
 
				  */
			
 
				 static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
			
 
				 {
			
 
				-	int r;
			
 
				+	int r, pool_created = 0;
			
 
				 	struct pool_c *pt;
			
 
				 	struct pool *pool;
			
 
				 	struct pool_features pf;
			
@@ -1691,6 +1900,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
				 	dm_block_t low_water_blocks;
			
 
				 	struct dm_dev *metadata_dev;
			
 
				 	sector_t metadata_dev_size;
			
 
				+	char b[BDEVNAME_SIZE];
			
 
				 
			
 
				 	/*
			
 
				 	 * FIXME Remove validation from scope of lock.
			
@@ -1712,11 +1922,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
				 	}
			
 
				 
			
 
				 	metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT;
			
 
				-	if (metadata_dev_size > METADATA_DEV_MAX_SECTORS) {
			
 
				-		ti->error = "Metadata device is too large";
			
 
				-		r = -EINVAL;
			
 
				-		goto out_metadata;
			
 
				-	}
			
 
				+	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
			
 
				+		DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
			
 
				+		       bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
			
 
				 
			
 
				 	r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
			
 
				 	if (r) {
			
@@ -1742,8 +1950,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
				 	/*
			
 
				 	 * Set default pool features.
			
 
				 	 */
			
 
				-	memset(&pf, 0, sizeof(pf));
			
 
				-	pf.zero_new_blocks = 1;
			
 
				+	pool_features_init(&pf);
			
 
				 
			
 
				 	dm_consume_args(&as, 4);
			
 
				 	r = parse_pool_features(&as, &pf, ti);
			
@@ -1757,20 +1964,58 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
				 	}
			
 
				 
			
 
				 	pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
			
 
				-			   block_size, &ti->error);
			
 
				+			   block_size, &ti->error, &pool_created);
			
 
				 	if (IS_ERR(pool)) {
			
 
				 		r = PTR_ERR(pool);
			
 
				 		goto out_free_pt;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * 'pool_created' reflects whether this is the first table load.
			
 
				+	 * Top level discard support is not allowed to be changed after
			
 
				+	 * initial load.  This would require a pool reload to trigger thin
			
 
				+	 * device changes.
			
 
				+	 */
			
 
				+	if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
			
 
				+		ti->error = "Discard support cannot be disabled once enabled";
			
 
				+		r = -EINVAL;
			
 
				+		goto out_flags_changed;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If discard_passdown was enabled verify that the data device
			
 
				+	 * supports discards.  Disable discard_passdown if not; otherwise
			
 
				+	 * -EOPNOTSUPP will be returned.
			
 
				+	 */
			
 
				+	if (pf.discard_passdown) {
			
 
				+		struct request_queue *q = bdev_get_queue(data_dev->bdev);
			
 
				+		if (!q || !blk_queue_discard(q)) {
			
 
				+			DMWARN("Discard unsupported by data device: Disabling discard passdown.");
			
 
				+			pf.discard_passdown = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	pt->pool = pool;
			
 
				 	pt->ti = ti;
			
 
				 	pt->metadata_dev = metadata_dev;
			
 
				 	pt->data_dev = data_dev;
			
 
				 	pt->low_water_blocks = low_water_blocks;
			
 
				-	pt->zero_new_blocks = pf.zero_new_blocks;
			
 
				+	pt->pf = pf;
			
 
				 	ti->num_flush_requests = 1;
			
 
				-	ti->num_discard_requests = 0;
			
 
				+	/*
			
 
				+	 * Only need to enable discards if the pool should pass
			
 
				+	 * them down to the data device.  The thin device's discard
			
 
				+	 * processing will cause mappings to be removed from the btree.
			
 
				+	 */
			
 
				+	if (pf.discard_enabled && pf.discard_passdown) {
			
 
				+		ti->num_discard_requests = 1;
			
 
				+		/*
			
 
				+		 * Setting 'discards_supported' circumvents the normal
			
 
				+		 * stacking of discard limits (this keeps the pool and
			
 
				+		 * thin devices' discard limits consistent).
			
 
				+		 */
			
 
				+		ti->discards_supported = 1;
			
 
				+	}
			
 
				 	ti->private = pt;
			
 
				 
			
 
				 	pt->callbacks.congested_fn = pool_is_congested;
			
@@ -1780,6 +2025,8 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				+out_flags_changed:
			
 
				+	__pool_dec(pool);
			
 
				 out_free_pt:
			
 
				 	kfree(pt);
			
 
				 out:
			
@@ -1878,7 +2125,7 @@ static void pool_resume(struct dm_target *ti)
 
				 	__requeue_bios(pool);
			
 
				 	spin_unlock_irqrestore(&pool->lock, flags);
			
 
				 
			
 
				-	wake_worker(pool);
			
 
				+	do_waker(&pool->waker.work);
			
 
				 }
			
 
				 
			
 
				 static void pool_postsuspend(struct dm_target *ti)
			
@@ -1887,6 +2134,7 @@ static void pool_postsuspend(struct dm_target *ti)
 
				 	struct pool_c *pt = ti->private;
			
 
				 	struct pool *pool = pt->pool;
			
 
				 
			
 
				+	cancel_delayed_work(&pool->waker);
			
 
				 	flush_workqueue(pool->wq);
			
 
				 
			
 
				 	r = dm_pool_commit_metadata(pool->pmd);
			
@@ -2067,7 +2315,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
 
				 static int pool_status(struct dm_target *ti, status_type_t type,
			
 
				 		       char *result, unsigned maxlen)
			
 
				 {
			
 
				-	int r;
			
 
				+	int r, count;
			
 
				 	unsigned sz = 0;
			
 
				 	uint64_t transaction_id;
			
 
				 	dm_block_t nr_free_blocks_data;
			
@@ -2130,10 +2378,19 @@ static int pool_status(struct dm_target *ti, status_type_t type,
 
				 		       (unsigned long)pool->sectors_per_block,
			
 
				 		       (unsigned long long)pt->low_water_blocks);
			
 
				 
			
 
				-		DMEMIT("%u ", !pool->zero_new_blocks);
			
 
				+		count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled +
			
 
				+			!pool->pf.discard_passdown;
			
 
				+		DMEMIT("%u ", count);
			
 
				 
			
 
				-		if (!pool->zero_new_blocks)
			
 
				+		if (!pool->pf.zero_new_blocks)
			
 
				 			DMEMIT("skip_block_zeroing ");
			
 
				+
			
 
				+		if (!pool->pf.discard_enabled)
			
 
				+			DMEMIT("ignore_discard ");
			
 
				+
			
 
				+		if (!pool->pf.discard_passdown)
			
 
				+			DMEMIT("no_discard_passdown ");
			
 
				+
			
 
				 		break;
			
 
				 	}
			
 
				 
			
@@ -2162,6 +2419,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 
				 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
			
 
				 }
			
 
				 
			
 
				+static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
			
 
				+{
			
 
				+	/*
			
 
				+	 * FIXME: these limits may be incompatible with the pool's data device
			
 
				+	 */
			
 
				+	limits->max_discard_sectors = pool->sectors_per_block;
			
 
				+
			
 
				+	/*
			
 
				+	 * This is just a hint, and not enforced.  We have to cope with
			
 
				+	 * bios that overlap 2 blocks.
			
 
				+	 */
			
 
				+	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
			
 
				+	limits->discard_zeroes_data = pool->pf.zero_new_blocks;
			
 
				+}
			
 
				+
			
 
				 static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
			
 
				 {
			
 
				 	struct pool_c *pt = ti->private;
			
@@ -2169,13 +2441,15 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
				 
			
 
				 	blk_limits_io_min(limits, 0);
			
 
				 	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
			
 
				+	if (pool->pf.discard_enabled)
			
 
				+		set_discard_limits(pool, limits);
			
 
				 }
			
 
				 
			
 
				 static struct target_type pool_target = {
			
 
				 	.name = "thin-pool",
			
 
				 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
			
 
				 		    DM_TARGET_IMMUTABLE,
			
 
				-	.version = {1, 0, 0},
			
 
				+	.version = {1, 1, 0},
			
 
				 	.module = THIS_MODULE,
			
 
				 	.ctr = pool_ctr,
			
 
				 	.dtr = pool_dtr,
			
@@ -2202,6 +2476,8 @@ static void thin_dtr(struct dm_target *ti)
 
				 	__pool_dec(tc->pool);
			
 
				 	dm_pool_close_thin_device(tc->td);
			
 
				 	dm_put_device(ti, tc->pool_dev);
			
 
				+	if (tc->origin_dev)
			
 
				+		dm_put_device(ti, tc->origin_dev);
			
 
				 	kfree(tc);
			
 
				 
			
 
				 	mutex_unlock(&dm_thin_pool_table.mutex);
			
@@ -2210,21 +2486,25 @@ static void thin_dtr(struct dm_target *ti)
 
				 /*
			
 
				  * Thin target parameters:
			
 
				  *
			
 
				- * <pool_dev> <dev_id>
			
 
				+ * <pool_dev> <dev_id> [origin_dev]
			
 
				  *
			
 
				  * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
			
 
				  * dev_id: the internal device identifier
			
 
				+ * origin_dev: a device external to the pool that should act as the origin
			
 
				+ *
			
 
				+ * If the pool device has discards disabled, they get disabled for the thin
			
 
				+ * device as well.
			
 
				  */
			
 
				 static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
			
 
				 {
			
 
				 	int r;
			
 
				 	struct thin_c *tc;
			
 
				-	struct dm_dev *pool_dev;
			
 
				+	struct dm_dev *pool_dev, *origin_dev;
			
 
				 	struct mapped_device *pool_md;
			
 
				 
			
 
				 	mutex_lock(&dm_thin_pool_table.mutex);
			
 
				 
			
 
				-	if (argc != 2) {
			
 
				+	if (argc != 2 && argc != 3) {
			
 
				 		ti->error = "Invalid argument count";
			
 
				 		r = -EINVAL;
			
 
				 		goto out_unlock;
			
@@ -2237,6 +2517,15 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				+	if (argc == 3) {
			
 
				+		r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
			
 
				+		if (r) {
			
 
				+			ti->error = "Error opening origin device";
			
 
				+			goto bad_origin_dev;
			
 
				+		}
			
 
				+		tc->origin_dev = origin_dev;
			
 
				+	}
			
 
				+
			
 
				 	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
			
 
				 	if (r) {
			
 
				 		ti->error = "Error opening pool device";
			
@@ -2273,8 +2562,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
				 
			
 
				 	ti->split_io = tc->pool->sectors_per_block;
			
 
				 	ti->num_flush_requests = 1;
			
 
				-	ti->num_discard_requests = 0;
			
 
				-	ti->discards_supported = 0;
			
 
				+
			
 
				+	/* In case the pool supports discards, pass them on. */
			
 
				+	if (tc->pool->pf.discard_enabled) {
			
 
				+		ti->discards_supported = 1;
			
 
				+		ti->num_discard_requests = 1;
			
 
				+	}
			
 
				 
			
 
				 	dm_put(pool_md);
			
 
				 
			
@@ -2289,6 +2582,9 @@ bad_pool_lookup:
 
				 bad_common:
			
 
				 	dm_put_device(ti, tc->pool_dev);
			
 
				 bad_pool_dev:
			
 
				+	if (tc->origin_dev)
			
 
				+		dm_put_device(ti, tc->origin_dev);
			
 
				+bad_origin_dev:
			
 
				 	kfree(tc);
			
 
				 out_unlock:
			
 
				 	mutex_unlock(&dm_thin_pool_table.mutex);
			
@@ -2299,11 +2595,46 @@ out_unlock:
 
				 static int thin_map(struct dm_target *ti, struct bio *bio,
			
 
				 		    union map_info *map_context)
			
 
				 {
			
 
				-	bio->bi_sector -= ti->begin;
			
 
				+	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
			
 
				 
			
 
				 	return thin_bio_map(ti, bio, map_context);
			
 
				 }
			
 
				 
			
 
				+static int thin_endio(struct dm_target *ti,
			
 
				+		      struct bio *bio, int err,
			
 
				+		      union map_info *map_context)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	struct endio_hook *h = map_context->ptr;
			
 
				+	struct list_head work;
			
 
				+	struct new_mapping *m, *tmp;
			
 
				+	struct pool *pool = h->tc->pool;
			
 
				+
			
 
				+	if (h->shared_read_entry) {
			
 
				+		INIT_LIST_HEAD(&work);
			
 
				+		ds_dec(h->shared_read_entry, &work);
			
 
				+
			
 
				+		spin_lock_irqsave(&pool->lock, flags);
			
 
				+		list_for_each_entry_safe(m, tmp, &work, list) {
			
 
				+			list_del(&m->list);
			
 
				+			m->quiesced = 1;
			
 
				+			__maybe_add_mapping(m);
			
 
				+		}
			
 
				+		spin_unlock_irqrestore(&pool->lock, flags);
			
 
				+	}
			
 
				+
			
 
				+	if (h->all_io_entry) {
			
 
				+		INIT_LIST_HEAD(&work);
			
 
				+		ds_dec(h->all_io_entry, &work);
			
 
				+		list_for_each_entry_safe(m, tmp, &work, list)
			
 
				+			list_add(&m->list, &pool->prepared_discards);
			
 
				+	}
			
 
				+
			
 
				+	mempool_free(h, pool->endio_hook_pool);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void thin_postsuspend(struct dm_target *ti)
			
 
				 {
			
 
				 	if (dm_noflush_suspending(ti))
			
@@ -2347,6 +2678,8 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 
				 			DMEMIT("%s %lu",
			
 
				 			       format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
			
 
				 			       (unsigned long) tc->dev_id);
			
 
				+			if (tc->origin_dev)
			
 
				+				DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
			
 
				 			break;
			
 
				 		}
			
 
				 	}
			
@@ -2377,18 +2710,21 @@ static int thin_iterate_devices(struct dm_target *ti,
 
				 static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
			
 
				 {
			
 
				 	struct thin_c *tc = ti->private;
			
 
				+	struct pool *pool = tc->pool;
			
 
				 
			
 
				 	blk_limits_io_min(limits, 0);
			
 
				-	blk_limits_io_opt(limits, tc->pool->sectors_per_block << SECTOR_SHIFT);
			
 
				+	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
			
 
				+	set_discard_limits(pool, limits);
			
 
				 }
			
 
				 
			
 
				 static struct target_type thin_target = {
			
 
				 	.name = "thin",
			
 
				-	.version = {1, 0, 0},
			
 
				+	.version = {1, 1, 0},
			
 
				 	.module	= THIS_MODULE,
			
 
				 	.ctr = thin_ctr,
			
 
				 	.dtr = thin_dtr,
			
 
				 	.map = thin_map,
			
 
				+	.end_io = thin_endio,
			
 
				 	.postsuspend = thin_postsuspend,
			
 
				 	.status = thin_status,
			
 
				 	.iterate_devices = thin_iterate_devices,
			
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -0,0 +1,913 @@
 
				+/*
			
 
				+ * Copyright (C) 2012 Red Hat, Inc.
			
 
				+ *
			
 
				+ * Author: Mikulas Patocka <mpatocka@redhat.com>
			
 
				+ *
			
 
				+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
			
 
				+ *
			
 
				+ * This file is released under the GPLv2.
			
 
				+ *
			
 
				+ * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
			
 
				+ * default prefetch value. Data are read in "prefetch_cluster" chunks from the
			
 
				+ * hash device. Setting this greatly improves performance when data and hash
			
 
				+ * are on the same disk on different partitions on devices with poor random
			
 
				+ * access behavior.
			
 
				+ */
			
 
				+
			
 
				+#include "dm-bufio.h"
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/device-mapper.h>
			
 
				+#include <crypto/hash.h>
			
 
				+
			
 
				+#define DM_MSG_PREFIX			"verity"
			
 
				+
			
 
				+#define DM_VERITY_IO_VEC_INLINE		16
			
 
				+#define DM_VERITY_MEMPOOL_SIZE		4
			
 
				+#define DM_VERITY_DEFAULT_PREFETCH_SIZE	262144
			
 
				+
			
 
				+#define DM_VERITY_MAX_LEVELS		63
			
 
				+
			
 
				+static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
			
 
				+
			
 
				+module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
			
 
				+
			
 
				+struct dm_verity {
			
 
				+	struct dm_dev *data_dev;
			
 
				+	struct dm_dev *hash_dev;
			
 
				+	struct dm_target *ti;
			
 
				+	struct dm_bufio_client *bufio;
			
 
				+	char *alg_name;
			
 
				+	struct crypto_shash *tfm;
			
 
				+	u8 *root_digest;	/* digest of the root block */
			
 
				+	u8 *salt;		/* salt: its size is salt_size */
			
 
				+	unsigned salt_size;
			
 
				+	sector_t data_start;	/* data offset in 512-byte sectors */
			
 
				+	sector_t hash_start;	/* hash start in blocks */
			
 
				+	sector_t data_blocks;	/* the number of data blocks */
			
 
				+	sector_t hash_blocks;	/* the number of hash blocks */
			
 
				+	unsigned char data_dev_block_bits;	/* log2(data blocksize) */
			
 
				+	unsigned char hash_dev_block_bits;	/* log2(hash blocksize) */
			
 
				+	unsigned char hash_per_block_bits;	/* log2(hashes in hash block) */
			
 
				+	unsigned char levels;	/* the number of tree levels */
			
 
				+	unsigned char version;
			
 
				+	unsigned digest_size;	/* digest size for the current hash algorithm */
			
 
				+	unsigned shash_descsize;/* the size of temporary space for crypto */
			
 
				+	int hash_failed;	/* set to 1 if hash of any block failed */
			
 
				+
			
 
				+	mempool_t *io_mempool;	/* mempool of struct dm_verity_io */
			
 
				+	mempool_t *vec_mempool;	/* mempool of bio vector */
			
 
				+
			
 
				+	struct workqueue_struct *verify_wq;
			
 
				+
			
 
				+	/* starting blocks for each tree level. 0 is the lowest level. */
			
 
				+	sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
			
 
				+};
			
 
				+
			
 
				+struct dm_verity_io {
			
 
				+	struct dm_verity *v;
			
 
				+	struct bio *bio;
			
 
				+
			
 
				+	/* original values of bio->bi_end_io and bio->bi_private */
			
 
				+	bio_end_io_t *orig_bi_end_io;
			
 
				+	void *orig_bi_private;
			
 
				+
			
 
				+	sector_t block;
			
 
				+	unsigned n_blocks;
			
 
				+
			
 
				+	/* saved bio vector */
			
 
				+	struct bio_vec *io_vec;
			
 
				+	unsigned io_vec_size;
			
 
				+
			
 
				+	struct work_struct work;
			
 
				+
			
 
				+	/* A space for short vectors; longer vectors are allocated separately. */
			
 
				+	struct bio_vec io_vec_inline[DM_VERITY_IO_VEC_INLINE];
			
 
				+
			
 
				+	/*
			
 
				+	 * Three variably-size fields follow this struct:
			
 
				+	 *
			
 
				+	 * u8 hash_desc[v->shash_descsize];
			
 
				+	 * u8 real_digest[v->digest_size];
			
 
				+	 * u8 want_digest[v->digest_size];
			
 
				+	 *
			
 
				+	 * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
			
 
				+	 */
			
 
				+};
			
 
				+
			
 
				+static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
			
 
				+{
			
 
				+	return (struct shash_desc *)(io + 1);
			
 
				+}
			
 
				+
			
 
				+static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
			
 
				+{
			
 
				+	return (u8 *)(io + 1) + v->shash_descsize;
			
 
				+}
			
 
				+
			
 
				+static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
			
 
				+{
			
 
				+	return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Auxiliary structure appended to each dm-bufio buffer. If the value
			
 
				+ * hash_verified is nonzero, hash of the block has been verified.
			
 
				+ *
			
 
				+ * The variable hash_verified is set to 0 when allocating the buffer, then
			
 
				+ * it can be changed to 1 and it is never reset to 0 again.
			
 
				+ *
			
 
				+ * There is no lock around this value, a race condition can at worst cause
			
 
				+ * that multiple processes verify the hash of the same buffer simultaneously
			
 
				+ * and write 1 to hash_verified simultaneously.
			
 
				+ * This condition is harmless, so we don't need locking.
			
 
				+ */
			
 
				+struct buffer_aux {
			
 
				+	int hash_verified;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Initialize struct buffer_aux for a freshly created buffer.
			
 
				+ */
			
 
				+static void dm_bufio_alloc_callback(struct dm_buffer *buf)
			
 
				+{
			
 
				+	struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
			
 
				+
			
 
				+	aux->hash_verified = 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Translate input sector number to the sector number on the target device.
			
 
				+ */
			
 
				+static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
			
 
				+{
			
 
				+	return v->data_start + dm_target_offset(v->ti, bi_sector);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return hash position of a specified block at a specified tree level
			
 
				+ * (0 is the lowest level).
			
 
				+ * The lowest "hash_per_block_bits"-bits of the result denote hash position
			
 
				+ * inside a hash block. The remaining bits denote location of the hash block.
			
 
				+ */
			
 
				+static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
			
 
				+					 int level)
			
 
				+{
			
 
				+	return block >> (level * v->hash_per_block_bits);
			
 
				+}
			
 
				+
			
 
				+static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
			
 
				+				 sector_t *hash_block, unsigned *offset)
			
 
				+{
			
 
				+	sector_t position = verity_position_at_level(v, block, level);
			
 
				+	unsigned idx;
			
 
				+
			
 
				+	*hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
			
 
				+
			
 
				+	if (!offset)
			
 
				+		return;
			
 
				+
			
 
				+	idx = position & ((1 << v->hash_per_block_bits) - 1);
			
 
				+	if (!v->version)
			
 
				+		*offset = idx * v->digest_size;
			
 
				+	else
			
 
				+		*offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify hash of a metadata block pertaining to the specified data block
			
 
				+ * ("block" argument) at a specified level ("level" argument).
			
 
				+ *
			
 
				+ * On successful return, io_want_digest(v, io) contains the hash value for
			
 
				+ * a lower tree level or for the data block (if we're at the lowest leve).
			
 
				+ *
			
 
				+ * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
			
 
				+ * If "skip_unverified" is false, unverified buffer is hashed and verified
			
 
				+ * against current value of io_want_digest(v, io).
			
 
				+ */
			
 
				+static int verity_verify_level(struct dm_verity_io *io, sector_t block,
			
 
				+			       int level, bool skip_unverified)
			
 
				+{
			
 
				+	struct dm_verity *v = io->v;
			
 
				+	struct dm_buffer *buf;
			
 
				+	struct buffer_aux *aux;
			
 
				+	u8 *data;
			
 
				+	int r;
			
 
				+	sector_t hash_block;
			
 
				+	unsigned offset;
			
 
				+
			
 
				+	verity_hash_at_level(v, block, level, &hash_block, &offset);
			
 
				+
			
 
				+	data = dm_bufio_read(v->bufio, hash_block, &buf);
			
 
				+	if (unlikely(IS_ERR(data)))
			
 
				+		return PTR_ERR(data);
			
 
				+
			
 
				+	aux = dm_bufio_get_aux_data(buf);
			
 
				+
			
 
				+	if (!aux->hash_verified) {
			
 
				+		struct shash_desc *desc;
			
 
				+		u8 *result;
			
 
				+
			
 
				+		if (skip_unverified) {
			
 
				+			r = 1;
			
 
				+			goto release_ret_r;
			
 
				+		}
			
 
				+
			
 
				+		desc = io_hash_desc(v, io);
			
 
				+		desc->tfm = v->tfm;
			
 
				+		desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				+		r = crypto_shash_init(desc);
			
 
				+		if (r < 0) {
			
 
				+			DMERR("crypto_shash_init failed: %d", r);
			
 
				+			goto release_ret_r;
			
 
				+		}
			
 
				+
			
 
				+		if (likely(v->version >= 1)) {
			
 
				+			r = crypto_shash_update(desc, v->salt, v->salt_size);
			
 
				+			if (r < 0) {
			
 
				+				DMERR("crypto_shash_update failed: %d", r);
			
 
				+				goto release_ret_r;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
			
 
				+		if (r < 0) {
			
 
				+			DMERR("crypto_shash_update failed: %d", r);
			
 
				+			goto release_ret_r;
			
 
				+		}
			
 
				+
			
 
				+		if (!v->version) {
			
 
				+			r = crypto_shash_update(desc, v->salt, v->salt_size);
			
 
				+			if (r < 0) {
			
 
				+				DMERR("crypto_shash_update failed: %d", r);
			
 
				+				goto release_ret_r;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		result = io_real_digest(v, io);
			
 
				+		r = crypto_shash_final(desc, result);
			
 
				+		if (r < 0) {
			
 
				+			DMERR("crypto_shash_final failed: %d", r);
			
 
				+			goto release_ret_r;
			
 
				+		}
			
 
				+		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
			
 
				+			DMERR_LIMIT("metadata block %llu is corrupted",
			
 
				+				(unsigned long long)hash_block);
			
 
				+			v->hash_failed = 1;
			
 
				+			r = -EIO;
			
 
				+			goto release_ret_r;
			
 
				+		} else
			
 
				+			aux->hash_verified = 1;
			
 
				+	}
			
 
				+
			
 
				+	data += offset;
			
 
				+
			
 
				+	memcpy(io_want_digest(v, io), data, v->digest_size);
			
 
				+
			
 
				+	dm_bufio_release(buf);
			
 
				+	return 0;
			
 
				+
			
 
				+release_ret_r:
			
 
				+	dm_bufio_release(buf);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify one "dm_verity_io" structure.
			
 
				+ */
			
 
				+static int verity_verify_io(struct dm_verity_io *io)
			
 
				+{
			
 
				+	struct dm_verity *v = io->v;
			
 
				+	unsigned b;
			
 
				+	int i;
			
 
				+	unsigned vector = 0, offset = 0;
			
 
				+
			
 
				+	for (b = 0; b < io->n_blocks; b++) {
			
 
				+		struct shash_desc *desc;
			
 
				+		u8 *result;
			
 
				+		int r;
			
 
				+		unsigned todo;
			
 
				+
			
 
				+		if (likely(v->levels)) {
			
 
				+			/*
			
 
				+			 * First, we try to get the requested hash for
			
 
				+			 * the current block. If the hash block itself is
			
 
				+			 * verified, zero is returned. If it isn't, this
			
 
				+			 * function returns 0 and we fall back to whole
			
 
				+			 * chain verification.
			
 
				+			 */
			
 
				+			int r = verity_verify_level(io, io->block + b, 0, true);
			
 
				+			if (likely(!r))
			
 
				+				goto test_block_hash;
			
 
				+			if (r < 0)
			
 
				+				return r;
			
 
				+		}
			
 
				+
			
 
				+		memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
			
 
				+
			
 
				+		for (i = v->levels - 1; i >= 0; i--) {
			
 
				+			int r = verity_verify_level(io, io->block + b, i, false);
			
 
				+			if (unlikely(r))
			
 
				+				return r;
			
 
				+		}
			
 
				+
			
 
				+test_block_hash:
			
 
				+		desc = io_hash_desc(v, io);
			
 
				+		desc->tfm = v->tfm;
			
 
				+		desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
			
 
				+		r = crypto_shash_init(desc);
			
 
				+		if (r < 0) {
			
 
				+			DMERR("crypto_shash_init failed: %d", r);
			
 
				+			return r;
			
 
				+		}
			
 
				+
			
 
				+		if (likely(v->version >= 1)) {
			
 
				+			r = crypto_shash_update(desc, v->salt, v->salt_size);
			
 
				+			if (r < 0) {
			
 
				+				DMERR("crypto_shash_update failed: %d", r);
			
 
				+				return r;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		todo = 1 << v->data_dev_block_bits;
			
 
				+		do {
			
 
				+			struct bio_vec *bv;
			
 
				+			u8 *page;
			
 
				+			unsigned len;
			
 
				+
			
 
				+			BUG_ON(vector >= io->io_vec_size);
			
 
				+			bv = &io->io_vec[vector];
			
 
				+			page = kmap_atomic(bv->bv_page);
			
 
				+			len = bv->bv_len - offset;
			
 
				+			if (likely(len >= todo))
			
 
				+				len = todo;
			
 
				+			r = crypto_shash_update(desc,
			
 
				+					page + bv->bv_offset + offset, len);
			
 
				+			kunmap_atomic(page);
			
 
				+			if (r < 0) {
			
 
				+				DMERR("crypto_shash_update failed: %d", r);
			
 
				+				return r;
			
 
				+			}
			
 
				+			offset += len;
			
 
				+			if (likely(offset == bv->bv_len)) {
			
 
				+				offset = 0;
			
 
				+				vector++;
			
 
				+			}
			
 
				+			todo -= len;
			
 
				+		} while (todo);
			
 
				+
			
 
				+		if (!v->version) {
			
 
				+			r = crypto_shash_update(desc, v->salt, v->salt_size);
			
 
				+			if (r < 0) {
			
 
				+				DMERR("crypto_shash_update failed: %d", r);
			
 
				+				return r;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		result = io_real_digest(v, io);
			
 
				+		r = crypto_shash_final(desc, result);
			
 
				+		if (r < 0) {
			
 
				+			DMERR("crypto_shash_final failed: %d", r);
			
 
				+			return r;
			
 
				+		}
			
 
				+		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
			
 
				+			DMERR_LIMIT("data block %llu is corrupted",
			
 
				+				(unsigned long long)(io->block + b));
			
 
				+			v->hash_failed = 1;
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+	}
			
 
				+	BUG_ON(vector != io->io_vec_size);
			
 
				+	BUG_ON(offset);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * End one "io" structure with a given error.
			
 
				+ */
			
 
				+static void verity_finish_io(struct dm_verity_io *io, int error)
			
 
				+{
			
 
				+	struct bio *bio = io->bio;
			
 
				+	struct dm_verity *v = io->v;
			
 
				+
			
 
				+	bio->bi_end_io = io->orig_bi_end_io;
			
 
				+	bio->bi_private = io->orig_bi_private;
			
 
				+
			
 
				+	if (io->io_vec != io->io_vec_inline)
			
 
				+		mempool_free(io->io_vec, v->vec_mempool);
			
 
				+
			
 
				+	mempool_free(io, v->io_mempool);
			
 
				+
			
 
				+	bio_endio(bio, error);
			
 
				+}
			
 
				+
			
 
				+static void verity_work(struct work_struct *w)
			
 
				+{
			
 
				+	struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
			
 
				+
			
 
				+	verity_finish_io(io, verity_verify_io(io));
			
 
				+}
			
 
				+
			
 
				+static void verity_end_io(struct bio *bio, int error)
			
 
				+{
			
 
				+	struct dm_verity_io *io = bio->bi_private;
			
 
				+
			
 
				+	if (error) {
			
 
				+		verity_finish_io(io, error);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	INIT_WORK(&io->work, verity_work);
			
 
				+	queue_work(io->v->verify_wq, &io->work);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Prefetch buffers for the specified io.
			
 
				+ * The root buffer is not prefetched, it is assumed that it will be cached
			
 
				+ * all the time.
			
 
				+ */
			
 
				+static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = v->levels - 2; i >= 0; i--) {
			
 
				+		sector_t hash_block_start;
			
 
				+		sector_t hash_block_end;
			
 
				+		verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
			
 
				+		verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
			
 
				+		if (!i) {
			
 
				+			unsigned cluster = *(volatile unsigned *)&dm_verity_prefetch_cluster;
			
 
				+
			
 
				+			cluster >>= v->data_dev_block_bits;
			
 
				+			if (unlikely(!cluster))
			
 
				+				goto no_prefetch_cluster;
			
 
				+
			
 
				+			if (unlikely(cluster & (cluster - 1)))
			
 
				+				cluster = 1 << (fls(cluster) - 1);
			
 
				+
			
 
				+			hash_block_start &= ~(sector_t)(cluster - 1);
			
 
				+			hash_block_end |= cluster - 1;
			
 
				+			if (unlikely(hash_block_end >= v->hash_blocks))
			
 
				+				hash_block_end = v->hash_blocks - 1;
			
 
				+		}
			
 
				+no_prefetch_cluster:
			
 
				+		dm_bufio_prefetch(v->bufio, hash_block_start,
			
 
				+				  hash_block_end - hash_block_start + 1);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Bio map function. It allocates dm_verity_io structure and bio vector and
			
 
				+ * fills them. Then it issues prefetches and the I/O.
			
 
				+ */
			
 
				+static int verity_map(struct dm_target *ti, struct bio *bio,
			
 
				+		      union map_info *map_context)
			
 
				+{
			
 
				+	struct dm_verity *v = ti->private;
			
 
				+	struct dm_verity_io *io;
			
 
				+
			
 
				+	bio->bi_bdev = v->data_dev->bdev;
			
 
				+	bio->bi_sector = verity_map_sector(v, bio->bi_sector);
			
 
				+
			
 
				+	if (((unsigned)bio->bi_sector | bio_sectors(bio)) &
			
 
				+	    ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
			
 
				+		DMERR_LIMIT("unaligned io");
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if ((bio->bi_sector + bio_sectors(bio)) >>
			
 
				+	    (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
			
 
				+		DMERR_LIMIT("io out of range");
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (bio_data_dir(bio) == WRITE)
			
 
				+		return -EIO;
			
 
				+
			
 
				+	io = mempool_alloc(v->io_mempool, GFP_NOIO);
			
 
				+	io->v = v;
			
 
				+	io->bio = bio;
			
 
				+	io->orig_bi_end_io = bio->bi_end_io;
			
 
				+	io->orig_bi_private = bio->bi_private;
			
 
				+	io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
			
 
				+	io->n_blocks = bio->bi_size >> v->data_dev_block_bits;
			
 
				+
			
 
				+	bio->bi_end_io = verity_end_io;
			
 
				+	bio->bi_private = io;
			
 
				+	io->io_vec_size = bio->bi_vcnt - bio->bi_idx;
			
 
				+	if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
			
 
				+		io->io_vec = io->io_vec_inline;
			
 
				+	else
			
 
				+		io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO);
			
 
				+	memcpy(io->io_vec, bio_iovec(bio),
			
 
				+	       io->io_vec_size * sizeof(struct bio_vec));
			
 
				+
			
 
				+	verity_prefetch_io(v, io);
			
 
				+
			
 
				+	generic_make_request(bio);
			
 
				+
			
 
				+	return DM_MAPIO_SUBMITTED;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Status: V (valid) or C (corruption found)
			
 
				+ */
			
 
				+static int verity_status(struct dm_target *ti, status_type_t type,
			
 
				+			 char *result, unsigned maxlen)
			
 
				+{
			
 
				+	struct dm_verity *v = ti->private;
			
 
				+	unsigned sz = 0;
			
 
				+	unsigned x;
			
 
				+
			
 
				+	switch (type) {
			
 
				+	case STATUSTYPE_INFO:
			
 
				+		DMEMIT("%c", v->hash_failed ? 'C' : 'V');
			
 
				+		break;
			
 
				+	case STATUSTYPE_TABLE:
			
 
				+		DMEMIT("%u %s %s %u %u %llu %llu %s ",
			
 
				+			v->version,
			
 
				+			v->data_dev->name,
			
 
				+			v->hash_dev->name,
			
 
				+			1 << v->data_dev_block_bits,
			
 
				+			1 << v->hash_dev_block_bits,
			
 
				+			(unsigned long long)v->data_blocks,
			
 
				+			(unsigned long long)v->hash_start,
			
 
				+			v->alg_name
			
 
				+			);
			
 
				+		for (x = 0; x < v->digest_size; x++)
			
 
				+			DMEMIT("%02x", v->root_digest[x]);
			
 
				+		DMEMIT(" ");
			
 
				+		if (!v->salt_size)
			
 
				+			DMEMIT("-");
			
 
				+		else
			
 
				+			for (x = 0; x < v->salt_size; x++)
			
 
				+				DMEMIT("%02x", v->salt[x]);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int verity_ioctl(struct dm_target *ti, unsigned cmd,
			
 
				+			unsigned long arg)
			
 
				+{
			
 
				+	struct dm_verity *v = ti->private;
			
 
				+	int r = 0;
			
 
				+
			
 
				+	if (v->data_start ||
			
 
				+	    ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
			
 
				+		r = scsi_verify_blk_ioctl(NULL, cmd);
			
 
				+
			
 
				+	return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode,
			
 
				+				     cmd, arg);
			
 
				+}
			
 
				+
			
 
				+static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
			
 
				+			struct bio_vec *biovec, int max_size)
			
 
				+{
			
 
				+	struct dm_verity *v = ti->private;
			
 
				+	struct request_queue *q = bdev_get_queue(v->data_dev->bdev);
			
 
				+
			
 
				+	if (!q->merge_bvec_fn)
			
 
				+		return max_size;
			
 
				+
			
 
				+	bvm->bi_bdev = v->data_dev->bdev;
			
 
				+	bvm->bi_sector = verity_map_sector(v, bvm->bi_sector);
			
 
				+
			
 
				+	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
			
 
				+}
			
 
				+
			
 
				+static int verity_iterate_devices(struct dm_target *ti,
			
 
				+				  iterate_devices_callout_fn fn, void *data)
			
 
				+{
			
 
				+	struct dm_verity *v = ti->private;
			
 
				+
			
 
				+	return fn(ti, v->data_dev, v->data_start, ti->len, data);
			
 
				+}
			
 
				+
			
 
				+static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
			
 
				+{
			
 
				+	struct dm_verity *v = ti->private;
			
 
				+
			
 
				+	if (limits->logical_block_size < 1 << v->data_dev_block_bits)
			
 
				+		limits->logical_block_size = 1 << v->data_dev_block_bits;
			
 
				+
			
 
				+	if (limits->physical_block_size < 1 << v->data_dev_block_bits)
			
 
				+		limits->physical_block_size = 1 << v->data_dev_block_bits;
			
 
				+
			
 
				+	blk_limits_io_min(limits, limits->logical_block_size);
			
 
				+}
			
 
				+
			
 
				+static void verity_dtr(struct dm_target *ti)
			
 
				+{
			
 
				+	struct dm_verity *v = ti->private;
			
 
				+
			
 
				+	if (v->verify_wq)
			
 
				+		destroy_workqueue(v->verify_wq);
			
 
				+
			
 
				+	if (v->vec_mempool)
			
 
				+		mempool_destroy(v->vec_mempool);
			
 
				+
			
 
				+	if (v->io_mempool)
			
 
				+		mempool_destroy(v->io_mempool);
			
 
				+
			
 
				+	if (v->bufio)
			
 
				+		dm_bufio_client_destroy(v->bufio);
			
 
				+
			
 
				+	kfree(v->salt);
			
 
				+	kfree(v->root_digest);
			
 
				+
			
 
				+	if (v->tfm)
			
 
				+		crypto_free_shash(v->tfm);
			
 
				+
			
 
				+	kfree(v->alg_name);
			
 
				+
			
 
				+	if (v->hash_dev)
			
 
				+		dm_put_device(ti, v->hash_dev);
			
 
				+
			
 
				+	if (v->data_dev)
			
 
				+		dm_put_device(ti, v->data_dev);
			
 
				+
			
 
				+	kfree(v);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Target parameters:
			
 
				+ *	<version>	The current format is version 1.
			
 
				+ *			Vsn 0 is compatible with original Chromium OS releases.
			
 
				+ *	<data device>
			
 
				+ *	<hash device>
			
 
				+ *	<data block size>
			
 
				+ *	<hash block size>
			
 
				+ *	<the number of data blocks>
			
 
				+ *	<hash start block>
			
 
				+ *	<algorithm>
			
 
				+ *	<digest>
			
 
				+ *	<salt>		Hex string or "-" if no salt.
			
 
				+ */
			
 
				+static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
			
 
				+{
			
 
				+	struct dm_verity *v;
			
 
				+	unsigned num;
			
 
				+	unsigned long long num_ll;
			
 
				+	int r;
			
 
				+	int i;
			
 
				+	sector_t hash_position;
			
 
				+	char dummy;
			
 
				+
			
 
				+	v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
			
 
				+	if (!v) {
			
 
				+		ti->error = "Cannot allocate verity structure";
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+	ti->private = v;
			
 
				+	v->ti = ti;
			
 
				+
			
 
				+	if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
			
 
				+		ti->error = "Device must be readonly";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	if (argc != 10) {
			
 
				+		ti->error = "Invalid argument count: exactly 10 arguments required";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	if (sscanf(argv[0], "%d%c", &num, &dummy) != 1 ||
			
 
				+	    num < 0 || num > 1) {
			
 
				+		ti->error = "Invalid version";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	v->version = num;
			
 
				+
			
 
				+	r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
			
 
				+	if (r) {
			
 
				+		ti->error = "Data device lookup failed";
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
			
 
				+	if (r) {
			
 
				+		ti->error = "Data device lookup failed";
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
			
 
				+	    !num || (num & (num - 1)) ||
			
 
				+	    num < bdev_logical_block_size(v->data_dev->bdev) ||
			
 
				+	    num > PAGE_SIZE) {
			
 
				+		ti->error = "Invalid data device block size";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	v->data_dev_block_bits = ffs(num) - 1;
			
 
				+
			
 
				+	if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
			
 
				+	    !num || (num & (num - 1)) ||
			
 
				+	    num < bdev_logical_block_size(v->hash_dev->bdev) ||
			
 
				+	    num > INT_MAX) {
			
 
				+		ti->error = "Invalid hash device block size";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	v->hash_dev_block_bits = ffs(num) - 1;
			
 
				+
			
 
				+	if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
			
 
				+	    num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) !=
			
 
				+	    (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) {
			
 
				+		ti->error = "Invalid data blocks";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	v->data_blocks = num_ll;
			
 
				+
			
 
				+	if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
			
 
				+		ti->error = "Data device is too small";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
			
 
				+	    num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) !=
			
 
				+	    (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) {
			
 
				+		ti->error = "Invalid hash start";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	v->hash_start = num_ll;
			
 
				+
			
 
				+	v->alg_name = kstrdup(argv[7], GFP_KERNEL);
			
 
				+	if (!v->alg_name) {
			
 
				+		ti->error = "Cannot allocate algorithm name";
			
 
				+		r = -ENOMEM;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
			
 
				+	if (IS_ERR(v->tfm)) {
			
 
				+		ti->error = "Cannot initialize hash function";
			
 
				+		r = PTR_ERR(v->tfm);
			
 
				+		v->tfm = NULL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	v->digest_size = crypto_shash_digestsize(v->tfm);
			
 
				+	if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
			
 
				+		ti->error = "Digest size too big";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	v->shash_descsize =
			
 
				+		sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
			
 
				+
			
 
				+	v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
			
 
				+	if (!v->root_digest) {
			
 
				+		ti->error = "Cannot allocate root digest";
			
 
				+		r = -ENOMEM;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+	if (strlen(argv[8]) != v->digest_size * 2 ||
			
 
				+	    hex2bin(v->root_digest, argv[8], v->digest_size)) {
			
 
				+		ti->error = "Invalid root digest";
			
 
				+		r = -EINVAL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	if (strcmp(argv[9], "-")) {
			
 
				+		v->salt_size = strlen(argv[9]) / 2;
			
 
				+		v->salt = kmalloc(v->salt_size, GFP_KERNEL);
			
 
				+		if (!v->salt) {
			
 
				+			ti->error = "Cannot allocate salt";
			
 
				+			r = -ENOMEM;
			
 
				+			goto bad;
			
 
				+		}
			
 
				+		if (strlen(argv[9]) != v->salt_size * 2 ||
			
 
				+		    hex2bin(v->salt, argv[9], v->salt_size)) {
			
 
				+			ti->error = "Invalid salt";
			
 
				+			r = -EINVAL;
			
 
				+			goto bad;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	v->hash_per_block_bits =
			
 
				+		fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1;
			
 
				+
			
 
				+	v->levels = 0;
			
 
				+	if (v->data_blocks)
			
 
				+		while (v->hash_per_block_bits * v->levels < 64 &&
			
 
				+		       (unsigned long long)(v->data_blocks - 1) >>
			
 
				+		       (v->hash_per_block_bits * v->levels))
			
 
				+			v->levels++;
			
 
				+
			
 
				+	if (v->levels > DM_VERITY_MAX_LEVELS) {
			
 
				+		ti->error = "Too many tree levels";
			
 
				+		r = -E2BIG;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	hash_position = v->hash_start;
			
 
				+	for (i = v->levels - 1; i >= 0; i--) {
			
 
				+		sector_t s;
			
 
				+		v->hash_level_block[i] = hash_position;
			
 
				+		s = verity_position_at_level(v, v->data_blocks, i);
			
 
				+		s = (s >> v->hash_per_block_bits) +
			
 
				+		    !!(s & ((1 << v->hash_per_block_bits) - 1));
			
 
				+		if (hash_position + s < hash_position) {
			
 
				+			ti->error = "Hash device offset overflow";
			
 
				+			r = -E2BIG;
			
 
				+			goto bad;
			
 
				+		}
			
 
				+		hash_position += s;
			
 
				+	}
			
 
				+	v->hash_blocks = hash_position;
			
 
				+
			
 
				+	v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
			
 
				+		1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
			
 
				+		dm_bufio_alloc_callback, NULL);
			
 
				+	if (IS_ERR(v->bufio)) {
			
 
				+		ti->error = "Cannot initialize dm-bufio";
			
 
				+		r = PTR_ERR(v->bufio);
			
 
				+		v->bufio = NULL;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
			
 
				+		ti->error = "Hash device is too small";
			
 
				+		r = -E2BIG;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
			
 
				+	  sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2);
			
 
				+	if (!v->io_mempool) {
			
 
				+		ti->error = "Cannot allocate io mempool";
			
 
				+		r = -ENOMEM;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
			
 
				+					BIO_MAX_PAGES * sizeof(struct bio_vec));
			
 
				+	if (!v->vec_mempool) {
			
 
				+		ti->error = "Cannot allocate vector mempool";
			
 
				+		r = -ENOMEM;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	/* WQ_UNBOUND greatly improves performance when running on ramdisk */
			
 
				+	v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
			
 
				+	if (!v->verify_wq) {
			
 
				+		ti->error = "Cannot allocate workqueue";
			
 
				+		r = -ENOMEM;
			
 
				+		goto bad;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+bad:
			
 
				+	verity_dtr(ti);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static struct target_type verity_target = {
			
 
				+	.name		= "verity",
			
 
				+	.version	= {1, 0, 0},
			
 
				+	.module		= THIS_MODULE,
			
 
				+	.ctr		= verity_ctr,
			
 
				+	.dtr		= verity_dtr,
			
 
				+	.map		= verity_map,
			
 
				+	.status		= verity_status,
			
 
				+	.ioctl		= verity_ioctl,
			
 
				+	.merge		= verity_merge,
			
 
				+	.iterate_devices = verity_iterate_devices,
			
 
				+	.io_hints	= verity_io_hints,
			
 
				+};
			
 
				+
			
 
				+static int __init dm_verity_init(void)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	r = dm_register_target(&verity_target);
			
 
				+	if (r < 0)
			
 
				+		DMERR("register failed %d", r);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static void __exit dm_verity_exit(void)
			
 
				+{
			
 
				+	dm_unregister_target(&verity_target);
			
 
				+}
			
 
				+
			
 
				+module_init(dm_verity_init);
			
 
				+module_exit(dm_verity_exit);
			
 
				+
			
 
				+MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
			
 
				+MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
			
 
				+MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
			
 
				+MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
			
 
				+MODULE_LICENSE("GPL");
			
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1016,6 +1016,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 
				 		/*
			
 
				 		 * Store bio_set for cleanup.
			
 
				 		 */
			
 
				+		clone->bi_end_io = NULL;
			
 
				 		clone->bi_private = md->bs;
			
 
				 		bio_put(clone);
			
 
				 		free_tio(md, tio);
			
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -108,12 +108,9 @@ static inline void *value_base(struct node *n)
 
				 	return &n->keys[le32_to_cpu(n->header.max_entries)];
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * FIXME: Now that value size is stored in node we don't need the third parm.
			
 
				- */
			
 
				-static inline void *value_ptr(struct node *n, uint32_t index, size_t value_size)
			
 
				+static inline void *value_ptr(struct node *n, uint32_t index)
			
 
				 {
			
 
				-	BUG_ON(value_size != le32_to_cpu(n->header.value_size));
			
 
				+	uint32_t value_size = le32_to_cpu(n->header.value_size);
			
 
				 	return value_base(n) + (value_size * index);
			
 
				 }
			
 
				 
			
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -61,20 +61,20 @@ static void node_shift(struct node *n, int shift)
 
				 	if (shift < 0) {
			
 
				 		shift = -shift;
			
 
				 		BUG_ON(shift > nr_entries);
			
 
				-		BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift, value_size));
			
 
				+		BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift));
			
 
				 		memmove(key_ptr(n, 0),
			
 
				 			key_ptr(n, shift),
			
 
				 			(nr_entries - shift) * sizeof(__le64));
			
 
				-		memmove(value_ptr(n, 0, value_size),
			
 
				-			value_ptr(n, shift, value_size),
			
 
				+		memmove(value_ptr(n, 0),
			
 
				+			value_ptr(n, shift),
			
 
				 			(nr_entries - shift) * value_size);
			
 
				 	} else {
			
 
				 		BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
			
 
				 		memmove(key_ptr(n, shift),
			
 
				 			key_ptr(n, 0),
			
 
				 			nr_entries * sizeof(__le64));
			
 
				-		memmove(value_ptr(n, shift, value_size),
			
 
				-			value_ptr(n, 0, value_size),
			
 
				+		memmove(value_ptr(n, shift),
			
 
				+			value_ptr(n, 0),
			
 
				 			nr_entries * value_size);
			
 
				 	}
			
 
				 }
			
@@ -91,16 +91,16 @@ static void node_copy(struct node *left, struct node *right, int shift)
 
				 		memcpy(key_ptr(left, nr_left),
			
 
				 		       key_ptr(right, 0),
			
 
				 		       shift * sizeof(__le64));
			
 
				-		memcpy(value_ptr(left, nr_left, value_size),
			
 
				-		       value_ptr(right, 0, value_size),
			
 
				+		memcpy(value_ptr(left, nr_left),
			
 
				+		       value_ptr(right, 0),
			
 
				 		       shift * value_size);
			
 
				 	} else {
			
 
				 		BUG_ON(shift > le32_to_cpu(right->header.max_entries));
			
 
				 		memcpy(key_ptr(right, 0),
			
 
				 		       key_ptr(left, nr_left - shift),
			
 
				 		       shift * sizeof(__le64));
			
 
				-		memcpy(value_ptr(right, 0, value_size),
			
 
				-		       value_ptr(left, nr_left - shift, value_size),
			
 
				+		memcpy(value_ptr(right, 0),
			
 
				+		       value_ptr(left, nr_left - shift),
			
 
				 		       shift * value_size);
			
 
				 	}
			
 
				 }
			
@@ -120,26 +120,17 @@ static void delete_at(struct node *n, unsigned index)
 
				 			key_ptr(n, index + 1),
			
 
				 			nr_to_copy * sizeof(__le64));
			
 
				 
			
 
				-		memmove(value_ptr(n, index, value_size),
			
 
				-			value_ptr(n, index + 1, value_size),
			
 
				+		memmove(value_ptr(n, index),
			
 
				+			value_ptr(n, index + 1),
			
 
				 			nr_to_copy * value_size);
			
 
				 	}
			
 
				 
			
 
				 	n->header.nr_entries = cpu_to_le32(nr_entries - 1);
			
 
				 }
			
 
				 
			
 
				-static unsigned del_threshold(struct node *n)
			
 
				-{
			
 
				-	return le32_to_cpu(n->header.max_entries) / 3;
			
 
				-}
			
 
				-
			
 
				 static unsigned merge_threshold(struct node *n)
			
 
				 {
			
 
				-	/*
			
 
				-	 * The extra one is because we know we're potentially going to
			
 
				-	 * delete an entry.
			
 
				-	 */
			
 
				-	return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1;
			
 
				+	return le32_to_cpu(n->header.max_entries) / 3;
			
 
				 }
			
 
				 
			
 
				 struct child {
			
@@ -175,7 +166,7 @@ static int init_child(struct dm_btree_info *info, struct node *parent,
 
				 	if (inc)
			
 
				 		inc_children(info->tm, result->n, &le64_type);
			
 
				 
			
 
				-	*((__le64 *) value_ptr(parent, index, sizeof(__le64))) =
			
 
				+	*((__le64 *) value_ptr(parent, index)) =
			
 
				 		cpu_to_le64(dm_block_location(result->block));
			
 
				 
			
 
				 	return 0;
			
@@ -188,6 +179,15 @@ static int exit_child(struct dm_btree_info *info, struct child *c)
 
				 
			
 
				 static void shift(struct node *left, struct node *right, int count)
			
 
				 {
			
 
				+	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
			
 
				+	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
			
 
				+	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
			
 
				+	uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
			
 
				+
			
 
				+	BUG_ON(max_entries != r_max_entries);
			
 
				+	BUG_ON(nr_left - count > max_entries);
			
 
				+	BUG_ON(nr_right + count > max_entries);
			
 
				+
			
 
				 	if (!count)
			
 
				 		return;
			
 
				 
			
@@ -199,13 +199,8 @@ static void shift(struct node *left, struct node *right, int count)
 
				 		node_shift(right, count);
			
 
				 	}
			
 
				 
			
 
				-	left->header.nr_entries =
			
 
				-		cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count);
			
 
				-	BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries));
			
 
				-
			
 
				-	right->header.nr_entries =
			
 
				-		cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count);
			
 
				-	BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries));
			
 
				+	left->header.nr_entries = cpu_to_le32(nr_left - count);
			
 
				+	right->header.nr_entries = cpu_to_le32(nr_right + count);
			
 
				 }
			
 
				 
			
 
				 static void __rebalance2(struct dm_btree_info *info, struct node *parent,
			
@@ -215,8 +210,9 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent,
 
				 	struct node *right = r->n;
			
 
				 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
			
 
				 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
			
 
				+	unsigned threshold = 2 * merge_threshold(left) + 1;
			
 
				 
			
 
				-	if (nr_left + nr_right <= merge_threshold(left)) {
			
 
				+	if (nr_left + nr_right < threshold) {
			
 
				 		/*
			
 
				 		 * Merge
			
 
				 		 */
			
@@ -234,9 +230,6 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent,
 
				 		 * Rebalance.
			
 
				 		 */
			
 
				 		unsigned target_left = (nr_left + nr_right) / 2;
			
 
				-		unsigned shift_ = nr_left - target_left;
			
 
				-		BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_);
			
 
				-		BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_);
			
 
				 		shift(left, right, nr_left - target_left);
			
 
				 		*key_ptr(parent, r->index) = right->keys[0];
			
 
				 	}
			
@@ -272,6 +265,84 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
 
				 	return exit_child(info, &right);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * We dump as many entries from center as possible into left, then the rest
			
 
				+ * in right, then rebalance2.  This wastes some cpu, but I want something
			
 
				+ * simple atm.
			
 
				+ */
			
 
				+static void delete_center_node(struct dm_btree_info *info, struct node *parent,
			
 
				+			       struct child *l, struct child *c, struct child *r,
			
 
				+			       struct node *left, struct node *center, struct node *right,
			
 
				+			       uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
			
 
				+{
			
 
				+	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
			
 
				+	unsigned shift = min(max_entries - nr_left, nr_center);
			
 
				+
			
 
				+	BUG_ON(nr_left + shift > max_entries);
			
 
				+	node_copy(left, center, -shift);
			
 
				+	left->header.nr_entries = cpu_to_le32(nr_left + shift);
			
 
				+
			
 
				+	if (shift != nr_center) {
			
 
				+		shift = nr_center - shift;
			
 
				+		BUG_ON((nr_right + shift) > max_entries);
			
 
				+		node_shift(right, shift);
			
 
				+		node_copy(center, right, shift);
			
 
				+		right->header.nr_entries = cpu_to_le32(nr_right + shift);
			
 
				+	}
			
 
				+	*key_ptr(parent, r->index) = right->keys[0];
			
 
				+
			
 
				+	delete_at(parent, c->index);
			
 
				+	r->index--;
			
 
				+
			
 
				+	dm_tm_dec(info->tm, dm_block_location(c->block));
			
 
				+	__rebalance2(info, parent, l, r);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Redistributes entries among 3 sibling nodes.
			
 
				+ */
			
 
				+static void redistribute3(struct dm_btree_info *info, struct node *parent,
			
 
				+			  struct child *l, struct child *c, struct child *r,
			
 
				+			  struct node *left, struct node *center, struct node *right,
			
 
				+			  uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
			
 
				+{
			
 
				+	int s;
			
 
				+	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
			
 
				+	unsigned target = (nr_left + nr_center + nr_right) / 3;
			
 
				+	BUG_ON(target > max_entries);
			
 
				+
			
 
				+	if (nr_left < nr_right) {
			
 
				+		s = nr_left - target;
			
 
				+
			
 
				+		if (s < 0 && nr_center < -s) {
			
 
				+			/* not enough in central node */
			
 
				+			shift(left, center, nr_center);
			
 
				+			s = nr_center - target;
			
 
				+			shift(left, right, s);
			
 
				+			nr_right += s;
			
 
				+		} else
			
 
				+			shift(left, center, s);
			
 
				+
			
 
				+		shift(center, right, target - nr_right);
			
 
				+
			
 
				+	} else {
			
 
				+		s = target - nr_right;
			
 
				+		if (s > 0 && nr_center < s) {
			
 
				+			/* not enough in central node */
			
 
				+			shift(center, right, nr_center);
			
 
				+			s = target - nr_center;
			
 
				+			shift(left, right, s);
			
 
				+			nr_left -= s;
			
 
				+		} else
			
 
				+			shift(center, right, s);
			
 
				+
			
 
				+		shift(left, center, nr_left - target);
			
 
				+	}
			
 
				+
			
 
				+	*key_ptr(parent, c->index) = center->keys[0];
			
 
				+	*key_ptr(parent, r->index) = right->keys[0];
			
 
				+}
			
 
				+
			
 
				 static void __rebalance3(struct dm_btree_info *info, struct node *parent,
			
 
				 			 struct child *l, struct child *c, struct child *r)
			
 
				 {
			
@@ -282,62 +353,18 @@ static void __rebalance3(struct dm_btree_info *info, struct node *parent,
 
				 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
			
 
				 	uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
			
 
				 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
			
 
				-	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
			
 
				 
			
 
				-	unsigned target;
			
 
				+	unsigned threshold = merge_threshold(left) * 4 + 1;
			
 
				 
			
 
				 	BUG_ON(left->header.max_entries != center->header.max_entries);
			
 
				 	BUG_ON(center->header.max_entries != right->header.max_entries);
			
 
				 
			
 
				-	if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) {
			
 
				-		/*
			
 
				-		 * Delete center node:
			
 
				-		 *
			
 
				-		 * We dump as many entries from center as possible into
			
 
				-		 * left, then the rest in right, then rebalance2.  This
			
 
				-		 * wastes some cpu, but I want something simple atm.
			
 
				-		 */
			
 
				-		unsigned shift = min(max_entries - nr_left, nr_center);
			
 
				-
			
 
				-		BUG_ON(nr_left + shift > max_entries);
			
 
				-		node_copy(left, center, -shift);
			
 
				-		left->header.nr_entries = cpu_to_le32(nr_left + shift);
			
 
				-
			
 
				-		if (shift != nr_center) {
			
 
				-			shift = nr_center - shift;
			
 
				-			BUG_ON((nr_right + shift) >= max_entries);
			
 
				-			node_shift(right, shift);
			
 
				-			node_copy(center, right, shift);
			
 
				-			right->header.nr_entries = cpu_to_le32(nr_right + shift);
			
 
				-		}
			
 
				-		*key_ptr(parent, r->index) = right->keys[0];
			
 
				-
			
 
				-		delete_at(parent, c->index);
			
 
				-		r->index--;
			
 
				-
			
 
				-		dm_tm_dec(info->tm, dm_block_location(c->block));
			
 
				-		__rebalance2(info, parent, l, r);
			
 
				-
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Rebalance
			
 
				-	 */
			
 
				-	target = (nr_left + nr_center + nr_right) / 3;
			
 
				-	BUG_ON(target > max_entries);
			
 
				-
			
 
				-	/*
			
 
				-	 * Adjust the left node
			
 
				-	 */
			
 
				-	shift(left, center, nr_left - target);
			
 
				-
			
 
				-	/*
			
 
				-	 * Adjust the right node
			
 
				-	 */
			
 
				-	shift(center, right, target - nr_right);
			
 
				-	*key_ptr(parent, c->index) = center->keys[0];
			
 
				-	*key_ptr(parent, r->index) = right->keys[0];
			
 
				+	if ((nr_left + nr_center + nr_right) < threshold)
			
 
				+		delete_center_node(info, parent, l, c, r, left, center, right,
			
 
				+				   nr_left, nr_center, nr_right);
			
 
				+	else
			
 
				+		redistribute3(info, parent, l, c, r, left, center, right,
			
 
				+			      nr_left, nr_center, nr_right);
			
 
				 }
			
 
				 
			
 
				 static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
			
@@ -441,9 +468,6 @@ static int rebalance_children(struct shadow_spine *s,
 
				 	if (r)
			
 
				 		return r;
			
 
				 
			
 
				-	if (child_entries > del_threshold(n))
			
 
				-		return 0;
			
 
				-
			
 
				 	has_left_sibling = i > 0;
			
 
				 	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
			
 
				 
			
@@ -496,7 +520,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
 
				 		 */
			
 
				 		if (shadow_has_parent(s)) {
			
 
				 			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
			
 
				-			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(__le64)),
			
 
				+			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
			
 
				 			       &location, sizeof(__le64));
			
 
				 		}
			
 
				 
			
@@ -553,7 +577,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 
				 
			
 
				 		if (info->value_type.dec)
			
 
				 			info->value_type.dec(info->value_type.context,
			
 
				-					     value_ptr(n, index, info->value_type.size));
			
 
				+					     value_ptr(n, index));
			
 
				 
			
 
				 		delete_at(n, index);
			
 
				 	}
			
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -74,8 +74,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n,
 
				 			dm_tm_inc(tm, value64(n, i));
			
 
				 	else if (vt->inc)
			
 
				 		for (i = 0; i < nr_entries; i++)
			
 
				-			vt->inc(vt->context,
			
 
				-				value_ptr(n, i, vt->size));
			
 
				+			vt->inc(vt->context, value_ptr(n, i));
			
 
				 }
			
 
				 
			
 
				 static int insert_at(size_t value_size, struct node *node, unsigned index,
			
@@ -281,7 +280,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
 
				 
			
 
				 				for (i = 0; i < f->nr_children; i++)
			
 
				 					info->value_type.dec(info->value_type.context,
			
 
				-							     value_ptr(f->n, i, info->value_type.size));
			
 
				+							     value_ptr(f->n, i));
			
 
				 			}
			
 
				 			f->current_child = f->nr_children;
			
 
				 		}
			
@@ -320,7 +319,7 @@ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
 
				 	} while (!(flags & LEAF_NODE));
			
 
				 
			
 
				 	*result_key = le64_to_cpu(ro_node(s)->keys[i]);
			
 
				-	memcpy(v, value_ptr(ro_node(s), i, value_size), value_size);
			
 
				+	memcpy(v, value_ptr(ro_node(s), i), value_size);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -432,7 +431,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
 
				 
			
 
				 	size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
			
 
				 		sizeof(uint64_t) : s->info->value_type.size;
			
 
				-	memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size),
			
 
				+	memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
			
 
				 	       size * nr_right);
			
 
				 
			
 
				 	/*
			
@@ -443,7 +442,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
 
				 	pn = dm_block_data(parent);
			
 
				 	location = cpu_to_le64(dm_block_location(left));
			
 
				 	__dm_bless_for_disk(&location);
			
 
				-	memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)),
			
 
				+	memcpy_disk(value_ptr(pn, parent_index),
			
 
				 		    &location, sizeof(__le64));
			
 
				 
			
 
				 	location = cpu_to_le64(dm_block_location(right));
			
@@ -529,8 +528,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
 
				 
			
 
				 	size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
			
 
				 		sizeof(__le64) : s->info->value_type.size;
			
 
				-	memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size);
			
 
				-	memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size),
			
 
				+	memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
			
 
				+	memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
			
 
				 	       nr_right * size);
			
 
				 
			
 
				 	/* new_parent should just point to l and r now */
			
@@ -545,12 +544,12 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
 
				 	val = cpu_to_le64(dm_block_location(left));
			
 
				 	__dm_bless_for_disk(&val);
			
 
				 	pn->keys[0] = ln->keys[0];
			
 
				-	memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64));
			
 
				+	memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64));
			
 
				 
			
 
				 	val = cpu_to_le64(dm_block_location(right));
			
 
				 	__dm_bless_for_disk(&val);
			
 
				 	pn->keys[1] = rn->keys[0];
			
 
				-	memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64));
			
 
				+	memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
			
 
				 
			
 
				 	/*
			
 
				 	 * rejig the spine.  This is ugly, since it knows too
			
@@ -595,7 +594,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
 
				 			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
			
 
				 
			
 
				 			__dm_bless_for_disk(&location);
			
 
				-			memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
			
 
				+			memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
			
 
				 				    &location, sizeof(__le64));
			
 
				 		}
			
 
				 
			
@@ -710,12 +709,12 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
 
				 		    (!info->value_type.equal ||
			
 
				 		     !info->value_type.equal(
			
 
				 			     info->value_type.context,
			
 
				-			     value_ptr(n, index, info->value_type.size),
			
 
				+			     value_ptr(n, index),
			
 
				 			     value))) {
			
 
				 			info->value_type.dec(info->value_type.context,
			
 
				-					     value_ptr(n, index, info->value_type.size));
			
 
				+					     value_ptr(n, index));
			
 
				 		}
			
 
				-		memcpy_disk(value_ptr(n, index, info->value_type.size),
			
 
				+		memcpy_disk(value_ptr(n, index),
			
 
				 			    value, info->value_type.size);
			
 
				 	}
			
 
				 
			
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -405,8 +405,6 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
 
				 		if (r < 0)
			
 
				 			return r;
			
 
				 
			
 
				-#if 0
			
 
				-		/* FIXME: dm_btree_remove doesn't handle this yet */
			
 
				 		if (old > 2) {
			
 
				 			r = dm_btree_remove(&ll->ref_count_info,
			
 
				 					    ll->ref_count_root,
			
@@ -414,7 +412,6 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
 
				 			if (r)
			
 
				 				return r;
			
 
				 		}
			
 
				-#endif
			
 
				 
			
 
				 	} else {
			
 
				 		__le32 le_rc = cpu_to_le32(ref_count);