7 éve · 94514bbe9e
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,7 +1,6 @@
 
				 config BTRFS_FS
			
 
				 	tristate "Btrfs filesystem support"
			
 
				-	select CRYPTO
			
 
				-	select CRYPTO_CRC32C
			
 
				+	select LIBCRC32C
			
 
				 	select ZLIB_INFLATE
			
 
				 	select ZLIB_DEFLATE
			
 
				 	select LZO_COMPRESS
			
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 
				 	   export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
			
 
				 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
			
 
				 	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
			
 
				-	   uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
			
 
				+	   uuid-tree.o props.o free-space-tree.o tree-checker.o
			
 
				 
			
 
				 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
			
 
				 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
			
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -46,12 +46,12 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 
				 		BUG();
			
 
				 	}
			
 
				 
			
 
				-	size = __btrfs_getxattr(inode, name, "", 0);
			
 
				+	size = btrfs_getxattr(inode, name, "", 0);
			
 
				 	if (size > 0) {
			
 
				 		value = kzalloc(size, GFP_KERNEL);
			
 
				 		if (!value)
			
 
				 			return ERR_PTR(-ENOMEM);
			
 
				-		size = __btrfs_getxattr(inode, name, value, size);
			
 
				+		size = btrfs_getxattr(inode, name, value, size);
			
 
				 	}
			
 
				 	if (size > 0) {
			
 
				 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
			
@@ -65,9 +65,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 
				 	return acl;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Needs to be called with fs_mutex held
			
 
				- */
			
 
				 static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
			
 
				 			 struct inode *inode, struct posix_acl *acl, int type)
			
 
				 {
			
@@ -101,7 +98,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
 
				 			goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
			
 
				+	ret = btrfs_setxattr(trans, inode, name, value, size, 0);
			
 
				 out:
			
 
				 	kfree(value);
			
 
				 
			
@@ -127,11 +124,6 @@ int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * btrfs_init_acl is already generally called under fs_mutex, so the locking
			
 
				- * stuff has been fixed to work with that.  If the locking stuff changes, we
			
 
				- * need to re-evaluate the acl locking stuff.
			
 
				- */
			
 
				 int btrfs_init_acl(struct btrfs_trans_handle *trans,
			
 
				 		   struct inode *inode, struct inode *dir)
			
 
				 {
			
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -170,7 +170,7 @@ int __init btrfs_prelim_ref_init(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void btrfs_prelim_ref_exit(void)
			
 
				+void __cold btrfs_prelim_ref_exit(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(btrfs_prelim_ref_cache);
			
 
				 }
			
@@ -738,7 +738,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
 
				 		BUG_ON(ref->key_for_search.type);
			
 
				 		BUG_ON(!ref->wanted_disk_byte);
			
 
				 
			
 
				-		eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0);
			
 
				+		eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
			
 
				+				     ref->level - 1, NULL);
			
 
				 		if (IS_ERR(eb)) {
			
 
				 			free_pref(ref);
			
 
				 			return PTR_ERR(eb);
			
@@ -773,15 +774,12 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
			
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_key tmp_op_key;
			
 
				-	struct btrfs_key *op_key = NULL;
			
 
				 	struct rb_node *n;
			
 
				 	int count;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	if (extent_op && extent_op->update_key) {
			
 
				+	if (extent_op && extent_op->update_key)
			
 
				 		btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
			
 
				-		op_key = &tmp_op_key;
			
 
				-	}
			
 
				 
			
 
				 	spin_lock(&head->lock);
			
 
				 	for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
			
@@ -1291,7 +1289,8 @@ again:
 
				 			    ref->level == 0) {
			
 
				 				struct extent_buffer *eb;
			
 
				 
			
 
				-				eb = read_tree_block(fs_info, ref->parent, 0);
			
 
				+				eb = read_tree_block(fs_info, ref->parent, 0,
			
 
				+						     ref->level, NULL);
			
 
				 				if (IS_ERR(eb)) {
			
 
				 					ret = PTR_ERR(eb);
			
 
				 					goto out;
			
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -73,7 +73,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
 
				 int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr);
			
 
				 
			
 
				 int __init btrfs_prelim_ref_init(void);
			
 
				-void btrfs_prelim_ref_exit(void);
			
 
				+void __cold btrfs_prelim_ref_exit(void);
			
 
				 
			
 
				 struct prelim_ref {
			
 
				 	struct rb_node rbnode;
			
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -195,7 +195,6 @@ struct btrfs_inode {
 
				 
			
 
				 	/* Hook into fs_info->delayed_iputs */
			
 
				 	struct list_head delayed_iput;
			
 
				-	long delayed_iput_count;
			
 
				 
			
 
				 	/*
			
 
				 	 * To avoid races between lockless (i_mutex not held) direct IO writes
			
@@ -365,6 +364,4 @@ static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
 
				 			logical_start, csum, csum_expected, mirror_num);
			
 
				 }
			
 
				 
			
 
				-bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
			
 
				-
			
 
				 #endif
			
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -96,9 +96,9 @@
 
				 #include <linux/blkdev.h>
			
 
				 #include <linux/mm.h>
			
 
				 #include <linux/string.h>
			
 
				+#include <linux/crc32c.h>
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				-#include "hash.h"
			
 
				 #include "transaction.h"
			
 
				 #include "extent_io.h"
			
 
				 #include "volumes.h"
			
@@ -1736,7 +1736,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
 
				 		size_t sublen = i ? PAGE_SIZE :
			
 
				 				    (PAGE_SIZE - BTRFS_CSUM_SIZE);
			
 
				 
			
 
				-		crc = btrfs_crc32c(crc, data, sublen);
			
 
				+		crc = crc32c(crc, data, sublen);
			
 
				 	}
			
 
				 	btrfs_csum_final(crc, csum);
			
 
				 	if (memcmp(csum, h->csum, state->csum_size))
			
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1133,7 +1133,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void btrfs_exit_compress(void)
			
 
				+void __cold btrfs_exit_compress(void)
			
 
				 {
			
 
				 	free_workspaces();
			
 
				 }
			
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -76,7 +76,7 @@ struct compressed_bio {
 
				 };
			
 
				 
			
 
				 void __init btrfs_init_compress(void);
			
 
				-void btrfs_exit_compress(void);
			
 
				+void __cold btrfs_exit_compress(void);
			
 
				 
			
 
				 int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
			
 
				 			 u64 start, struct page **pages,
			
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -41,8 +41,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 
				 			      struct extent_buffer *src_buf);
			
 
				 static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
			
 
				 		    int level, int slot);
			
 
				-static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
			
 
				-				 struct extent_buffer *eb);
			
 
				 
			
 
				 struct btrfs_path *btrfs_alloc_path(void)
			
 
				 {
			
@@ -301,11 +299,6 @@ enum mod_log_op {
 
				 	MOD_LOG_ROOT_REPLACE,
			
 
				 };
			
 
				 
			
 
				-struct tree_mod_move {
			
 
				-	int dst_slot;
			
 
				-	int nr_items;
			
 
				-};
			
 
				-
			
 
				 struct tree_mod_root {
			
 
				 	u64 logical;
			
 
				 	u8 level;
			
@@ -328,32 +321,15 @@ struct tree_mod_elem {
 
				 	u64 blockptr;
			
 
				 
			
 
				 	/* this is used for op == MOD_LOG_MOVE_KEYS */
			
 
				-	struct tree_mod_move move;
			
 
				+	struct {
			
 
				+		int dst_slot;
			
 
				+		int nr_items;
			
 
				+	} move;
			
 
				 
			
 
				 	/* this is used for op == MOD_LOG_ROOT_REPLACE */
			
 
				 	struct tree_mod_root old_root;
			
 
				 };
			
 
				 
			
 
				-static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
			
 
				-{
			
 
				-	read_lock(&fs_info->tree_mod_log_lock);
			
 
				-}
			
 
				-
			
 
				-static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
			
 
				-{
			
 
				-	read_unlock(&fs_info->tree_mod_log_lock);
			
 
				-}
			
 
				-
			
 
				-static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
			
 
				-{
			
 
				-	write_lock(&fs_info->tree_mod_log_lock);
			
 
				-}
			
 
				-
			
 
				-static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
			
 
				-{
			
 
				-	write_unlock(&fs_info->tree_mod_log_lock);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Pull a new tree mod seq number for our operation.
			
 
				  */
			
@@ -373,14 +349,14 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
 
				 u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
			
 
				 			   struct seq_list *elem)
			
 
				 {
			
 
				-	tree_mod_log_write_lock(fs_info);
			
 
				+	write_lock(&fs_info->tree_mod_log_lock);
			
 
				 	spin_lock(&fs_info->tree_mod_seq_lock);
			
 
				 	if (!elem->seq) {
			
 
				 		elem->seq = btrfs_inc_tree_mod_seq(fs_info);
			
 
				 		list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
			
 
				 	}
			
 
				 	spin_unlock(&fs_info->tree_mod_seq_lock);
			
 
				-	tree_mod_log_write_unlock(fs_info);
			
 
				+	write_unlock(&fs_info->tree_mod_log_lock);
			
 
				 
			
 
				 	return elem->seq;
			
 
				 }
			
@@ -422,7 +398,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 
				 	 * anything that's lower than the lowest existing (read: blocked)
			
 
				 	 * sequence number can be removed from the tree.
			
 
				 	 */
			
 
				-	tree_mod_log_write_lock(fs_info);
			
 
				+	write_lock(&fs_info->tree_mod_log_lock);
			
 
				 	tm_root = &fs_info->tree_mod_log;
			
 
				 	for (node = rb_first(tm_root); node; node = next) {
			
 
				 		next = rb_next(node);
			
@@ -432,7 +408,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 
				 		rb_erase(node, tm_root);
			
 
				 		kfree(tm);
			
 
				 	}
			
 
				-	tree_mod_log_write_unlock(fs_info);
			
 
				+	write_unlock(&fs_info->tree_mod_log_lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -443,7 +419,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 
				  * for root replace operations, or the logical address of the affected
			
 
				  * block for all other operations.
			
 
				  *
			
 
				- * Note: must be called with write lock (tree_mod_log_write_lock).
			
 
				+ * Note: must be called with write lock for fs_info::tree_mod_log_lock.
			
 
				  */
			
 
				 static noinline int
			
 
				 __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
			
@@ -481,7 +457,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
 
				  * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
			
 
				  * returns zero with the tree_mod_log_lock acquired. The caller must hold
			
 
				  * this until all tree mod log insertions are recorded in the rb tree and then
			
 
				- * call tree_mod_log_write_unlock() to release.
			
 
				+ * write unlock fs_info::tree_mod_log_lock.
			
 
				  */
			
 
				 static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
			
 
				 				    struct extent_buffer *eb) {
			
@@ -491,9 +467,9 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
 
				 	if (eb && btrfs_header_level(eb) == 0)
			
 
				 		return 1;
			
 
				 
			
 
				-	tree_mod_log_write_lock(fs_info);
			
 
				+	write_lock(&fs_info->tree_mod_log_lock);
			
 
				 	if (list_empty(&(fs_info)->tree_mod_seq_list)) {
			
 
				-		tree_mod_log_write_unlock(fs_info);
			
 
				+		write_unlock(&fs_info->tree_mod_log_lock);
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
@@ -536,38 +512,34 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
 
				 	return tm;
			
 
				 }
			
 
				 
			
 
				-static noinline int
			
 
				-tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
			
 
				-			struct extent_buffer *eb, int slot,
			
 
				-			enum mod_log_op op, gfp_t flags)
			
 
				+static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
			
 
				+		enum mod_log_op op, gfp_t flags)
			
 
				 {
			
 
				 	struct tree_mod_elem *tm;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (!tree_mod_need_log(fs_info, eb))
			
 
				+	if (!tree_mod_need_log(eb->fs_info, eb))
			
 
				 		return 0;
			
 
				 
			
 
				 	tm = alloc_tree_mod_elem(eb, slot, op, flags);
			
 
				 	if (!tm)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	if (tree_mod_dont_log(fs_info, eb)) {
			
 
				+	if (tree_mod_dont_log(eb->fs_info, eb)) {
			
 
				 		kfree(tm);
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	ret = __tree_mod_log_insert(fs_info, tm);
			
 
				-	tree_mod_log_write_unlock(fs_info);
			
 
				+	ret = __tree_mod_log_insert(eb->fs_info, tm);
			
 
				+	write_unlock(&eb->fs_info->tree_mod_log_lock);
			
 
				 	if (ret)
			
 
				 		kfree(tm);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static noinline int
			
 
				-tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
			
 
				-			 struct extent_buffer *eb, int dst_slot, int src_slot,
			
 
				-			 int nr_items)
			
 
				+static noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
			
 
				+		int dst_slot, int src_slot, int nr_items)
			
 
				 {
			
 
				 	struct tree_mod_elem *tm = NULL;
			
 
				 	struct tree_mod_elem **tm_list = NULL;
			
@@ -575,7 +547,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 
				 	int i;
			
 
				 	int locked = 0;
			
 
				 
			
 
				-	if (!tree_mod_need_log(fs_info, eb))
			
 
				+	if (!tree_mod_need_log(eb->fs_info, eb))
			
 
				 		return 0;
			
 
				 
			
 
				 	tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
			
@@ -603,7 +575,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (tree_mod_dont_log(fs_info, eb))
			
 
				+	if (tree_mod_dont_log(eb->fs_info, eb))
			
 
				 		goto free_tms;
			
 
				 	locked = 1;
			
 
				 
			
@@ -613,26 +585,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 
				 	 * buffer, i.e. dst_slot < src_slot.
			
 
				 	 */
			
 
				 	for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
			
 
				-		ret = __tree_mod_log_insert(fs_info, tm_list[i]);
			
 
				+		ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
			
 
				 		if (ret)
			
 
				 			goto free_tms;
			
 
				 	}
			
 
				 
			
 
				-	ret = __tree_mod_log_insert(fs_info, tm);
			
 
				+	ret = __tree_mod_log_insert(eb->fs_info, tm);
			
 
				 	if (ret)
			
 
				 		goto free_tms;
			
 
				-	tree_mod_log_write_unlock(fs_info);
			
 
				+	write_unlock(&eb->fs_info->tree_mod_log_lock);
			
 
				 	kfree(tm_list);
			
 
				 
			
 
				 	return 0;
			
 
				 free_tms:
			
 
				 	for (i = 0; i < nr_items; i++) {
			
 
				 		if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
			
 
				-			rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
			
 
				+			rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
			
 
				 		kfree(tm_list[i]);
			
 
				 	}
			
 
				 	if (locked)
			
 
				-		tree_mod_log_write_unlock(fs_info);
			
 
				+		write_unlock(&eb->fs_info->tree_mod_log_lock);
			
 
				 	kfree(tm_list);
			
 
				 	kfree(tm);
			
 
				 
			
@@ -660,12 +632,10 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static noinline int
			
 
				-tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
			
 
				-			 struct extent_buffer *old_root,
			
 
				-			 struct extent_buffer *new_root,
			
 
				-			 int log_removal)
			
 
				+static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
			
 
				+			 struct extent_buffer *new_root, int log_removal)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = old_root->fs_info;
			
 
				 	struct tree_mod_elem *tm = NULL;
			
 
				 	struct tree_mod_elem **tm_list = NULL;
			
 
				 	int nritems = 0;
			
@@ -713,7 +683,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 
				 	if (!ret)
			
 
				 		ret = __tree_mod_log_insert(fs_info, tm);
			
 
				 
			
 
				-	tree_mod_log_write_unlock(fs_info);
			
 
				+	write_unlock(&fs_info->tree_mod_log_lock);
			
 
				 	if (ret)
			
 
				 		goto free_tms;
			
 
				 	kfree(tm_list);
			
@@ -740,7 +710,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
 
				 	struct tree_mod_elem *cur = NULL;
			
 
				 	struct tree_mod_elem *found = NULL;
			
 
				 
			
 
				-	tree_mod_log_read_lock(fs_info);
			
 
				+	read_lock(&fs_info->tree_mod_log_lock);
			
 
				 	tm_root = &fs_info->tree_mod_log;
			
 
				 	node = tm_root->rb_node;
			
 
				 	while (node) {
			
@@ -768,7 +738,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				-	tree_mod_log_read_unlock(fs_info);
			
 
				+	read_unlock(&fs_info->tree_mod_log_lock);
			
 
				 
			
 
				 	return found;
			
 
				 }
			
@@ -849,7 +819,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 
				 			goto free_tms;
			
 
				 	}
			
 
				 
			
 
				-	tree_mod_log_write_unlock(fs_info);
			
 
				+	write_unlock(&fs_info->tree_mod_log_lock);
			
 
				 	kfree(tm_list);
			
 
				 
			
 
				 	return 0;
			
@@ -861,36 +831,13 @@ free_tms:
 
				 		kfree(tm_list[i]);
			
 
				 	}
			
 
				 	if (locked)
			
 
				-		tree_mod_log_write_unlock(fs_info);
			
 
				+		write_unlock(&fs_info->tree_mod_log_lock);
			
 
				 	kfree(tm_list);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static inline void
			
 
				-tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
			
 
				-		     int dst_offset, int src_offset, int nr_items)
			
 
				-{
			
 
				-	int ret;
			
 
				-	ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
			
 
				-				       nr_items);
			
 
				-	BUG_ON(ret < 0);
			
 
				-}
			
 
				-
			
 
				-static noinline void
			
 
				-tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
			
 
				-			  struct extent_buffer *eb, int slot, int atomic)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	ret = tree_mod_log_insert_key(fs_info, eb, slot,
			
 
				-					MOD_LOG_KEY_REPLACE,
			
 
				-					atomic ? GFP_ATOMIC : GFP_NOFS);
			
 
				-	BUG_ON(ret < 0);
			
 
				-}
			
 
				-
			
 
				-static noinline int
			
 
				-tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
			
 
				+static noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
			
 
				 {
			
 
				 	struct tree_mod_elem **tm_list = NULL;
			
 
				 	int nritems = 0;
			
@@ -900,7 +847,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 
				 	if (btrfs_header_level(eb) == 0)
			
 
				 		return 0;
			
 
				 
			
 
				-	if (!tree_mod_need_log(fs_info, NULL))
			
 
				+	if (!tree_mod_need_log(eb->fs_info, NULL))
			
 
				 		return 0;
			
 
				 
			
 
				 	nritems = btrfs_header_nritems(eb);
			
@@ -917,11 +864,11 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (tree_mod_dont_log(fs_info, eb))
			
 
				+	if (tree_mod_dont_log(eb->fs_info, eb))
			
 
				 		goto free_tms;
			
 
				 
			
 
				-	ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
			
 
				-	tree_mod_log_write_unlock(fs_info);
			
 
				+	ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
			
 
				+	write_unlock(&eb->fs_info->tree_mod_log_lock);
			
 
				 	if (ret)
			
 
				 		goto free_tms;
			
 
				 	kfree(tm_list);
			
@@ -936,17 +883,6 @@ free_tms:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static noinline void
			
 
				-tree_mod_log_set_root_pointer(struct btrfs_root *root,
			
 
				-			      struct extent_buffer *new_root_node,
			
 
				-			      int log_removal)
			
 
				-{
			
 
				-	int ret;
			
 
				-	ret = tree_mod_log_insert_root(root->fs_info, root->node,
			
 
				-				       new_root_node, log_removal);
			
 
				-	BUG_ON(ret < 0);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * check if the tree block can be shared by multiple trees
			
 
				  */
			
@@ -1173,7 +1109,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
				 			parent_start = buf->start;
			
 
				 
			
 
				 		extent_buffer_get(cow);
			
 
				-		tree_mod_log_set_root_pointer(root, cow, 1);
			
 
				+		ret = tree_mod_log_insert_root(root->node, cow, 1);
			
 
				+		BUG_ON(ret < 0);
			
 
				 		rcu_assign_pointer(root->node, cow);
			
 
				 
			
 
				 		btrfs_free_tree_block(trans, root, buf, parent_start,
			
@@ -1182,7 +1119,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
				 		add_root_to_dirty_list(root);
			
 
				 	} else {
			
 
				 		WARN_ON(trans->transid != btrfs_header_generation(parent));
			
 
				-		tree_mod_log_insert_key(fs_info, parent, parent_slot,
			
 
				+		tree_mod_log_insert_key(parent, parent_slot,
			
 
				 					MOD_LOG_KEY_REPLACE, GFP_NOFS);
			
 
				 		btrfs_set_node_blockptr(parent, parent_slot,
			
 
				 					cow->start);
			
@@ -1190,7 +1127,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
				 					      trans->transid);
			
 
				 		btrfs_mark_buffer_dirty(parent);
			
 
				 		if (last_ref) {
			
 
				-			ret = tree_mod_log_free_eb(fs_info, buf);
			
 
				+			ret = tree_mod_log_free_eb(buf);
			
 
				 			if (ret) {
			
 
				 				btrfs_abort_transaction(trans, ret);
			
 
				 				return ret;
			
@@ -1211,9 +1148,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
				  * returns the logical address of the oldest predecessor of the given root.
			
 
				  * entries older than time_seq are ignored.
			
 
				  */
			
 
				-static struct tree_mod_elem *
			
 
				-__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
			
 
				-			   struct extent_buffer *eb_root, u64 time_seq)
			
 
				+static struct tree_mod_elem *__tree_mod_log_oldest_root(
			
 
				+		struct extent_buffer *eb_root, u64 time_seq)
			
 
				 {
			
 
				 	struct tree_mod_elem *tm;
			
 
				 	struct tree_mod_elem *found = NULL;
			
@@ -1230,7 +1166,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
 
				 	 * first operation that's logged for this root.
			
 
				 	 */
			
 
				 	while (1) {
			
 
				-		tm = tree_mod_log_search_oldest(fs_info, root_logical,
			
 
				+		tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
			
 
				 						time_seq);
			
 
				 		if (!looped && !tm)
			
 
				 			return NULL;
			
@@ -1279,7 +1215,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 
				 	unsigned long p_size = sizeof(struct btrfs_key_ptr);
			
 
				 
			
 
				 	n = btrfs_header_nritems(eb);
			
 
				-	tree_mod_log_read_lock(fs_info);
			
 
				+	read_lock(&fs_info->tree_mod_log_lock);
			
 
				 	while (tm && tm->seq >= time_seq) {
			
 
				 		/*
			
 
				 		 * all the operations are recorded with the operator used for
			
@@ -1334,7 +1270,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 
				 		if (tm->logical != first_tm->logical)
			
 
				 			break;
			
 
				 	}
			
 
				-	tree_mod_log_read_unlock(fs_info);
			
 
				+	read_unlock(&fs_info->tree_mod_log_lock);
			
 
				 	btrfs_set_header_nritems(eb, n);
			
 
				 }
			
 
				 
			
@@ -1418,9 +1354,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 
				 	struct tree_mod_root *old_root = NULL;
			
 
				 	u64 old_generation = 0;
			
 
				 	u64 logical;
			
 
				+	int level;
			
 
				 
			
 
				 	eb_root = btrfs_read_lock_root_node(root);
			
 
				-	tm = __tree_mod_log_oldest_root(fs_info, eb_root, time_seq);
			
 
				+	tm = __tree_mod_log_oldest_root(eb_root, time_seq);
			
 
				 	if (!tm)
			
 
				 		return eb_root;
			
 
				 
			
@@ -1428,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 
				 		old_root = &tm->old_root;
			
 
				 		old_generation = tm->generation;
			
 
				 		logical = old_root->logical;
			
 
				+		level = old_root->level;
			
 
				 	} else {
			
 
				 		logical = eb_root->start;
			
 
				+		level = btrfs_header_level(eb_root);
			
 
				 	}
			
 
				 
			
 
				 	tm = tree_mod_log_search(fs_info, logical, time_seq);
			
 
				 	if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
			
 
				 		btrfs_tree_read_unlock(eb_root);
			
 
				 		free_extent_buffer(eb_root);
			
 
				-		old = read_tree_block(fs_info, logical, 0);
			
 
				+		old = read_tree_block(fs_info, logical, 0, level, NULL);
			
 
				 		if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
			
 
				 			if (!IS_ERR(old))
			
 
				 				free_extent_buffer(old);
			
@@ -1484,7 +1423,7 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
 
				 	int level;
			
 
				 	struct extent_buffer *eb_root = btrfs_root_node(root);
			
 
				 
			
 
				-	tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
			
 
				+	tm = __tree_mod_log_oldest_root(eb_root, time_seq);
			
 
				 	if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
			
 
				 		level = tm->old_root.level;
			
 
				 	} else {
			
@@ -1502,8 +1441,8 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
 
				 	if (btrfs_is_testing(root->fs_info))
			
 
				 		return 0;
			
 
				 
			
 
				-	/* ensure we can see the force_cow */
			
 
				-	smp_rmb();
			
 
				+	/* Ensure we can see the FORCE_COW bit */
			
 
				+	smp_mb__before_atomic();
			
 
				 
			
 
				 	/*
			
 
				 	 * We do not need to cow a block if
			
@@ -1656,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 
				 	btrfs_set_lock_blocking(parent);
			
 
				 
			
 
				 	for (i = start_slot; i <= end_slot; i++) {
			
 
				+		struct btrfs_key first_key;
			
 
				 		int close = 1;
			
 
				 
			
 
				 		btrfs_node_key(parent, &disk_key, i);
			
@@ -1665,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 
				 		progress_passed = 1;
			
 
				 		blocknr = btrfs_node_blockptr(parent, i);
			
 
				 		gen = btrfs_node_ptr_generation(parent, i);
			
 
				+		btrfs_node_key_to_cpu(parent, &first_key, i);
			
 
				 		if (last_block == 0)
			
 
				 			last_block = blocknr;
			
 
				 
			
@@ -1688,7 +1629,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 
				 			uptodate = 0;
			
 
				 		if (!cur || !uptodate) {
			
 
				 			if (!cur) {
			
 
				-				cur = read_tree_block(fs_info, blocknr, gen);
			
 
				+				cur = read_tree_block(fs_info, blocknr, gen,
			
 
				+						      parent_level - 1,
			
 
				+						      &first_key);
			
 
				 				if (IS_ERR(cur)) {
			
 
				 					return PTR_ERR(cur);
			
 
				 				} else if (!extent_buffer_uptodate(cur)) {
			
@@ -1696,7 +1639,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 
				 					return -EIO;
			
 
				 				}
			
 
				 			} else if (!uptodate) {
			
 
				-				err = btrfs_read_buffer(cur, gen);
			
 
				+				err = btrfs_read_buffer(cur, gen,
			
 
				+						parent_level - 1,&first_key);
			
 
				 				if (err) {
			
 
				 					free_extent_buffer(cur);
			
 
				 					return err;
			
@@ -1849,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
 
				 {
			
 
				 	int level = btrfs_header_level(parent);
			
 
				 	struct extent_buffer *eb;
			
 
				+	struct btrfs_key first_key;
			
 
				 
			
 
				 	if (slot < 0 || slot >= btrfs_header_nritems(parent))
			
 
				 		return ERR_PTR(-ENOENT);
			
 
				 
			
 
				 	BUG_ON(level == 0);
			
 
				 
			
 
				+	btrfs_node_key_to_cpu(parent, &first_key, slot);
			
 
				 	eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
			
 
				-			     btrfs_node_ptr_generation(parent, slot));
			
 
				+			     btrfs_node_ptr_generation(parent, slot),
			
 
				+			     level - 1, &first_key);
			
 
				 	if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
			
 
				 		free_extent_buffer(eb);
			
 
				 		eb = ERR_PTR(-EIO);
			
@@ -1928,7 +1875,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
				 			goto enospc;
			
 
				 		}
			
 
				 
			
 
				-		tree_mod_log_set_root_pointer(root, child, 1);
			
 
				+		ret = tree_mod_log_insert_root(root->node, child, 1);
			
 
				+		BUG_ON(ret < 0);
			
 
				 		rcu_assign_pointer(root->node, child);
			
 
				 
			
 
				 		add_root_to_dirty_list(root);
			
@@ -2007,8 +1955,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
				 		} else {
			
 
				 			struct btrfs_disk_key right_key;
			
 
				 			btrfs_node_key(right, &right_key, 0);
			
 
				-			tree_mod_log_set_node_key(fs_info, parent,
			
 
				-						  pslot + 1, 0);
			
 
				+			ret = tree_mod_log_insert_key(parent, pslot + 1,
			
 
				+					MOD_LOG_KEY_REPLACE, GFP_NOFS);
			
 
				+			BUG_ON(ret < 0);
			
 
				 			btrfs_set_node_key(parent, &right_key, pslot + 1);
			
 
				 			btrfs_mark_buffer_dirty(parent);
			
 
				 		}
			
@@ -2052,7 +2001,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
				 		/* update the parent key to reflect our changes */
			
 
				 		struct btrfs_disk_key mid_key;
			
 
				 		btrfs_node_key(mid, &mid_key, 0);
			
 
				-		tree_mod_log_set_node_key(fs_info, parent, pslot, 0);
			
 
				+		ret = tree_mod_log_insert_key(parent, pslot,
			
 
				+				MOD_LOG_KEY_REPLACE, GFP_NOFS);
			
 
				+		BUG_ON(ret < 0);
			
 
				 		btrfs_set_node_key(parent, &mid_key, pslot);
			
 
				 		btrfs_mark_buffer_dirty(parent);
			
 
				 	}
			
@@ -2153,7 +2104,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 
				 			struct btrfs_disk_key disk_key;
			
 
				 			orig_slot += left_nr;
			
 
				 			btrfs_node_key(mid, &disk_key, 0);
			
 
				-			tree_mod_log_set_node_key(fs_info, parent, pslot, 0);
			
 
				+			ret = tree_mod_log_insert_key(parent, pslot,
			
 
				+					MOD_LOG_KEY_REPLACE, GFP_NOFS);
			
 
				+			BUG_ON(ret < 0);
			
 
				 			btrfs_set_node_key(parent, &disk_key, pslot);
			
 
				 			btrfs_mark_buffer_dirty(parent);
			
 
				 			if (btrfs_header_nritems(left) > orig_slot) {
			
@@ -2207,8 +2160,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 
				 			struct btrfs_disk_key disk_key;
			
 
				 
			
 
				 			btrfs_node_key(right, &disk_key, 0);
			
 
				-			tree_mod_log_set_node_key(fs_info, parent,
			
 
				-						  pslot + 1, 0);
			
 
				+			ret = tree_mod_log_insert_key(parent, pslot + 1,
			
 
				+					MOD_LOG_KEY_REPLACE, GFP_NOFS);
			
 
				+			BUG_ON(ret < 0);
			
 
				 			btrfs_set_node_key(parent, &disk_key, pslot + 1);
			
 
				 			btrfs_mark_buffer_dirty(parent);
			
 
				 
			
@@ -2445,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 
				 	u64 gen;
			
 
				 	struct extent_buffer *b = *eb_ret;
			
 
				 	struct extent_buffer *tmp;
			
 
				+	struct btrfs_key first_key;
			
 
				 	int ret;
			
 
				+	int parent_level;
			
 
				 
			
 
				 	blocknr = btrfs_node_blockptr(b, slot);
			
 
				 	gen = btrfs_node_ptr_generation(b, slot);
			
 
				+	parent_level = btrfs_header_level(b);
			
 
				+	btrfs_node_key_to_cpu(b, &first_key, slot);
			
 
				 
			
 
				 	tmp = find_extent_buffer(fs_info, blocknr);
			
 
				 	if (tmp) {
			
@@ -2467,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 
				 		btrfs_set_path_blocking(p);
			
 
				 
			
 
				 		/* now we're allowed to do a blocking uptodate check */
			
 
				-		ret = btrfs_read_buffer(tmp, gen);
			
 
				+		ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
			
 
				 		if (!ret) {
			
 
				 			*eb_ret = tmp;
			
 
				 			return 0;
			
@@ -2494,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 
				 	btrfs_release_path(p);
			
 
				 
			
 
				 	ret = -EAGAIN;
			
 
				-	tmp = read_tree_block(fs_info, blocknr, 0);
			
 
				+	tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
			
 
				+			      &first_key);
			
 
				 	if (!IS_ERR(tmp)) {
			
 
				 		/*
			
 
				 		 * If the read above didn't mark this buffer up to date,
			
@@ -3161,13 +3120,17 @@ static void fixup_low_keys(struct btrfs_fs_info *fs_info,
 
				 {
			
 
				 	int i;
			
 
				 	struct extent_buffer *t;
			
 
				+	int ret;
			
 
				 
			
 
				 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
			
 
				 		int tslot = path->slots[i];
			
 
				+
			
 
				 		if (!path->nodes[i])
			
 
				 			break;
			
 
				 		t = path->nodes[i];
			
 
				-		tree_mod_log_set_node_key(fs_info, t, tslot, 1);
			
 
				+		ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
			
 
				+				GFP_ATOMIC);
			
 
				+		BUG_ON(ret < 0);
			
 
				 		btrfs_set_node_key(t, key, tslot);
			
 
				 		btrfs_mark_buffer_dirty(path->nodes[i]);
			
 
				 		if (tslot != 0)
			
@@ -3264,8 +3227,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	if (push_items < src_nritems) {
			
 
				 		/*
			
 
				-		 * don't call tree_mod_log_eb_move here, key removal was already
			
 
				-		 * fully logged by tree_mod_log_eb_copy above.
			
 
				+		 * Don't call tree_mod_log_insert_move here, key removal was
			
 
				+		 * already fully logged by tree_mod_log_eb_copy above.
			
 
				 		 */
			
 
				 		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
			
 
				 				      btrfs_node_key_ptr_offset(push_items),
			
@@ -3320,7 +3283,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 
				 	if (max_push < push_items)
			
 
				 		push_items = max_push;
			
 
				 
			
 
				-	tree_mod_log_eb_move(fs_info, dst, push_items, 0, dst_nritems);
			
 
				+	ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
			
 
				+	BUG_ON(ret < 0);
			
 
				 	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
			
 
				 				      btrfs_node_key_ptr_offset(0),
			
 
				 				      (dst_nritems) *
			
@@ -3363,6 +3327,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 
				 	struct extent_buffer *c;
			
 
				 	struct extent_buffer *old;
			
 
				 	struct btrfs_disk_key lower_key;
			
 
				+	int ret;
			
 
				 
			
 
				 	BUG_ON(path->nodes[level]);
			
 
				 	BUG_ON(path->nodes[level-1] != root->node);
			
@@ -3401,7 +3366,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 
				 	btrfs_mark_buffer_dirty(c);
			
 
				 
			
 
				 	old = root->node;
			
 
				-	tree_mod_log_set_root_pointer(root, c, 0);
			
 
				+	ret = tree_mod_log_insert_root(root->node, c, 0);
			
 
				+	BUG_ON(ret < 0);
			
 
				 	rcu_assign_pointer(root->node, c);
			
 
				 
			
 
				 	/* the super has an extra ref to root->node */
			
@@ -3438,17 +3404,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
 
				 	BUG_ON(slot > nritems);
			
 
				 	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info));
			
 
				 	if (slot != nritems) {
			
 
				-		if (level)
			
 
				-			tree_mod_log_eb_move(fs_info, lower, slot + 1,
			
 
				-					     slot, nritems - slot);
			
 
				+		if (level) {
			
 
				+			ret = tree_mod_log_insert_move(lower, slot + 1, slot,
			
 
				+					nritems - slot);
			
 
				+			BUG_ON(ret < 0);
			
 
				+		}
			
 
				 		memmove_extent_buffer(lower,
			
 
				 			      btrfs_node_key_ptr_offset(slot + 1),
			
 
				 			      btrfs_node_key_ptr_offset(slot),
			
 
				 			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
			
 
				 	}
			
 
				 	if (level) {
			
 
				-		ret = tree_mod_log_insert_key(fs_info, lower, slot,
			
 
				-					      MOD_LOG_KEY_ADD, GFP_NOFS);
			
 
				+		ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
			
 
				+				GFP_NOFS);
			
 
				 		BUG_ON(ret < 0);
			
 
				 	}
			
 
				 	btrfs_set_node_key(lower, key, slot);
			
@@ -4911,17 +4879,19 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
 
				 
			
 
				 	nritems = btrfs_header_nritems(parent);
			
 
				 	if (slot != nritems - 1) {
			
 
				-		if (level)
			
 
				-			tree_mod_log_eb_move(fs_info, parent, slot,
			
 
				-					     slot + 1, nritems - slot - 1);
			
 
				+		if (level) {
			
 
				+			ret = tree_mod_log_insert_move(parent, slot, slot + 1,
			
 
				+					nritems - slot - 1);
			
 
				+			BUG_ON(ret < 0);
			
 
				+		}
			
 
				 		memmove_extent_buffer(parent,
			
 
				 			      btrfs_node_key_ptr_offset(slot),
			
 
				 			      btrfs_node_key_ptr_offset(slot + 1),
			
 
				 			      sizeof(struct btrfs_key_ptr) *
			
 
				 			      (nritems - slot - 1));
			
 
				 	} else if (level) {
			
 
				-		ret = tree_mod_log_insert_key(fs_info, parent, slot,
			
 
				-					      MOD_LOG_KEY_REMOVE, GFP_NOFS);
			
 
				+		ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
			
 
				+				GFP_NOFS);
			
 
				 		BUG_ON(ret < 0);
			
 
				 	}
			
 
				 
			
@@ -5145,9 +5115,6 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
 
				  * into min_key, so you can call btrfs_search_slot with cow=1 on the
			
 
				  * key and get a writable path.
			
 
				  *
			
 
				- * This does lock as it descends, and path->keep_locks should be set
			
 
				- * to 1 by the caller.
			
 
				- *
			
 
				  * This honors path->lowest_level to prevent descent past a given level
			
 
				  * of the tree.
			
 
				  *
			
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -40,6 +40,7 @@
 
				 #include <linux/sizes.h>
			
 
				 #include <linux/dynamic_debug.h>
			
 
				 #include <linux/refcount.h>
			
 
				+#include <linux/crc32c.h>
			
 
				 #include "extent_io.h"
			
 
				 #include "extent_map.h"
			
 
				 #include "async-thread.h"
			
@@ -65,6 +66,8 @@ struct btrfs_ordered_sum;
 
				 
			
 
				 #define BTRFS_MAX_LEVEL 8
			
 
				 
			
 
				+#define BTRFS_OLDEST_GENERATION	0ULL
			
 
				+
			
 
				 #define BTRFS_COMPAT_EXTENT_TREE_V0
			
 
				 
			
 
				 /*
			
@@ -86,9 +89,9 @@ struct btrfs_ordered_sum;
 
				  */
			
 
				 #define BTRFS_LINK_MAX 65535U
			
 
				 
			
 
				+/* four bytes for CRC32 */
			
 
				 static const int btrfs_csum_sizes[] = { 4 };
			
 
				 
			
 
				-/* four bytes for CRC32 */
			
 
				 #define BTRFS_EMPTY_DIR_SIZE 0
			
 
				 
			
 
				 /* ioprio of readahead is set to idle */
			
@@ -98,6 +101,7 @@ static const int btrfs_csum_sizes[] = { 4 };
 
				 
			
 
				 #define BTRFS_MAX_EXTENT_SIZE SZ_128M
			
 
				 
			
 
				+
			
 
				 /*
			
 
				  * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
			
 
				  */
			
@@ -381,8 +385,9 @@ struct btrfs_dev_replace {
 
				 
			
 
				 /* For raid type sysfs entries */
			
 
				 struct raid_kobject {
			
 
				-	int raid_type;
			
 
				+	u64 flags;
			
 
				 	struct kobject kobj;
			
 
				+	struct list_head list;
			
 
				 };
			
 
				 
			
 
				 struct btrfs_space_info {
			
@@ -707,7 +712,6 @@ struct btrfs_delayed_root;
 
				 #define BTRFS_FS_LOG_RECOVERING			4
			
 
				 #define BTRFS_FS_OPEN				5
			
 
				 #define BTRFS_FS_QUOTA_ENABLED			6
			
 
				-#define BTRFS_FS_QUOTA_ENABLING			7
			
 
				 #define BTRFS_FS_UPDATE_UUID_TREE_GEN		9
			
 
				 #define BTRFS_FS_CREATING_FREE_SPACE_TREE	10
			
 
				 #define BTRFS_FS_BTREE_ERR			11
			
@@ -788,7 +792,7 @@ struct btrfs_fs_info {
 
				 	unsigned long pending_changes;
			
 
				 	unsigned long compress_type:4;
			
 
				 	unsigned int compress_level;
			
 
				-	int commit_interval;
			
 
				+	u32 commit_interval;
			
 
				 	/*
			
 
				 	 * It is a suggestive number, the read side is safe even it gets a
			
 
				 	 * wrong number because we will write out the data into a regular
			
@@ -877,7 +881,6 @@ struct btrfs_fs_info {
 
				 	struct rb_root tree_mod_log;
			
 
				 
			
 
				 	atomic_t async_delalloc_pages;
			
 
				-	atomic_t open_ioctl_trans;
			
 
				 
			
 
				 	/*
			
 
				 	 * this is used to protect the following list -- ordered_roots.
			
@@ -935,9 +938,11 @@ struct btrfs_fs_info {
 
				 	struct btrfs_workqueue *extent_workers;
			
 
				 	struct task_struct *transaction_kthread;
			
 
				 	struct task_struct *cleaner_kthread;
			
 
				-	int thread_pool_size;
			
 
				+	u32 thread_pool_size;
			
 
				 
			
 
				 	struct kobject *space_info_kobj;
			
 
				+	struct list_head pending_raid_kobjs;
			
 
				+	spinlock_t pending_raid_kobjs_lock; /* uncontended */
			
 
				 
			
 
				 	u64 total_pinned;
			
 
				 
			
@@ -952,9 +957,9 @@ struct btrfs_fs_info {
 
				 	struct btrfs_fs_devices *fs_devices;
			
 
				 
			
 
				 	/*
			
 
				-	 * the space_info list is almost entirely read only.  It only changes
			
 
				-	 * when we add a new raid type to the FS, and that happens
			
 
				-	 * very rarely.  RCU is used to protect it.
			
 
				+	 * The space_info list is effectively read only after initial
			
 
				+	 * setup.  It is populated at mount time and cleaned up after
			
 
				+	 * all block groups are removed.  RCU is used to protect it.
			
 
				 	 */
			
 
				 	struct list_head space_info;
			
 
				 
			
@@ -993,8 +998,8 @@ struct btrfs_fs_info {
 
				 	struct btrfs_balance_control *balance_ctl;
			
 
				 	wait_queue_head_t balance_wait_q;
			
 
				 
			
 
				-	unsigned data_chunk_allocations;
			
 
				-	unsigned metadata_ratio;
			
 
				+	u32 data_chunk_allocations;
			
 
				+	u32 metadata_ratio;
			
 
				 
			
 
				 	void *bdev_holder;
			
 
				 
			
@@ -1260,12 +1265,13 @@ struct btrfs_root {
 
				 	struct btrfs_subvolume_writers *subv_writers;
			
 
				 	atomic_t will_be_snapshotted;
			
 
				 
			
 
				-	/* For qgroup metadata space reserve */
			
 
				-	atomic64_t qgroup_meta_rsv;
			
 
				+	/* For qgroup metadata reserved space */
			
 
				+	spinlock_t qgroup_meta_rsv_lock;
			
 
				+	u64 qgroup_meta_rsv_pertrans;
			
 
				+	u64 qgroup_meta_rsv_prealloc;
			
 
				 };
			
 
				 
			
 
				 struct btrfs_file_private {
			
 
				-	struct btrfs_trans_handle *trans;
			
 
				 	void *filldir_buf;
			
 
				 };
			
 
				 
			
@@ -2554,6 +2560,20 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
 
				 	((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \
			
 
				 	btrfs_item_offset_nr(leaf, slot)))
			
 
				 
			
 
				+static inline u64 btrfs_name_hash(const char *name, int len)
			
 
				+{
			
 
				+       return crc32c((u32)~1, name, len);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Figure the key offset of an extended inode ref
			
 
				+ */
			
 
				+static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
			
 
				+                                   int len)
			
 
				+{
			
 
				+       return (u64) crc32c(parent_objectid, name, len);
			
 
				+}
			
 
				+
			
 
				 static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
			
 
				 {
			
 
				 	return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
			
@@ -2608,7 +2628,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
 
				 void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
			
 
				 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
			
 
				 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
			
 
				-			   struct btrfs_fs_info *fs_info, unsigned long count);
			
 
				+			   unsigned long count);
			
 
				 int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
			
 
				 				 unsigned long count, u64 transid, int wait);
			
 
				 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
			
@@ -2628,7 +2648,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
 
				 						 u64 bytenr);
			
 
				 void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
			
 
				 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
			
 
				-int get_block_group_index(struct btrfs_block_group_cache *cache);
			
 
				 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
			
 
				 					     struct btrfs_root *root,
			
 
				 					     u64 parent, u64 root_objectid,
			
@@ -2668,15 +2687,13 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
 
				 int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
			
 
				 				       u64 start, u64 len);
			
 
				 void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
			
 
				-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			
 
				-			       struct btrfs_fs_info *fs_info);
			
 
				+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
			
 
				 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
			
 
				 			 struct btrfs_root *root,
			
 
				 			 u64 bytenr, u64 num_bytes, u64 parent,
			
 
				 			 u64 root_objectid, u64 owner, u64 offset);
			
 
				 
			
 
				-int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
			
 
				-				   struct btrfs_fs_info *fs_info);
			
 
				+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
			
 
				 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
			
 
				 				   struct btrfs_fs_info *fs_info);
			
 
				 int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
			
@@ -2688,6 +2705,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
 
				 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
			
 
				 			   struct btrfs_fs_info *fs_info, u64 bytes_used,
			
 
				 			   u64 type, u64 chunk_offset, u64 size);
			
 
				+void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
			
 
				 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
			
 
				 				struct btrfs_fs_info *fs_info,
			
 
				 				const u64 chunk_offset);
			
@@ -2697,8 +2715,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
				 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
			
 
				 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
			
 
				 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
			
 
				-void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
			
 
				-				       struct btrfs_fs_info *fs_info);
			
 
				+void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
			
 
				 u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info);
			
 
				 u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info);
			
 
				 u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info);
			
@@ -2730,11 +2747,10 @@ int btrfs_check_data_free_space(struct inode *inode,
 
				 void btrfs_free_reserved_data_space(struct inode *inode,
			
 
				 			struct extent_changeset *reserved, u64 start, u64 len);
			
 
				 void btrfs_delalloc_release_space(struct inode *inode,
			
 
				-			struct extent_changeset *reserved, u64 start, u64 len);
			
 
				+				  struct extent_changeset *reserved,
			
 
				+				  u64 start, u64 len, bool qgroup_free);
			
 
				 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
			
 
				 					    u64 len);
			
 
				-void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
			
 
				-				  struct btrfs_fs_info *fs_info);
			
 
				 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
			
 
				 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
			
 
				 				  struct btrfs_inode *inode);
			
@@ -2745,10 +2761,12 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 
				 				     u64 *qgroup_reserved, bool use_global_rsv);
			
 
				 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
			
 
				 				      struct btrfs_block_rsv *rsv);
			
 
				-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
			
 
				+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
			
 
				+				    bool qgroup_free);
			
 
				 
			
 
				 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
			
 
				-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
			
 
				+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
			
 
				+				     bool qgroup_free);
			
 
				 int btrfs_delalloc_reserve_space(struct inode *inode,
			
 
				 			struct extent_changeset **reserved, u64 start, u64 len);
			
 
				 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
			
@@ -2792,7 +2810,6 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
 
				 int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
			
 
				 int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
			
 
				 					 struct btrfs_fs_info *fs_info);
			
 
				-int __get_raid_index(u64 flags);
			
 
				 int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
			
 
				 void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
			
 
				 void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
			
@@ -3195,8 +3212,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb);
 
				 void btrfs_destroy_inode(struct inode *inode);
			
 
				 int btrfs_drop_inode(struct inode *inode);
			
 
				 int __init btrfs_init_cachep(void);
			
 
				-void btrfs_destroy_cachep(void);
			
 
				-long btrfs_ioctl_trans_end(struct file *file);
			
 
				+void __cold btrfs_destroy_cachep(void);
			
 
				 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
			
 
				 			 struct btrfs_root *root, int *was_new);
			
 
				 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
			
@@ -3246,7 +3262,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 
				 
			
 
				 /* file.c */
			
 
				 int __init btrfs_auto_defrag_init(void);
			
 
				-void btrfs_auto_defrag_exit(void);
			
 
				+void __cold btrfs_auto_defrag_exit(void);
			
 
				 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
			
 
				 			   struct btrfs_inode *inode);
			
 
				 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
			
@@ -3281,25 +3297,23 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 
			
 
				 /* sysfs.c */
			
 
				 int __init btrfs_init_sysfs(void);
			
 
				-void btrfs_exit_sysfs(void);
			
 
				+void __cold btrfs_exit_sysfs(void);
			
 
				 int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
			
 
				 void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
			
 
				 
			
 
				-/* xattr.c */
			
 
				-ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
			
 
				-
			
 
				 /* super.c */
			
 
				 int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
			
 
				 			unsigned long new_flags);
			
 
				 int btrfs_sync_fs(struct super_block *sb, int wait);
			
 
				 
			
 
				-static inline __printf(2, 3)
			
 
				+static inline __printf(2, 3) __cold
			
 
				 void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
			
 
				 {
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_PRINTK
			
 
				 __printf(2, 3)
			
 
				+__cold
			
 
				 void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
			
 
				 #else
			
 
				 #define btrfs_printk(fs_info, fmt, args...) \
			
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -23,6 +23,7 @@
 
				 #include "disk-io.h"
			
 
				 #include "transaction.h"
			
 
				 #include "ctree.h"
			
 
				+#include "qgroup.h"
			
 
				 
			
 
				 #define BTRFS_DELAYED_WRITEBACK		512
			
 
				 #define BTRFS_DELAYED_BACKGROUND	128
			
@@ -42,7 +43,7 @@ int __init btrfs_delayed_inode_init(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void btrfs_delayed_inode_exit(void)
			
 
				+void __cold btrfs_delayed_inode_exit(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(delayed_node_cache);
			
 
				 }
			
@@ -552,11 +553,12 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item(
 
				 }
			
 
				 
			
 
				 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
			
 
				-					       struct btrfs_fs_info *fs_info,
			
 
				+					       struct btrfs_root *root,
			
 
				 					       struct btrfs_delayed_item *item)
			
 
				 {
			
 
				 	struct btrfs_block_rsv *src_rsv;
			
 
				 	struct btrfs_block_rsv *dst_rsv;
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	u64 num_bytes;
			
 
				 	int ret;
			
 
				 
			
@@ -578,15 +580,17 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
			
 
				+static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
			
 
				 						struct btrfs_delayed_item *item)
			
 
				 {
			
 
				 	struct btrfs_block_rsv *rsv;
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 
			
 
				 	if (!item->bytes_reserved)
			
 
				 		return;
			
 
				 
			
 
				 	rsv = &fs_info->delayed_block_rsv;
			
 
				+	btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved);
			
 
				 	trace_btrfs_space_reservation(fs_info, "delayed_item",
			
 
				 				      item->key.objectid, item->bytes_reserved,
			
 
				 				      0);
			
@@ -611,6 +615,9 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 
			
 
				 	num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
			
 
				 
			
 
				+	ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				 	/*
			
 
				 	 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
			
 
				 	 * which doesn't reserve space for speed.  This is a problem since we
			
@@ -630,8 +637,10 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 		 * EAGAIN to make us stop the transaction we have, so return
			
 
				 		 * ENOSPC instead so that btrfs_dirty_inode knows what to do.
			
 
				 		 */
			
 
				-		if (ret == -EAGAIN)
			
 
				+		if (ret == -EAGAIN) {
			
 
				 			ret = -ENOSPC;
			
 
				+			btrfs_qgroup_free_meta_prealloc(root, num_bytes);
			
 
				+		}
			
 
				 		if (!ret) {
			
 
				 			node->bytes_reserved = num_bytes;
			
 
				 			trace_btrfs_space_reservation(fs_info,
			
@@ -653,7 +662,8 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 }
			
 
				 
			
 
				 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
			
 
				-						struct btrfs_delayed_node *node)
			
 
				+						struct btrfs_delayed_node *node,
			
 
				+						bool qgroup_free)
			
 
				 {
			
 
				 	struct btrfs_block_rsv *rsv;
			
 
				 
			
@@ -665,6 +675,12 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
 
				 				      node->inode_id, node->bytes_reserved, 0);
			
 
				 	btrfs_block_rsv_release(fs_info, rsv,
			
 
				 				node->bytes_reserved);
			
 
				+	if (qgroup_free)
			
 
				+		btrfs_qgroup_free_meta_prealloc(node->root,
			
 
				+				node->bytes_reserved);
			
 
				+	else
			
 
				+		btrfs_qgroup_convert_reserved_meta(node->root,
			
 
				+				node->bytes_reserved);
			
 
				 	node->bytes_reserved = 0;
			
 
				 }
			
 
				 
			
@@ -766,7 +782,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
 
				 				    curr->data_len);
			
 
				 		slot++;
			
 
				 
			
 
				-		btrfs_delayed_item_release_metadata(fs_info, curr);
			
 
				+		btrfs_delayed_item_release_metadata(root, curr);
			
 
				 
			
 
				 		list_del(&curr->tree_list);
			
 
				 		btrfs_release_delayed_item(curr);
			
@@ -788,7 +804,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 
				 				     struct btrfs_path *path,
			
 
				 				     struct btrfs_delayed_item *delayed_item)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	struct extent_buffer *leaf;
			
 
				 	char *ptr;
			
 
				 	int ret;
			
@@ -806,7 +821,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 
				 			    delayed_item->data_len);
			
 
				 	btrfs_mark_buffer_dirty(leaf);
			
 
				 
			
 
				-	btrfs_delayed_item_release_metadata(fs_info, delayed_item);
			
 
				+	btrfs_delayed_item_release_metadata(root, delayed_item);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -858,7 +873,6 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 
				 				    struct btrfs_path *path,
			
 
				 				    struct btrfs_delayed_item *item)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	struct btrfs_delayed_item *curr, *next;
			
 
				 	struct extent_buffer *leaf;
			
 
				 	struct btrfs_key key;
			
@@ -908,7 +922,7 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 
				 		goto out;
			
 
				 
			
 
				 	list_for_each_entry_safe(curr, next, &head, tree_list) {
			
 
				-		btrfs_delayed_item_release_metadata(fs_info, curr);
			
 
				+		btrfs_delayed_item_release_metadata(root, curr);
			
 
				 		list_del(&curr->tree_list);
			
 
				 		btrfs_release_delayed_item(curr);
			
 
				 	}
			
@@ -1051,7 +1065,7 @@ out:
 
				 no_iref:
			
 
				 	btrfs_release_path(path);
			
 
				 err_out:
			
 
				-	btrfs_delayed_inode_release_metadata(fs_info, node);
			
 
				+	btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
			
 
				 	btrfs_release_delayed_inode(node);
			
 
				 
			
 
				 	return ret;
			
@@ -1115,9 +1129,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 
				  * Returns < 0 on error and returns with an aborted transaction with any
			
 
				  * outstanding delayed items cleaned up.
			
 
				  */
			
 
				-static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
			
 
				-				     struct btrfs_fs_info *fs_info, int nr)
			
 
				+static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_delayed_root *delayed_root;
			
 
				 	struct btrfs_delayed_node *curr_node, *prev_node;
			
 
				 	struct btrfs_path *path;
			
@@ -1162,16 +1176,14 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
			
 
				-			    struct btrfs_fs_info *fs_info)
			
 
				+int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				-	return __btrfs_run_delayed_items(trans, fs_info, -1);
			
 
				+	return __btrfs_run_delayed_items(trans, -1);
			
 
				 }
			
 
				 
			
 
				-int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
			
 
				-			       struct btrfs_fs_info *fs_info, int nr)
			
 
				+int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
			
 
				 {
			
 
				-	return __btrfs_run_delayed_items(trans, fs_info, nr);
			
 
				+	return __btrfs_run_delayed_items(trans, nr);
			
 
				 }
			
 
				 
			
 
				 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
			
@@ -1443,7 +1455,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 
				 	btrfs_set_stack_dir_type(dir_item, type);
			
 
				 	memcpy((char *)(dir_item + 1), name, name_len);
			
 
				 
			
 
				-	ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
			
 
				+	ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
			
 
				 	/*
			
 
				 	 * we have reserved enough space when we start a new transaction,
			
 
				 	 * so reserving metadata failure is impossible
			
@@ -1480,7 +1492,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
 
				 		return 1;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_delayed_item_release_metadata(fs_info, item);
			
 
				+	btrfs_delayed_item_release_metadata(node->root, item);
			
 
				 	btrfs_release_delayed_item(item);
			
 
				 	mutex_unlock(&node->mutex);
			
 
				 	return 0;
			
@@ -1515,7 +1527,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	item->key = item_key;
			
 
				 
			
 
				-	ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
			
 
				+	ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
			
 
				 	/*
			
 
				 	 * we have reserved enough space when we start a new transaction,
			
 
				 	 * so reserving metadata failure is impossible.
			
@@ -1880,7 +1892,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 
				 	mutex_lock(&delayed_node->mutex);
			
 
				 	curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
			
 
				 	while (curr_item) {
			
 
				-		btrfs_delayed_item_release_metadata(fs_info, curr_item);
			
 
				+		btrfs_delayed_item_release_metadata(root, curr_item);
			
 
				 		prev_item = curr_item;
			
 
				 		curr_item = __btrfs_next_delayed_item(prev_item);
			
 
				 		btrfs_release_delayed_item(prev_item);
			
@@ -1888,7 +1900,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 
				 
			
 
				 	curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
			
 
				 	while (curr_item) {
			
 
				-		btrfs_delayed_item_release_metadata(fs_info, curr_item);
			
 
				+		btrfs_delayed_item_release_metadata(root, curr_item);
			
 
				 		prev_item = curr_item;
			
 
				 		curr_item = __btrfs_next_delayed_item(prev_item);
			
 
				 		btrfs_release_delayed_item(prev_item);
			
@@ -1898,7 +1910,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 
				 		btrfs_release_delayed_iref(delayed_node);
			
 
				 
			
 
				 	if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
			
 
				-		btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
			
 
				+		btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
			
 
				 		btrfs_release_delayed_inode(delayed_node);
			
 
				 	}
			
 
				 	mutex_unlock(&delayed_node->mutex);
			
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -111,10 +111,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 
				 
			
 
				 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
			
 
				 
			
 
				-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
			
 
				-			    struct btrfs_fs_info *fs_info);
			
 
				-int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
			
 
				-			       struct btrfs_fs_info *fs_info, int nr);
			
 
				+int btrfs_run_delayed_items(struct btrfs_trans_handle *trans);
			
 
				+int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr);
			
 
				 
			
 
				 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info);
			
 
				 
			
@@ -151,7 +149,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 
				 
			
 
				 /* for init */
			
 
				 int __init btrfs_delayed_inode_init(void);
			
 
				-void btrfs_delayed_inode_exit(void);
			
 
				+void __cold btrfs_delayed_inode_exit(void);
			
 
				 
			
 
				 /* for debugging */
			
 
				 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info);
			
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -216,7 +216,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 
			
 
				 	delayed_refs = &trans->transaction->delayed_refs;
			
 
				-	assert_spin_locked(&delayed_refs->lock);
			
 
				+	lockdep_assert_held(&delayed_refs->lock);
			
 
				 	if (mutex_trylock(&head->mutex))
			
 
				 		return 0;
			
 
				 
			
@@ -239,7 +239,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
 
				 				    struct btrfs_delayed_ref_head *head,
			
 
				 				    struct btrfs_delayed_ref_node *ref)
			
 
				 {
			
 
				-	assert_spin_locked(&head->lock);
			
 
				+	lockdep_assert_held(&head->lock);
			
 
				 	rb_erase(&ref->ref_node, &head->ref_tree);
			
 
				 	RB_CLEAR_NODE(&ref->ref_node);
			
 
				 	if (!list_empty(&ref->add_list))
			
@@ -307,7 +307,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 
				 	struct rb_node *node;
			
 
				 	u64 seq = 0;
			
 
				 
			
 
				-	assert_spin_locked(&head->lock);
			
 
				+	lockdep_assert_held(&head->lock);
			
 
				 
			
 
				 	if (RB_EMPTY_ROOT(&head->ref_tree))
			
 
				 		return;
			
@@ -930,7 +930,7 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt
 
				 	return find_ref_head(&delayed_refs->href_root, bytenr, 0);
			
 
				 }
			
 
				 
			
 
				-void btrfs_delayed_ref_exit(void)
			
 
				+void __cold btrfs_delayed_ref_exit(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
			
 
				 	kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
			
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -204,7 +204,7 @@ extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
 
				 extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
			
 
				 
			
 
				 int __init btrfs_delayed_ref_init(void);
			
 
				-void btrfs_delayed_ref_exit(void);
			
 
				+void __cold btrfs_delayed_ref_exit(void);
			
 
				 
			
 
				 static inline struct btrfs_delayed_extent_op *
			
 
				 btrfs_alloc_delayed_extent_op(void)
			
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -44,7 +44,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
 
				 						struct btrfs_fs_info *fs_info,
			
 
				 						struct btrfs_device *srcdev,
			
 
				 						struct btrfs_device *tgtdev);
			
 
				-static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
			
 
				 static int btrfs_dev_replace_kthread(void *data);
			
 
				 static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
			
 
				 
			
@@ -174,8 +173,14 @@ no_valid_dev_replace_entry_found:
 
				 			}
			
 
				 			set_bit(BTRFS_DEV_STATE_REPLACE_TGT,
			
 
				 				&dev_replace->tgtdev->dev_state);
			
 
				-			btrfs_init_dev_replace_tgtdev_for_resume(fs_info,
			
 
				-				dev_replace->tgtdev);
			
 
				+
			
 
				+			WARN_ON(fs_info->fs_devices->rw_devices == 0);
			
 
				+			dev_replace->tgtdev->io_width = fs_info->sectorsize;
			
 
				+			dev_replace->tgtdev->io_align = fs_info->sectorsize;
			
 
				+			dev_replace->tgtdev->sector_size = fs_info->sectorsize;
			
 
				+			dev_replace->tgtdev->fs_info = fs_info;
			
 
				+			set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
			
 
				+				&dev_replace->tgtdev->dev_state);
			
 
				 		}
			
 
				 		break;
			
 
				 	}
			
@@ -200,13 +205,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_dev_replace_item *ptr;
			
 
				 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
			
 
				 
			
 
				-	btrfs_dev_replace_lock(dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(dev_replace);
			
 
				 	if (!dev_replace->is_valid ||
			
 
				 	    !dev_replace->item_needs_writeback) {
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 0);
			
 
				+		btrfs_dev_replace_read_unlock(dev_replace);
			
 
				 		return 0;
			
 
				 	}
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(dev_replace);
			
 
				 
			
 
				 	key.objectid = 0;
			
 
				 	key.type = BTRFS_DEV_REPLACE_KEY;
			
@@ -264,7 +269,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 
				 	ptr = btrfs_item_ptr(eb, path->slots[0],
			
 
				 			     struct btrfs_dev_replace_item);
			
 
				 
			
 
				-	btrfs_dev_replace_lock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_lock(dev_replace);
			
 
				 	if (dev_replace->srcdev)
			
 
				 		btrfs_set_dev_replace_src_devid(eb, ptr,
			
 
				 			dev_replace->srcdev->devid);
			
@@ -287,7 +292,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 
				 	btrfs_set_dev_replace_cursor_right(eb, ptr,
			
 
				 		dev_replace->cursor_right);
			
 
				 	dev_replace->item_needs_writeback = 0;
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 
			
 
				 	btrfs_mark_buffer_dirty(eb);
			
 
				 
			
@@ -307,7 +312,7 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
 
				 
			
 
				 static char* btrfs_dev_name(struct btrfs_device *device)
			
 
				 {
			
 
				-	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
			
 
				+	if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
			
 
				 		return "<missing disk>";
			
 
				 	else
			
 
				 		return rcu_str_deref(device->name);
			
@@ -352,7 +357,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 
				 		return PTR_ERR(trans);
			
 
				 	}
			
 
				 
			
 
				-	btrfs_dev_replace_lock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_lock(dev_replace);
			
 
				 	switch (dev_replace->replace_state) {
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
			
@@ -390,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 
				 	dev_replace->item_needs_writeback = 1;
			
 
				 	atomic64_set(&dev_replace->num_write_errors, 0);
			
 
				 	atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 
			
 
				 	ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
			
 
				 	if (ret)
			
@@ -402,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 
				 	trans = btrfs_start_transaction(root, 0);
			
 
				 	if (IS_ERR(trans)) {
			
 
				 		ret = PTR_ERR(trans);
			
 
				-		btrfs_dev_replace_lock(dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_lock(dev_replace);
			
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
@@ -426,7 +431,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 
				 leave:
			
 
				 	dev_replace->srcdev = NULL;
			
 
				 	dev_replace->tgtdev = NULL;
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 	btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
			
 
				 	return ret;
			
 
				 }
			
@@ -493,18 +498,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
				 	/* don't allow cancel or unmount to disturb the finishing procedure */
			
 
				 	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
			
 
				 
			
 
				-	btrfs_dev_replace_lock(dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(dev_replace);
			
 
				 	/* was the operation canceled, or is it finished? */
			
 
				 	if (dev_replace->replace_state !=
			
 
				 	    BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 0);
			
 
				+		btrfs_dev_replace_read_unlock(dev_replace);
			
 
				 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				 	tgt_device = dev_replace->tgtdev;
			
 
				 	src_device = dev_replace->srcdev;
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(dev_replace);
			
 
				 
			
 
				 	/*
			
 
				 	 * flush all outstanding I/O and inode extent mappings before the
			
@@ -529,7 +534,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
				 	/* keep away write_all_supers() during the finishing procedure */
			
 
				 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
			
 
				 	mutex_lock(&fs_info->chunk_mutex);
			
 
				-	btrfs_dev_replace_lock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_lock(dev_replace);
			
 
				 	dev_replace->replace_state =
			
 
				 		scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
			
 
				 			  : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
			
@@ -549,7 +554,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
				 				 btrfs_dev_name(src_device),
			
 
				 				 src_device->devid,
			
 
				 				 rcu_str_deref(tgt_device->name), scrub_ret);
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 		mutex_unlock(&fs_info->chunk_mutex);
			
 
				 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
			
 
				 		mutex_unlock(&uuid_mutex);
			
@@ -586,7 +591,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
				 	list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
			
 
				 	fs_info->fs_devices->rw_devices++;
			
 
				 
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 
			
 
				 	btrfs_rm_dev_replace_blocked(fs_info);
			
 
				 
			
@@ -679,7 +684,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 
				 {
			
 
				 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
			
 
				 
			
 
				-	btrfs_dev_replace_lock(dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(dev_replace);
			
 
				 	/* even if !dev_replace_is_valid, the values are good enough for
			
 
				 	 * the replace_status ioctl */
			
 
				 	args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
			
@@ -691,41 +696,36 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 
				 	args->status.num_uncorrectable_read_errors =
			
 
				 		atomic64_read(&dev_replace->num_uncorrectable_read_errors);
			
 
				 	args->status.progress_1000 = btrfs_dev_replace_progress(fs_info);
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(dev_replace);
			
 
				 }
			
 
				 
			
 
				-int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
			
 
				-			     struct btrfs_ioctl_dev_replace_args *args)
			
 
				-{
			
 
				-	args->result = __btrfs_dev_replace_cancel(fs_info);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
			
 
				+int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
			
 
				 {
			
 
				 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
			
 
				 	struct btrfs_device *tgt_device = NULL;
			
 
				+	struct btrfs_device *src_device = NULL;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_root *root = fs_info->tree_root;
			
 
				-	u64 result;
			
 
				+	int result;
			
 
				 	int ret;
			
 
				 
			
 
				 	if (sb_rdonly(fs_info->sb))
			
 
				 		return -EROFS;
			
 
				 
			
 
				 	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
			
 
				-	btrfs_dev_replace_lock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_lock(dev_replace);
			
 
				 	switch (dev_replace->replace_state) {
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
			
 
				 		result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 		goto leave;
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
			
 
				 		result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
			
 
				 		tgt_device = dev_replace->tgtdev;
			
 
				+		src_device = dev_replace->srcdev;
			
 
				 		dev_replace->tgtdev = NULL;
			
 
				 		dev_replace->srcdev = NULL;
			
 
				 		break;
			
@@ -733,7 +733,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
 
				 	dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
			
 
				 	dev_replace->time_stopped = get_seconds();
			
 
				 	dev_replace->item_needs_writeback = 1;
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 	btrfs_scrub_cancel(fs_info);
			
 
				 
			
 
				 	trans = btrfs_start_transaction(root, 0);
			
@@ -743,6 +743,12 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
 
				 	}
			
 
				 	ret = btrfs_commit_transaction(trans);
			
 
				 	WARN_ON(ret);
			
 
				+
			
 
				+	btrfs_info_in_rcu(fs_info,
			
 
				+		"dev_replace from %s (devid %llu) to %s canceled",
			
 
				+		btrfs_dev_name(src_device), src_device->devid,
			
 
				+		btrfs_dev_name(tgt_device));
			
 
				+
			
 
				 	if (tgt_device)
			
 
				 		btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
			
 
				 
			
@@ -756,7 +762,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
 
				 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
			
 
				 
			
 
				 	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
			
 
				-	btrfs_dev_replace_lock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_lock(dev_replace);
			
 
				 	switch (dev_replace->replace_state) {
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
			
@@ -772,7 +778,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 	mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
			
 
				 }
			
 
				 
			
@@ -782,12 +788,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
 
				 	struct task_struct *task;
			
 
				 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
			
 
				 
			
 
				-	btrfs_dev_replace_lock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_lock(dev_replace);
			
 
				 	switch (dev_replace->replace_state) {
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 		return 0;
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
			
 
				 		break;
			
@@ -801,10 +807,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
 
				 			   "cannot continue dev_replace, tgtdev is missing");
			
 
				 		btrfs_info(fs_info,
			
 
				 			   "you may cancel the operation after 'mount -o degraded'");
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 		return 0;
			
 
				 	}
			
 
				-	btrfs_dev_replace_unlock(dev_replace, 1);
			
 
				+	btrfs_dev_replace_write_unlock(dev_replace);
			
 
				 
			
 
				 	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
			
 
				 	task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
			
@@ -873,37 +879,37 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
			
 
				+void btrfs_dev_replace_read_lock(struct btrfs_dev_replace *dev_replace)
			
 
				 {
			
 
				-	if (rw == 1) {
			
 
				-		/* write */
			
 
				-again:
			
 
				-		wait_event(dev_replace->read_lock_wq,
			
 
				-			   atomic_read(&dev_replace->blocking_readers) == 0);
			
 
				-		write_lock(&dev_replace->lock);
			
 
				-		if (atomic_read(&dev_replace->blocking_readers)) {
			
 
				-			write_unlock(&dev_replace->lock);
			
 
				-			goto again;
			
 
				-		}
			
 
				-	} else {
			
 
				-		read_lock(&dev_replace->lock);
			
 
				-		atomic_inc(&dev_replace->read_locks);
			
 
				-	}
			
 
				+	read_lock(&dev_replace->lock);
			
 
				+	atomic_inc(&dev_replace->read_locks);
			
 
				+}
			
 
				+
			
 
				+void btrfs_dev_replace_read_unlock(struct btrfs_dev_replace *dev_replace)
			
 
				+{
			
 
				+	ASSERT(atomic_read(&dev_replace->read_locks) > 0);
			
 
				+	atomic_dec(&dev_replace->read_locks);
			
 
				+	read_unlock(&dev_replace->lock);
			
 
				 }
			
 
				 
			
 
				-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
			
 
				+void btrfs_dev_replace_write_lock(struct btrfs_dev_replace *dev_replace)
			
 
				 {
			
 
				-	if (rw == 1) {
			
 
				-		/* write */
			
 
				-		ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
			
 
				+again:
			
 
				+	wait_event(dev_replace->read_lock_wq,
			
 
				+		   atomic_read(&dev_replace->blocking_readers) == 0);
			
 
				+	write_lock(&dev_replace->lock);
			
 
				+	if (atomic_read(&dev_replace->blocking_readers)) {
			
 
				 		write_unlock(&dev_replace->lock);
			
 
				-	} else {
			
 
				-		ASSERT(atomic_read(&dev_replace->read_locks) > 0);
			
 
				-		atomic_dec(&dev_replace->read_locks);
			
 
				-		read_unlock(&dev_replace->lock);
			
 
				+		goto again;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void btrfs_dev_replace_write_unlock(struct btrfs_dev_replace *dev_replace)
			
 
				+{
			
 
				+	ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
			
 
				+	write_unlock(&dev_replace->lock);
			
 
				+}
			
 
				+
			
 
				 /* inc blocking cnt and release read lock */
			
 
				 void btrfs_dev_replace_set_lock_blocking(
			
 
				 					struct btrfs_dev_replace *dev_replace)
			
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -32,13 +32,14 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 
				 		int read_src);
			
 
				 void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
			
 
				 			      struct btrfs_ioctl_dev_replace_args *args);
			
 
				-int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
			
 
				-			     struct btrfs_ioctl_dev_replace_args *args);
			
 
				+int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
			
 
				 void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
			
 
				 int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
			
 
				 int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
			
 
				-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
			
 
				-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
			
 
				+void btrfs_dev_replace_read_lock(struct btrfs_dev_replace *dev_replace);
			
 
				+void btrfs_dev_replace_read_unlock(struct btrfs_dev_replace *dev_replace);
			
 
				+void btrfs_dev_replace_write_lock(struct btrfs_dev_replace *dev_replace);
			
 
				+void btrfs_dev_replace_write_unlock(struct btrfs_dev_replace *dev_replace);
			
 
				 void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
			
 
				 void btrfs_dev_replace_clear_lock_blocking(
			
 
				 					struct btrfs_dev_replace *dev_replace);
			
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -18,7 +18,6 @@
 
				 
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				-#include "hash.h"
			
 
				 #include "transaction.h"
			
 
				 
			
 
				 /*
			
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -31,10 +31,10 @@
 
				 #include <linux/uuid.h>
			
 
				 #include <linux/semaphore.h>
			
 
				 #include <linux/error-injection.h>
			
 
				+#include <linux/crc32c.h>
			
 
				 #include <asm/unaligned.h>
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				-#include "hash.h"
			
 
				 #include "transaction.h"
			
 
				 #include "btrfs_inode.h"
			
 
				 #include "volumes.h"
			
@@ -110,7 +110,7 @@ int __init btrfs_end_io_wq_init(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void btrfs_end_io_wq_exit(void)
			
 
				+void __cold btrfs_end_io_wq_exit(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(btrfs_end_io_wq_cache);
			
 
				 }
			
@@ -124,8 +124,8 @@ struct async_submit_bio {
 
				 	void *private_data;
			
 
				 	struct btrfs_fs_info *fs_info;
			
 
				 	struct bio *bio;
			
 
				-	extent_submit_bio_hook_t *submit_bio_start;
			
 
				-	extent_submit_bio_hook_t *submit_bio_done;
			
 
				+	extent_submit_bio_start_t *submit_bio_start;
			
 
				+	extent_submit_bio_done_t *submit_bio_done;
			
 
				 	int mirror_num;
			
 
				 	unsigned long bio_flags;
			
 
				 	/*
			
@@ -270,7 +270,7 @@ out:
 
				 
			
 
				 u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
			
 
				 {
			
 
				-	return btrfs_crc32c(seed, data, len);
			
 
				+	return crc32c(seed, data, len);
			
 
				 }
			
 
				 
			
 
				 void btrfs_csum_final(u32 crc, u8 *result)
			
@@ -403,8 +403,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 	if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
			
 
				 		u32 crc = ~(u32)0;
			
 
				-		const int csum_size = sizeof(crc);
			
 
				-		char result[csum_size];
			
 
				+		char result[sizeof(crc)];
			
 
				 
			
 
				 		/*
			
 
				 		 * The super_block structure does not span the whole
			
@@ -415,7 +414,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 
				 				crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
			
 
				 		btrfs_csum_final(crc, result);
			
 
				 
			
 
				-		if (memcmp(raw_disk_sb, result, csum_size))
			
 
				+		if (memcmp(raw_disk_sb, result, sizeof(result)))
			
 
				 			ret = 1;
			
 
				 	}
			
 
				 
			
@@ -428,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int verify_level_key(struct btrfs_fs_info *fs_info,
			
 
				+			    struct extent_buffer *eb, int level,
			
 
				+			    struct btrfs_key *first_key)
			
 
				+{
			
 
				+	int found_level;
			
 
				+	struct btrfs_key found_key;
			
 
				+	int ret;
			
 
				+
			
 
				+	found_level = btrfs_header_level(eb);
			
 
				+	if (found_level != level) {
			
 
				+#ifdef CONFIG_BTRFS_DEBUG
			
 
				+		WARN_ON(1);
			
 
				+		btrfs_err(fs_info,
			
 
				+"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
			
 
				+			  eb->start, level, found_level);
			
 
				+#endif
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (!first_key)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (found_level)
			
 
				+		btrfs_node_key_to_cpu(eb, &found_key, 0);
			
 
				+	else
			
 
				+		btrfs_item_key_to_cpu(eb, &found_key, 0);
			
 
				+	ret = btrfs_comp_cpu_keys(first_key, &found_key);
			
 
				+
			
 
				+#ifdef CONFIG_BTRFS_DEBUG
			
 
				+	if (ret) {
			
 
				+		WARN_ON(1);
			
 
				+		btrfs_err(fs_info,
			
 
				+"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
			
 
				+			  eb->start, first_key->objectid, first_key->type,
			
 
				+			  first_key->offset, found_key.objectid,
			
 
				+			  found_key.type, found_key.offset);
			
 
				+	}
			
 
				+#endif
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * helper to read a given tree block, doing retries as required when
			
 
				  * the checksums don't match and we have alternate mirrors to try.
			
 
				+ *
			
 
				+ * @parent_transid:	expected transid, skip check if 0
			
 
				+ * @level:		expected level, mandatory check
			
 
				+ * @first_key:		expected key of first slot, skip check if NULL
			
 
				  */
			
 
				 static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
			
 
				 					  struct extent_buffer *eb,
			
 
				-					  u64 parent_transid)
			
 
				+					  u64 parent_transid, int level,
			
 
				+					  struct btrfs_key *first_key)
			
 
				 {
			
 
				 	struct extent_io_tree *io_tree;
			
 
				 	int failed = 0;
			
@@ -449,11 +494,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
 
				 		ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
			
 
				 					       mirror_num);
			
 
				 		if (!ret) {
			
 
				-			if (!verify_parent_transid(io_tree, eb,
			
 
				+			if (verify_parent_transid(io_tree, eb,
			
 
				 						   parent_transid, 0))
			
 
				-				break;
			
 
				-			else
			
 
				 				ret = -EIO;
			
 
				+			else if (verify_level_key(fs_info, eb, level,
			
 
				+						  first_key))
			
 
				+				ret = -EUCLEAN;
			
 
				+			else
			
 
				+				break;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -461,7 +509,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
 
				 		 * there is no reason to read the other copies, they won't be
			
 
				 		 * any less wrong.
			
 
				 		 */
			
 
				-		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
			
 
				+		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
			
 
				+		    ret == -EUCLEAN)
			
 
				 			break;
			
 
				 
			
 
				 		num_copies = btrfs_num_copies(fs_info,
			
@@ -602,12 +651,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
				 	 * that we don't try and read the other copies of this block, just
			
 
				 	 * return -EIO.
			
 
				 	 */
			
 
				-	if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
			
 
				+	if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
			
 
				 		set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
			
 
				 		ret = -EIO;
			
 
				 	}
			
 
				 
			
 
				-	if (found_level > 0 && btrfs_check_node(root, eb))
			
 
				+	if (found_level > 0 && btrfs_check_node(fs_info, eb))
			
 
				 		ret = -EIO;
			
 
				 
			
 
				 	if (!ret)
			
@@ -710,14 +759,6 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
			
 
				-{
			
 
				-	unsigned long limit = min_t(unsigned long,
			
 
				-				    info->thread_pool_size,
			
 
				-				    info->fs_devices->open_devices);
			
 
				-	return 256 * limit;
			
 
				-}
			
 
				-
			
 
				 static void run_one_async_start(struct btrfs_work *work)
			
 
				 {
			
 
				 	struct async_submit_bio *async;
			
@@ -725,7 +766,6 @@ static void run_one_async_start(struct btrfs_work *work)
 
				 
			
 
				 	async = container_of(work, struct  async_submit_bio, work);
			
 
				 	ret = async->submit_bio_start(async->private_data, async->bio,
			
 
				-				      async->mirror_num, async->bio_flags,
			
 
				 				      async->bio_offset);
			
 
				 	if (ret)
			
 
				 		async->status = ret;
			
@@ -744,8 +784,7 @@ static void run_one_async_done(struct btrfs_work *work)
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	async->submit_bio_done(async->private_data, async->bio, async->mirror_num,
			
 
				-			       async->bio_flags, async->bio_offset);
			
 
				+	async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
			
 
				 }
			
 
				 
			
 
				 static void run_one_async_free(struct btrfs_work *work)
			
@@ -759,8 +798,8 @@ static void run_one_async_free(struct btrfs_work *work)
 
				 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
			
 
				 				 int mirror_num, unsigned long bio_flags,
			
 
				 				 u64 bio_offset, void *private_data,
			
 
				-				 extent_submit_bio_hook_t *submit_bio_start,
			
 
				-				 extent_submit_bio_hook_t *submit_bio_done)
			
 
				+				 extent_submit_bio_start_t *submit_bio_start,
			
 
				+				 extent_submit_bio_done_t *submit_bio_done)
			
 
				 {
			
 
				 	struct async_submit_bio *async;
			
 
				 
			
@@ -807,8 +846,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
 
				 	return errno_to_blk_status(ret);
			
 
				 }
			
 
				 
			
 
				-static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio,
			
 
				-					     int mirror_num, unsigned long bio_flags,
			
 
				+static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
			
 
				 					     u64 bio_offset)
			
 
				 {
			
 
				 	/*
			
@@ -818,9 +856,8 @@ static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio
 
				 	return btree_csum_one_bio(bio);
			
 
				 }
			
 
				 
			
 
				-static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
			
 
				-					    int mirror_num, unsigned long bio_flags,
			
 
				-					    u64 bio_offset)
			
 
				+static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
			
 
				+					    int mirror_num)
			
 
				 {
			
 
				 	struct inode *inode = private_data;
			
 
				 	blk_status_t ret;
			
@@ -879,8 +916,8 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
 
				 		 */
			
 
				 		ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
			
 
				 					  bio_offset, private_data,
			
 
				-					  __btree_submit_bio_start,
			
 
				-					  __btree_submit_bio_done);
			
 
				+					  btree_submit_bio_start,
			
 
				+					  btree_submit_bio_done);
			
 
				 	}
			
 
				 
			
 
				 	if (ret)
			
@@ -1062,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
 
				 			        buf->start, buf->start + buf->len - 1);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Read tree block at logical address @bytenr and do variant basic but critical
			
 
				+ * verification.
			
 
				+ *
			
 
				+ * @parent_transid:	expected transid of this tree block, skip check if 0
			
 
				+ * @level:		expected level, mandatory check
			
 
				+ * @first_key:		expected key in slot 0, skip check if NULL
			
 
				+ */
			
 
				 struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				-				      u64 parent_transid)
			
 
				+				      u64 parent_transid, int level,
			
 
				+				      struct btrfs_key *first_key)
			
 
				 {
			
 
				 	struct extent_buffer *buf = NULL;
			
 
				 	int ret;
			
@@ -1072,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
 
				 	if (IS_ERR(buf))
			
 
				 		return buf;
			
 
				 
			
 
				-	ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
			
 
				+	ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
			
 
				+					     level, first_key);
			
 
				 	if (ret) {
			
 
				 		free_extent_buffer(buf);
			
 
				 		return ERR_PTR(ret);
			
@@ -1108,7 +1155,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
 
				 	if (!writers)
			
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 
			
 
				-	ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
			
 
				+	ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS);
			
 
				 	if (ret < 0) {
			
 
				 		kfree(writers);
			
 
				 		return ERR_PTR(ret);
			
@@ -1160,6 +1207,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 
				 	spin_lock_init(&root->accounting_lock);
			
 
				 	spin_lock_init(&root->log_extents_lock[0]);
			
 
				 	spin_lock_init(&root->log_extents_lock[1]);
			
 
				+	spin_lock_init(&root->qgroup_meta_rsv_lock);
			
 
				 	mutex_init(&root->objectid_mutex);
			
 
				 	mutex_init(&root->log_mutex);
			
 
				 	mutex_init(&root->ordered_extent_mutex);
			
@@ -1176,7 +1224,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 
				 	atomic_set(&root->orphan_inodes, 0);
			
 
				 	refcount_set(&root->refs, 1);
			
 
				 	atomic_set(&root->will_be_snapshotted, 0);
			
 
				-	atomic64_set(&root->qgroup_meta_rsv, 0);
			
 
				 	root->log_transid = 0;
			
 
				 	root->log_transid_committed = -1;
			
 
				 	root->last_log_commit = 0;
			
@@ -1401,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
 
				 	struct btrfs_path *path;
			
 
				 	u64 generation;
			
 
				 	int ret;
			
 
				+	int level;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
@@ -1423,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
 
				 	}
			
 
				 
			
 
				 	generation = btrfs_root_generation(&root->root_item);
			
 
				+	level = btrfs_root_level(&root->root_item);
			
 
				 	root->node = read_tree_block(fs_info,
			
 
				 				     btrfs_root_bytenr(&root->root_item),
			
 
				-				     generation);
			
 
				+				     generation, level, NULL);
			
 
				 	if (IS_ERR(root->node)) {
			
 
				 		ret = PTR_ERR(root->node);
			
 
				 		goto find_fail;
			
@@ -1808,12 +1857,10 @@ sleep:
 
				 		if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
			
 
				 				      &fs_info->fs_state)))
			
 
				 			btrfs_cleanup_transaction(fs_info);
			
 
				-		set_current_state(TASK_INTERRUPTIBLE);
			
 
				 		if (!kthread_should_stop() &&
			
 
				 				(!btrfs_transaction_blocked(fs_info) ||
			
 
				 				 cannot_commit))
			
 
				-			schedule_timeout(delay);
			
 
				-		__set_current_state(TASK_RUNNING);
			
 
				+			schedule_timeout_interruptible(delay);
			
 
				 	} while (!kthread_should_stop());
			
 
				 	return 0;
			
 
				 }
			
@@ -2183,7 +2230,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
 
				 static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
			
 
				 		struct btrfs_fs_devices *fs_devices)
			
 
				 {
			
 
				-	int max_active = fs_info->thread_pool_size;
			
 
				+	u32 max_active = fs_info->thread_pool_size;
			
 
				 	unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
			
 
				 
			
 
				 	fs_info->workers =
			
@@ -2276,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_root *log_tree_root;
			
 
				 	struct btrfs_super_block *disk_super = fs_info->super_copy;
			
 
				 	u64 bytenr = btrfs_super_log_root(disk_super);
			
 
				+	int level = btrfs_super_log_root_level(disk_super);
			
 
				 
			
 
				 	if (fs_devices->rw_devices == 0) {
			
 
				 		btrfs_warn(fs_info, "log replay required on RO media");
			
@@ -2289,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 
				 	__setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
			
 
				 
			
 
				 	log_tree_root->node = read_tree_block(fs_info, bytenr,
			
 
				-					      fs_info->generation + 1);
			
 
				+					      fs_info->generation + 1,
			
 
				+					      level, NULL);
			
 
				 	if (IS_ERR(log_tree_root->node)) {
			
 
				 		btrfs_warn(fs_info, "failed to read log tree");
			
 
				 		ret = PTR_ERR(log_tree_root->node);
			
@@ -2334,23 +2383,29 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 
				 	location.offset = 0;
			
 
				 
			
 
				 	root = btrfs_read_tree_root(tree_root, &location);
			
 
				-	if (IS_ERR(root))
			
 
				-		return PTR_ERR(root);
			
 
				+	if (IS_ERR(root)) {
			
 
				+		ret = PTR_ERR(root);
			
 
				+		goto out;
			
 
				+	}
			
 
				 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
			
 
				 	fs_info->extent_root = root;
			
 
				 
			
 
				 	location.objectid = BTRFS_DEV_TREE_OBJECTID;
			
 
				 	root = btrfs_read_tree_root(tree_root, &location);
			
 
				-	if (IS_ERR(root))
			
 
				-		return PTR_ERR(root);
			
 
				+	if (IS_ERR(root)) {
			
 
				+		ret = PTR_ERR(root);
			
 
				+		goto out;
			
 
				+	}
			
 
				 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
			
 
				 	fs_info->dev_root = root;
			
 
				 	btrfs_init_devices_late(fs_info);
			
 
				 
			
 
				 	location.objectid = BTRFS_CSUM_TREE_OBJECTID;
			
 
				 	root = btrfs_read_tree_root(tree_root, &location);
			
 
				-	if (IS_ERR(root))
			
 
				-		return PTR_ERR(root);
			
 
				+	if (IS_ERR(root)) {
			
 
				+		ret = PTR_ERR(root);
			
 
				+		goto out;
			
 
				+	}
			
 
				 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
			
 
				 	fs_info->csum_root = root;
			
 
				 
			
@@ -2367,7 +2422,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 
				 	if (IS_ERR(root)) {
			
 
				 		ret = PTR_ERR(root);
			
 
				 		if (ret != -ENOENT)
			
 
				-			return ret;
			
 
				+			goto out;
			
 
				 	} else {
			
 
				 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
			
 
				 		fs_info->uuid_root = root;
			
@@ -2376,13 +2431,19 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 
				 	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
			
 
				 		location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
			
 
				 		root = btrfs_read_tree_root(tree_root, &location);
			
 
				-		if (IS_ERR(root))
			
 
				-			return PTR_ERR(root);
			
 
				+		if (IS_ERR(root)) {
			
 
				+			ret = PTR_ERR(root);
			
 
				+			goto out;
			
 
				+		}
			
 
				 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
			
 
				 		fs_info->free_space_root = root;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				+out:
			
 
				+	btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d",
			
 
				+		   location.objectid, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 int open_ctree(struct super_block *sb,
			
@@ -2404,8 +2465,8 @@ int open_ctree(struct super_block *sb,
 
				 	int err = -EINVAL;
			
 
				 	int num_backups_tried = 0;
			
 
				 	int backup_index = 0;
			
 
				-	int max_active;
			
 
				 	int clear_free_space_tree = 0;
			
 
				+	int level;
			
 
				 
			
 
				 	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
			
 
				 	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
			
@@ -2447,6 +2508,8 @@ int open_ctree(struct super_block *sb,
 
				 	INIT_LIST_HEAD(&fs_info->delayed_iputs);
			
 
				 	INIT_LIST_HEAD(&fs_info->delalloc_roots);
			
 
				 	INIT_LIST_HEAD(&fs_info->caching_block_groups);
			
 
				+	INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
			
 
				+	spin_lock_init(&fs_info->pending_raid_kobjs_lock);
			
 
				 	spin_lock_init(&fs_info->delalloc_root_lock);
			
 
				 	spin_lock_init(&fs_info->trans_lock);
			
 
				 	spin_lock_init(&fs_info->fs_roots_radix_lock);
			
@@ -2713,8 +2776,6 @@ int open_ctree(struct super_block *sb,
 
				 		goto fail_alloc;
			
 
				 	}
			
 
				 
			
 
				-	max_active = fs_info->thread_pool_size;
			
 
				-
			
 
				 	ret = btrfs_init_workqueues(fs_info, fs_devices);
			
 
				 	if (ret) {
			
 
				 		err = ret;
			
@@ -2741,12 +2802,13 @@ int open_ctree(struct super_block *sb,
 
				 	}
			
 
				 
			
 
				 	generation = btrfs_super_chunk_root_generation(disk_super);
			
 
				+	level = btrfs_super_chunk_root_level(disk_super);
			
 
				 
			
 
				 	__setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
			
 
				 
			
 
				 	chunk_root->node = read_tree_block(fs_info,
			
 
				 					   btrfs_super_chunk_root(disk_super),
			
 
				-					   generation);
			
 
				+					   generation, level, NULL);
			
 
				 	if (IS_ERR(chunk_root->node) ||
			
 
				 	    !extent_buffer_uptodate(chunk_root->node)) {
			
 
				 		btrfs_err(fs_info, "failed to read chunk root");
			
@@ -2768,10 +2830,10 @@ int open_ctree(struct super_block *sb,
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * keep the device that is marked to be the target device for the
			
 
				-	 * dev_replace procedure
			
 
				+	 * Keep the devid that is marked to be the target device for the
			
 
				+	 * device replace procedure
			
 
				 	 */
			
 
				-	btrfs_close_extra_devices(fs_devices, 0);
			
 
				+	btrfs_free_extra_devids(fs_devices, 0);
			
 
				 
			
 
				 	if (!fs_devices->latest_bdev) {
			
 
				 		btrfs_err(fs_info, "failed to read devices");
			
@@ -2780,10 +2842,11 @@ int open_ctree(struct super_block *sb,
 
				 
			
 
				 retry_root_backup:
			
 
				 	generation = btrfs_super_generation(disk_super);
			
 
				+	level = btrfs_super_root_level(disk_super);
			
 
				 
			
 
				 	tree_root->node = read_tree_block(fs_info,
			
 
				 					  btrfs_super_root(disk_super),
			
 
				-					  generation);
			
 
				+					  generation, level, NULL);
			
 
				 	if (IS_ERR(tree_root->node) ||
			
 
				 	    !extent_buffer_uptodate(tree_root->node)) {
			
 
				 		btrfs_warn(fs_info, "failed to read tree root");
			
@@ -2834,7 +2897,7 @@ retry_root_backup:
 
				 		goto fail_block_groups;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_close_extra_devices(fs_devices, 1);
			
 
				+	btrfs_free_extra_devids(fs_devices, 1);
			
 
				 
			
 
				 	ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
			
 
				 	if (ret) {
			
@@ -2953,6 +3016,7 @@ retry_root_backup:
 
				 	fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
			
 
				 	if (IS_ERR(fs_info->fs_root)) {
			
 
				 		err = PTR_ERR(fs_info->fs_root);
			
 
				+		btrfs_warn(fs_info, "failed to read fs tree: %d", err);
			
 
				 		goto fail_qgroup;
			
 
				 	}
			
 
				 
			
@@ -3290,6 +3354,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 
				 	struct buffer_head *bh;
			
 
				 	int i;
			
 
				 	int errors = 0;
			
 
				+	bool primary_failed = false;
			
 
				 	u64 bytenr;
			
 
				 
			
 
				 	if (max_mirrors == 0)
			
@@ -3306,11 +3371,16 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 
				 				      BTRFS_SUPER_INFO_SIZE);
			
 
				 		if (!bh) {
			
 
				 			errors++;
			
 
				+			if (i == 0)
			
 
				+				primary_failed = true;
			
 
				 			continue;
			
 
				 		}
			
 
				 		wait_on_buffer(bh);
			
 
				-		if (!buffer_uptodate(bh))
			
 
				+		if (!buffer_uptodate(bh)) {
			
 
				 			errors++;
			
 
				+			if (i == 0)
			
 
				+				primary_failed = true;
			
 
				+		}
			
 
				 
			
 
				 		/* drop our reference */
			
 
				 		brelse(bh);
			
@@ -3319,6 +3389,13 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 
				 		brelse(bh);
			
 
				 	}
			
 
				 
			
 
				+	/* log error, force error return */
			
 
				+	if (primary_failed) {
			
 
				+		btrfs_err(device->fs_info, "error writing primary super block to device %llu",
			
 
				+			  device->devid);
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	return errors < i ? 0 : -1;
			
 
				 }
			
 
				 
			
@@ -3851,7 +3928,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 
				 	 * So here we should only check item pointers, not item data.
			
 
				 	 */
			
 
				 	if (btrfs_header_level(buf) == 0 &&
			
 
				-	    btrfs_check_leaf_relaxed(root, buf)) {
			
 
				+	    btrfs_check_leaf_relaxed(fs_info, buf)) {
			
 
				 		btrfs_print_leaf(buf);
			
 
				 		ASSERT(0);
			
 
				 	}
			
@@ -3890,12 +3967,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
 
				 	__btrfs_btree_balance_dirty(fs_info, 0);
			
 
				 }
			
 
				 
			
 
				-int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
			
 
				+int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
			
 
				+		      struct btrfs_key *first_key)
			
 
				 {
			
 
				 	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
			
 
				 	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 
			
 
				-	return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
			
 
				+	return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
			
 
				+					      level, first_key);
			
 
				 }
			
 
				 
			
 
				 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
			
@@ -4314,11 +4393,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
 
				 		cache = list_first_entry(&cur_trans->dirty_bgs,
			
 
				 					 struct btrfs_block_group_cache,
			
 
				 					 dirty_list);
			
 
				-		if (!cache) {
			
 
				-			btrfs_err(fs_info, "orphan block group dirty_bgs list");
			
 
				-			spin_unlock(&cur_trans->dirty_bgs_lock);
			
 
				-			return;
			
 
				-		}
			
 
				 
			
 
				 		if (!list_empty(&cache->io_list)) {
			
 
				 			spin_unlock(&cur_trans->dirty_bgs_lock);
			
@@ -4338,14 +4412,14 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
 
				 	}
			
 
				 	spin_unlock(&cur_trans->dirty_bgs_lock);
			
 
				 
			
 
				+	/*
			
 
				+	 * Refer to the definition of io_bgs member for details why it's safe
			
 
				+	 * to use it without any locking
			
 
				+	 */
			
 
				 	while (!list_empty(&cur_trans->io_bgs)) {
			
 
				 		cache = list_first_entry(&cur_trans->io_bgs,
			
 
				 					 struct btrfs_block_group_cache,
			
 
				 					 io_list);
			
 
				-		if (!cache) {
			
 
				-			btrfs_err(fs_info, "orphan block group on io_bgs list");
			
 
				-			return;
			
 
				-		}
			
 
				 
			
 
				 		list_del_init(&cache->io_list);
			
 
				 		spin_lock(&cache->lock);
			
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,8 +52,9 @@ static inline u64 btrfs_sb_offset(int mirror)
 
				 struct btrfs_device;
			
 
				 struct btrfs_fs_devices;
			
 
				 
			
 
				-struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info,
			
 
				-				      u64 bytenr, u64 parent_transid);
			
 
				+struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				+				      u64 parent_transid, int level,
			
 
				+				      struct btrfs_key *first_key);
			
 
				 void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
			
 
				 int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				 			 int mirror_num, struct extent_buffer **eb);
			
@@ -123,7 +124,8 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
 
				 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
			
 
				 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
			
 
				 			  int atomic);
			
 
				-int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
			
 
				+int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
			
 
				+		      struct btrfs_key *first_key);
			
 
				 u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
			
 
				 void btrfs_csum_final(u32 crc, u8 *result);
			
 
				 blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
			
@@ -131,9 +133,8 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 
				 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
			
 
				 			int mirror_num, unsigned long bio_flags,
			
 
				 			u64 bio_offset, void *private_data,
			
 
				-			extent_submit_bio_hook_t *submit_bio_start,
			
 
				-			extent_submit_bio_hook_t *submit_bio_done);
			
 
				-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
			
 
				+			extent_submit_bio_start_t *submit_bio_start,
			
 
				+			extent_submit_bio_done_t *submit_bio_done);
			
 
				 int btrfs_write_tree_block(struct extent_buffer *buf);
			
 
				 void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
			
 
				 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
			
@@ -154,7 +155,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 
				 		int create);
			
 
				 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
			
 
				 int __init btrfs_end_io_wq_init(void);
			
 
				-void btrfs_end_io_wq_exit(void);
			
 
				+void __cold btrfs_end_io_wq_exit(void);
			
 
				 
			
 
				 #ifdef CONFIG_DEBUG_LOCK_ALLOC
			
 
				 void btrfs_init_lockdep(void);
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -27,7 +27,7 @@
 
				 #include <linux/ratelimit.h>
			
 
				 #include <linux/percpu_counter.h>
			
 
				 #include <linux/lockdep.h>
			
 
				-#include "hash.h"
			
 
				+#include <linux/crc32c.h>
			
 
				 #include "tree-log.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "print-tree.h"
			
@@ -535,13 +535,11 @@ static noinline void caching_thread(struct btrfs_work *work)
 
				 	struct btrfs_block_group_cache *block_group;
			
 
				 	struct btrfs_fs_info *fs_info;
			
 
				 	struct btrfs_caching_control *caching_ctl;
			
 
				-	struct btrfs_root *extent_root;
			
 
				 	int ret;
			
 
				 
			
 
				 	caching_ctl = container_of(work, struct btrfs_caching_control, work);
			
 
				 	block_group = caching_ctl->block_group;
			
 
				 	fs_info = block_group->fs_info;
			
 
				-	extent_root = fs_info->extent_root;
			
 
				 
			
 
				 	mutex_lock(&caching_ctl->mutex);
			
 
				 	down_read(&fs_info->commit_root_sem);
			
@@ -1203,11 +1201,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
 
				 	__le64 lenum;
			
 
				 
			
 
				 	lenum = cpu_to_le64(root_objectid);
			
 
				-	high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
			
 
				+	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
			
 
				 	lenum = cpu_to_le64(owner);
			
 
				-	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
			
 
				+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
			
 
				 	lenum = cpu_to_le64(offset);
			
 
				-	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
			
 
				+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
			
 
				 
			
 
				 	return ((u64)high_crc << 31) ^ (u64)low_crc;
			
 
				 }
			
@@ -2652,9 +2650,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
 
				  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
			
 
				  */
			
 
				 static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
			
 
				-					     struct btrfs_fs_info *fs_info,
			
 
				 					     unsigned long nr)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 	struct btrfs_delayed_ref_node *ref;
			
 
				 	struct btrfs_delayed_ref_head *locked_ref = NULL;
			
@@ -2994,7 +2992,7 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 
				 	if (trans->transid > async->transid)
			
 
				 		goto end;
			
 
				 
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, async->count);
			
 
				+	ret = btrfs_run_delayed_refs(trans, async->count);
			
 
				 	if (ret)
			
 
				 		async->error = ret;
			
 
				 end:
			
@@ -3053,8 +3051,9 @@ int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
 
				  * Returns <0 on error and aborts the transaction
			
 
				  */
			
 
				 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
			
 
				-			   struct btrfs_fs_info *fs_info, unsigned long count)
			
 
				+			   unsigned long count)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct rb_node *node;
			
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 	struct btrfs_delayed_ref_head *head;
			
@@ -3078,7 +3077,7 @@ again:
 
				 	delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
			
 
				 #endif
			
 
				 	trans->can_flush_pending_bgs = false;
			
 
				-	ret = __btrfs_run_delayed_refs(trans, fs_info, count);
			
 
				+	ret = __btrfs_run_delayed_refs(trans, count);
			
 
				 	if (ret < 0) {
			
 
				 		btrfs_abort_transaction(trans, ret);
			
 
				 		return ret;
			
@@ -3086,7 +3085,7 @@ again:
 
				 
			
 
				 	if (run_all) {
			
 
				 		if (!list_empty(&trans->new_bgs))
			
 
				-			btrfs_create_pending_block_groups(trans, fs_info);
			
 
				+			btrfs_create_pending_block_groups(trans);
			
 
				 
			
 
				 		spin_lock(&delayed_refs->lock);
			
 
				 		node = rb_first(&delayed_refs->href_root);
			
@@ -3660,9 +3659,9 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
 
				  * the commit latency by getting rid of the easy block groups while
			
 
				  * we're still allowing others to join the commit.
			
 
				  */
			
 
				-int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
			
 
				-				   struct btrfs_fs_info *fs_info)
			
 
				+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_block_group_cache *cache;
			
 
				 	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				 	int ret = 0;
			
@@ -3686,7 +3685,7 @@ again:
 
				 	 * make sure all the block groups on our dirty list actually
			
 
				 	 * exist
			
 
				 	 */
			
 
				-	btrfs_create_pending_block_groups(trans, fs_info);
			
 
				+	btrfs_create_pending_block_groups(trans);
			
 
				 
			
 
				 	if (!path) {
			
 
				 		path = btrfs_alloc_path();
			
@@ -3741,8 +3740,9 @@ again:
 
				 				should_put = 0;
			
 
				 
			
 
				 				/*
			
 
				-				 * the cache_write_mutex is protecting
			
 
				-				 * the io_list
			
 
				+				 * The cache_write_mutex is protecting the
			
 
				+				 * io_list, also refer to the definition of
			
 
				+				 * btrfs_transaction::io_bgs for more details
			
 
				 				 */
			
 
				 				list_add_tail(&cache->io_list, io);
			
 
				 			} else {
			
@@ -3800,7 +3800,7 @@ again:
 
				 	 * go through delayed refs for all the stuff we've just kicked off
			
 
				 	 * and then loop back (just once)
			
 
				 	 */
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, 0);
			
 
				+	ret = btrfs_run_delayed_refs(trans, 0);
			
 
				 	if (!ret && loops == 0) {
			
 
				 		loops++;
			
 
				 		spin_lock(&cur_trans->dirty_bgs_lock);
			
@@ -3882,7 +3882,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
				 		cache_save_setup(cache, trans, path);
			
 
				 
			
 
				 		if (!ret)
			
 
				-			ret = btrfs_run_delayed_refs(trans, fs_info,
			
 
				+			ret = btrfs_run_delayed_refs(trans,
			
 
				 						     (unsigned long) -1);
			
 
				 
			
 
				 		if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
			
@@ -3934,6 +3934,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 	spin_unlock(&cur_trans->dirty_bgs_lock);
			
 
				 
			
 
				+	/*
			
 
				+	 * Refer to the definition of io_bgs member for details why it's safe
			
 
				+	 * to use it without any locking
			
 
				+	 */
			
 
				 	while (!list_empty(io)) {
			
 
				 		cache = list_first_entry(io, struct btrfs_block_group_cache,
			
 
				 					 io_list);
			
@@ -4332,8 +4336,7 @@ again:
 
				 
			
 
				 		/* commit the current transaction and try again */
			
 
				 commit_trans:
			
 
				-		if (need_commit &&
			
 
				-		    !atomic_read(&fs_info->open_ioctl_trans)) {
			
 
				+		if (need_commit) {
			
 
				 			need_commit--;
			
 
				 
			
 
				 			if (need_commit > 0) {
			
@@ -4541,7 +4544,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
 
				 	 * Needed because we can end up allocating a system chunk and for an
			
 
				 	 * atomic and race free space reservation in the chunk block reserve.
			
 
				 	 */
			
 
				-	ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
			
 
				+	lockdep_assert_held(&fs_info->chunk_mutex);
			
 
				 
			
 
				 	info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
			
 
				 	spin_lock(&info->lock);
			
@@ -4602,11 +4605,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 
				 		return -ENOSPC;
			
 
				 
			
 
				 	space_info = __find_space_info(fs_info, flags);
			
 
				-	if (!space_info) {
			
 
				-		ret = create_space_info(fs_info, flags, &space_info);
			
 
				-		if (ret)
			
 
				-			return ret;
			
 
				-	}
			
 
				+	ASSERT(space_info);
			
 
				 
			
 
				 again:
			
 
				 	spin_lock(&space_info->lock);
			
@@ -4705,7 +4704,7 @@ out:
 
				 	 */
			
 
				 	if (trans->can_flush_pending_bgs &&
			
 
				 	    trans->chunk_bytes_reserved >= (u64)SZ_2M) {
			
 
				-		btrfs_create_pending_block_groups(trans, fs_info);
			
 
				+		btrfs_create_pending_block_groups(trans);
			
 
				 		btrfs_trans_release_chunk_metadata(trans);
			
 
				 	}
			
 
				 	return ret;
			
@@ -4826,7 +4825,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
 
				 	long time_left;
			
 
				 	unsigned long nr_pages;
			
 
				 	int loops;
			
 
				-	enum btrfs_reserve_flush_enum flush;
			
 
				 
			
 
				 	/* Calc the number of the pages we need flush for space reservation */
			
 
				 	items = calc_reclaim_items_nr(fs_info, to_reclaim);
			
@@ -4867,10 +4865,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
 
				 			   atomic_read(&fs_info->async_delalloc_pages) <=
			
 
				 			   (int)max_reclaim);
			
 
				 skip_async:
			
 
				-		if (!trans)
			
 
				-			flush = BTRFS_RESERVE_FLUSH_ALL;
			
 
				-		else
			
 
				-			flush = BTRFS_RESERVE_NO_FLUSH;
			
 
				 		spin_lock(&space_info->lock);
			
 
				 		if (list_empty(&space_info->tickets) &&
			
 
				 		    list_empty(&space_info->priority_tickets)) {
			
@@ -4993,7 +4987,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 
				 			ret = PTR_ERR(trans);
			
 
				 			break;
			
 
				 		}
			
 
				-		ret = btrfs_run_delayed_items_nr(trans, fs_info, nr);
			
 
				+		ret = btrfs_run_delayed_items_nr(trans, nr);
			
 
				 		btrfs_end_transaction(trans);
			
 
				 		break;
			
 
				 	case FLUSH_DELALLOC:
			
@@ -5388,10 +5382,15 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
 
				 		    !block_rsv_use_bytes(global_rsv, orig_bytes))
			
 
				 			ret = 0;
			
 
				 	}
			
 
				-	if (ret == -ENOSPC)
			
 
				+	if (ret == -ENOSPC) {
			
 
				 		trace_btrfs_space_reservation(fs_info, "space_info:enospc",
			
 
				 					      block_rsv->space_info->flags,
			
 
				 					      orig_bytes, 1);
			
 
				+
			
 
				+		if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
			
 
				+			dump_space_info(fs_info, block_rsv->space_info,
			
 
				+					orig_bytes, 0);
			
 
				+	}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -5760,6 +5759,9 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
 
				 	if (num_bytes == 0)
			
 
				 		return 0;
			
 
				 
			
 
				+	ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
			
 
				 	if (!ret) {
			
 
				 		block_rsv_add_bytes(block_rsv, num_bytes, 0);
			
@@ -5772,11 +5774,15 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
 
				 /**
			
 
				  * btrfs_inode_rsv_release - release any excessive reservation.
			
 
				  * @inode - the inode we need to release from.
			
 
				+ * @qgroup_free - free or convert qgroup meta.
			
 
				+ *   Unlike normal operation, qgroup meta reservation needs to know if we are
			
 
				+ *   freeing qgroup reservation or just converting it into per-trans.  Normally
			
 
				+ *   @qgroup_free is true for error handling, and false for normal release.
			
 
				  *
			
 
				  * This is the same as btrfs_block_rsv_release, except that it handles the
			
 
				  * tracepoint for the reservation.
			
 
				  */
			
 
				-static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
			
 
				+static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
			
 
				 	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
			
@@ -5792,6 +5798,10 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
 
				 	if (released > 0)
			
 
				 		trace_btrfs_space_reservation(fs_info, "delalloc",
			
 
				 					      btrfs_ino(inode), released, 0);
			
 
				+	if (qgroup_free)
			
 
				+		btrfs_qgroup_free_meta_prealloc(inode->root, released);
			
 
				+	else
			
 
				+		btrfs_qgroup_convert_reserved_meta(inode->root, released);
			
 
				 }
			
 
				 
			
 
				 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
			
@@ -5892,24 +5902,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 
				 	WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
			
 
				 }
			
 
				 
			
 
				-void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
			
 
				-				  struct btrfs_fs_info *fs_info)
			
 
				-{
			
 
				-	if (!trans->block_rsv) {
			
 
				-		ASSERT(!trans->bytes_reserved);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (!trans->bytes_reserved)
			
 
				-		return;
			
 
				-
			
 
				-	ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
			
 
				-	trace_btrfs_space_reservation(fs_info, "transaction",
			
 
				-				      trans->transid, trans->bytes_reserved, 0);
			
 
				-	btrfs_block_rsv_release(fs_info, trans->block_rsv,
			
 
				-				trans->bytes_reserved);
			
 
				-	trans->bytes_reserved = 0;
			
 
				-}
			
 
				 
			
 
				 /*
			
 
				  * To be called after all the new block groups attached to the transaction
			
@@ -5951,7 +5943,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 
				 	 */
			
 
				 	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
			
 
				 
			
 
				-	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), 
			
 
				+	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
			
 
				 			num_bytes, 1);
			
 
				 	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
			
 
				 }
			
@@ -5995,7 +5987,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 
				 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
			
 
				 		/* One for parent inode, two for dir entries */
			
 
				 		num_bytes = 3 * fs_info->nodesize;
			
 
				-		ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
			
 
				+		ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 	} else {
			
@@ -6014,7 +6006,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 
				 		ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
			
 
				 
			
 
				 	if (ret && *qgroup_reserved)
			
 
				-		btrfs_qgroup_free_meta(root, *qgroup_reserved);
			
 
				+		btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -6051,7 +6043,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 
				 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				-	struct btrfs_root *root = inode->root;
			
 
				 	unsigned nr_extents;
			
 
				 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
			
 
				 	int ret = 0;
			
@@ -6068,13 +6059,13 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
				 	if (btrfs_is_free_space_inode(inode)) {
			
 
				 		flush = BTRFS_RESERVE_NO_FLUSH;
			
 
				 		delalloc_lock = false;
			
 
				-	} else if (current->journal_info) {
			
 
				-		flush = BTRFS_RESERVE_FLUSH_LIMIT;
			
 
				-	}
			
 
				+	} else {
			
 
				+		if (current->journal_info)
			
 
				+			flush = BTRFS_RESERVE_FLUSH_LIMIT;
			
 
				 
			
 
				-	if (flush != BTRFS_RESERVE_NO_FLUSH &&
			
 
				-	    btrfs_transaction_in_commit(fs_info))
			
 
				-		schedule_timeout(1);
			
 
				+		if (btrfs_transaction_in_commit(fs_info))
			
 
				+			schedule_timeout(1);
			
 
				+	}
			
 
				 
			
 
				 	if (delalloc_lock)
			
 
				 		mutex_lock(&inode->delalloc_mutex);
			
@@ -6089,19 +6080,9 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
				 	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
			
 
				 	spin_unlock(&inode->lock);
			
 
				 
			
 
				-	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
			
 
				-		ret = btrfs_qgroup_reserve_meta(root,
			
 
				-				nr_extents * fs_info->nodesize, true);
			
 
				-		if (ret)
			
 
				-			goto out_fail;
			
 
				-	}
			
 
				-
			
 
				 	ret = btrfs_inode_rsv_refill(inode, flush);
			
 
				-	if (unlikely(ret)) {
			
 
				-		btrfs_qgroup_free_meta(root,
			
 
				-				       nr_extents * fs_info->nodesize);
			
 
				+	if (unlikely(ret))
			
 
				 		goto out_fail;
			
 
				-	}
			
 
				 
			
 
				 	if (delalloc_lock)
			
 
				 		mutex_unlock(&inode->delalloc_mutex);
			
@@ -6115,7 +6096,7 @@ out_fail:
 
				 	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
			
 
				 	spin_unlock(&inode->lock);
			
 
				 
			
 
				-	btrfs_inode_rsv_release(inode);
			
 
				+	btrfs_inode_rsv_release(inode, true);
			
 
				 	if (delalloc_lock)
			
 
				 		mutex_unlock(&inode->delalloc_mutex);
			
 
				 	return ret;
			
@@ -6125,12 +6106,14 @@ out_fail:
 
				  * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
			
 
				  * @inode: the inode to release the reservation for.
			
 
				  * @num_bytes: the number of bytes we are releasing.
			
 
				+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
			
 
				  *
			
 
				  * This will release the metadata reservation for an inode.  This can be called
			
 
				  * once we complete IO for a given set of bytes to release their metadata
			
 
				  * reservations, or on error for the same reason.
			
 
				  */
			
 
				-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
			
 
				+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
			
 
				+				     bool qgroup_free)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				 
			
@@ -6143,13 +6126,14 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
				 	if (btrfs_is_testing(fs_info))
			
 
				 		return;
			
 
				 
			
 
				-	btrfs_inode_rsv_release(inode);
			
 
				+	btrfs_inode_rsv_release(inode, qgroup_free);
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				  * btrfs_delalloc_release_extents - release our outstanding_extents
			
 
				  * @inode: the inode to balance the reservation for.
			
 
				  * @num_bytes: the number of bytes we originally reserved with
			
 
				+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
			
 
				  *
			
 
				  * When we reserve space we increase outstanding_extents for the extents we may
			
 
				  * add.  Once we've set the range as delalloc or created our ordered extents we
			
@@ -6157,7 +6141,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
				  * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
			
 
				  * with btrfs_delalloc_reserve_metadata.
			
 
				  */
			
 
				-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
			
 
				+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
			
 
				+				    bool qgroup_free)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				 	unsigned num_extents;
			
@@ -6171,7 +6156,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
 
				 	if (btrfs_is_testing(fs_info))
			
 
				 		return;
			
 
				 
			
 
				-	btrfs_inode_rsv_release(inode);
			
 
				+	btrfs_inode_rsv_release(inode, qgroup_free);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -6227,9 +6212,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
 
				  */
			
 
				 void btrfs_delalloc_release_space(struct inode *inode,
			
 
				 				  struct extent_changeset *reserved,
			
 
				-				  u64 start, u64 len)
			
 
				+				  u64 start, u64 len, bool qgroup_free)
			
 
				 {
			
 
				-	btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
			
 
				+	btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
			
 
				 	btrfs_free_reserved_data_space(inode, reserved, start, len);
			
 
				 }
			
 
				 
			
@@ -6783,9 +6768,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			
 
				-			       struct btrfs_fs_info *fs_info)
			
 
				+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_block_group_cache *block_group, *tmp;
			
 
				 	struct list_head *deleted_bgs;
			
 
				 	struct extent_io_tree *unpin;
			
@@ -7351,29 +7336,6 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int __get_raid_index(u64 flags)
			
 
				-{
			
 
				-	if (flags & BTRFS_BLOCK_GROUP_RAID10)
			
 
				-		return BTRFS_RAID_RAID10;
			
 
				-	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
			
 
				-		return BTRFS_RAID_RAID1;
			
 
				-	else if (flags & BTRFS_BLOCK_GROUP_DUP)
			
 
				-		return BTRFS_RAID_DUP;
			
 
				-	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
			
 
				-		return BTRFS_RAID_RAID0;
			
 
				-	else if (flags & BTRFS_BLOCK_GROUP_RAID5)
			
 
				-		return BTRFS_RAID_RAID5;
			
 
				-	else if (flags & BTRFS_BLOCK_GROUP_RAID6)
			
 
				-		return BTRFS_RAID_RAID6;
			
 
				-
			
 
				-	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
			
 
				-}
			
 
				-
			
 
				-int get_block_group_index(struct btrfs_block_group_cache *cache)
			
 
				-{
			
 
				-	return __get_raid_index(cache->flags);
			
 
				-}
			
 
				-
			
 
				 static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
			
 
				 	[BTRFS_RAID_RAID10]	= "raid10",
			
 
				 	[BTRFS_RAID_RAID1]	= "raid1",
			
@@ -7488,7 +7450,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 
				 	u64 empty_cluster = 0;
			
 
				 	struct btrfs_space_info *space_info;
			
 
				 	int loop = 0;
			
 
				-	int index = __get_raid_index(flags);
			
 
				+	int index = btrfs_bg_flags_to_raid_index(flags);
			
 
				 	bool failed_cluster_refill = false;
			
 
				 	bool failed_alloc = false;
			
 
				 	bool use_cluster = true;
			
@@ -7574,7 +7536,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 
				 				btrfs_put_block_group(block_group);
			
 
				 				up_read(&space_info->groups_sem);
			
 
				 			} else {
			
 
				-				index = get_block_group_index(block_group);
			
 
				+				index = btrfs_bg_flags_to_raid_index(
			
 
				+						block_group->flags);
			
 
				 				btrfs_lock_block_group(block_group, delalloc);
			
 
				 				goto have_block_group;
			
 
				 			}
			
@@ -7584,7 +7547,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 
				 	}
			
 
				 search:
			
 
				 	have_caching_bg = false;
			
 
				-	if (index == 0 || index == __get_raid_index(flags))
			
 
				+	if (index == 0 || index == btrfs_bg_flags_to_raid_index(flags))
			
 
				 		full_search = true;
			
 
				 	down_read(&space_info->groups_sem);
			
 
				 	list_for_each_entry(block_group, &space_info->block_groups[index],
			
@@ -7842,7 +7805,8 @@ checks:
 
				 loop:
			
 
				 		failed_cluster_refill = false;
			
 
				 		failed_alloc = false;
			
 
				-		BUG_ON(index != get_block_group_index(block_group));
			
 
				+		BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
			
 
				+		       index);
			
 
				 		btrfs_release_block_group(block_group, delalloc);
			
 
				 		cond_resched();
			
 
				 	}
			
@@ -7996,6 +7960,51 @@ again:
 
				 	up_read(&info->groups_sem);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
			
 
				+ *			  hole that is at least as big as @num_bytes.
			
 
				+ *
			
 
				+ * @root           -	The root that will contain this extent
			
 
				+ *
			
 
				+ * @ram_bytes      -	The amount of space in ram that @num_bytes take. This
			
 
				+ *			is used for accounting purposes. This value differs
			
 
				+ *			from @num_bytes only in the case of compressed extents.
			
 
				+ *
			
 
				+ * @num_bytes      -	Number of bytes to allocate on-disk.
			
 
				+ *
			
 
				+ * @min_alloc_size -	Indicates the minimum amount of space that the
			
 
				+ *			allocator should try to satisfy. In some cases
			
 
				+ *			@num_bytes may be larger than what is required and if
			
 
				+ *			the filesystem is fragmented then allocation fails.
			
 
				+ *			However, the presence of @min_alloc_size gives a
			
 
				+ *			chance to try and satisfy the smaller allocation.
			
 
				+ *
			
 
				+ * @empty_size     -	A hint that you plan on doing more COW. This is the
			
 
				+ *			size in bytes the allocator should try to find free
			
 
				+ *			next to the block it returns.  This is just a hint and
			
 
				+ *			may be ignored by the allocator.
			
 
				+ *
			
 
				+ * @hint_byte      -	Hint to the allocator to start searching above the byte
			
 
				+ *			address passed. It might be ignored.
			
 
				+ *
			
 
				+ * @ins            -	This key is modified to record the found hole. It will
			
 
				+ *			have the following values:
			
 
				+ *			ins->objectid == start position
			
 
				+ *			ins->flags = BTRFS_EXTENT_ITEM_KEY
			
 
				+ *			ins->offset == the size of the hole.
			
 
				+ *
			
 
				+ * @is_data        -	Boolean flag indicating whether an extent is
			
 
				+ *			allocated for data (true) or metadata (false)
			
 
				+ *
			
 
				+ * @delalloc       -	Boolean flag indicating whether this allocation is for
			
 
				+ *			delalloc or not. If 'true' data_rwsem of block groups
			
 
				+ *			is going to be acquired.
			
 
				+ *
			
 
				+ *
			
 
				+ * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
			
 
				+ * case -ENOSPC is returned then @ins->offset will contain the size of the
			
 
				+ * largest available hole the allocator managed to find.
			
 
				+ */
			
 
				 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
			
 
				 			 u64 num_bytes, u64 min_alloc_size,
			
 
				 			 u64 empty_size, u64 hint_byte,
			
@@ -8699,6 +8708,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
				 	u64 parent;
			
 
				 	u32 blocksize;
			
 
				 	struct btrfs_key key;
			
 
				+	struct btrfs_key first_key;
			
 
				 	struct extent_buffer *next;
			
 
				 	int level = wc->level;
			
 
				 	int reada = 0;
			
@@ -8719,6 +8729,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 
			
 
				 	bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
			
 
				+	btrfs_node_key_to_cpu(path->nodes[level], &first_key,
			
 
				+			      path->slots[level]);
			
 
				 	blocksize = fs_info->nodesize;
			
 
				 
			
 
				 	next = find_extent_buffer(fs_info, bytenr);
			
@@ -8783,7 +8795,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
				 	if (!next) {
			
 
				 		if (reada && level == 1)
			
 
				 			reada_walk_down(trans, root, wc, path);
			
 
				-		next = read_tree_block(fs_info, bytenr, generation);
			
 
				+		next = read_tree_block(fs_info, bytenr, generation, level - 1,
			
 
				+				       &first_key);
			
 
				 		if (IS_ERR(next)) {
			
 
				 			return PTR_ERR(next);
			
 
				 		} else if (!extent_buffer_uptodate(next)) {
			
@@ -9648,7 +9661,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
 
				 	 */
			
 
				 	target = get_restripe_target(fs_info, block_group->flags);
			
 
				 	if (target) {
			
 
				-		index = __get_raid_index(extended_to_chunk(target));
			
 
				+		index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
			
 
				 	} else {
			
 
				 		/*
			
 
				 		 * this is just a balance, so if we were marked as full
			
@@ -9662,7 +9675,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				-		index = get_block_group_index(block_group);
			
 
				+		index = btrfs_bg_flags_to_raid_index(block_group->flags);
			
 
				 	}
			
 
				 
			
 
				 	if (index == BTRFS_RAID_RAID10) {
			
@@ -9911,10 +9924,40 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/* link_block_group will queue up kobjects to add when we're reclaim-safe */
			
 
				+void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+	struct btrfs_space_info *space_info;
			
 
				+	struct raid_kobject *rkobj;
			
 
				+	LIST_HEAD(list);
			
 
				+	int index;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	spin_lock(&fs_info->pending_raid_kobjs_lock);
			
 
				+	list_splice_init(&fs_info->pending_raid_kobjs, &list);
			
 
				+	spin_unlock(&fs_info->pending_raid_kobjs_lock);
			
 
				+
			
 
				+	list_for_each_entry(rkobj, &list, list) {
			
 
				+		space_info = __find_space_info(fs_info, rkobj->flags);
			
 
				+		index = btrfs_bg_flags_to_raid_index(rkobj->flags);
			
 
				+
			
 
				+		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
			
 
				+				  "%s", get_raid_name(index));
			
 
				+		if (ret) {
			
 
				+			kobject_put(&rkobj->kobj);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	if (ret)
			
 
				+		btrfs_warn(fs_info,
			
 
				+			   "failed to add kobject for block cache, ignoring");
			
 
				+}
			
 
				+
			
 
				 static void link_block_group(struct btrfs_block_group_cache *cache)
			
 
				 {
			
 
				 	struct btrfs_space_info *space_info = cache->space_info;
			
 
				-	int index = get_block_group_index(cache);
			
 
				+	struct btrfs_fs_info *fs_info = cache->fs_info;
			
 
				+	int index = btrfs_bg_flags_to_raid_index(cache->flags);
			
 
				 	bool first = false;
			
 
				 
			
 
				 	down_write(&space_info->groups_sem);
			
@@ -9924,27 +9967,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
 
				 	up_write(&space_info->groups_sem);
			
 
				 
			
 
				 	if (first) {
			
 
				-		struct raid_kobject *rkobj;
			
 
				-		int ret;
			
 
				-
			
 
				-		rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
			
 
				-		if (!rkobj)
			
 
				-			goto out_err;
			
 
				-		rkobj->raid_type = index;
			
 
				-		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
			
 
				-		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
			
 
				-				  "%s", get_raid_name(index));
			
 
				-		if (ret) {
			
 
				-			kobject_put(&rkobj->kobj);
			
 
				-			goto out_err;
			
 
				+		struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
			
 
				+		if (!rkobj) {
			
 
				+			btrfs_warn(cache->fs_info,
			
 
				+				"couldn't alloc memory for raid level kobject");
			
 
				+			return;
			
 
				 		}
			
 
				+		rkobj->flags = cache->flags;
			
 
				+		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
			
 
				+
			
 
				+		spin_lock(&fs_info->pending_raid_kobjs_lock);
			
 
				+		list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
			
 
				+		spin_unlock(&fs_info->pending_raid_kobjs_lock);
			
 
				 		space_info->block_group_kobjs[index] = &rkobj->kobj;
			
 
				 	}
			
 
				-
			
 
				-	return;
			
 
				-out_err:
			
 
				-	btrfs_warn(cache->fs_info,
			
 
				-		   "failed to add kobject for block cache, ignoring");
			
 
				 }
			
 
				 
			
 
				 static struct btrfs_block_group_cache *
			
@@ -10160,6 +10196,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 
				 			inc_block_group_ro(cache, 1);
			
 
				 	}
			
 
				 
			
 
				+	btrfs_add_raid_kobjects(info);
			
 
				 	init_global_block_rsv(info);
			
 
				 	ret = 0;
			
 
				 error:
			
@@ -10167,9 +10204,9 @@ error:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
			
 
				-				       struct btrfs_fs_info *fs_info)
			
 
				+void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_block_group_cache *block_group, *tmp;
			
 
				 	struct btrfs_root *extent_root = fs_info->extent_root;
			
 
				 	struct btrfs_block_group_item item;
			
@@ -10254,15 +10291,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
				 	 * with its ->space_info set.
			
 
				 	 */
			
 
				 	cache->space_info = __find_space_info(fs_info, cache->flags);
			
 
				-	if (!cache->space_info) {
			
 
				-		ret = create_space_info(fs_info, cache->flags,
			
 
				-				       &cache->space_info);
			
 
				-		if (ret) {
			
 
				-			btrfs_remove_free_space_cache(cache);
			
 
				-			btrfs_put_block_group(cache);
			
 
				-			return ret;
			
 
				-		}
			
 
				-	}
			
 
				+	ASSERT(cache->space_info);
			
 
				 
			
 
				 	ret = btrfs_add_block_group_cache(fs_info, cache);
			
 
				 	if (ret) {
			
@@ -10334,7 +10363,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
				 				  block_group->key.offset);
			
 
				 
			
 
				 	memcpy(&key, &block_group->key, sizeof(key));
			
 
				-	index = get_block_group_index(block_group);
			
 
				+	index = btrfs_bg_flags_to_raid_index(block_group->flags);
			
 
				 	if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
			
 
				 				  BTRFS_BLOCK_GROUP_RAID1 |
			
 
				 				  BTRFS_BLOCK_GROUP_RAID10))
			
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -76,8 +76,8 @@ void btrfs_leak_debug_check(void)
 
				 
			
 
				 	while (!list_empty(&buffers)) {
			
 
				 		eb = list_entry(buffers.next, struct extent_buffer, leak_list);
			
 
				-		pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",
			
 
				-		       eb->start, eb->len, atomic_read(&eb->refs));
			
 
				+		pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
			
 
				+		       eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
			
 
				 		list_del(&eb->leak_list);
			
 
				 		kmem_cache_free(extent_buffer_cache, eb);
			
 
				 	}
			
@@ -119,23 +119,22 @@ struct extent_page_data {
 
				 	unsigned int sync_io:1;
			
 
				 };
			
 
				 
			
 
				-static void add_extent_changeset(struct extent_state *state, unsigned bits,
			
 
				+static int add_extent_changeset(struct extent_state *state, unsigned bits,
			
 
				 				 struct extent_changeset *changeset,
			
 
				 				 int set)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				 	if (!changeset)
			
 
				-		return;
			
 
				+		return 0;
			
 
				 	if (set && (state->state & bits) == bits)
			
 
				-		return;
			
 
				+		return 0;
			
 
				 	if (!set && (state->state & bits) == 0)
			
 
				-		return;
			
 
				+		return 0;
			
 
				 	changeset->bytes_changed += state->end - state->start + 1;
			
 
				 	ret = ulist_add(&changeset->range_changed, state->start, state->end,
			
 
				 			GFP_ATOMIC);
			
 
				-	/* ENOMEM */
			
 
				-	BUG_ON(ret < 0);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void flush_write_bio(struct extent_page_data *epd);
			
@@ -187,7 +186,7 @@ free_state_cache:
 
				 	return -ENOMEM;
			
 
				 }
			
 
				 
			
 
				-void extent_io_exit(void)
			
 
				+void __cold extent_io_exit(void)
			
 
				 {
			
 
				 	btrfs_leak_debug_check();
			
 
				 
			
@@ -527,6 +526,7 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
 
				 {
			
 
				 	struct extent_state *next;
			
 
				 	unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
			
 
				+	int ret;
			
 
				 
			
 
				 	if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
			
 
				 		u64 range = state->end - state->start + 1;
			
@@ -534,7 +534,8 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
 
				 		tree->dirty_bytes -= range;
			
 
				 	}
			
 
				 	clear_state_cb(tree, state, bits);
			
 
				-	add_extent_changeset(state, bits_to_clear, changeset, 0);
			
 
				+	ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
			
 
				+	BUG_ON(ret < 0);
			
 
				 	state->state &= ~bits_to_clear;
			
 
				 	if (wake)
			
 
				 		wake_up(&state->wq);
			
@@ -805,13 +806,15 @@ static void set_state_bits(struct extent_io_tree *tree,
 
				 			   unsigned *bits, struct extent_changeset *changeset)
			
 
				 {
			
 
				 	unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
			
 
				+	int ret;
			
 
				 
			
 
				 	set_state_cb(tree, state, bits);
			
 
				 	if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
			
 
				 		u64 range = state->end - state->start + 1;
			
 
				 		tree->dirty_bytes += range;
			
 
				 	}
			
 
				-	add_extent_changeset(state, bits_to_set, changeset, 1);
			
 
				+	ret = add_extent_changeset(state, bits_to_set, changeset, 1);
			
 
				+	BUG_ON(ret < 0);
			
 
				 	state->state |= bits_to_set;
			
 
				 }
			
 
				 
			
@@ -2744,20 +2747,21 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 
				 	return blk_status_to_errno(ret);
			
 
				 }
			
 
				 
			
 
				-static int merge_bio(struct extent_io_tree *tree, struct page *page,
			
 
				-		     unsigned long offset, size_t size, struct bio *bio,
			
 
				-		     unsigned long bio_flags)
			
 
				-{
			
 
				-	int ret = 0;
			
 
				-	if (tree->ops)
			
 
				-		ret = tree->ops->merge_bio_hook(page, offset, size, bio,
			
 
				-						bio_flags);
			
 
				-	return ret;
			
 
				-
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * @opf:	bio REQ_OP_* and REQ_* flags as one value
			
 
				+ * @tree:	tree so we can call our merge_bio hook
			
 
				+ * @wbc:	optional writeback control for io accounting
			
 
				+ * @page:	page to add to the bio
			
 
				+ * @pg_offset:	offset of the new bio or to check whether we are adding
			
 
				+ *              a contiguous page to the previous one
			
 
				+ * @size:	portion of page that we want to write
			
 
				+ * @offset:	starting offset in the page
			
 
				+ * @bdev:	attach newly created bios to this bdev
			
 
				+ * @bio_ret:	must be valid pointer, newly allocated bio will be stored there
			
 
				+ * @end_io_func:     end_io callback for new bio
			
 
				+ * @mirror_num:	     desired mirror to read/write
			
 
				+ * @prev_bio_flags:  flags of previous bio to see if we can merge the current one
			
 
				+ * @bio_flags:	flags of the current bio to see if we can merge them
			
 
				  */
			
 
				 static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
			
 
				 			      struct writeback_control *wbc,
			
@@ -2773,21 +2777,27 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 
				 {
			
 
				 	int ret = 0;
			
 
				 	struct bio *bio;
			
 
				-	int contig = 0;
			
 
				-	int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
			
 
				 	size_t page_size = min_t(size_t, size, PAGE_SIZE);
			
 
				 	sector_t sector = offset >> 9;
			
 
				 
			
 
				-	if (bio_ret && *bio_ret) {
			
 
				+	ASSERT(bio_ret);
			
 
				+
			
 
				+	if (*bio_ret) {
			
 
				+		bool contig;
			
 
				+		bool can_merge = true;
			
 
				+
			
 
				 		bio = *bio_ret;
			
 
				-		if (old_compressed)
			
 
				+		if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
			
 
				 			contig = bio->bi_iter.bi_sector == sector;
			
 
				 		else
			
 
				 			contig = bio_end_sector(bio) == sector;
			
 
				 
			
 
				-		if (prev_bio_flags != bio_flags || !contig ||
			
 
				+		if (tree->ops && tree->ops->merge_bio_hook(page, offset,
			
 
				+					page_size, bio, bio_flags))
			
 
				+			can_merge = false;
			
 
				+
			
 
				+		if (prev_bio_flags != bio_flags || !contig || !can_merge ||
			
 
				 		    force_bio_submit ||
			
 
				-		    merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
			
 
				 		    bio_add_page(bio, page, page_size, pg_offset) < page_size) {
			
 
				 			ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
			
 
				 			if (ret < 0) {
			
@@ -2813,10 +2823,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 
				 		wbc_account_io(wbc, page, page_size);
			
 
				 	}
			
 
				 
			
 
				-	if (bio_ret)
			
 
				-		*bio_ret = bio;
			
 
				-	else
			
 
				-		ret = submit_one_bio(bio, mirror_num, bio_flags);
			
 
				+	*bio_ret = bio;
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -2886,8 +2893,7 @@ static int __do_readpage(struct extent_io_tree *tree,
 
				 {
			
 
				 	struct inode *inode = page->mapping->host;
			
 
				 	u64 start = page_offset(page);
			
 
				-	u64 page_end = start + PAGE_SIZE - 1;
			
 
				-	u64 end;
			
 
				+	const u64 end = start + PAGE_SIZE - 1;
			
 
				 	u64 cur = start;
			
 
				 	u64 extent_offset;
			
 
				 	u64 last_byte = i_size_read(inode);
			
@@ -2905,7 +2911,6 @@ static int __do_readpage(struct extent_io_tree *tree,
 
				 
			
 
				 	set_page_extent_mapped(page);
			
 
				 
			
 
				-	end = page_end;
			
 
				 	if (!PageUptodate(page)) {
			
 
				 		if (cleancache_get_page(page) == 0) {
			
 
				 			BUG_ON(blocksize != PAGE_SIZE);
			
@@ -5230,11 +5235,6 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-int extent_buffer_uptodate(struct extent_buffer *eb)
			
 
				-{
			
 
				-	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
			
 
				-}
			
 
				-
			
 
				 int read_extent_buffer_pages(struct extent_io_tree *tree,
			
 
				 			     struct extent_buffer *eb, int wait, int mirror_num)
			
 
				 {
			
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -83,8 +83,8 @@ static inline int le_test_bit(int nr, const u8 *addr)
 
				 	return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
			
 
				 }
			
 
				 
			
 
				-extern void le_bitmap_set(u8 *map, unsigned int start, int len);
			
 
				-extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
			
 
				+void le_bitmap_set(u8 *map, unsigned int start, int len);
			
 
				+void le_bitmap_clear(u8 *map, unsigned int start, int len);
			
 
				 
			
 
				 struct extent_state;
			
 
				 struct btrfs_root;
			
@@ -95,6 +95,13 @@ struct io_failure_record;
 
				 typedef	blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
			
 
				 				       int mirror_num, unsigned long bio_flags,
			
 
				 				       u64 bio_offset);
			
 
				+
			
 
				+typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
			
 
				+		struct bio *bio, u64 bio_offset);
			
 
				+
			
 
				+typedef blk_status_t (extent_submit_bio_done_t)(void *private_data,
			
 
				+		struct bio *bio, int mirror_num);
			
 
				+
			
 
				 struct extent_io_ops {
			
 
				 	/*
			
 
				 	 * The following callbacks must be allways defined, the function
			
@@ -286,7 +293,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 
				 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
			
 
				 			  get_extent_t *get_extent, int mirror_num);
			
 
				 int __init extent_io_init(void);
			
 
				-void extent_io_exit(void);
			
 
				+void __cold extent_io_exit(void);
			
 
				 
			
 
				 u64 count_range_bits(struct extent_io_tree *tree,
			
 
				 		     u64 *start, u64 search_end,
			
@@ -455,6 +462,11 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
 
				 	atomic_inc(&eb->refs);
			
 
				 }
			
 
				 
			
 
				+static inline int extent_buffer_uptodate(struct extent_buffer *eb)
			
 
				+{
			
 
				+	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
			
 
				+}
			
 
				+
			
 
				 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
			
 
				 			 unsigned long start, unsigned long len);
			
 
				 void read_extent_buffer(const struct extent_buffer *eb, void *dst,
			
@@ -489,7 +501,6 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb);
 
				 int set_extent_buffer_dirty(struct extent_buffer *eb);
			
 
				 void set_extent_buffer_uptodate(struct extent_buffer *eb);
			
 
				 void clear_extent_buffer_uptodate(struct extent_buffer *eb);
			
 
				-int extent_buffer_uptodate(struct extent_buffer *eb);
			
 
				 int extent_buffer_under_io(struct extent_buffer *eb);
			
 
				 int map_private_extent_buffer(const struct extent_buffer *eb,
			
 
				 			      unsigned long offset, unsigned long min_len,
			
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -2,7 +2,6 @@
 
				 #include <linux/err.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/spinlock.h>
			
 
				-#include <linux/hardirq.h>
			
 
				 #include "ctree.h"
			
 
				 #include "extent_map.h"
			
 
				 #include "compression.h"
			
@@ -20,7 +19,7 @@ int __init extent_map_init(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void extent_map_exit(void)
			
 
				+void __cold extent_map_exit(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(extent_map_cache);
			
 
				 }
			
@@ -552,6 +551,9 @@ int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
 
				 		ret = 0;
			
 
				 
			
 
				 		existing = search_extent_mapping(em_tree, start, len);
			
 
				+
			
 
				+		trace_btrfs_handle_em_exist(existing, em, start, len);
			
 
				+
			
 
				 		/*
			
 
				 		 * existing will always be non-NULL, since there must be
			
 
				 		 * extent causing the -EEXIST.
			
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -86,7 +86,7 @@ void replace_extent_mapping(struct extent_map_tree *tree,
 
				 struct extent_map *alloc_extent_map(void);
			
 
				 void free_extent_map(struct extent_map *em);
			
 
				 int __init extent_map_init(void);
			
 
				-void extent_map_exit(void);
			
 
				+void __cold extent_map_exit(void);
			
 
				 int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
			
 
				 void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
			
 
				 struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
			
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1691,7 +1691,7 @@ again:
 
				 				    force_page_uptodate);
			
 
				 		if (ret) {
			
 
				 			btrfs_delalloc_release_extents(BTRFS_I(inode),
			
 
				-						       reserve_bytes);
			
 
				+						       reserve_bytes, true);
			
 
				 			break;
			
 
				 		}
			
 
				 
			
@@ -1703,7 +1703,7 @@ again:
 
				 			if (extents_locked == -EAGAIN)
			
 
				 				goto again;
			
 
				 			btrfs_delalloc_release_extents(BTRFS_I(inode),
			
 
				-						       reserve_bytes);
			
 
				+						       reserve_bytes, true);
			
 
				 			ret = extents_locked;
			
 
				 			break;
			
 
				 		}
			
@@ -1738,7 +1738,7 @@ again:
 
				 						fs_info->sb->s_blocksize_bits;
			
 
				 			if (only_release_metadata) {
			
 
				 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				-								release_bytes);
			
 
				+							release_bytes, true);
			
 
				 			} else {
			
 
				 				u64 __pos;
			
 
				 
			
@@ -1747,7 +1747,7 @@ again:
 
				 					(dirty_pages << PAGE_SHIFT);
			
 
				 				btrfs_delalloc_release_space(inode,
			
 
				 						data_reserved, __pos,
			
 
				-						release_bytes);
			
 
				+						release_bytes, true);
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -1760,7 +1760,8 @@ again:
 
				 		if (extents_locked)
			
 
				 			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
			
 
				 					     lockstart, lockend, &cached_state);
			
 
				-		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
			
 
				+					       (ret != 0));
			
 
				 		if (ret) {
			
 
				 			btrfs_drop_pages(pages, num_pages);
			
 
				 			break;
			
@@ -1800,11 +1801,11 @@ again:
 
				 		if (only_release_metadata) {
			
 
				 			btrfs_end_write_no_snapshotting(root);
			
 
				 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				-					release_bytes);
			
 
				+					release_bytes, true);
			
 
				 		} else {
			
 
				 			btrfs_delalloc_release_space(inode, data_reserved,
			
 
				 					round_down(pos, fs_info->sectorsize),
			
 
				-					release_bytes);
			
 
				+					release_bytes, true);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1997,8 +1998,6 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
 
				 {
			
 
				 	struct btrfs_file_private *private = filp->private_data;
			
 
				 
			
 
				-	if (private && private->trans)
			
 
				-		btrfs_ioctl_trans_end(filp);
			
 
				 	if (private && private->filldir_buf)
			
 
				 		kfree(private->filldir_buf);
			
 
				 	kfree(private);
			
@@ -2189,12 +2188,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * ok we haven't committed the transaction yet, lets do a commit
			
 
				-	 */
			
 
				-	if (file->private_data)
			
 
				-		btrfs_ioctl_trans_end(file);
			
 
				-
			
 
				 	/*
			
 
				 	 * We use start here because we will need to wait on the IO to complete
			
 
				 	 * in btrfs_sync_log, which could require joining a transaction (for
			
@@ -2214,7 +2207,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
				 	}
			
 
				 	trans->sync = true;
			
 
				 
			
 
				-	ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
			
 
				+	ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
			
 
				 	if (ret < 0) {
			
 
				 		/* Fallthrough and commit/free transaction. */
			
 
				 		ret = 1;
			
@@ -2482,7 +2475,8 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
 
				 		if ((!ordered ||
			
 
				 		    (ordered->file_offset + ordered->len <= lockstart ||
			
 
				 		     ordered->file_offset > lockend)) &&
			
 
				-		     !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
			
 
				+		     !filemap_range_has_page(inode->i_mapping,
			
 
				+					     lockstart, lockend)) {
			
 
				 			if (ordered)
			
 
				 				btrfs_put_ordered_extent(ordered);
			
 
				 			break;
			
@@ -3378,7 +3372,7 @@ const struct file_operations btrfs_file_operations = {
 
				 	.dedupe_file_range = btrfs_dedupe_file_range,
			
 
				 };
			
 
				 
			
 
				-void btrfs_auto_defrag_exit(void)
			
 
				+void __cold btrfs_auto_defrag_exit(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(btrfs_inode_defrag_cachep);
			
 
				 }
			
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3547,7 +3547,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 
				 	if (ret) {
			
 
				 		if (release_metadata)
			
 
				 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				-					inode->i_size);
			
 
				+					inode->i_size, true);
			
 
				 #ifdef DEBUG
			
 
				 		btrfs_err(fs_info,
			
 
				 			  "failed to write free ino cache for root %llu",
			
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1071,7 +1071,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
 
				 		return -ENOMEM;
			
 
				-	path->reada = 1;
			
 
				+	path->reada = READA_FORWARD;
			
 
				 
			
 
				 	path2 = btrfs_alloc_path();
			
 
				 	if (!path2) {
			
@@ -1573,7 +1573,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
 
				 	 */
			
 
				 	path->skip_locking = 1;
			
 
				 	path->search_commit_root = 1;
			
 
				-	path->reada = 1;
			
 
				+	path->reada = READA_FORWARD;
			
 
				 
			
 
				 	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
			
 
				 	if (IS_ERR(info)) {
			
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -1,54 +0,0 @@
 
				-/*
			
 
				- * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License v2 as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- */
			
 
				-
			
 
				-#include <crypto/hash.h>
			
 
				-#include <linux/err.h>
			
 
				-#include "hash.h"
			
 
				-
			
 
				-static struct crypto_shash *tfm;
			
 
				-
			
 
				-int __init btrfs_hash_init(void)
			
 
				-{
			
 
				-	tfm = crypto_alloc_shash("crc32c", 0, 0);
			
 
				-
			
 
				-	return PTR_ERR_OR_ZERO(tfm);
			
 
				-}
			
 
				-
			
 
				-const char* btrfs_crc32c_impl(void)
			
 
				-{
			
 
				-	return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm));
			
 
				-}
			
 
				-
			
 
				-void btrfs_hash_exit(void)
			
 
				-{
			
 
				-	crypto_free_shash(tfm);
			
 
				-}
			
 
				-
			
 
				-u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
			
 
				-{
			
 
				-	SHASH_DESC_ON_STACK(shash, tfm);
			
 
				-	u32 *ctx = (u32 *)shash_desc_ctx(shash);
			
 
				-	u32 retval;
			
 
				-	int err;
			
 
				-
			
 
				-	shash->tfm = tfm;
			
 
				-	shash->flags = 0;
			
 
				-	*ctx = crc;
			
 
				-
			
 
				-	err = crypto_shash_update(shash, address, length);
			
 
				-	BUG_ON(err);
			
 
				-
			
 
				-	retval = *ctx;
			
 
				-	barrier_data(ctx);
			
 
				-	return retval;
			
 
				-}
			
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -1,43 +0,0 @@
 
				-/*
			
 
				- * Copyright (C) 2007 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License v2 as published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#ifndef __HASH__
			
 
				-#define __HASH__
			
 
				-
			
 
				-int __init btrfs_hash_init(void);
			
 
				-
			
 
				-void btrfs_hash_exit(void);
			
 
				-const char* btrfs_crc32c_impl(void);
			
 
				-
			
 
				-u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
			
 
				-
			
 
				-static inline u64 btrfs_name_hash(const char *name, int len)
			
 
				-{
			
 
				-	return btrfs_crc32c((u32)~1, name, len);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Figure the key offset of an extended inode ref
			
 
				- */
			
 
				-static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
			
 
				-				    int len)
			
 
				-{
			
 
				-	return (u64) btrfs_crc32c(parent_objectid, name, len);
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -18,7 +18,6 @@
 
				 
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				-#include "hash.h"
			
 
				 #include "transaction.h"
			
 
				 #include "print-tree.h"
			
 
				 
			
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -500,12 +500,12 @@ again:
 
				 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
			
 
				 					      prealloc, prealloc, &alloc_hint);
			
 
				 	if (ret) {
			
 
				-		btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
			
 
				 		goto out_put;
			
 
				 	}
			
 
				 
			
 
				 	ret = btrfs_write_out_ino_cache(root, trans, path, inode);
			
 
				-	btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
			
 
				 out_put:
			
 
				 	iput(inode);
			
 
				 out_release:
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -58,7 +58,6 @@
 
				 #include "free-space-cache.h"
			
 
				 #include "inode-map.h"
			
 
				 #include "backref.h"
			
 
				-#include "hash.h"
			
 
				 #include "props.h"
			
 
				 #include "qgroup.h"
			
 
				 #include "dedupe.h"
			
@@ -102,7 +101,7 @@ static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
 
				 };
			
 
				 
			
 
				 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
			
 
				-static int btrfs_truncate(struct inode *inode);
			
 
				+static int btrfs_truncate(struct inode *inode, bool skip_writeback);
			
 
				 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
			
 
				 static noinline int cow_file_range(struct inode *inode,
			
 
				 				   struct page *locked_page,
			
@@ -277,12 +276,12 @@ fail:
 
				  * does the checks required to make sure the data is small enough
			
 
				  * to fit as an inline extent.
			
 
				  */
			
 
				-static noinline int cow_file_range_inline(struct btrfs_root *root,
			
 
				-					  struct inode *inode, u64 start,
			
 
				+static noinline int cow_file_range_inline(struct inode *inode, u64 start,
			
 
				 					  u64 end, size_t compressed_size,
			
 
				 					  int compress_type,
			
 
				 					  struct page **compressed_pages)
			
 
				 {
			
 
				+	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	u64 isize = i_size_read(inode);
			
@@ -458,7 +457,6 @@ static noinline void compress_file_range(struct inode *inode,
 
				 					int *num_added)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
 
				-	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	u64 blocksize = fs_info->sectorsize;
			
 
				 	u64 actual_end;
			
 
				 	u64 isize = i_size_read(inode);
			
@@ -580,11 +578,11 @@ cont:
 
				 			/* we didn't compress the entire range, try
			
 
				 			 * to make an uncompressed inline extent.
			
 
				 			 */
			
 
				-			ret = cow_file_range_inline(root, inode, start, end,
			
 
				-					    0, BTRFS_COMPRESS_NONE, NULL);
			
 
				+			ret = cow_file_range_inline(inode, start, end, 0,
			
 
				+						    BTRFS_COMPRESS_NONE, NULL);
			
 
				 		} else {
			
 
				 			/* try making a compressed inline extent */
			
 
				-			ret = cow_file_range_inline(root, inode, start, end,
			
 
				+			ret = cow_file_range_inline(inode, start, end,
			
 
				 						    total_compressed,
			
 
				 						    compress_type, pages);
			
 
				 		}
			
@@ -961,7 +959,6 @@ static noinline int cow_file_range(struct inode *inode,
 
				 	u64 alloc_hint = 0;
			
 
				 	u64 num_bytes;
			
 
				 	unsigned long ram_size;
			
 
				-	u64 disk_num_bytes;
			
 
				 	u64 cur_alloc_size = 0;
			
 
				 	u64 blocksize = fs_info->sectorsize;
			
 
				 	struct btrfs_key ins;
			
@@ -979,14 +976,14 @@ static noinline int cow_file_range(struct inode *inode,
 
				 
			
 
				 	num_bytes = ALIGN(end - start + 1, blocksize);
			
 
				 	num_bytes = max(blocksize,  num_bytes);
			
 
				-	disk_num_bytes = num_bytes;
			
 
				+	ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
			
 
				 
			
 
				 	inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
			
 
				 
			
 
				 	if (start == 0) {
			
 
				 		/* lets try to make an inline extent */
			
 
				-		ret = cow_file_range_inline(root, inode, start, end, 0,
			
 
				-					BTRFS_COMPRESS_NONE, NULL);
			
 
				+		ret = cow_file_range_inline(inode, start, end, 0,
			
 
				+					    BTRFS_COMPRESS_NONE, NULL);
			
 
				 		if (ret == 0) {
			
 
				 			/*
			
 
				 			 * We use DO_ACCOUNTING here because we need the
			
@@ -1010,15 +1007,12 @@ static noinline int cow_file_range(struct inode *inode,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	BUG_ON(disk_num_bytes >
			
 
				-	       btrfs_super_total_bytes(fs_info->super_copy));
			
 
				-
			
 
				 	alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
			
 
				 	btrfs_drop_extent_cache(BTRFS_I(inode), start,
			
 
				 			start + num_bytes - 1, 0);
			
 
				 
			
 
				-	while (disk_num_bytes > 0) {
			
 
				-		cur_alloc_size = disk_num_bytes;
			
 
				+	while (num_bytes > 0) {
			
 
				+		cur_alloc_size = num_bytes;
			
 
				 		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
			
 
				 					   fs_info->sectorsize, 0, alloc_hint,
			
 
				 					   &ins, 1, 1);
			
@@ -1082,11 +1076,10 @@ static noinline int cow_file_range(struct inode *inode,
 
				 					     delalloc_end, locked_page,
			
 
				 					     EXTENT_LOCKED | EXTENT_DELALLOC,
			
 
				 					     page_ops);
			
 
				-		if (disk_num_bytes < cur_alloc_size)
			
 
				-			disk_num_bytes = 0;
			
 
				+		if (num_bytes < cur_alloc_size)
			
 
				+			num_bytes = 0;
			
 
				 		else
			
 
				-			disk_num_bytes -= cur_alloc_size;
			
 
				-		num_bytes -= cur_alloc_size;
			
 
				+			num_bytes -= cur_alloc_size;
			
 
				 		alloc_hint = ins.objectid + ins.offset;
			
 
				 		start += cur_alloc_size;
			
 
				 		extent_reserved = false;
			
@@ -1262,6 +1255,8 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
 
				 		list_del(&sums->list);
			
 
				 		kfree(sums);
			
 
				 	}
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
@@ -1394,10 +1389,23 @@ next_slot:
 
				 				goto out_check;
			
 
				 			if (btrfs_extent_readonly(fs_info, disk_bytenr))
			
 
				 				goto out_check;
			
 
				-			if (btrfs_cross_ref_exist(root, ino,
			
 
				-						  found_key.offset -
			
 
				-						  extent_offset, disk_bytenr))
			
 
				+			ret = btrfs_cross_ref_exist(root, ino,
			
 
				+						    found_key.offset -
			
 
				+						    extent_offset, disk_bytenr);
			
 
				+			if (ret) {
			
 
				+				/*
			
 
				+				 * ret could be -EIO if the above fails to read
			
 
				+				 * metadata.
			
 
				+				 */
			
 
				+				if (ret < 0) {
			
 
				+					if (cow_start != (u64)-1)
			
 
				+						cur_offset = cow_start;
			
 
				+					goto error;
			
 
				+				}
			
 
				+
			
 
				+				WARN_ON_ONCE(nolock);
			
 
				 				goto out_check;
			
 
				+			}
			
 
				 			disk_bytenr += extent_offset;
			
 
				 			disk_bytenr += cur_offset - found_key.offset;
			
 
				 			num_bytes = min(end + 1, extent_end) - cur_offset;
			
@@ -1415,10 +1423,22 @@ next_slot:
 
				 			 * this ensure that csum for a given extent are
			
 
				 			 * either valid or do not exist.
			
 
				 			 */
			
 
				-			if (csum_exist_in_range(fs_info, disk_bytenr,
			
 
				-						num_bytes)) {
			
 
				+			ret = csum_exist_in_range(fs_info, disk_bytenr,
			
 
				+						  num_bytes);
			
 
				+			if (ret) {
			
 
				 				if (!nolock)
			
 
				 					btrfs_end_write_no_snapshotting(root);
			
 
				+
			
 
				+				/*
			
 
				+				 * ret could be -EIO if the above fails to read
			
 
				+				 * metadata.
			
 
				+				 */
			
 
				+				if (ret < 0) {
			
 
				+					if (cow_start != (u64)-1)
			
 
				+						cur_offset = cow_start;
			
 
				+					goto error;
			
 
				+				}
			
 
				+				WARN_ON_ONCE(nolock);
			
 
				 				goto out_check;
			
 
				 			}
			
 
				 			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
			
@@ -1847,7 +1867,7 @@ static void btrfs_clear_bit_hook(void *private_data,
 
				 		 */
			
 
				 		if (*bits & EXTENT_CLEAR_META_RESV &&
			
 
				 		    root != fs_info->tree_root)
			
 
				-			btrfs_delalloc_release_metadata(inode, len);
			
 
				+			btrfs_delalloc_release_metadata(inode, len, false);
			
 
				 
			
 
				 		/* For sanity tests. */
			
 
				 		if (btrfs_is_testing(fs_info))
			
@@ -1921,8 +1941,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 
				  * At IO completion time the cums attached on the ordered extent record
			
 
				  * are inserted into the btree
			
 
				  */
			
 
				-static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio,
			
 
				-				    int mirror_num, unsigned long bio_flags,
			
 
				+static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
			
 
				 				    u64 bio_offset)
			
 
				 {
			
 
				 	struct inode *inode = private_data;
			
@@ -1941,9 +1960,8 @@ static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio
 
				  * At IO completion time the cums attached on the ordered extent record
			
 
				  * are inserted into the btree
			
 
				  */
			
 
				-static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio,
			
 
				-			  int mirror_num, unsigned long bio_flags,
			
 
				-			  u64 bio_offset)
			
 
				+static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
			
 
				+			  int mirror_num)
			
 
				 {
			
 
				 	struct inode *inode = private_data;
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
@@ -2015,8 +2033,8 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
 
				 		/* we're doing a write, do the async checksumming */
			
 
				 		ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
			
 
				 					  bio_offset, inode,
			
 
				-					  __btrfs_submit_bio_start,
			
 
				-					  __btrfs_submit_bio_done);
			
 
				+					  btrfs_submit_bio_start,
			
 
				+					  btrfs_submit_bio_done);
			
 
				 		goto out;
			
 
				 	} else if (!skip_sum) {
			
 
				 		ret = btrfs_csum_one_bio(inode, bio, 0, 0);
			
@@ -2134,7 +2152,7 @@ again:
 
				 
			
 
				 	ClearPageChecked(page);
			
 
				 	set_page_dirty(page);
			
 
				-	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
			
 
				 out:
			
 
				 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
			
 
				 			     &cached_state);
			
@@ -2754,12 +2772,10 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
 
				 	struct sa_defrag_extent_backref *backref;
			
 
				 	struct sa_defrag_extent_backref *prev = NULL;
			
 
				 	struct inode *inode;
			
 
				-	struct btrfs_root *root;
			
 
				 	struct rb_node *node;
			
 
				 	int ret;
			
 
				 
			
 
				 	inode = new->inode;
			
 
				-	root = BTRFS_I(inode)->root;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
@@ -3247,6 +3263,16 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
				 				      start, (size_t)(end - start + 1));
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * btrfs_add_delayed_iput - perform a delayed iput on @inode
			
 
				+ *
			
 
				+ * @inode: The inode we want to perform iput on
			
 
				+ *
			
 
				+ * This function uses the generic vfs_inode::i_count to track whether we should
			
 
				+ * just decrement it (in case it's > 1) or if this is the last iput then link
			
 
				+ * the inode to the delayed iput machinery. Delayed iputs are processed at
			
 
				+ * transaction commit time/superblock commit/cleaner kthread.
			
 
				+ */
			
 
				 void btrfs_add_delayed_iput(struct inode *inode)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
@@ -3256,12 +3282,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
 
				 		return;
			
 
				 
			
 
				 	spin_lock(&fs_info->delayed_iput_lock);
			
 
				-	if (binode->delayed_iput_count == 0) {
			
 
				-		ASSERT(list_empty(&binode->delayed_iput));
			
 
				-		list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
			
 
				-	} else {
			
 
				-		binode->delayed_iput_count++;
			
 
				-	}
			
 
				+	ASSERT(list_empty(&binode->delayed_iput));
			
 
				+	list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
			
 
				 	spin_unlock(&fs_info->delayed_iput_lock);
			
 
				 }
			
 
				 
			
@@ -3274,13 +3296,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 
				 
			
 
				 		inode = list_first_entry(&fs_info->delayed_iputs,
			
 
				 				struct btrfs_inode, delayed_iput);
			
 
				-		if (inode->delayed_iput_count) {
			
 
				-			inode->delayed_iput_count--;
			
 
				-			list_move_tail(&inode->delayed_iput,
			
 
				-					&fs_info->delayed_iputs);
			
 
				-		} else {
			
 
				-			list_del_init(&inode->delayed_iput);
			
 
				-		}
			
 
				+		list_del_init(&inode->delayed_iput);
			
 
				 		spin_unlock(&fs_info->delayed_iput_lock);
			
 
				 		iput(&inode->vfs_inode);
			
 
				 		spin_lock(&fs_info->delayed_iput_lock);
			
@@ -3350,7 +3366,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_root *root = inode->root;
			
 
				 	struct btrfs_block_rsv *block_rsv = NULL;
			
 
				 	int reserve = 0;
			
 
				-	int insert = 0;
			
 
				+	bool insert = false;
			
 
				 	int ret;
			
 
				 
			
 
				 	if (!root->orphan_block_rsv) {
			
@@ -3360,7 +3376,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 
				 			return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				+	if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
			
 
				+			      &inode->runtime_flags))
			
 
				+		insert = true;
			
 
				+
			
 
				+	if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
			
 
				+			      &inode->runtime_flags))
			
 
				+		reserve = 1;
			
 
				+
			
 
				 	spin_lock(&root->orphan_lock);
			
 
				+	/* If someone has created ->orphan_block_rsv, be happy to use it. */
			
 
				 	if (!root->orphan_block_rsv) {
			
 
				 		root->orphan_block_rsv = block_rsv;
			
 
				 	} else if (block_rsv) {
			
@@ -3368,26 +3393,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 
				 		block_rsv = NULL;
			
 
				 	}
			
 
				 
			
 
				-	if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
			
 
				-			      &inode->runtime_flags)) {
			
 
				-#if 0
			
 
				-		/*
			
 
				-		 * For proper ENOSPC handling, we should do orphan
			
 
				-		 * cleanup when mounting. But this introduces backward
			
 
				-		 * compatibility issue.
			
 
				-		 */
			
 
				-		if (!xchg(&root->orphan_item_inserted, 1))
			
 
				-			insert = 2;
			
 
				-		else
			
 
				-			insert = 1;
			
 
				-#endif
			
 
				-		insert = 1;
			
 
				+	if (insert)
			
 
				 		atomic_inc(&root->orphan_inodes);
			
 
				-	}
			
 
				-
			
 
				-	if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
			
 
				-			      &inode->runtime_flags))
			
 
				-		reserve = 1;
			
 
				 	spin_unlock(&root->orphan_lock);
			
 
				 
			
 
				 	/* grab metadata reservation from transaction handle */
			
@@ -3411,7 +3418,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 
			
 
				 	/* insert an orphan item to track this unlinked/truncated file */
			
 
				-	if (insert >= 1) {
			
 
				+	if (insert) {
			
 
				 		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
			
 
				 		if (ret) {
			
 
				 			if (reserve) {
			
@@ -3435,15 +3442,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 
				 		ret = 0;
			
 
				 	}
			
 
				 
			
 
				-	/* insert an orphan item to track subvolume contains orphan files */
			
 
				-	if (insert >= 2) {
			
 
				-		ret = btrfs_insert_orphan_item(trans, fs_info->tree_root,
			
 
				-					       root->root_key.objectid);
			
 
				-		if (ret && ret != -EEXIST) {
			
 
				-			btrfs_abort_transaction(trans, ret);
			
 
				-			return ret;
			
 
				-		}
			
 
				-	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -3644,7 +3642,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 				goto out;
			
 
				 			}
			
 
				 
			
 
				-			ret = btrfs_truncate(inode);
			
 
				+			ret = btrfs_truncate(inode, false);
			
 
				 			if (ret)
			
 
				 				btrfs_orphan_del(NULL, BTRFS_I(inode));
			
 
				 		} else {
			
@@ -4711,7 +4709,6 @@ delete:
 
				 				if (updates) {
			
 
				 					trans->delayed_ref_updates = 0;
			
 
				 					ret = btrfs_run_delayed_refs(trans,
			
 
				-								   fs_info,
			
 
				 								   updates * 2);
			
 
				 					if (ret && !err)
			
 
				 						err = ret;
			
@@ -4751,8 +4748,7 @@ error:
 
				 		unsigned long updates = trans->delayed_ref_updates;
			
 
				 		if (updates) {
			
 
				 			trans->delayed_ref_updates = 0;
			
 
				-			ret = btrfs_run_delayed_refs(trans, fs_info,
			
 
				-						     updates * 2);
			
 
				+			ret = btrfs_run_delayed_refs(trans, updates * 2);
			
 
				 			if (ret && !err)
			
 
				 				err = ret;
			
 
				 		}
			
@@ -4806,8 +4802,8 @@ again:
 
				 	page = find_or_create_page(mapping, index, mask);
			
 
				 	if (!page) {
			
 
				 		btrfs_delalloc_release_space(inode, data_reserved,
			
 
				-					     block_start, blocksize);
			
 
				-		btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
			
 
				+					     block_start, blocksize, true);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
			
 
				 		ret = -ENOMEM;
			
 
				 		goto out;
			
 
				 	}
			
@@ -4874,8 +4870,8 @@ again:
 
				 out_unlock:
			
 
				 	if (ret)
			
 
				 		btrfs_delalloc_release_space(inode, data_reserved, block_start,
			
 
				-					     blocksize);
			
 
				-	btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
			
 
				+					     blocksize, true);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
			
 
				 	unlock_page(page);
			
 
				 	put_page(page);
			
 
				 out:
			
@@ -5130,7 +5126,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 
				 		inode_dio_wait(inode);
			
 
				 		btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
			
 
				 
			
 
				-		ret = btrfs_truncate(inode);
			
 
				+		ret = btrfs_truncate(inode, newsize == oldsize);
			
 
				 		if (ret && inode->i_nlink) {
			
 
				 			int err;
			
 
				 
			
@@ -5466,7 +5462,8 @@ no_delete:
 
				 
			
 
				 /*
			
 
				  * this returns the key found in the dir entry in the location pointer.
			
 
				- * If no dir entries were found, location->objectid is 0.
			
 
				+ * If no dir entries were found, returns -ENOENT.
			
 
				+ * If found a corrupted location in dir entry, returns -EUCLEAN.
			
 
				  */
			
 
				 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
			
 
				 			       struct btrfs_key *location)
			
@@ -5484,27 +5481,27 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
 
				 
			
 
				 	di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
			
 
				 			name, namelen, 0);
			
 
				-	if (IS_ERR(di))
			
 
				+	if (!di) {
			
 
				+		ret = -ENOENT;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	if (IS_ERR(di)) {
			
 
				 		ret = PTR_ERR(di);
			
 
				-
			
 
				-	if (IS_ERR_OR_NULL(di))
			
 
				-		goto out_err;
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
			
 
				 	if (location->type != BTRFS_INODE_ITEM_KEY &&
			
 
				 	    location->type != BTRFS_ROOT_ITEM_KEY) {
			
 
				+		ret = -EUCLEAN;
			
 
				 		btrfs_warn(root->fs_info,
			
 
				 "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
			
 
				 			   __func__, name, btrfs_ino(BTRFS_I(dir)),
			
 
				 			   location->objectid, location->type, location->offset);
			
 
				-		goto out_err;
			
 
				 	}
			
 
				 out:
			
 
				 	btrfs_free_path(path);
			
 
				 	return ret;
			
 
				-out_err:
			
 
				-	location->objectid = 0;
			
 
				-	goto out;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -5807,9 +5804,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 
				 	if (ret < 0)
			
 
				 		return ERR_PTR(ret);
			
 
				 
			
 
				-	if (location.objectid == 0)
			
 
				-		return ERR_PTR(-ENOENT);
			
 
				-
			
 
				 	if (location.type == BTRFS_INODE_ITEM_KEY) {
			
 
				 		inode = btrfs_iget(dir->i_sb, &location, root, NULL);
			
 
				 		return inode;
			
@@ -7443,76 +7437,6 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
			
 
				-{
			
 
				-	struct radix_tree_root *root = &inode->i_mapping->page_tree;
			
 
				-	bool found = false;
			
 
				-	void **pagep = NULL;
			
 
				-	struct page *page = NULL;
			
 
				-	unsigned long start_idx;
			
 
				-	unsigned long end_idx;
			
 
				-
			
 
				-	start_idx = start >> PAGE_SHIFT;
			
 
				-
			
 
				-	/*
			
 
				-	 * end is the last byte in the last page.  end == start is legal
			
 
				-	 */
			
 
				-	end_idx = end >> PAGE_SHIFT;
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-
			
 
				-	/* Most of the code in this while loop is lifted from
			
 
				-	 * find_get_page.  It's been modified to begin searching from a
			
 
				-	 * page and return just the first page found in that range.  If the
			
 
				-	 * found idx is less than or equal to the end idx then we know that
			
 
				-	 * a page exists.  If no pages are found or if those pages are
			
 
				-	 * outside of the range then we're fine (yay!) */
			
 
				-	while (page == NULL &&
			
 
				-	       radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
			
 
				-		page = radix_tree_deref_slot(pagep);
			
 
				-		if (unlikely(!page))
			
 
				-			break;
			
 
				-
			
 
				-		if (radix_tree_exception(page)) {
			
 
				-			if (radix_tree_deref_retry(page)) {
			
 
				-				page = NULL;
			
 
				-				continue;
			
 
				-			}
			
 
				-			/*
			
 
				-			 * Otherwise, shmem/tmpfs must be storing a swap entry
			
 
				-			 * here as an exceptional entry: so return it without
			
 
				-			 * attempting to raise page count.
			
 
				-			 */
			
 
				-			page = NULL;
			
 
				-			break; /* TODO: Is this relevant for this use case? */
			
 
				-		}
			
 
				-
			
 
				-		if (!page_cache_get_speculative(page)) {
			
 
				-			page = NULL;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Has the page moved?
			
 
				-		 * This is part of the lockless pagecache protocol. See
			
 
				-		 * include/linux/pagemap.h for details.
			
 
				-		 */
			
 
				-		if (unlikely(page != *pagep)) {
			
 
				-			put_page(page);
			
 
				-			page = NULL;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (page) {
			
 
				-		if (page->index <= end_idx)
			
 
				-			found = true;
			
 
				-		put_page(page);
			
 
				-	}
			
 
				-
			
 
				-	rcu_read_unlock();
			
 
				-	return found;
			
 
				-}
			
 
				-
			
 
				 static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
			
 
				 			      struct extent_state **cached_state, int writing)
			
 
				 {
			
@@ -7538,8 +7462,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 
				 		 * get stale data.
			
 
				 		 */
			
 
				 		if (!ordered &&
			
 
				-		    (!writing ||
			
 
				-		     !btrfs_page_exists_in_range(inode, lockstart, lockend)))
			
 
				+		    (!writing || !filemap_range_has_page(inode->i_mapping,
			
 
				+							 lockstart, lockend)))
			
 
				 			break;
			
 
				 
			
 
				 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
			
@@ -8270,9 +8194,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
 
				 	bio_put(bio);
			
 
				 }
			
 
				 
			
 
				-static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data,
			
 
				-				    struct bio *bio, int mirror_num,
			
 
				-				    unsigned long bio_flags, u64 offset)
			
 
				+static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
			
 
				+				    struct bio *bio, u64 offset)
			
 
				 {
			
 
				 	struct inode *inode = private_data;
			
 
				 	blk_status_t ret;
			
@@ -8298,13 +8221,13 @@ static void btrfs_end_dio_bio(struct bio *bio)
 
				 		err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
			
 
				 
			
 
				 	if (err) {
			
 
				-		dip->errors = 1;
			
 
				-
			
 
				 		/*
			
 
				-		 * before atomic variable goto zero, we must make sure
			
 
				-		 * dip->errors is perceived to be set.
			
 
				+		 * We want to perceive the errors flag being set before
			
 
				+		 * decrementing the reference count. We don't need a barrier
			
 
				+		 * since atomic operations with a return value are fully
			
 
				+		 * ordered as per atomic_t.txt
			
 
				 		 */
			
 
				-		smp_mb__before_atomic();
			
 
				+		dip->errors = 1;
			
 
				 	}
			
 
				 
			
 
				 	/* if there are more bios still pending for this dio, just exit */
			
@@ -8352,9 +8275,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline blk_status_t
			
 
				-__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
			
 
				-		       int async_submit)
			
 
				+static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
			
 
				+		struct inode *inode, u64 file_offset, int async_submit)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
 
				 	struct btrfs_dio_private *dip = bio->bi_private;
			
@@ -8377,8 +8299,8 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
 
				 	if (write && async_submit) {
			
 
				 		ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
			
 
				 					  file_offset, inode,
			
 
				-					  __btrfs_submit_bio_start_direct_io,
			
 
				-					  __btrfs_submit_bio_done);
			
 
				+					  btrfs_submit_bio_start_direct_io,
			
 
				+					  btrfs_submit_bio_done);
			
 
				 		goto err;
			
 
				 	} else if (write) {
			
 
				 		/*
			
@@ -8464,7 +8386,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
 
				 		 */
			
 
				 		atomic_inc(&dip->pending_bios);
			
 
				 
			
 
				-		status = __btrfs_submit_dio_bio(bio, inode, file_offset,
			
 
				+		status = btrfs_submit_dio_bio(bio, inode, file_offset,
			
 
				 						async_submit);
			
 
				 		if (status) {
			
 
				 			bio_put(bio);
			
@@ -8484,7 +8406,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
 
				 	} while (submit_len > 0);
			
 
				 
			
 
				 submit:
			
 
				-	status = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
			
 
				+	status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
			
 
				 	if (!status)
			
 
				 		return 0;
			
 
				 
			
@@ -8492,10 +8414,11 @@ submit:
 
				 out_err:
			
 
				 	dip->errors = 1;
			
 
				 	/*
			
 
				-	 * before atomic variable goto zero, we must
			
 
				-	 * make sure dip->errors is perceived to be set.
			
 
				+	 * Before atomic variable goto zero, we must  make sure dip->errors is
			
 
				+	 * perceived to be set. This ordering is ensured by the fact that an
			
 
				+	 * atomic operations with a return value are fully ordered as per
			
 
				+	 * atomic_t.txt
			
 
				 	 */
			
 
				-	smp_mb__before_atomic();
			
 
				 	if (atomic_dec_and_test(&dip->pending_bios))
			
 
				 		bio_io_error(dip->orig_bio);
			
 
				 
			
@@ -8713,7 +8636,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
				 		if (ret < 0 && ret != -EIOCBQUEUED) {
			
 
				 			if (dio_data.reserve)
			
 
				 				btrfs_delalloc_release_space(inode, data_reserved,
			
 
				-					offset, dio_data.reserve);
			
 
				+					offset, dio_data.reserve, true);
			
 
				 			/*
			
 
				 			 * On error we might have left some ordered extents
			
 
				 			 * without submitting corresponding bios for them, so
			
@@ -8729,8 +8652,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
				 					false);
			
 
				 		} else if (ret >= 0 && (size_t)ret < count)
			
 
				 			btrfs_delalloc_release_space(inode, data_reserved,
			
 
				-					offset, count - (size_t)ret);
			
 
				-		btrfs_delalloc_release_extents(BTRFS_I(inode), count);
			
 
				+					offset, count - (size_t)ret, true);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
			
 
				 	}
			
 
				 out:
			
 
				 	if (wakeup)
			
@@ -9045,7 +8968,8 @@ again:
 
				 		if (reserved_space < PAGE_SIZE) {
			
 
				 			end = page_start + reserved_space - 1;
			
 
				 			btrfs_delalloc_release_space(inode, data_reserved,
			
 
				-					page_start, PAGE_SIZE - reserved_space);
			
 
				+					page_start, PAGE_SIZE - reserved_space,
			
 
				+					true);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -9095,23 +9019,23 @@ again:
 
				 
			
 
				 out_unlock:
			
 
				 	if (!ret) {
			
 
				-		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
			
 
				 		sb_end_pagefault(inode->i_sb);
			
 
				 		extent_changeset_free(data_reserved);
			
 
				 		return VM_FAULT_LOCKED;
			
 
				 	}
			
 
				 	unlock_page(page);
			
 
				 out:
			
 
				-	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
			
 
				 	btrfs_delalloc_release_space(inode, data_reserved, page_start,
			
 
				-				     reserved_space);
			
 
				+				     reserved_space, (ret != 0));
			
 
				 out_noreserve:
			
 
				 	sb_end_pagefault(inode->i_sb);
			
 
				 	extent_changeset_free(data_reserved);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int btrfs_truncate(struct inode *inode)
			
 
				+static int btrfs_truncate(struct inode *inode, bool skip_writeback)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
@@ -9122,10 +9046,12 @@ static int btrfs_truncate(struct inode *inode)
 
				 	u64 mask = fs_info->sectorsize - 1;
			
 
				 	u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
			
 
				 
			
 
				-	ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
			
 
				-				       (u64)-1);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				+	if (!skip_writeback) {
			
 
				+		ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
			
 
				+					       (u64)-1);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Yes ladies and gentlemen, this is indeed ugly.  The fact is we have
			
@@ -9335,7 +9261,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 
				 	ei->dir_index = 0;
			
 
				 	ei->last_unlink_trans = 0;
			
 
				 	ei->last_log_commit = 0;
			
 
				-	ei->delayed_iput_count = 0;
			
 
				 
			
 
				 	spin_lock_init(&ei->lock);
			
 
				 	ei->outstanding_extents = 0;
			
@@ -9455,7 +9380,7 @@ static void init_once(void *foo)
 
				 	inode_init_once(&ei->vfs_inode);
			
 
				 }
			
 
				 
			
 
				-void btrfs_destroy_cachep(void)
			
 
				+void __cold btrfs_destroy_cachep(void)
			
 
				 {
			
 
				 	/*
			
 
				 	 * Make sure all delayed rcu free inodes are flushed before we
			
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -106,7 +106,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 
				 		       int no_time_update);
			
 
				 
			
 
				 /* Mask out flags that are inappropriate for the given type of inode. */
			
 
				-static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
			
 
				+static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
			
 
				 {
			
 
				 	if (S_ISDIR(mode))
			
 
				 		return flags;
			
@@ -1197,7 +1197,7 @@ again:
 
				 		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 		btrfs_delalloc_release_space(inode, data_reserved,
			
 
				 				start_index << PAGE_SHIFT,
			
 
				-				(page_cnt - i_done) << PAGE_SHIFT);
			
 
				+				(page_cnt - i_done) << PAGE_SHIFT, true);
			
 
				 	}
			
 
				 
			
 
				 
			
@@ -1215,7 +1215,8 @@ again:
 
				 		unlock_page(pages[i]);
			
 
				 		put_page(pages[i]);
			
 
				 	}
			
 
				-	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
			
 
				+				       false);
			
 
				 	extent_changeset_free(data_reserved);
			
 
				 	return i_done;
			
 
				 out:
			
@@ -1225,8 +1226,9 @@ out:
 
				 	}
			
 
				 	btrfs_delalloc_release_space(inode, data_reserved,
			
 
				 			start_index << PAGE_SHIFT,
			
 
				-			page_cnt << PAGE_SHIFT);
			
 
				-	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
			
 
				+			page_cnt << PAGE_SHIFT, true);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
			
 
				+				       true);
			
 
				 	extent_changeset_free(data_reserved);
			
 
				 	return ret;
			
 
				 
			
@@ -2600,7 +2602,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 
				 			range->len = (u64)-1;
			
 
				 		}
			
 
				 		ret = btrfs_defrag_file(file_inode(file), file,
			
 
				-					range, 0, 0);
			
 
				+					range, BTRFS_OLDEST_GENERATION, 0);
			
 
				 		if (ret > 0)
			
 
				 			ret = 0;
			
 
				 		kfree(range);
			
@@ -3936,73 +3938,6 @@ int btrfs_clone_file_range(struct file *src_file, loff_t off,
 
				 	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * there are many ways the trans_start and trans_end ioctls can lead
			
 
				- * to deadlocks.  They should only be used by applications that
			
 
				- * basically own the machine, and have a very in depth understanding
			
 
				- * of all the possible deadlocks and enospc problems.
			
 
				- */
			
 
				-static long btrfs_ioctl_trans_start(struct file *file)
			
 
				-{
			
 
				-	struct inode *inode = file_inode(file);
			
 
				-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
 
				-	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				-	struct btrfs_trans_handle *trans;
			
 
				-	struct btrfs_file_private *private;
			
 
				-	int ret;
			
 
				-	static bool warned = false;
			
 
				-
			
 
				-	ret = -EPERM;
			
 
				-	if (!capable(CAP_SYS_ADMIN))
			
 
				-		goto out;
			
 
				-
			
 
				-	if (!warned) {
			
 
				-		btrfs_warn(fs_info,
			
 
				-			"Userspace transaction mechanism is considered "
			
 
				-			"deprecated and slated to be removed in 4.17. "
			
 
				-			"If you have a valid use case please "
			
 
				-			"speak up on the mailing list");
			
 
				-		WARN_ON(1);
			
 
				-		warned = true;
			
 
				-	}
			
 
				-
			
 
				-	ret = -EINPROGRESS;
			
 
				-	private = file->private_data;
			
 
				-	if (private && private->trans)
			
 
				-		goto out;
			
 
				-	if (!private) {
			
 
				-		private = kzalloc(sizeof(struct btrfs_file_private),
			
 
				-				  GFP_KERNEL);
			
 
				-		if (!private)
			
 
				-			return -ENOMEM;
			
 
				-		file->private_data = private;
			
 
				-	}
			
 
				-
			
 
				-	ret = -EROFS;
			
 
				-	if (btrfs_root_readonly(root))
			
 
				-		goto out;
			
 
				-
			
 
				-	ret = mnt_want_write_file(file);
			
 
				-	if (ret)
			
 
				-		goto out;
			
 
				-
			
 
				-	atomic_inc(&fs_info->open_ioctl_trans);
			
 
				-
			
 
				-	ret = -ENOMEM;
			
 
				-	trans = btrfs_start_ioctl_transaction(root);
			
 
				-	if (IS_ERR(trans))
			
 
				-		goto out_drop;
			
 
				-
			
 
				-	private->trans = trans;
			
 
				-	return 0;
			
 
				-
			
 
				-out_drop:
			
 
				-	atomic_dec(&fs_info->open_ioctl_trans);
			
 
				-	mnt_drop_write_file(file);
			
 
				-out:
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
			
 
				 {
			
 
				 	struct inode *inode = file_inode(file);
			
@@ -4244,30 +4179,6 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * there are many ways the trans_start and trans_end ioctls can lead
			
 
				- * to deadlocks.  They should only be used by applications that
			
 
				- * basically own the machine, and have a very in depth understanding
			
 
				- * of all the possible deadlocks and enospc problems.
			
 
				- */
			
 
				-long btrfs_ioctl_trans_end(struct file *file)
			
 
				-{
			
 
				-	struct inode *inode = file_inode(file);
			
 
				-	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				-	struct btrfs_file_private *private = file->private_data;
			
 
				-
			
 
				-	if (!private || !private->trans)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	btrfs_end_transaction(private->trans);
			
 
				-	private->trans = NULL;
			
 
				-
			
 
				-	atomic_dec(&root->fs_info->open_ioctl_trans);
			
 
				-
			
 
				-	mnt_drop_write_file(file);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
			
 
				 					    void __user *argp)
			
 
				 {
			
@@ -4429,7 +4340,8 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
 
				 		ret = 0;
			
 
				 		break;
			
 
				 	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
			
 
				-		ret = btrfs_dev_replace_cancel(fs_info, p);
			
 
				+		p->result = btrfs_dev_replace_cancel(fs_info);
			
 
				+		ret = 0;
			
 
				 		break;
			
 
				 	default:
			
 
				 		ret = -EINVAL;
			
@@ -5138,10 +5050,17 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
 
				 	received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
			
 
				 				       BTRFS_UUID_SIZE);
			
 
				 	if (received_uuid_changed &&
			
 
				-	    !btrfs_is_empty_uuid(root_item->received_uuid))
			
 
				-		btrfs_uuid_tree_rem(trans, fs_info, root_item->received_uuid,
			
 
				-				    BTRFS_UUID_KEY_RECEIVED_SUBVOL,
			
 
				-				    root->root_key.objectid);
			
 
				+	    !btrfs_is_empty_uuid(root_item->received_uuid)) {
			
 
				+		ret = btrfs_uuid_tree_rem(trans, fs_info,
			
 
				+					  root_item->received_uuid,
			
 
				+					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
			
 
				+					  root->root_key.objectid);
			
 
				+		if (ret && ret != -ENOENT) {
			
 
				+		        btrfs_abort_transaction(trans, ret);
			
 
				+		        btrfs_end_transaction(trans);
			
 
				+		        goto out;
			
 
				+		}
			
 
				+	}
			
 
				 	memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
			
 
				 	btrfs_set_root_stransid(root_item, sa->stransid);
			
 
				 	btrfs_set_root_rtransid(root_item, sa->rtransid);
			
@@ -5574,10 +5493,6 @@ long btrfs_ioctl(struct file *file, unsigned int
 
				 		return btrfs_ioctl_dev_info(fs_info, argp);
			
 
				 	case BTRFS_IOC_BALANCE:
			
 
				 		return btrfs_ioctl_balance(file, NULL);
			
 
				-	case BTRFS_IOC_TRANS_START:
			
 
				-		return btrfs_ioctl_trans_start(file);
			
 
				-	case BTRFS_IOC_TRANS_END:
			
 
				-		return btrfs_ioctl_trans_end(file);
			
 
				 	case BTRFS_IOC_TREE_SEARCH:
			
 
				 		return btrfs_ioctl_tree_search(file, argp);
			
 
				 	case BTRFS_IOC_TREE_SEARCH_V2:
			
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -290,7 +290,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
 
				 		/*
			
 
				 		 * Make sure counter is updated before we wake up waiters.
			
 
				 		 */
			
 
				-		smp_mb();
			
 
				+		smp_mb__after_atomic();
			
 
				 		if (waitqueue_active(&eb->write_lock_wq))
			
 
				 			wake_up(&eb->write_lock_wq);
			
 
				 	} else {
			
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -382,14 +382,12 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 
				 	struct workspace *workspace = list_entry(ws, struct workspace, list);
			
 
				 	size_t in_len;
			
 
				 	size_t out_len;
			
 
				-	size_t tot_len;
			
 
				 	int ret = 0;
			
 
				 	char *kaddr;
			
 
				 	unsigned long bytes;
			
 
				 
			
 
				 	BUG_ON(srclen < LZO_LEN);
			
 
				 
			
 
				-	tot_len = read_compress_length(data_in);
			
 
				 	data_in += LZO_LEN;
			
 
				 
			
 
				 	in_len = read_compress_length(data_in);
			
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -610,7 +610,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
 
				 	btrfs_mod_outstanding_extents(btrfs_inode, -1);
			
 
				 	spin_unlock(&btrfs_inode->lock);
			
 
				 	if (root != fs_info->tree_root)
			
 
				-		btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
			
 
				+		btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
			
 
				 
			
 
				 	tree = &btrfs_inode->ordered_tree;
			
 
				 	spin_lock_irq(&tree->lock);
			
@@ -1154,7 +1154,7 @@ int __init ordered_data_init(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void ordered_data_exit(void)
			
 
				+void __cold ordered_data_exit(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(btrfs_ordered_extent_cache);
			
 
				 }
			
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -151,7 +151,9 @@ static inline int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info,
 
				 					 unsigned long bytes)
			
 
				 {
			
 
				 	int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
			
 
				-	return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32);
			
 
				+	int csum_size = btrfs_super_csum_size(fs_info->super_copy);
			
 
				+
			
 
				+	return sizeof(struct btrfs_ordered_sum) + num_sectors * csum_size;
			
 
				 }
			
 
				 
			
 
				 static inline void
			
@@ -215,5 +217,5 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
 
				 			       struct btrfs_root *log, u64 transid);
			
 
				 void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
			
 
				 int __init ordered_data_init(void);
			
 
				-void ordered_data_exit(void);
			
 
				+void __cold ordered_data_exit(void);
			
 
				 #endif
			
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -365,9 +365,13 @@ void btrfs_print_tree(struct extent_buffer *c)
 
				 		       btrfs_node_blockptr(c, i));
			
 
				 	}
			
 
				 	for (i = 0; i < nr; i++) {
			
 
				-		struct extent_buffer *next = read_tree_block(fs_info,
			
 
				-					btrfs_node_blockptr(c, i),
			
 
				-					btrfs_node_ptr_generation(c, i));
			
 
				+		struct btrfs_key first_key;
			
 
				+		struct extent_buffer *next;
			
 
				+
			
 
				+		btrfs_node_key_to_cpu(c, &first_key, i);
			
 
				+		next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
			
 
				+				       btrfs_node_ptr_generation(c, i),
			
 
				+				       level - 1, &first_key);
			
 
				 		if (IS_ERR(next)) {
			
 
				 			continue;
			
 
				 		} else if (!extent_buffer_uptodate(next)) {
			
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -19,8 +19,8 @@
 
				 #include <linux/hashtable.h>
			
 
				 #include "props.h"
			
 
				 #include "btrfs_inode.h"
			
 
				-#include "hash.h"
			
 
				 #include "transaction.h"
			
 
				+#include "ctree.h"
			
 
				 #include "xattr.h"
			
 
				 #include "compression.h"
			
 
				 
			
@@ -116,7 +116,7 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
 
				 		return -EINVAL;
			
 
				 
			
 
				 	if (value_len == 0) {
			
 
				-		ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
			
 
				+		ret = btrfs_setxattr(trans, inode, handler->xattr_name,
			
 
				 				       NULL, 0, flags);
			
 
				 		if (ret)
			
 
				 			return ret;
			
@@ -130,13 +130,13 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
 
				 	ret = handler->validate(value, value_len);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				-	ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
			
 
				+	ret = btrfs_setxattr(trans, inode, handler->xattr_name,
			
 
				 			       value, value_len, flags);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 	ret = handler->apply(inode, value, value_len);
			
 
				 	if (ret) {
			
 
				-		__btrfs_setxattr(trans, inode, handler->xattr_name,
			
 
				+		btrfs_setxattr(trans, inode, handler->xattr_name,
			
 
				 				 NULL, 0, flags);
			
 
				 		return ret;
			
 
				 	}
			
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -47,6 +47,82 @@
 
				  *  - check all ioctl parameters
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Helpers to access qgroup reservation
			
 
				+ *
			
 
				+ * Callers should ensure the lock context and type are valid
			
 
				+ */
			
 
				+
			
 
				+static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
			
 
				+{
			
 
				+	u64 ret = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
			
 
				+		ret += qgroup->rsv.values[i];
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_BTRFS_DEBUG
			
 
				+static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
			
 
				+{
			
 
				+	if (type == BTRFS_QGROUP_RSV_DATA)
			
 
				+		return "data";
			
 
				+	if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
			
 
				+		return "meta_pertrans";
			
 
				+	if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
			
 
				+		return "meta_prealloc";
			
 
				+	return NULL;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
			
 
				+			   struct btrfs_qgroup *qgroup, u64 num_bytes,
			
 
				+			   enum btrfs_qgroup_rsv_type type)
			
 
				+{
			
 
				+	trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
			
 
				+	qgroup->rsv.values[type] += num_bytes;
			
 
				+}
			
 
				+
			
 
				+static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
			
 
				+			       struct btrfs_qgroup *qgroup, u64 num_bytes,
			
 
				+			       enum btrfs_qgroup_rsv_type type)
			
 
				+{
			
 
				+	trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
			
 
				+	if (qgroup->rsv.values[type] >= num_bytes) {
			
 
				+		qgroup->rsv.values[type] -= num_bytes;
			
 
				+		return;
			
 
				+	}
			
 
				+#ifdef CONFIG_BTRFS_DEBUG
			
 
				+	WARN_RATELIMIT(1,
			
 
				+		"qgroup %llu %s reserved space underflow, have %llu to free %llu",
			
 
				+		qgroup->qgroupid, qgroup_rsv_type_str(type),
			
 
				+		qgroup->rsv.values[type], num_bytes);
			
 
				+#endif
			
 
				+	qgroup->rsv.values[type] = 0;
			
 
				+}
			
 
				+
			
 
				+static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
			
 
				+				     struct btrfs_qgroup *dest,
			
 
				+				     struct btrfs_qgroup *src)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
			
 
				+		qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
			
 
				+}
			
 
				+
			
 
				+static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
			
 
				+					 struct btrfs_qgroup *dest,
			
 
				+					  struct btrfs_qgroup *src)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
			
 
				+		qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
			
 
				+}
			
 
				+
			
 
				 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
			
 
				 					   int mod)
			
 
				 {
			
@@ -826,10 +902,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 
				 	int slot;
			
 
				 
			
 
				 	mutex_lock(&fs_info->qgroup_ioctl_lock);
			
 
				-	if (fs_info->quota_root) {
			
 
				-		set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
			
 
				+	if (fs_info->quota_root)
			
 
				 		goto out;
			
 
				-	}
			
 
				 
			
 
				 	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
			
 
				 	if (!fs_info->qgroup_ulist) {
			
@@ -923,8 +997,15 @@ out_add_root:
 
				 	}
			
 
				 	spin_lock(&fs_info->qgroup_lock);
			
 
				 	fs_info->quota_root = quota_root;
			
 
				-	set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
			
 
				+	set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
			
 
				 	spin_unlock(&fs_info->qgroup_lock);
			
 
				+	ret = qgroup_rescan_init(fs_info, 0, 1);
			
 
				+	if (!ret) {
			
 
				+	        qgroup_rescan_zero_tracking(fs_info);
			
 
				+	        btrfs_queue_work(fs_info->qgroup_rescan_workers,
			
 
				+	                         &fs_info->qgroup_rescan_work);
			
 
				+	}
			
 
				+
			
 
				 out_free_path:
			
 
				 	btrfs_free_path(path);
			
 
				 out_free_root:
			
@@ -991,33 +1072,29 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
 
				 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
			
 
				 }
			
 
				 
			
 
				-static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
			
 
				-				      struct btrfs_qgroup *qgroup,
			
 
				-				      u64 num_bytes)
			
 
				-{
			
 
				-#ifdef CONFIG_BTRFS_DEBUG
			
 
				-	WARN_ON(qgroup->reserved < num_bytes);
			
 
				-	btrfs_debug(fs_info,
			
 
				-		"qgroup %llu reserved space underflow, have: %llu, to free: %llu",
			
 
				-		qgroup->qgroupid, qgroup->reserved, num_bytes);
			
 
				-#endif
			
 
				-	qgroup->reserved = 0;
			
 
				-}
			
 
				 /*
			
 
				- * The easy accounting, if we are adding/removing the only ref for an extent
			
 
				- * then this qgroup and all of the parent qgroups get their reference and
			
 
				- * exclusive counts adjusted.
			
 
				+ * The easy accounting, we're updating qgroup relationship whose child qgroup
			
 
				+ * only has exclusive extents.
			
 
				+ *
			
 
				+ * In this case, all exclsuive extents will also be exlusive for parent, so
			
 
				+ * excl/rfer just get added/removed.
			
 
				+ *
			
 
				+ * So is qgroup reservation space, which should also be added/removed to
			
 
				+ * parent.
			
 
				+ * Or when child tries to release reservation space, parent will underflow its
			
 
				+ * reservation (for relationship adding case).
			
 
				  *
			
 
				  * Caller should hold fs_info->qgroup_lock.
			
 
				  */
			
 
				 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
			
 
				 				    struct ulist *tmp, u64 ref_root,
			
 
				-				    u64 num_bytes, int sign)
			
 
				+				    struct btrfs_qgroup *src, int sign)
			
 
				 {
			
 
				 	struct btrfs_qgroup *qgroup;
			
 
				 	struct btrfs_qgroup_list *glist;
			
 
				 	struct ulist_node *unode;
			
 
				 	struct ulist_iterator uiter;
			
 
				+	u64 num_bytes = src->excl;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	qgroup = find_qgroup_rb(fs_info, ref_root);
			
@@ -1030,13 +1107,11 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 
				 	WARN_ON(sign < 0 && qgroup->excl < num_bytes);
			
 
				 	qgroup->excl += sign * num_bytes;
			
 
				 	qgroup->excl_cmpr += sign * num_bytes;
			
 
				-	if (sign > 0) {
			
 
				-		trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
			
 
				-		if (qgroup->reserved < num_bytes)
			
 
				-			report_reserved_underflow(fs_info, qgroup, num_bytes);
			
 
				-		else
			
 
				-			qgroup->reserved -= num_bytes;
			
 
				-	}
			
 
				+
			
 
				+	if (sign > 0)
			
 
				+		qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
			
 
				+	else
			
 
				+		qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
			
 
				 
			
 
				 	qgroup_dirty(fs_info, qgroup);
			
 
				 
			
@@ -1056,15 +1131,10 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 
				 		qgroup->rfer_cmpr += sign * num_bytes;
			
 
				 		WARN_ON(sign < 0 && qgroup->excl < num_bytes);
			
 
				 		qgroup->excl += sign * num_bytes;
			
 
				-		if (sign > 0) {
			
 
				-			trace_qgroup_update_reserve(fs_info, qgroup,
			
 
				-						    -(s64)num_bytes);
			
 
				-			if (qgroup->reserved < num_bytes)
			
 
				-				report_reserved_underflow(fs_info, qgroup,
			
 
				-							  num_bytes);
			
 
				-			else
			
 
				-				qgroup->reserved -= num_bytes;
			
 
				-		}
			
 
				+		if (sign > 0)
			
 
				+			qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
			
 
				+		else
			
 
				+			qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
			
 
				 		qgroup->excl_cmpr += sign * num_bytes;
			
 
				 		qgroup_dirty(fs_info, qgroup);
			
 
				 
			
@@ -1107,7 +1177,7 @@ static int quick_update_accounting(struct btrfs_fs_info *fs_info,
 
				 	if (qgroup->excl == qgroup->rfer) {
			
 
				 		ret = 0;
			
 
				 		err = __qgroup_excl_accounting(fs_info, tmp, dst,
			
 
				-					       qgroup->excl, sign);
			
 
				+					       qgroup, sign);
			
 
				 		if (err < 0) {
			
 
				 			ret = err;
			
 
				 			goto out;
			
@@ -1414,7 +1484,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_qgroup_extent_record *entry;
			
 
				 	u64 bytenr = record->bytenr;
			
 
				 
			
 
				-	assert_spin_locked(&delayed_refs->lock);
			
 
				+	lockdep_assert_held(&delayed_refs->lock);
			
 
				 	trace_btrfs_qgroup_trace_extent(fs_info, record);
			
 
				 
			
 
				 	while (*p) {
			
@@ -1614,7 +1684,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
 
				 		return 0;
			
 
				 
			
 
				 	if (!extent_buffer_uptodate(root_eb)) {
			
 
				-		ret = btrfs_read_buffer(root_eb, root_gen);
			
 
				+		ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL);
			
 
				 		if (ret)
			
 
				 			goto out;
			
 
				 	}
			
@@ -1645,6 +1715,7 @@ walk_down:
 
				 	level = root_level;
			
 
				 	while (level >= 0) {
			
 
				 		if (path->nodes[level] == NULL) {
			
 
				+			struct btrfs_key first_key;
			
 
				 			int parent_slot;
			
 
				 			u64 child_gen;
			
 
				 			u64 child_bytenr;
			
@@ -1657,8 +1728,10 @@ walk_down:
 
				 			parent_slot = path->slots[level + 1];
			
 
				 			child_bytenr = btrfs_node_blockptr(eb, parent_slot);
			
 
				 			child_gen = btrfs_node_ptr_generation(eb, parent_slot);
			
 
				+			btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
			
 
				 
			
 
				-			eb = read_tree_block(fs_info, child_bytenr, child_gen);
			
 
				+			eb = read_tree_block(fs_info, child_bytenr, child_gen,
			
 
				+					     level, &first_key);
			
 
				 			if (IS_ERR(eb)) {
			
 
				 				ret = PTR_ERR(eb);
			
 
				 				goto out;
			
@@ -2009,9 +2082,9 @@ out_free:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
			
 
				-				 struct btrfs_fs_info *fs_info)
			
 
				+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_qgroup_extent_record *record;
			
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 	struct ulist *new_roots = NULL;
			
@@ -2080,17 +2153,9 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 
				 {
			
 
				 	struct btrfs_root *quota_root = fs_info->quota_root;
			
 
				 	int ret = 0;
			
 
				-	int start_rescan_worker = 0;
			
 
				 
			
 
				 	if (!quota_root)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
			
 
				-	    test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
			
 
				-		start_rescan_worker = 1;
			
 
				-
			
 
				-	if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
			
 
				-		set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
			
 
				+		return ret;
			
 
				 
			
 
				 	spin_lock(&fs_info->qgroup_lock);
			
 
				 	while (!list_empty(&fs_info->dirty_qgroups)) {
			
@@ -2119,18 +2184,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 
				 	if (ret)
			
 
				 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
			
 
				 
			
 
				-	if (!ret && start_rescan_worker) {
			
 
				-		ret = qgroup_rescan_init(fs_info, 0, 1);
			
 
				-		if (!ret) {
			
 
				-			qgroup_rescan_zero_tracking(fs_info);
			
 
				-			btrfs_queue_work(fs_info->qgroup_rescan_workers,
			
 
				-					 &fs_info->qgroup_rescan_work);
			
 
				-		}
			
 
				-		ret = 0;
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -2338,24 +2391,24 @@ out:
 
				 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
			
 
				 {
			
 
				 	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
			
 
				-	    qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
			
 
				+	    qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
			
 
				 		return false;
			
 
				 
			
 
				 	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
			
 
				-	    qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
			
 
				+	    qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
			
 
				 		return false;
			
 
				 
			
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
			
 
				+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
			
 
				+			  enum btrfs_qgroup_rsv_type type)
			
 
				 {
			
 
				 	struct btrfs_root *quota_root;
			
 
				 	struct btrfs_qgroup *qgroup;
			
 
				 	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	u64 ref_root = root->root_key.objectid;
			
 
				 	int ret = 0;
			
 
				-	int retried = 0;
			
 
				 	struct ulist_node *unode;
			
 
				 	struct ulist_iterator uiter;
			
 
				 
			
@@ -2369,7 +2422,6 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 
				 	    capable(CAP_SYS_RESOURCE))
			
 
				 		enforce = false;
			
 
				 
			
 
				-retry:
			
 
				 	spin_lock(&fs_info->qgroup_lock);
			
 
				 	quota_root = fs_info->quota_root;
			
 
				 	if (!quota_root)
			
@@ -2385,7 +2437,7 @@ retry:
 
				 	 */
			
 
				 	ulist_reinit(fs_info->qgroup_ulist);
			
 
				 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
			
 
				-			(uintptr_t)qgroup, GFP_ATOMIC);
			
 
				+			qgroup_to_aux(qgroup), GFP_ATOMIC);
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 	ULIST_ITER_INIT(&uiter);
			
@@ -2396,27 +2448,6 @@ retry:
 
				 		qg = unode_aux_to_qgroup(unode);
			
 
				 
			
 
				 		if (enforce && !qgroup_check_limits(qg, num_bytes)) {
			
 
				-			/*
			
 
				-			 * Commit the tree and retry, since we may have
			
 
				-			 * deletions which would free up space.
			
 
				-			 */
			
 
				-			if (!retried && qg->reserved > 0) {
			
 
				-				struct btrfs_trans_handle *trans;
			
 
				-
			
 
				-				spin_unlock(&fs_info->qgroup_lock);
			
 
				-				ret = btrfs_start_delalloc_inodes(root, 0);
			
 
				-				if (ret)
			
 
				-					return ret;
			
 
				-				btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
			
 
				-				trans = btrfs_join_transaction(root);
			
 
				-				if (IS_ERR(trans))
			
 
				-					return PTR_ERR(trans);
			
 
				-				ret = btrfs_commit_transaction(trans);
			
 
				-				if (ret)
			
 
				-					return ret;
			
 
				-				retried++;
			
 
				-				goto retry;
			
 
				-			}
			
 
				 			ret = -EDQUOT;
			
 
				 			goto out;
			
 
				 		}
			
@@ -2424,7 +2455,7 @@ retry:
 
				 		list_for_each_entry(glist, &qg->groups, next_group) {
			
 
				 			ret = ulist_add(fs_info->qgroup_ulist,
			
 
				 					glist->group->qgroupid,
			
 
				-					(uintptr_t)glist->group, GFP_ATOMIC);
			
 
				+					qgroup_to_aux(glist->group), GFP_ATOMIC);
			
 
				 			if (ret < 0)
			
 
				 				goto out;
			
 
				 		}
			
@@ -2439,8 +2470,8 @@ retry:
 
				 
			
 
				 		qg = unode_aux_to_qgroup(unode);
			
 
				 
			
 
				-		trace_qgroup_update_reserve(fs_info, qg, num_bytes);
			
 
				-		qg->reserved += num_bytes;
			
 
				+		trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
			
 
				+		qgroup_rsv_add(fs_info, qg, num_bytes, type);
			
 
				 	}
			
 
				 
			
 
				 out:
			
@@ -2448,8 +2479,18 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Free @num_bytes of reserved space with @type for qgroup.  (Normally level 0
			
 
				+ * qgroup).
			
 
				+ *
			
 
				+ * Will handle all higher level qgroup too.
			
 
				+ *
			
 
				+ * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
			
 
				+ * This special case is only used for META_PERTRANS type.
			
 
				+ */
			
 
				 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
			
 
				-			       u64 ref_root, u64 num_bytes)
			
 
				+			       u64 ref_root, u64 num_bytes,
			
 
				+			       enum btrfs_qgroup_rsv_type type)
			
 
				 {
			
 
				 	struct btrfs_root *quota_root;
			
 
				 	struct btrfs_qgroup *qgroup;
			
@@ -2463,6 +2504,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 
				 	if (num_bytes == 0)
			
 
				 		return;
			
 
				 
			
 
				+	if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
			
 
				+		WARN(1, "%s: Invalid type to free", __func__);
			
 
				+		return;
			
 
				+	}
			
 
				 	spin_lock(&fs_info->qgroup_lock);
			
 
				 
			
 
				 	quota_root = fs_info->quota_root;
			
@@ -2473,9 +2518,16 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 
				 	if (!qgroup)
			
 
				 		goto out;
			
 
				 
			
 
				+	if (num_bytes == (u64)-1)
			
 
				+		/*
			
 
				+		 * We're freeing all pertrans rsv, get reserved value from
			
 
				+		 * level 0 qgroup as real num_bytes to free.
			
 
				+		 */
			
 
				+		num_bytes = qgroup->rsv.values[type];
			
 
				+
			
 
				 	ulist_reinit(fs_info->qgroup_ulist);
			
 
				 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
			
 
				-			(uintptr_t)qgroup, GFP_ATOMIC);
			
 
				+			qgroup_to_aux(qgroup), GFP_ATOMIC);
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 	ULIST_ITER_INIT(&uiter);
			
@@ -2485,16 +2537,13 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 		qg = unode_aux_to_qgroup(unode);
			
 
				 
			
 
				-		trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
			
 
				-		if (qg->reserved < num_bytes)
			
 
				-			report_reserved_underflow(fs_info, qg, num_bytes);
			
 
				-		else
			
 
				-			qg->reserved -= num_bytes;
			
 
				+		trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
			
 
				+		qgroup_rsv_release(fs_info, qg, num_bytes, type);
			
 
				 
			
 
				 		list_for_each_entry(glist, &qg->groups, next_group) {
			
 
				 			ret = ulist_add(fs_info->qgroup_ulist,
			
 
				 					glist->group->qgroupid,
			
 
				-					(uintptr_t)glist->group, GFP_ATOMIC);
			
 
				+					qgroup_to_aux(glist->group), GFP_ATOMIC);
			
 
				 			if (ret < 0)
			
 
				 				goto out;
			
 
				 		}
			
@@ -2877,7 +2926,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
 
				 					to_reserve, QGROUP_RESERVE);
			
 
				 	if (ret < 0)
			
 
				 		goto cleanup;
			
 
				-	ret = qgroup_reserve(root, to_reserve, true);
			
 
				+	ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
			
 
				 	if (ret < 0)
			
 
				 		goto cleanup;
			
 
				 
			
@@ -2940,7 +2989,8 @@ static int qgroup_free_reserved_data(struct inode *inode,
 
				 			goto out;
			
 
				 		freed += changeset.bytes_changed;
			
 
				 	}
			
 
				-	btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
			
 
				+	btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
			
 
				+				  BTRFS_QGROUP_RSV_DATA);
			
 
				 	ret = freed;
			
 
				 out:
			
 
				 	extent_changeset_release(&changeset);
			
@@ -2972,7 +3022,7 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
 
				 	if (free)
			
 
				 		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
			
 
				 				BTRFS_I(inode)->root->objectid,
			
 
				-				changeset.bytes_changed);
			
 
				+				changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
			
 
				 	ret = changeset.bytes_changed;
			
 
				 out:
			
 
				 	extent_changeset_release(&changeset);
			
@@ -3017,8 +3067,48 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
 
				 	return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
			
 
				 }
			
 
				 
			
 
				-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
			
 
				-			      bool enforce)
			
 
				+static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
			
 
				+			      enum btrfs_qgroup_rsv_type type)
			
 
				+{
			
 
				+	if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
			
 
				+	    type != BTRFS_QGROUP_RSV_META_PERTRANS)
			
 
				+		return;
			
 
				+	if (num_bytes == 0)
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&root->qgroup_meta_rsv_lock);
			
 
				+	if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
			
 
				+		root->qgroup_meta_rsv_prealloc += num_bytes;
			
 
				+	else
			
 
				+		root->qgroup_meta_rsv_pertrans += num_bytes;
			
 
				+	spin_unlock(&root->qgroup_meta_rsv_lock);
			
 
				+}
			
 
				+
			
 
				+static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
			
 
				+			     enum btrfs_qgroup_rsv_type type)
			
 
				+{
			
 
				+	if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
			
 
				+	    type != BTRFS_QGROUP_RSV_META_PERTRANS)
			
 
				+		return 0;
			
 
				+	if (num_bytes == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	spin_lock(&root->qgroup_meta_rsv_lock);
			
 
				+	if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
			
 
				+		num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
			
 
				+				  num_bytes);
			
 
				+		root->qgroup_meta_rsv_prealloc -= num_bytes;
			
 
				+	} else {
			
 
				+		num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
			
 
				+				  num_bytes);
			
 
				+		root->qgroup_meta_rsv_pertrans -= num_bytes;
			
 
				+	}
			
 
				+	spin_unlock(&root->qgroup_meta_rsv_lock);
			
 
				+	return num_bytes;
			
 
				+}
			
 
				+
			
 
				+int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
			
 
				+				enum btrfs_qgroup_rsv_type type, bool enforce)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	int ret;
			
@@ -3028,31 +3118,39 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 
				 		return 0;
			
 
				 
			
 
				 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
			
 
				-	trace_qgroup_meta_reserve(root, (s64)num_bytes);
			
 
				-	ret = qgroup_reserve(root, num_bytes, enforce);
			
 
				+	trace_qgroup_meta_reserve(root, type, (s64)num_bytes);
			
 
				+	ret = qgroup_reserve(root, num_bytes, enforce, type);
			
 
				 	if (ret < 0)
			
 
				 		return ret;
			
 
				-	atomic64_add(num_bytes, &root->qgroup_meta_rsv);
			
 
				+	/*
			
 
				+	 * Record what we have reserved into root.
			
 
				+	 *
			
 
				+	 * To avoid quota disabled->enabled underflow.
			
 
				+	 * In that case, we may try to free space we haven't reserved
			
 
				+	 * (since quota was disabled), so record what we reserved into root.
			
 
				+	 * And ensure later release won't underflow this number.
			
 
				+	 */
			
 
				+	add_root_meta_rsv(root, num_bytes, type);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
			
 
				+void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				-	u64 reserved;
			
 
				 
			
 
				 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
			
 
				 	    !is_fstree(root->objectid))
			
 
				 		return;
			
 
				 
			
 
				-	reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
			
 
				-	if (reserved == 0)
			
 
				-		return;
			
 
				-	trace_qgroup_meta_reserve(root, -(s64)reserved);
			
 
				-	btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
			
 
				+	/* TODO: Update trace point to handle such free */
			
 
				+	trace_qgroup_meta_free_all_pertrans(root);
			
 
				+	/* Special value -1 means to free all reserved space */
			
 
				+	btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1,
			
 
				+				  BTRFS_QGROUP_RSV_META_PERTRANS);
			
 
				 }
			
 
				 
			
 
				-void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
			
 
				+void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
			
 
				+			      enum btrfs_qgroup_rsv_type type)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 
			
@@ -3060,11 +3158,75 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
 
				 	    !is_fstree(root->objectid))
			
 
				 		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * reservation for META_PREALLOC can happen before quota is enabled,
			
 
				+	 * which can lead to underflow.
			
 
				+	 * Here ensure we will only free what we really have reserved.
			
 
				+	 */
			
 
				+	num_bytes = sub_root_meta_rsv(root, num_bytes, type);
			
 
				 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
			
 
				-	WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
			
 
				-	atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
			
 
				-	trace_qgroup_meta_reserve(root, -(s64)num_bytes);
			
 
				-	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
			
 
				+	trace_qgroup_meta_reserve(root, type, -(s64)num_bytes);
			
 
				+	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type);
			
 
				+}
			
 
				+
			
 
				+static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
			
 
				+				int num_bytes)
			
 
				+{
			
 
				+	struct btrfs_root *quota_root = fs_info->quota_root;
			
 
				+	struct btrfs_qgroup *qgroup;
			
 
				+	struct ulist_node *unode;
			
 
				+	struct ulist_iterator uiter;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (num_bytes == 0)
			
 
				+		return;
			
 
				+	if (!quota_root)
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&fs_info->qgroup_lock);
			
 
				+	qgroup = find_qgroup_rb(fs_info, ref_root);
			
 
				+	if (!qgroup)
			
 
				+		goto out;
			
 
				+	ulist_reinit(fs_info->qgroup_ulist);
			
 
				+	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
			
 
				+		       qgroup_to_aux(qgroup), GFP_ATOMIC);
			
 
				+	if (ret < 0)
			
 
				+		goto out;
			
 
				+	ULIST_ITER_INIT(&uiter);
			
 
				+	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
			
 
				+		struct btrfs_qgroup *qg;
			
 
				+		struct btrfs_qgroup_list *glist;
			
 
				+
			
 
				+		qg = unode_aux_to_qgroup(unode);
			
 
				+
			
 
				+		qgroup_rsv_release(fs_info, qg, num_bytes,
			
 
				+				BTRFS_QGROUP_RSV_META_PREALLOC);
			
 
				+		qgroup_rsv_add(fs_info, qg, num_bytes,
			
 
				+				BTRFS_QGROUP_RSV_META_PERTRANS);
			
 
				+		list_for_each_entry(glist, &qg->groups, next_group) {
			
 
				+			ret = ulist_add(fs_info->qgroup_ulist,
			
 
				+					glist->group->qgroupid,
			
 
				+					qgroup_to_aux(glist->group), GFP_ATOMIC);
			
 
				+			if (ret < 0)
			
 
				+				goto out;
			
 
				+		}
			
 
				+	}
			
 
				+out:
			
 
				+	spin_unlock(&fs_info->qgroup_lock);
			
 
				+}
			
 
				+
			
 
				+void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				+
			
 
				+	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
			
 
				+	    !is_fstree(root->objectid))
			
 
				+		return;
			
 
				+	/* Same as btrfs_qgroup_free_meta_prealloc() */
			
 
				+	num_bytes = sub_root_meta_rsv(root, num_bytes,
			
 
				+				      BTRFS_QGROUP_RSV_META_PREALLOC);
			
 
				+	trace_qgroup_meta_convert(root, num_bytes);
			
 
				+	qgroup_convert_meta(fs_info, root->objectid, num_bytes);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3092,7 +3254,7 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
 
				 		}
			
 
				 		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
			
 
				 				BTRFS_I(inode)->root->objectid,
			
 
				-				changeset.bytes_changed);
			
 
				+				changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
			
 
				 
			
 
				 	}
			
 
				 	extent_changeset_release(&changeset);
			
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -61,6 +61,48 @@ struct btrfs_qgroup_extent_record {
 
				 	struct ulist *old_roots;
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Qgroup reservation types:
			
 
				+ *
			
 
				+ * DATA:
			
 
				+ *	space reserved for data
			
 
				+ *
			
 
				+ * META_PERTRANS:
			
 
				+ * 	Space reserved for metadata (per-transaction)
			
 
				+ * 	Due to the fact that qgroup data is only updated at transaction commit
			
 
				+ * 	time, reserved space for metadata must be kept until transaction
			
 
				+ * 	commits.
			
 
				+ * 	Any metadata reserved that are used in btrfs_start_transaction() should
			
 
				+ * 	be of this type.
			
 
				+ *
			
 
				+ * META_PREALLOC:
			
 
				+ *	There are cases where metadata space is reserved before starting
			
 
				+ *	transaction, and then btrfs_join_transaction() to get a trans handle.
			
 
				+ *	Any metadata reserved for such usage should be of this type.
			
 
				+ *	And after join_transaction() part (or all) of such reservation should
			
 
				+ *	be converted into META_PERTRANS.
			
 
				+ */
			
 
				+enum btrfs_qgroup_rsv_type {
			
 
				+	BTRFS_QGROUP_RSV_DATA = 0,
			
 
				+	BTRFS_QGROUP_RSV_META_PERTRANS,
			
 
				+	BTRFS_QGROUP_RSV_META_PREALLOC,
			
 
				+	BTRFS_QGROUP_RSV_LAST,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Represents how many bytes we have reserved for this qgroup.
			
 
				+ *
			
 
				+ * Each type should have different reservation behavior.
			
 
				+ * E.g, data follows its io_tree flag modification, while
			
 
				+ * *currently* meta is just reserve-and-clear during transcation.
			
 
				+ *
			
 
				+ * TODO: Add new type for reservation which can survive transaction commit.
			
 
				+ * Currect metadata reservation behavior is not suitable for such case.
			
 
				+ */
			
 
				+struct btrfs_qgroup_rsv {
			
 
				+	u64 values[BTRFS_QGROUP_RSV_LAST];
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * one struct for each qgroup, organized in fs_info->qgroup_tree.
			
 
				  */
			
@@ -87,7 +129,7 @@ struct btrfs_qgroup {
 
				 	/*
			
 
				 	 * reservation tracking
			
 
				 	 */
			
 
				-	u64 reserved;
			
 
				+	struct btrfs_qgroup_rsv rsv;
			
 
				 
			
 
				 	/*
			
 
				 	 * lists
			
@@ -220,20 +262,21 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
 
				 			    struct btrfs_fs_info *fs_info,
			
 
				 			    u64 bytenr, u64 num_bytes,
			
 
				 			    struct ulist *old_roots, struct ulist *new_roots);
			
 
				-int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
			
 
				-				 struct btrfs_fs_info *fs_info);
			
 
				+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
			
 
				 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
			
 
				 		      struct btrfs_fs_info *fs_info);
			
 
				 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
			
 
				 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
			
 
				 			 struct btrfs_qgroup_inherit *inherit);
			
 
				 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
			
 
				-			       u64 ref_root, u64 num_bytes);
			
 
				+			       u64 ref_root, u64 num_bytes,
			
 
				+			       enum btrfs_qgroup_rsv_type type);
			
 
				 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
			
 
				 						 u64 ref_root, u64 num_bytes)
			
 
				 {
			
 
				 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
			
 
				-	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
			
 
				+	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
			
 
				+				  BTRFS_QGROUP_RSV_DATA);
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
			
@@ -248,9 +291,54 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
 
				 int btrfs_qgroup_free_data(struct inode *inode,
			
 
				 			struct extent_changeset *reserved, u64 start, u64 len);
			
 
				 
			
 
				-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
			
 
				-			      bool enforce);
			
 
				-void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
			
 
				-void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
			
 
				+int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
			
 
				+				enum btrfs_qgroup_rsv_type type, bool enforce);
			
 
				+/* Reserve metadata space for pertrans and prealloc type */
			
 
				+static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
			
 
				+				int num_bytes, bool enforce)
			
 
				+{
			
 
				+	return __btrfs_qgroup_reserve_meta(root, num_bytes,
			
 
				+			BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
			
 
				+}
			
 
				+static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
			
 
				+				int num_bytes, bool enforce)
			
 
				+{
			
 
				+	return __btrfs_qgroup_reserve_meta(root, num_bytes,
			
 
				+			BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
			
 
				+}
			
 
				+
			
 
				+void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
			
 
				+			     enum btrfs_qgroup_rsv_type type);
			
 
				+
			
 
				+/* Free per-transaction meta reservation for error handling */
			
 
				+static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
			
 
				+						   int num_bytes)
			
 
				+{
			
 
				+	__btrfs_qgroup_free_meta(root, num_bytes,
			
 
				+			BTRFS_QGROUP_RSV_META_PERTRANS);
			
 
				+}
			
 
				+
			
 
				+/* Pre-allocated meta reservation can be freed at need */
			
 
				+static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
			
 
				+						   int num_bytes)
			
 
				+{
			
 
				+	__btrfs_qgroup_free_meta(root, num_bytes,
			
 
				+			BTRFS_QGROUP_RSV_META_PREALLOC);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Per-transaction meta reservation should be all freed at transaction commit
			
 
				+ * time
			
 
				+ */
			
 
				+void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
			
 
				+
			
 
				+/*
			
 
				+ * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
			
 
				+ *
			
 
				+ * This is called when preallocated meta reservation needs to be used.
			
 
				+ * Normally after btrfs_join_transaction() call.
			
 
				+ */
			
 
				+void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
			
 
				+
			
 
				 void btrfs_qgroup_check_reserved_leak(struct inode *inode);
			
 
				 #endif /* __BTRFS_QGROUP__ */
			
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1987,7 +1987,13 @@ cleanup:
 
				 	kfree(pointers);
			
 
				 
			
 
				 cleanup_io:
			
 
				-	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
			
 
				+	/*
			
 
				+	 * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
			
 
				+	 * valid rbio which is consistent with ondisk content, thus such a
			
 
				+	 * valid rbio can be cached to avoid further disk reads.
			
 
				+	 */
			
 
				+	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
			
 
				+	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
			
 
				 		/*
			
 
				 		 * - In case of two failures, where rbio->failb != -1:
			
 
				 		 *
			
@@ -2008,8 +2014,6 @@ cleanup_io:
 
				 		else
			
 
				 			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
			
 
				 
			
 
				-		rbio_orig_end_io(rbio, err);
			
 
				-	} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
			
 
				 		rbio_orig_end_io(rbio, err);
			
 
				 	} else if (err == BLK_STS_OK) {
			
 
				 		rbio->faila = -1;
			
@@ -2768,24 +2772,8 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 
				 	return rbio;
			
 
				 }
			
 
				 
			
 
				-static void missing_raid56_work(struct btrfs_work *work)
			
 
				-{
			
 
				-	struct btrfs_raid_bio *rbio;
			
 
				-
			
 
				-	rbio = container_of(work, struct btrfs_raid_bio, work);
			
 
				-	__raid56_parity_recover(rbio);
			
 
				-}
			
 
				-
			
 
				-static void async_missing_raid56(struct btrfs_raid_bio *rbio)
			
 
				-{
			
 
				-	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
			
 
				-			missing_raid56_work, NULL, NULL);
			
 
				-
			
 
				-	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
			
 
				-}
			
 
				-
			
 
				 void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
			
 
				 {
			
 
				 	if (!lock_stripe_add(rbio))
			
 
				-		async_missing_raid56(rbio);
			
 
				+		async_read_rebuild(rbio);
			
 
				 }
			
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -395,20 +395,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 
				 		goto error;
			
 
				 
			
 
				 	/* insert extent in reada_tree + all per-device trees, all or nothing */
			
 
				-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
			
 
				 	spin_lock(&fs_info->reada_lock);
			
 
				 	ret = radix_tree_insert(&fs_info->reada_tree, index, re);
			
 
				 	if (ret == -EEXIST) {
			
 
				 		re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
			
 
				 		re_exist->refcnt++;
			
 
				 		spin_unlock(&fs_info->reada_lock);
			
 
				-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+		btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 		radix_tree_preload_end();
			
 
				 		goto error;
			
 
				 	}
			
 
				 	if (ret) {
			
 
				 		spin_unlock(&fs_info->reada_lock);
			
 
				-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+		btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 		radix_tree_preload_end();
			
 
				 		goto error;
			
 
				 	}
			
@@ -451,13 +451,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 
				 			}
			
 
				 			radix_tree_delete(&fs_info->reada_tree, index);
			
 
				 			spin_unlock(&fs_info->reada_lock);
			
 
				-			btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+			btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 			goto error;
			
 
				 		}
			
 
				 		have_zone = 1;
			
 
				 	}
			
 
				 	spin_unlock(&fs_info->reada_lock);
			
 
				-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 
			
 
				 	if (!have_zone)
			
 
				 		goto error;
			
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -579,11 +579,16 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
 
				 
			
 
				 	while (level >= 0) {
			
 
				 		if (level) {
			
 
				+			struct btrfs_key first_key;
			
 
				+
			
 
				 			block_bytenr = btrfs_node_blockptr(path->nodes[level],
			
 
				 							   path->slots[level]);
			
 
				 			gen = btrfs_node_ptr_generation(path->nodes[level],
			
 
				 							path->slots[level]);
			
 
				-			eb = read_tree_block(fs_info, block_bytenr, gen);
			
 
				+			btrfs_node_key_to_cpu(path->nodes[level], &first_key,
			
 
				+					      path->slots[level]);
			
 
				+			eb = read_tree_block(fs_info, block_bytenr, gen,
			
 
				+					     level - 1, &first_key);
			
 
				 			if (IS_ERR(eb))
			
 
				 				return PTR_ERR(eb);
			
 
				 			if (!extent_buffer_uptodate(eb)) {
			
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1839,6 +1839,8 @@ again:
 
				 
			
 
				 	parent = eb;
			
 
				 	while (1) {
			
 
				+		struct btrfs_key first_key;
			
 
				+
			
 
				 		level = btrfs_header_level(parent);
			
 
				 		BUG_ON(level < lowest_level);
			
 
				 
			
@@ -1852,6 +1854,7 @@ again:
 
				 		old_bytenr = btrfs_node_blockptr(parent, slot);
			
 
				 		blocksize = fs_info->nodesize;
			
 
				 		old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
			
 
				+		btrfs_node_key_to_cpu(parent, &key, slot);
			
 
				 
			
 
				 		if (level <= max_level) {
			
 
				 			eb = path->nodes[level];
			
@@ -1876,7 +1879,8 @@ again:
 
				 				break;
			
 
				 			}
			
 
				 
			
 
				-			eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen);
			
 
				+			eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
			
 
				+					     level - 1, &first_key);
			
 
				 			if (IS_ERR(eb)) {
			
 
				 				ret = PTR_ERR(eb);
			
 
				 				break;
			
@@ -2036,6 +2040,8 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
 
				 	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
			
 
				 
			
 
				 	for (i = *level; i > 0; i--) {
			
 
				+		struct btrfs_key first_key;
			
 
				+
			
 
				 		eb = path->nodes[i];
			
 
				 		nritems = btrfs_header_nritems(eb);
			
 
				 		while (path->slots[i] < nritems) {
			
@@ -2056,7 +2062,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
 
				 		}
			
 
				 
			
 
				 		bytenr = btrfs_node_blockptr(eb, path->slots[i]);
			
 
				-		eb = read_tree_block(fs_info, bytenr, ptr_gen);
			
 
				+		btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
			
 
				+		eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
			
 
				+				     &first_key);
			
 
				 		if (IS_ERR(eb)) {
			
 
				 			return PTR_ERR(eb);
			
 
				 		} else if (!extent_buffer_uptodate(eb)) {
			
@@ -2714,6 +2722,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
				 	path->lowest_level = node->level + 1;
			
 
				 	rc->backref_cache.path[node->level] = node;
			
 
				 	list_for_each_entry(edge, &node->upper, list[LOWER]) {
			
 
				+		struct btrfs_key first_key;
			
 
				+
			
 
				 		cond_resched();
			
 
				 
			
 
				 		upper = edge->node[UPPER];
			
@@ -2779,7 +2789,9 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
				 
			
 
				 		blocksize = root->fs_info->nodesize;
			
 
				 		generation = btrfs_node_ptr_generation(upper->eb, slot);
			
 
				-		eb = read_tree_block(fs_info, bytenr, generation);
			
 
				+		btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
			
 
				+		eb = read_tree_block(fs_info, bytenr, generation,
			
 
				+				     upper->level - 1, &first_key);
			
 
				 		if (IS_ERR(eb)) {
			
 
				 			err = PTR_ERR(eb);
			
 
				 			goto next;
			
@@ -2944,7 +2956,8 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
 
				 	struct extent_buffer *eb;
			
 
				 
			
 
				 	BUG_ON(block->key_ready);
			
 
				-	eb = read_tree_block(fs_info, block->bytenr, block->key.offset);
			
 
				+	eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
			
 
				+			     block->level, NULL);
			
 
				 	if (IS_ERR(eb)) {
			
 
				 		return PTR_ERR(eb);
			
 
				 	} else if (!extent_buffer_uptodate(eb)) {
			
@@ -3226,7 +3239,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 						   mask);
			
 
				 			if (!page) {
			
 
				 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				-							PAGE_SIZE);
			
 
				+							PAGE_SIZE, true);
			
 
				 				ret = -ENOMEM;
			
 
				 				goto out;
			
 
				 			}
			
@@ -3245,9 +3258,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 				unlock_page(page);
			
 
				 				put_page(page);
			
 
				 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				-							PAGE_SIZE);
			
 
				+							PAGE_SIZE, true);
			
 
				 				btrfs_delalloc_release_extents(BTRFS_I(inode),
			
 
				-							       PAGE_SIZE);
			
 
				+							       PAGE_SIZE, true);
			
 
				 				ret = -EIO;
			
 
				 				goto out;
			
 
				 			}
			
@@ -3274,9 +3287,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 			unlock_page(page);
			
 
				 			put_page(page);
			
 
				 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				-							 PAGE_SIZE);
			
 
				+							 PAGE_SIZE, true);
			
 
				 			btrfs_delalloc_release_extents(BTRFS_I(inode),
			
 
				-			                               PAGE_SIZE);
			
 
				+			                               PAGE_SIZE, true);
			
 
				 
			
 
				 			clear_extent_bits(&BTRFS_I(inode)->io_tree,
			
 
				 					  page_start, page_end,
			
@@ -3292,7 +3305,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 		put_page(page);
			
 
				 
			
 
				 		index++;
			
 
				-		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
			
 
				+					       false);
			
 
				 		balance_dirty_pages_ratelimited(inode->i_mapping);
			
 
				 		btrfs_throttle(fs_info);
			
 
				 	}
			
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -371,7 +371,7 @@ static struct full_stripe_lock *insert_full_stripe_lock(
 
				 	struct full_stripe_lock *entry;
			
 
				 	struct full_stripe_lock *ret;
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&locks_root->lock));
			
 
				+	lockdep_assert_held(&locks_root->lock);
			
 
				 
			
 
				 	p = &locks_root->root.rb_node;
			
 
				 	while (*p) {
			
@@ -413,7 +413,7 @@ static struct full_stripe_lock *search_full_stripe_lock(
 
				 	struct rb_node *node;
			
 
				 	struct full_stripe_lock *entry;
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&locks_root->lock));
			
 
				+	lockdep_assert_held(&locks_root->lock);
			
 
				 
			
 
				 	node = locks_root->root.rb_node;
			
 
				 	while (node) {
			
@@ -1111,7 +1111,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
				 	struct scrub_ctx *sctx = sblock_to_check->sctx;
			
 
				 	struct btrfs_device *dev;
			
 
				 	struct btrfs_fs_info *fs_info;
			
 
				-	u64 length;
			
 
				 	u64 logical;
			
 
				 	unsigned int failed_mirror_index;
			
 
				 	unsigned int is_metadata;
			
@@ -1139,7 +1138,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
				 		spin_unlock(&sctx->stat_lock);
			
 
				 		return 0;
			
 
				 	}
			
 
				-	length = sblock_to_check->page_count * PAGE_SIZE;
			
 
				 	logical = sblock_to_check->pagev[0]->logical;
			
 
				 	BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
			
 
				 	failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
			
@@ -1412,8 +1410,17 @@ nodatasum_case:
 
				 		if (!page_bad->io_error && !sctx->is_dev_replace)
			
 
				 			continue;
			
 
				 
			
 
				-		/* try to find no-io-error page in mirrors */
			
 
				-		if (page_bad->io_error) {
			
 
				+		if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
			
 
				+			/*
			
 
				+			 * In case of dev replace, if raid56 rebuild process
			
 
				+			 * didn't work out correct data, then copy the content
			
 
				+			 * in sblock_bad to make sure target device is identical
			
 
				+			 * to source device, instead of writing garbage data in
			
 
				+			 * sblock_for_recheck array to target device.
			
 
				+			 */
			
 
				+			sblock_other = NULL;
			
 
				+		} else if (page_bad->io_error) {
			
 
				+			/* try to find no-io-error page in mirrors */
			
 
				 			for (mirror_index = 0;
			
 
				 			     mirror_index < BTRFS_MAX_MIRRORS &&
			
 
				 			     sblocks_for_recheck[mirror_index].page_count > 0;
			
@@ -1718,6 +1725,45 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 
				 	return blk_status_to_errno(bio->bi_status);
			
 
				 }
			
 
				 
			
 
				+static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
			
 
				+					  struct scrub_block *sblock)
			
 
				+{
			
 
				+	struct scrub_page *first_page = sblock->pagev[0];
			
 
				+	struct bio *bio;
			
 
				+	int page_num;
			
 
				+
			
 
				+	/* All pages in sblock belong to the same stripe on the same device. */
			
 
				+	ASSERT(first_page->dev);
			
 
				+	if (!first_page->dev->bdev)
			
 
				+		goto out;
			
 
				+
			
 
				+	bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
			
 
				+	bio_set_dev(bio, first_page->dev->bdev);
			
 
				+
			
 
				+	for (page_num = 0; page_num < sblock->page_count; page_num++) {
			
 
				+		struct scrub_page *page = sblock->pagev[page_num];
			
 
				+
			
 
				+		WARN_ON(!page->page);
			
 
				+		bio_add_page(bio, page->page, PAGE_SIZE, 0);
			
 
				+	}
			
 
				+
			
 
				+	if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
			
 
				+		bio_put(bio);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	bio_put(bio);
			
 
				+
			
 
				+	scrub_recheck_block_checksum(sblock);
			
 
				+
			
 
				+	return;
			
 
				+out:
			
 
				+	for (page_num = 0; page_num < sblock->page_count; page_num++)
			
 
				+		sblock->pagev[page_num]->io_error = 1;
			
 
				+
			
 
				+	sblock->no_io_error_seen = 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * this function will check the on disk data for checksum errors, header
			
 
				  * errors and read I/O errors. If any I/O errors happen, the exact pages
			
@@ -1733,6 +1779,10 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 	sblock->no_io_error_seen = 1;
			
 
				 
			
 
				+	/* short cut for raid56 */
			
 
				+	if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
			
 
				+		return scrub_recheck_block_on_raid56(fs_info, sblock);
			
 
				+
			
 
				 	for (page_num = 0; page_num < sblock->page_count; page_num++) {
			
 
				 		struct bio *bio;
			
 
				 		struct scrub_page *page = sblock->pagev[page_num];
			
@@ -1748,19 +1798,12 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 
				 		bio_set_dev(bio, page->dev->bdev);
			
 
				 
			
 
				 		bio_add_page(bio, page->page, PAGE_SIZE, 0);
			
 
				-		if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
			
 
				-			if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
			
 
				-				page->io_error = 1;
			
 
				-				sblock->no_io_error_seen = 0;
			
 
				-			}
			
 
				-		} else {
			
 
				-			bio->bi_iter.bi_sector = page->physical >> 9;
			
 
				-			bio_set_op_attrs(bio, REQ_OP_READ, 0);
			
 
				+		bio->bi_iter.bi_sector = page->physical >> 9;
			
 
				+		bio->bi_opf = REQ_OP_READ;
			
 
				 
			
 
				-			if (btrfsic_submit_bio_wait(bio)) {
			
 
				-				page->io_error = 1;
			
 
				-				sblock->no_io_error_seen = 0;
			
 
				-			}
			
 
				+		if (btrfsic_submit_bio_wait(bio)) {
			
 
				+			page->io_error = 1;
			
 
				+			sblock->no_io_error_seen = 0;
			
 
				 		}
			
 
				 
			
 
				 		bio_put(bio);
			
@@ -2728,7 +2771,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
 
				 }
			
 
				 
			
 
				 /* scrub extent tries to collect up to 64 kB for each bio */
			
 
				-static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
			
 
				+static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
			
 
				+			u64 logical, u64 len,
			
 
				 			u64 physical, struct btrfs_device *dev, u64 flags,
			
 
				 			u64 gen, int mirror_num, u64 physical_for_dev_replace)
			
 
				 {
			
@@ -2737,13 +2781,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
 
				 	u32 blocksize;
			
 
				 
			
 
				 	if (flags & BTRFS_EXTENT_FLAG_DATA) {
			
 
				-		blocksize = sctx->fs_info->sectorsize;
			
 
				+		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
			
 
				+			blocksize = map->stripe_len;
			
 
				+		else
			
 
				+			blocksize = sctx->fs_info->sectorsize;
			
 
				 		spin_lock(&sctx->stat_lock);
			
 
				 		sctx->stat.data_extents_scrubbed++;
			
 
				 		sctx->stat.data_bytes_scrubbed += len;
			
 
				 		spin_unlock(&sctx->stat_lock);
			
 
				 	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
			
 
				-		blocksize = sctx->fs_info->nodesize;
			
 
				+		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
			
 
				+			blocksize = map->stripe_len;
			
 
				+		else
			
 
				+			blocksize = sctx->fs_info->nodesize;
			
 
				 		spin_lock(&sctx->stat_lock);
			
 
				 		sctx->stat.tree_extents_scrubbed++;
			
 
				 		sctx->stat.tree_bytes_scrubbed += len;
			
@@ -2883,9 +2933,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
 
				 	}
			
 
				 
			
 
				 	if (flags & BTRFS_EXTENT_FLAG_DATA) {
			
 
				-		blocksize = sctx->fs_info->sectorsize;
			
 
				+		blocksize = sparity->stripe_len;
			
 
				 	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
			
 
				-		blocksize = sctx->fs_info->nodesize;
			
 
				+		blocksize = sparity->stripe_len;
			
 
				 	} else {
			
 
				 		blocksize = sctx->fs_info->sectorsize;
			
 
				 		WARN_ON(1);
			
@@ -3595,7 +3645,7 @@ again:
 
				 			if (ret)
			
 
				 				goto out;
			
 
				 
			
 
				-			ret = scrub_extent(sctx, extent_logical, extent_len,
			
 
				+			ret = scrub_extent(sctx, map, extent_logical, extent_len,
			
 
				 					   extent_physical, extent_dev, flags,
			
 
				 					   generation, extent_mirror_num,
			
 
				 					   extent_logical - logical + physical);
			
@@ -3885,11 +3935,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_lock(&fs_info->dev_replace);
			
 
				 		dev_replace->cursor_right = found_key.offset + length;
			
 
				 		dev_replace->cursor_left = found_key.offset;
			
 
				 		dev_replace->item_needs_writeback = 1;
			
 
				-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
			
 
				 		ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
			
 
				 				  found_key.offset, cache, is_dev_replace);
			
 
				 
			
@@ -3925,10 +3975,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 
				 
			
 
				 		scrub_pause_off(fs_info);
			
 
				 
			
 
				-		btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_lock(&fs_info->dev_replace);
			
 
				 		dev_replace->cursor_left = dev_replace->cursor_right;
			
 
				 		dev_replace->item_needs_writeback = 1;
			
 
				-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
			
 
				+		btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
			
 
				 
			
 
				 		if (ro_set)
			
 
				 			btrfs_dec_block_group_ro(cache);
			
@@ -4144,16 +4194,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 
				 		return -EIO;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
			
 
				 	if (dev->scrub_ctx ||
			
 
				 	    (!is_dev_replace &&
			
 
				 	     btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
			
 
				-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+		btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 		mutex_unlock(&fs_info->scrub_lock);
			
 
				 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
			
 
				 		return -EINPROGRESS;
			
 
				 	}
			
 
				-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 
			
 
				 	ret = scrub_workers_get(fs_info, is_dev_replace);
			
 
				 	if (ret) {
			
@@ -4480,7 +4530,8 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
 
				 	 * move on to the next inode.
			
 
				 	 */
			
 
				 	if (em->block_start > logical ||
			
 
				-	    em->block_start + em->block_len < logical + len) {
			
 
				+	    em->block_start + em->block_len < logical + len ||
			
 
				+	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
			
 
				 		free_extent_map(em);
			
 
				 		ret = 1;
			
 
				 		goto out_unlock;
			
@@ -4620,7 +4671,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
 
				 {
			
 
				 	struct bio *bio;
			
 
				 	struct btrfs_device *dev;
			
 
				-	int ret;
			
 
				 
			
 
				 	dev = sctx->wr_tgtdev;
			
 
				 	if (!dev)
			
@@ -4635,17 +4685,15 @@ static int write_page_nocow(struct scrub_ctx *sctx,
 
				 	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
			
 
				 	bio_set_dev(bio, dev->bdev);
			
 
				 	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
			
 
				-	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
			
 
				-	if (ret != PAGE_SIZE) {
			
 
				-leave_with_eio:
			
 
				+	/* bio_add_page won't fail on a freshly allocated bio */
			
 
				+	bio_add_page(bio, page, PAGE_SIZE, 0);
			
 
				+
			
 
				+	if (btrfsic_submit_bio_wait(bio)) {
			
 
				 		bio_put(bio);
			
 
				 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
			
 
				 		return -EIO;
			
 
				 	}
			
 
				 
			
 
				-	if (btrfsic_submit_bio_wait(bio))
			
 
				-		goto leave_with_eio;
			
 
				-
			
 
				 	bio_put(bio);
			
 
				 	return 0;
			
 
				 }
			
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -27,10 +27,10 @@
 
				 #include <linux/vmalloc.h>
			
 
				 #include <linux/string.h>
			
 
				 #include <linux/compat.h>
			
 
				+#include <linux/crc32c.h>
			
 
				 
			
 
				 #include "send.h"
			
 
				 #include "backref.h"
			
 
				-#include "hash.h"
			
 
				 #include "locking.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "btrfs_inode.h"
			
@@ -112,6 +112,7 @@ struct send_ctx {
 
				 	u64 cur_inode_mode;
			
 
				 	u64 cur_inode_rdev;
			
 
				 	u64 cur_inode_last_extent;
			
 
				+	u64 cur_inode_next_write_offset;
			
 
				 
			
 
				 	u64 send_progress;
			
 
				 
			
@@ -270,6 +271,7 @@ struct name_cache_entry {
 
				 	char name[];
			
 
				 };
			
 
				 
			
 
				+__cold
			
 
				 static void inconsistent_snapshot_error(struct send_ctx *sctx,
			
 
				 					enum btrfs_compare_tree_result result,
			
 
				 					const char *what)
			
@@ -611,9 +613,9 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
 
				 }
			
 
				 
			
 
				 
			
 
				-#define TLV_PUT(sctx, attrtype, attrlen, data) \
			
 
				+#define TLV_PUT(sctx, attrtype, data, attrlen) \
			
 
				 	do { \
			
 
				-		ret = tlv_put(sctx, attrtype, attrlen, data); \
			
 
				+		ret = tlv_put(sctx, attrtype, data, attrlen); \
			
 
				 		if (ret < 0) \
			
 
				 			goto tlv_put_failure; \
			
 
				 	} while (0)
			
@@ -695,7 +697,7 @@ static int send_cmd(struct send_ctx *sctx)
 
				 	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
			
 
				 	hdr->crc = 0;
			
 
				 
			
 
				-	crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
			
 
				+	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
			
 
				 	hdr->crc = cpu_to_le32(crc);
			
 
				 
			
 
				 	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
			
@@ -5029,6 +5031,7 @@ static int send_hole(struct send_ctx *sctx, u64 end)
 
				 			break;
			
 
				 		offset += len;
			
 
				 	}
			
 
				+	sctx->cur_inode_next_write_offset = offset;
			
 
				 tlv_put_failure:
			
 
				 	fs_path_free(p);
			
 
				 	return ret;
			
@@ -5264,6 +5267,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
 
				 	} else {
			
 
				 		ret = send_extent_data(sctx, offset, len);
			
 
				 	}
			
 
				+	sctx->cur_inode_next_write_offset = offset + len;
			
 
				 out:
			
 
				 	return ret;
			
 
				 }
			
@@ -5788,6 +5792,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 
				 	u64 right_gid;
			
 
				 	int need_chmod = 0;
			
 
				 	int need_chown = 0;
			
 
				+	int need_truncate = 1;
			
 
				 	int pending_move = 0;
			
 
				 	int refs_processed = 0;
			
 
				 
			
@@ -5825,9 +5830,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 
				 		need_chown = 1;
			
 
				 		if (!S_ISLNK(sctx->cur_inode_mode))
			
 
				 			need_chmod = 1;
			
 
				+		if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
			
 
				+			need_truncate = 0;
			
 
				 	} else {
			
 
				+		u64 old_size;
			
 
				+
			
 
				 		ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
			
 
				-				NULL, NULL, &right_mode, &right_uid,
			
 
				+				&old_size, NULL, &right_mode, &right_uid,
			
 
				 				&right_gid, NULL);
			
 
				 		if (ret < 0)
			
 
				 			goto out;
			
@@ -5836,6 +5845,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 
				 			need_chown = 1;
			
 
				 		if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
			
 
				 			need_chmod = 1;
			
 
				+		if ((old_size == sctx->cur_inode_size) ||
			
 
				+		    (sctx->cur_inode_size > old_size &&
			
 
				+		     sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
			
 
				+			need_truncate = 0;
			
 
				 	}
			
 
				 
			
 
				 	if (S_ISREG(sctx->cur_inode_mode)) {
			
@@ -5854,10 +5867,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 
				 					goto out;
			
 
				 			}
			
 
				 		}
			
 
				-		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
			
 
				-				sctx->cur_inode_size);
			
 
				-		if (ret < 0)
			
 
				-			goto out;
			
 
				+		if (need_truncate) {
			
 
				+			ret = send_truncate(sctx, sctx->cur_ino,
			
 
				+					    sctx->cur_inode_gen,
			
 
				+					    sctx->cur_inode_size);
			
 
				+			if (ret < 0)
			
 
				+				goto out;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if (need_chown) {
			
@@ -5911,6 +5927,7 @@ static int changed_inode(struct send_ctx *sctx,
 
				 	sctx->cur_ino = key->objectid;
			
 
				 	sctx->cur_inode_new_gen = 0;
			
 
				 	sctx->cur_inode_last_extent = (u64)-1;
			
 
				+	sctx->cur_inode_next_write_offset = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * Set send_progress to current inode. This will tell all get_cur_xxx
			
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,6 +41,7 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/cleancache.h>
			
 
				 #include <linux/ratelimit.h>
			
 
				+#include <linux/crc32c.h>
			
 
				 #include <linux/btrfs.h>
			
 
				 #include "delayed-inode.h"
			
 
				 #include "ctree.h"
			
@@ -48,7 +49,6 @@
 
				 #include "transaction.h"
			
 
				 #include "btrfs_inode.h"
			
 
				 #include "print-tree.h"
			
 
				-#include "hash.h"
			
 
				 #include "props.h"
			
 
				 #include "xattr.h"
			
 
				 #include "volumes.h"
			
@@ -308,21 +308,50 @@ static void btrfs_put_super(struct super_block *sb)
 
				 }
			
 
				 
			
 
				 enum {
			
 
				-	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
			
 
				-	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
			
 
				-	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
			
 
				-	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
			
 
				-	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
			
 
				-	Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
			
 
				-	Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
			
 
				-	Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
			
 
				-	Opt_skip_balance, Opt_check_integrity,
			
 
				+	Opt_acl, Opt_noacl,
			
 
				+	Opt_clear_cache,
			
 
				+	Opt_commit_interval,
			
 
				+	Opt_compress,
			
 
				+	Opt_compress_force,
			
 
				+	Opt_compress_force_type,
			
 
				+	Opt_compress_type,
			
 
				+	Opt_degraded,
			
 
				+	Opt_device,
			
 
				+	Opt_fatal_errors,
			
 
				+	Opt_flushoncommit, Opt_noflushoncommit,
			
 
				+	Opt_inode_cache, Opt_noinode_cache,
			
 
				+	Opt_max_inline,
			
 
				+	Opt_barrier, Opt_nobarrier,
			
 
				+	Opt_datacow, Opt_nodatacow,
			
 
				+	Opt_datasum, Opt_nodatasum,
			
 
				+	Opt_defrag, Opt_nodefrag,
			
 
				+	Opt_discard, Opt_nodiscard,
			
 
				+	Opt_nologreplay,
			
 
				+	Opt_norecovery,
			
 
				+	Opt_ratio,
			
 
				+	Opt_rescan_uuid_tree,
			
 
				+	Opt_skip_balance,
			
 
				+	Opt_space_cache, Opt_no_space_cache,
			
 
				+	Opt_space_cache_version,
			
 
				+	Opt_ssd, Opt_nossd,
			
 
				+	Opt_ssd_spread, Opt_nossd_spread,
			
 
				+	Opt_subvol,
			
 
				+	Opt_subvolid,
			
 
				+	Opt_thread_pool,
			
 
				+	Opt_treelog, Opt_notreelog,
			
 
				+	Opt_usebackuproot,
			
 
				+	Opt_user_subvol_rm_allowed,
			
 
				+
			
 
				+	/* Deprecated options */
			
 
				+	Opt_alloc_start,
			
 
				+	Opt_recovery,
			
 
				+	Opt_subvolrootid,
			
 
				+
			
 
				+	/* Debugging options */
			
 
				+	Opt_check_integrity,
			
 
				 	Opt_check_integrity_including_extent_data,
			
 
				-	Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
			
 
				-	Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
			
 
				-	Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
			
 
				-	Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
			
 
				-	Opt_nologreplay, Opt_norecovery,
			
 
				+	Opt_check_integrity_print_mask,
			
 
				+	Opt_enospc_debug, Opt_noenospc_debug,
			
 
				 #ifdef CONFIG_BTRFS_DEBUG
			
 
				 	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
			
 
				 #endif
			
@@ -333,58 +362,63 @@ enum {
 
				 };
			
 
				 
			
 
				 static const match_table_t tokens = {
			
 
				-	{Opt_degraded, "degraded"},
			
 
				-	{Opt_subvol, "subvol=%s"},
			
 
				-	{Opt_subvolid, "subvolid=%s"},
			
 
				-	{Opt_device, "device=%s"},
			
 
				-	{Opt_nodatasum, "nodatasum"},
			
 
				-	{Opt_datasum, "datasum"},
			
 
				-	{Opt_nodatacow, "nodatacow"},
			
 
				-	{Opt_datacow, "datacow"},
			
 
				-	{Opt_nobarrier, "nobarrier"},
			
 
				-	{Opt_barrier, "barrier"},
			
 
				-	{Opt_max_inline, "max_inline=%s"},
			
 
				-	{Opt_alloc_start, "alloc_start=%s"},
			
 
				-	{Opt_thread_pool, "thread_pool=%d"},
			
 
				+	{Opt_acl, "acl"},
			
 
				+	{Opt_noacl, "noacl"},
			
 
				+	{Opt_clear_cache, "clear_cache"},
			
 
				+	{Opt_commit_interval, "commit=%u"},
			
 
				 	{Opt_compress, "compress"},
			
 
				 	{Opt_compress_type, "compress=%s"},
			
 
				 	{Opt_compress_force, "compress-force"},
			
 
				 	{Opt_compress_force_type, "compress-force=%s"},
			
 
				-	{Opt_ssd, "ssd"},
			
 
				-	{Opt_ssd_spread, "ssd_spread"},
			
 
				-	{Opt_nossd, "nossd"},
			
 
				-	{Opt_acl, "acl"},
			
 
				-	{Opt_noacl, "noacl"},
			
 
				-	{Opt_notreelog, "notreelog"},
			
 
				-	{Opt_treelog, "treelog"},
			
 
				-	{Opt_nologreplay, "nologreplay"},
			
 
				-	{Opt_norecovery, "norecovery"},
			
 
				+	{Opt_degraded, "degraded"},
			
 
				+	{Opt_device, "device=%s"},
			
 
				+	{Opt_fatal_errors, "fatal_errors=%s"},
			
 
				 	{Opt_flushoncommit, "flushoncommit"},
			
 
				 	{Opt_noflushoncommit, "noflushoncommit"},
			
 
				-	{Opt_ratio, "metadata_ratio=%d"},
			
 
				+	{Opt_inode_cache, "inode_cache"},
			
 
				+	{Opt_noinode_cache, "noinode_cache"},
			
 
				+	{Opt_max_inline, "max_inline=%s"},
			
 
				+	{Opt_barrier, "barrier"},
			
 
				+	{Opt_nobarrier, "nobarrier"},
			
 
				+	{Opt_datacow, "datacow"},
			
 
				+	{Opt_nodatacow, "nodatacow"},
			
 
				+	{Opt_datasum, "datasum"},
			
 
				+	{Opt_nodatasum, "nodatasum"},
			
 
				+	{Opt_defrag, "autodefrag"},
			
 
				+	{Opt_nodefrag, "noautodefrag"},
			
 
				 	{Opt_discard, "discard"},
			
 
				 	{Opt_nodiscard, "nodiscard"},
			
 
				+	{Opt_nologreplay, "nologreplay"},
			
 
				+	{Opt_norecovery, "norecovery"},
			
 
				+	{Opt_ratio, "metadata_ratio=%u"},
			
 
				+	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
			
 
				+	{Opt_skip_balance, "skip_balance"},
			
 
				 	{Opt_space_cache, "space_cache"},
			
 
				+	{Opt_no_space_cache, "nospace_cache"},
			
 
				 	{Opt_space_cache_version, "space_cache=%s"},
			
 
				-	{Opt_clear_cache, "clear_cache"},
			
 
				+	{Opt_ssd, "ssd"},
			
 
				+	{Opt_nossd, "nossd"},
			
 
				+	{Opt_ssd_spread, "ssd_spread"},
			
 
				+	{Opt_nossd_spread, "nossd_spread"},
			
 
				+	{Opt_subvol, "subvol=%s"},
			
 
				+	{Opt_subvolid, "subvolid=%s"},
			
 
				+	{Opt_thread_pool, "thread_pool=%u"},
			
 
				+	{Opt_treelog, "treelog"},
			
 
				+	{Opt_notreelog, "notreelog"},
			
 
				+	{Opt_usebackuproot, "usebackuproot"},
			
 
				 	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
			
 
				-	{Opt_enospc_debug, "enospc_debug"},
			
 
				-	{Opt_noenospc_debug, "noenospc_debug"},
			
 
				+
			
 
				+	/* Deprecated options */
			
 
				+	{Opt_alloc_start, "alloc_start=%s"},
			
 
				+	{Opt_recovery, "recovery"},
			
 
				 	{Opt_subvolrootid, "subvolrootid=%d"},
			
 
				-	{Opt_defrag, "autodefrag"},
			
 
				-	{Opt_nodefrag, "noautodefrag"},
			
 
				-	{Opt_inode_cache, "inode_cache"},
			
 
				-	{Opt_noinode_cache, "noinode_cache"},
			
 
				-	{Opt_no_space_cache, "nospace_cache"},
			
 
				-	{Opt_recovery, "recovery"}, /* deprecated */
			
 
				-	{Opt_usebackuproot, "usebackuproot"},
			
 
				-	{Opt_skip_balance, "skip_balance"},
			
 
				+
			
 
				+	/* Debugging options */
			
 
				 	{Opt_check_integrity, "check_int"},
			
 
				 	{Opt_check_integrity_including_extent_data, "check_int_data"},
			
 
				-	{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
			
 
				-	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
			
 
				-	{Opt_fatal_errors, "fatal_errors=%s"},
			
 
				-	{Opt_commit_interval, "commit=%d"},
			
 
				+	{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
			
 
				+	{Opt_enospc_debug, "enospc_debug"},
			
 
				+	{Opt_noenospc_debug, "noenospc_debug"},
			
 
				 #ifdef CONFIG_BTRFS_DEBUG
			
 
				 	{Opt_fragment_data, "fragment=data"},
			
 
				 	{Opt_fragment_metadata, "fragment=metadata"},
			
@@ -579,6 +613,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 			btrfs_set_opt(info->mount_opt, NOSSD);
			
 
				 			btrfs_clear_and_info(info, SSD,
			
 
				 					     "not using ssd optimizations");
			
 
				+			/* Fallthrough */
			
 
				+		case Opt_nossd_spread:
			
 
				 			btrfs_clear_and_info(info, SSD_SPREAD,
			
 
				 					     "not using spread ssd allocation scheme");
			
 
				 			break;
			
@@ -594,12 +630,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 			ret = match_int(&args[0], &intarg);
			
 
				 			if (ret) {
			
 
				 				goto out;
			
 
				-			} else if (intarg > 0) {
			
 
				-				info->thread_pool_size = intarg;
			
 
				-			} else {
			
 
				+			} else if (intarg == 0) {
			
 
				 				ret = -EINVAL;
			
 
				 				goto out;
			
 
				 			}
			
 
				+			info->thread_pool_size = intarg;
			
 
				 			break;
			
 
				 		case Opt_max_inline:
			
 
				 			num = match_strdup(&args[0]);
			
@@ -658,16 +693,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 			break;
			
 
				 		case Opt_ratio:
			
 
				 			ret = match_int(&args[0], &intarg);
			
 
				-			if (ret) {
			
 
				+			if (ret)
			
 
				 				goto out;
			
 
				-			} else if (intarg >= 0) {
			
 
				-				info->metadata_ratio = intarg;
			
 
				-				btrfs_info(info, "metadata ratio %d",
			
 
				-					   info->metadata_ratio);
			
 
				-			} else {
			
 
				-				ret = -EINVAL;
			
 
				-				goto out;
			
 
				-			}
			
 
				+			info->metadata_ratio = intarg;
			
 
				+			btrfs_info(info, "metadata ratio %u",
			
 
				+				   info->metadata_ratio);
			
 
				 			break;
			
 
				 		case Opt_discard:
			
 
				 			btrfs_set_and_info(info, DISCARD,
			
@@ -762,17 +792,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 			break;
			
 
				 		case Opt_check_integrity_print_mask:
			
 
				 			ret = match_int(&args[0], &intarg);
			
 
				-			if (ret) {
			
 
				+			if (ret)
			
 
				 				goto out;
			
 
				-			} else if (intarg >= 0) {
			
 
				-				info->check_integrity_print_mask = intarg;
			
 
				-				btrfs_info(info,
			
 
				-					   "check_integrity_print_mask 0x%x",
			
 
				-					   info->check_integrity_print_mask);
			
 
				-			} else {
			
 
				-				ret = -EINVAL;
			
 
				-				goto out;
			
 
				-			}
			
 
				+			info->check_integrity_print_mask = intarg;
			
 
				+			btrfs_info(info, "check_integrity_print_mask 0x%x",
			
 
				+				   info->check_integrity_print_mask);
			
 
				 			break;
			
 
				 #else
			
 
				 		case Opt_check_integrity_including_extent_data:
			
@@ -798,24 +822,18 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 		case Opt_commit_interval:
			
 
				 			intarg = 0;
			
 
				 			ret = match_int(&args[0], &intarg);
			
 
				-			if (ret < 0) {
			
 
				-				btrfs_err(info, "invalid commit interval");
			
 
				-				ret = -EINVAL;
			
 
				+			if (ret)
			
 
				 				goto out;
			
 
				-			}
			
 
				-			if (intarg > 0) {
			
 
				-				if (intarg > 300) {
			
 
				-					btrfs_warn(info,
			
 
				-						"excessive commit interval %d",
			
 
				-						intarg);
			
 
				-				}
			
 
				-				info->commit_interval = intarg;
			
 
				-			} else {
			
 
				+			if (intarg == 0) {
			
 
				 				btrfs_info(info,
			
 
				-					   "using default commit interval %ds",
			
 
				+					   "using default commit interval %us",
			
 
				 					   BTRFS_DEFAULT_COMMIT_INTERVAL);
			
 
				-				info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
			
 
				+				intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
			
 
				+			} else if (intarg > 300) {
			
 
				+				btrfs_warn(info, "excessive commit interval %d",
			
 
				+					   intarg);
			
 
				 			}
			
 
				+			info->commit_interval = intarg;
			
 
				 			break;
			
 
				 #ifdef CONFIG_BTRFS_DEBUG
			
 
				 		case Opt_fragment_all:
			
@@ -932,8 +950,8 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
 
				 {
			
 
				 	substring_t args[MAX_OPT_ARGS];
			
 
				 	char *opts, *orig, *p;
			
 
				-	char *num = NULL;
			
 
				 	int error = 0;
			
 
				+	u64 subvolid;
			
 
				 
			
 
				 	if (!options)
			
 
				 		return 0;
			
@@ -963,18 +981,15 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
 
				 			}
			
 
				 			break;
			
 
				 		case Opt_subvolid:
			
 
				-			num = match_strdup(&args[0]);
			
 
				-			if (num) {
			
 
				-				*subvol_objectid = memparse(num, NULL);
			
 
				-				kfree(num);
			
 
				-				/* we want the original fs_tree */
			
 
				-				if (!*subvol_objectid)
			
 
				-					*subvol_objectid =
			
 
				-						BTRFS_FS_TREE_OBJECTID;
			
 
				-			} else {
			
 
				-				error = -EINVAL;
			
 
				+			error = match_u64(&args[0], &subvolid);
			
 
				+			if (error)
			
 
				 				goto out;
			
 
				-			}
			
 
				+
			
 
				+			/* we want the original fs_tree */
			
 
				+			if (subvolid == 0)
			
 
				+				subvolid = BTRFS_FS_TREE_OBJECTID;
			
 
				+
			
 
				+			*subvol_objectid = subvolid;
			
 
				 			break;
			
 
				 		case Opt_subvolrootid:
			
 
				 			pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
			
@@ -1284,7 +1299,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 
				 		seq_printf(seq, ",max_inline=%llu", info->max_inline);
			
 
				 	if (info->thread_pool_size !=  min_t(unsigned long,
			
 
				 					     num_online_cpus() + 2, 8))
			
 
				-		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
			
 
				+		seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
			
 
				 	if (btrfs_test_opt(info, COMPRESS)) {
			
 
				 		compress_type = btrfs_compress_type2str(info->compress_type);
			
 
				 		if (btrfs_test_opt(info, FORCE_COMPRESS))
			
@@ -1340,12 +1355,11 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 
				 				info->check_integrity_print_mask);
			
 
				 #endif
			
 
				 	if (info->metadata_ratio)
			
 
				-		seq_printf(seq, ",metadata_ratio=%d",
			
 
				-				info->metadata_ratio);
			
 
				+		seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
			
 
				 	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
			
 
				 		seq_puts(seq, ",fatal_errors=panic");
			
 
				 	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
			
 
				-		seq_printf(seq, ",commit=%d", info->commit_interval);
			
 
				+		seq_printf(seq, ",commit=%u", info->commit_interval);
			
 
				 #ifdef CONFIG_BTRFS_DEBUG
			
 
				 	if (btrfs_test_opt(info, FRAGMENT_DATA))
			
 
				 		seq_puts(seq, ",fragment=data");
			
@@ -1690,7 +1704,7 @@ out:
 
				 }
			
 
				 
			
 
				 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
			
 
				-				     int new_pool_size, int old_pool_size)
			
 
				+				     u32 new_pool_size, u32 old_pool_size)
			
 
				 {
			
 
				 	if (new_pool_size == old_pool_size)
			
 
				 		return;
			
@@ -1758,8 +1772,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 
				 	unsigned long old_opts = fs_info->mount_opt;
			
 
				 	unsigned long old_compress_type = fs_info->compress_type;
			
 
				 	u64 old_max_inline = fs_info->max_inline;
			
 
				-	int old_thread_pool_size = fs_info->thread_pool_size;
			
 
				-	unsigned int old_metadata_ratio = fs_info->metadata_ratio;
			
 
				+	u32 old_thread_pool_size = fs_info->thread_pool_size;
			
 
				+	u32 old_metadata_ratio = fs_info->metadata_ratio;
			
 
				 	int ret;
			
 
				 
			
 
				 	sync_filesystem(sb);
			
@@ -2290,11 +2304,18 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 
				 	struct list_head *head;
			
 
				 	struct rcu_string *name;
			
 
				 
			
 
				-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
			
 
				+	/*
			
 
				+	 * Lightweight locking of the devices. We should not need
			
 
				+	 * device_list_mutex here as we only read the device data and the list
			
 
				+	 * is protected by RCU.  Even if a device is deleted during the list
			
 
				+	 * traversals, we'll get valid data, the freeing callback will wait at
			
 
				+	 * least until until the rcu_read_unlock.
			
 
				+	 */
			
 
				+	rcu_read_lock();
			
 
				 	cur_devices = fs_info->fs_devices;
			
 
				 	while (cur_devices) {
			
 
				 		head = &cur_devices->devices;
			
 
				-		list_for_each_entry(dev, head, dev_list) {
			
 
				+		list_for_each_entry_rcu(dev, head, dev_list) {
			
 
				 			if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
			
 
				 				continue;
			
 
				 			if (!dev->name)
			
@@ -2306,14 +2327,12 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 
				 	}
			
 
				 
			
 
				 	if (first_dev) {
			
 
				-		rcu_read_lock();
			
 
				 		name = rcu_dereference(first_dev->name);
			
 
				 		seq_escape(m, name->str, " \t\n\\");
			
 
				-		rcu_read_unlock();
			
 
				 	} else {
			
 
				 		WARN_ON(1);
			
 
				 	}
			
 
				-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
			
 
				+	rcu_read_unlock();
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -2355,7 +2374,7 @@ static int __init btrfs_interface_init(void)
 
				 	return misc_register(&btrfs_misc);
			
 
				 }
			
 
				 
			
 
				-static void btrfs_interface_exit(void)
			
 
				+static __cold void btrfs_interface_exit(void)
			
 
				 {
			
 
				 	misc_deregister(&btrfs_misc);
			
 
				 }
			
@@ -2376,22 +2395,18 @@ static void __init btrfs_print_mod_info(void)
 
				 			", ref-verify=on"
			
 
				 #endif
			
 
				 			"\n",
			
 
				-			btrfs_crc32c_impl());
			
 
				+			crc32c_impl());
			
 
				 }
			
 
				 
			
 
				 static int __init init_btrfs_fs(void)
			
 
				 {
			
 
				 	int err;
			
 
				 
			
 
				-	err = btrfs_hash_init();
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				 	btrfs_props_init();
			
 
				 
			
 
				 	err = btrfs_init_sysfs();
			
 
				 	if (err)
			
 
				-		goto free_hash;
			
 
				+		return err;
			
 
				 
			
 
				 	btrfs_init_compress();
			
 
				 
			
@@ -2472,8 +2487,7 @@ free_cachep:
 
				 free_compress:
			
 
				 	btrfs_exit_compress();
			
 
				 	btrfs_exit_sysfs();
			
 
				-free_hash:
			
 
				-	btrfs_hash_exit();
			
 
				+
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -2493,7 +2507,6 @@ static void __exit exit_btrfs_fs(void)
 
				 	btrfs_exit_sysfs();
			
 
				 	btrfs_cleanup_fs_uuids();
			
 
				 	btrfs_exit_compress();
			
 
				-	btrfs_hash_exit();
			
 
				 }
			
 
				 
			
 
				 late_initcall(init_btrfs_fs);
			
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
 
				 {
			
 
				 	struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
			
 
				 	struct btrfs_block_group_cache *block_group;
			
 
				-	int index = to_raid_kobj(kobj)->raid_type;
			
 
				+	int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
			
 
				 	u64 val = 0;
			
 
				 
			
 
				 	down_read(&sinfo->groups_sem);
			
@@ -923,7 +923,7 @@ out1:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void btrfs_exit_sysfs(void)
			
 
				+void __cold btrfs_exit_sysfs(void)
			
 
				 {
			
 
				 	sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
			
 
				 	kset_unregister(btrfs_kset);
			
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -278,8 +278,7 @@ int btrfs_run_sanity_tests(void)
 
				 		}
			
 
				 	}
			
 
				 	ret = btrfs_test_extent_map();
			
 
				-	if (ret)
			
 
				-		goto out;
			
 
				+
			
 
				 out:
			
 
				 	btrfs_destroy_test_fs();
			
 
				 	return ret;
			
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -343,7 +343,7 @@ static void test_case_4(struct extent_map_tree *em_tree)
 
				 	__test_case_4(em_tree, SZ_4K);
			
 
				 }
			
 
				 
			
 
				-int btrfs_test_extent_map()
			
 
				+int btrfs_test_extent_map(void)
			
 
				 {
			
 
				 	struct extent_map_tree *em_tree;
			
 
				 
			
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -63,7 +63,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
 
				 	btrfs_set_extent_generation(leaf, item, 1);
			
 
				 	btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK);
			
 
				 	block_info = (struct btrfs_tree_block_info *)(item + 1);
			
 
				-	btrfs_set_tree_block_level(leaf, block_info, 1);
			
 
				+	btrfs_set_tree_block_level(leaf, block_info, 0);
			
 
				 	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
			
 
				 	if (parent > 0) {
			
 
				 		btrfs_set_extent_inline_ref_type(leaf, iref,
			
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -37,22 +37,16 @@
 
				 
			
 
				 static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
			
 
				 	[TRANS_STATE_RUNNING]		= 0U,
			
 
				-	[TRANS_STATE_BLOCKED]		= (__TRANS_USERSPACE |
			
 
				-					   __TRANS_START),
			
 
				-	[TRANS_STATE_COMMIT_START]	= (__TRANS_USERSPACE |
			
 
				-					   __TRANS_START |
			
 
				-					   __TRANS_ATTACH),
			
 
				-	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_USERSPACE |
			
 
				-					   __TRANS_START |
			
 
				+	[TRANS_STATE_BLOCKED]		=  __TRANS_START,
			
 
				+	[TRANS_STATE_COMMIT_START]	= (__TRANS_START | __TRANS_ATTACH),
			
 
				+	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_START |
			
 
				 					   __TRANS_ATTACH |
			
 
				 					   __TRANS_JOIN),
			
 
				-	[TRANS_STATE_UNBLOCKED]		= (__TRANS_USERSPACE |
			
 
				-					   __TRANS_START |
			
 
				+	[TRANS_STATE_UNBLOCKED]		= (__TRANS_START |
			
 
				 					   __TRANS_ATTACH |
			
 
				 					   __TRANS_JOIN |
			
 
				 					   __TRANS_JOIN_NOLOCK),
			
 
				-	[TRANS_STATE_COMPLETED]		= (__TRANS_USERSPACE |
			
 
				-					   __TRANS_START |
			
 
				+	[TRANS_STATE_COMPLETED]		= (__TRANS_START |
			
 
				 					   __TRANS_ATTACH |
			
 
				 					   __TRANS_JOIN |
			
 
				 					   __TRANS_JOIN_NOLOCK),
			
@@ -126,9 +120,9 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
 
				 	spin_unlock(&tree->lock);
			
 
				 }
			
 
				 
			
 
				-static noinline void switch_commit_roots(struct btrfs_transaction *trans,
			
 
				-					 struct btrfs_fs_info *fs_info)
			
 
				+static noinline void switch_commit_roots(struct btrfs_transaction *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_root *root, *tmp;
			
 
				 
			
 
				 	down_write(&fs_info->commit_root_sem);
			
@@ -319,7 +313,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
 
				 	if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
			
 
				 	    root->last_trans < trans->transid) || force) {
			
 
				 		WARN_ON(root == fs_info->extent_root);
			
 
				-		WARN_ON(root->commit_root != root->node);
			
 
				+		WARN_ON(!force && root->commit_root != root->node);
			
 
				 
			
 
				 		/*
			
 
				 		 * see below for IN_TRANS_SETUP usage rules
			
@@ -449,11 +443,7 @@ static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
 
				 	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (type == TRANS_USERSPACE)
			
 
				-		return 1;
			
 
				-
			
 
				-	if (type == TRANS_START &&
			
 
				-	    !atomic_read(&fs_info->open_ioctl_trans))
			
 
				+	if (type == TRANS_START)
			
 
				 		return 1;
			
 
				 
			
 
				 	return 0;
			
@@ -508,8 +498,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 
				 	 */
			
 
				 	if (num_items && root != fs_info->chunk_root) {
			
 
				 		qgroup_reserved = num_items * fs_info->nodesize;
			
 
				-		ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
			
 
				-						enforce_qgroups);
			
 
				+		ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
			
 
				+				enforce_qgroups);
			
 
				 		if (ret)
			
 
				 			return ERR_PTR(ret);
			
 
				 
			
@@ -593,7 +583,7 @@ again:
 
				 got_it:
			
 
				 	btrfs_record_root_in_trans(h, root);
			
 
				 
			
 
				-	if (!current->journal_info && type != TRANS_USERSPACE)
			
 
				+	if (!current->journal_info)
			
 
				 		current->journal_info = h;
			
 
				 	return h;
			
 
				 
			
@@ -606,7 +596,7 @@ alloc_fail:
 
				 		btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
			
 
				 					num_bytes);
			
 
				 reserve_fail:
			
 
				-	btrfs_qgroup_free_meta(root, qgroup_reserved);
			
 
				+	btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
			
 
				 	return ERR_PTR(ret);
			
 
				 }
			
 
				 
			
@@ -658,14 +648,6 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
 
				 	return trans;
			
 
				 }
			
 
				 
			
 
				-struct btrfs_trans_handle *btrfs_start_transaction_lflush(
			
 
				-					struct btrfs_root *root,
			
 
				-					unsigned int num_items)
			
 
				-{
			
 
				-	return start_transaction(root, num_items, TRANS_START,
			
 
				-				 BTRFS_RESERVE_FLUSH_LIMIT, true);
			
 
				-}
			
 
				-
			
 
				 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
			
 
				 {
			
 
				 	return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
			
@@ -678,12 +660,6 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
 
				 				 BTRFS_RESERVE_NO_FLUSH, true);
			
 
				 }
			
 
				 
			
 
				-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
			
 
				-{
			
 
				-	return start_transaction(root, 0, TRANS_USERSPACE,
			
 
				-				 BTRFS_RESERVE_NO_FLUSH, true);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * btrfs_attach_transaction() - catch the running transaction
			
 
				  *
			
@@ -789,8 +765,7 @@ out:
 
				 
			
 
				 void btrfs_throttle(struct btrfs_fs_info *fs_info)
			
 
				 {
			
 
				-	if (!atomic_read(&fs_info->open_ioctl_trans))
			
 
				-		wait_current_trans(fs_info);
			
 
				+	wait_current_trans(fs_info);
			
 
				 }
			
 
				 
			
 
				 static int should_end_transaction(struct btrfs_trans_handle *trans)
			
@@ -806,7 +781,6 @@ static int should_end_transaction(struct btrfs_trans_handle *trans)
 
				 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				 	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				-	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	int updates;
			
 
				 	int err;
			
 
				 
			
@@ -818,7 +792,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
 
				 	updates = trans->delayed_ref_updates;
			
 
				 	trans->delayed_ref_updates = 0;
			
 
				 	if (updates) {
			
 
				-		err = btrfs_run_delayed_refs(trans, fs_info, updates * 2);
			
 
				+		err = btrfs_run_delayed_refs(trans, updates * 2);
			
 
				 		if (err) /* Error code will also eval true */
			
 
				 			return err;
			
 
				 	}
			
@@ -826,6 +800,27 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
 
				 	return should_end_transaction(trans);
			
 
				 }
			
 
				 
			
 
				+static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
			
 
				+
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				+
			
 
				+	if (!trans->block_rsv) {
			
 
				+		ASSERT(!trans->bytes_reserved);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!trans->bytes_reserved)
			
 
				+		return;
			
 
				+
			
 
				+	ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
			
 
				+	trace_btrfs_space_reservation(fs_info, "transaction",
			
 
				+				      trans->transid, trans->bytes_reserved, 0);
			
 
				+	btrfs_block_rsv_release(fs_info, trans->block_rsv,
			
 
				+				trans->bytes_reserved);
			
 
				+	trans->bytes_reserved = 0;
			
 
				+}
			
 
				+
			
 
				 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
			
 
				 				   int throttle)
			
 
				 {
			
@@ -843,11 +838,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_trans_release_metadata(trans, info);
			
 
				+	btrfs_trans_release_metadata(trans);
			
 
				 	trans->block_rsv = NULL;
			
 
				 
			
 
				 	if (!list_empty(&trans->new_bgs))
			
 
				-		btrfs_create_pending_block_groups(trans, info);
			
 
				+		btrfs_create_pending_block_groups(trans);
			
 
				 
			
 
				 	trans->delayed_ref_updates = 0;
			
 
				 	if (!trans->sync) {
			
@@ -864,16 +859,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
				 			must_run_delayed_refs = 2;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_trans_release_metadata(trans, info);
			
 
				+	btrfs_trans_release_metadata(trans);
			
 
				 	trans->block_rsv = NULL;
			
 
				 
			
 
				 	if (!list_empty(&trans->new_bgs))
			
 
				-		btrfs_create_pending_block_groups(trans, info);
			
 
				+		btrfs_create_pending_block_groups(trans);
			
 
				 
			
 
				 	btrfs_trans_release_chunk_metadata(trans);
			
 
				 
			
 
				-	if (lock && !atomic_read(&info->open_ioctl_trans) &&
			
 
				-	    should_end_transaction(trans) &&
			
 
				+	if (lock && should_end_transaction(trans) &&
			
 
				 	    READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
			
 
				 		spin_lock(&info->trans_lock);
			
 
				 		if (cur_trans->state == TRANS_STATE_RUNNING)
			
@@ -1072,40 +1066,33 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * when btree blocks are allocated, they have some corresponding bits set for
			
 
				- * them in one of two extent_io trees.  This is used to make sure all of
			
 
				- * those extents are on disk for transaction or log commit
			
 
				+ * When btree blocks are allocated the corresponding extents are marked dirty.
			
 
				+ * This function ensures such extents are persisted on disk for transaction or
			
 
				+ * log commit.
			
 
				+ *
			
 
				+ * @trans: transaction whose dirty pages we'd like to write
			
 
				  */
			
 
				-static int btrfs_write_and_wait_marked_extents(struct btrfs_fs_info *fs_info,
			
 
				-				struct extent_io_tree *dirty_pages, int mark)
			
 
				+static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				 	int ret;
			
 
				 	int ret2;
			
 
				+	struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages;
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct blk_plug plug;
			
 
				 
			
 
				 	blk_start_plug(&plug);
			
 
				-	ret = btrfs_write_marked_extents(fs_info, dirty_pages, mark);
			
 
				+	ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY);
			
 
				 	blk_finish_plug(&plug);
			
 
				 	ret2 = btrfs_wait_extents(fs_info, dirty_pages);
			
 
				 
			
 
				+	clear_btree_io_tree(&trans->transaction->dirty_pages);
			
 
				+
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				-	if (ret2)
			
 
				+	else if (ret2)
			
 
				 		return ret2;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
			
 
				-					    struct btrfs_fs_info *fs_info)
			
 
				-{
			
 
				-	int ret;
			
 
				-
			
 
				-	ret = btrfs_write_and_wait_marked_extents(fs_info,
			
 
				-					   &trans->transaction->dirty_pages,
			
 
				-					   EXTENT_DIRTY);
			
 
				-	clear_btree_io_tree(&trans->transaction->dirty_pages);
			
 
				-
			
 
				-	return ret;
			
 
				+	else
			
 
				+		return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1155,9 +1142,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 
				  * failures will cause the file system to go offline. We still need
			
 
				  * to clean up the delayed refs.
			
 
				  */
			
 
				-static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
			
 
				-					 struct btrfs_fs_info *fs_info)
			
 
				+static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
			
 
				 	struct list_head *io_bgs = &trans->transaction->io_bgs;
			
 
				 	struct list_head *next;
			
@@ -1173,7 +1160,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -1192,7 +1179,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 
				 		return ret;
			
 
				 
			
 
				 	/* run_qgroups might have added some more refs */
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 again:
			
@@ -1209,7 +1196,7 @@ again:
 
				 		ret = update_cowonly_root(trans, root);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				-		ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+		ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 	}
			
@@ -1218,7 +1205,7 @@ again:
 
				 		ret = btrfs_write_dirty_block_groups(trans, fs_info);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				-		ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+		ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 	}
			
@@ -1251,9 +1238,9 @@ void btrfs_add_dead_root(struct btrfs_root *root)
 
				 /*
			
 
				  * update all the cowonly tree roots on disk
			
 
				  */
			
 
				-static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
			
 
				-				    struct btrfs_fs_info *fs_info)
			
 
				+static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_root *gang[8];
			
 
				 	int i;
			
 
				 	int ret;
			
@@ -1297,7 +1284,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 
				 			spin_lock(&fs_info->fs_roots_radix_lock);
			
 
				 			if (err)
			
 
				 				break;
			
 
				-			btrfs_qgroup_free_meta_all(root);
			
 
				+			btrfs_qgroup_free_meta_all_pertrans(root);
			
 
				 		}
			
 
				 	}
			
 
				 	spin_unlock(&fs_info->fs_roots_radix_lock);
			
@@ -1365,16 +1352,24 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 
				 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
			
 
				 		return 0;
			
 
				 
			
 
				+	/*
			
 
				+	 * Ensure dirty @src will be commited.  Or, after comming
			
 
				+	 * commit_fs_roots() and switch_commit_roots(), any dirty but not
			
 
				+	 * recorded root will never be updated again, causing an outdated root
			
 
				+	 * item.
			
 
				+	 */
			
 
				+	record_root_in_trans(trans, src, 1);
			
 
				+
			
 
				 	/*
			
 
				 	 * We are going to commit transaction, see btrfs_commit_transaction()
			
 
				 	 * comment for reason locking tree_log_mutex
			
 
				 	 */
			
 
				 	mutex_lock(&fs_info->tree_log_mutex);
			
 
				 
			
 
				-	ret = commit_fs_roots(trans, fs_info);
			
 
				+	ret = commit_fs_roots(trans);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				-	ret = btrfs_qgroup_account_extents(trans, fs_info);
			
 
				+	ret = btrfs_qgroup_account_extents(trans);
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 
			
@@ -1397,11 +1392,11 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 
				 	 * like chunk and root tree, as they won't affect qgroup.
			
 
				 	 * And we don't write super to avoid half committed status.
			
 
				 	 */
			
 
				-	ret = commit_cowonly_roots(trans, fs_info);
			
 
				+	ret = commit_cowonly_roots(trans);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				-	switch_commit_roots(trans->transaction, fs_info);
			
 
				-	ret = btrfs_write_and_wait_transaction(trans, fs_info);
			
 
				+	switch_commit_roots(trans->transaction);
			
 
				+	ret = btrfs_write_and_wait_transaction(trans);
			
 
				 	if (ret)
			
 
				 		btrfs_handle_fs_error(fs_info, ret,
			
 
				 			"Error while writing out transaction for qgroup");
			
@@ -1430,9 +1425,10 @@ out:
 
				  * the creation of the pending snapshots, just return 0.
			
 
				  */
			
 
				 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
			
 
				-				   struct btrfs_fs_info *fs_info,
			
 
				 				   struct btrfs_pending_snapshot *pending)
			
 
				 {
			
 
				+
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_root_item *new_root_item;
			
 
				 	struct btrfs_root *tree_root = fs_info->tree_root;
			
@@ -1524,7 +1520,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
				 	 * otherwise we corrupt the FS during
			
 
				 	 * snapshot
			
 
				 	 */
			
 
				-	ret = btrfs_run_delayed_items(trans, fs_info);
			
 
				+	ret = btrfs_run_delayed_items(trans);
			
 
				 	if (ret) {	/* Transaction aborted */
			
 
				 		btrfs_abort_transaction(trans, ret);
			
 
				 		goto fail;
			
@@ -1620,7 +1616,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
				 		goto fail;
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 	if (ret) {
			
 
				 		btrfs_abort_transaction(trans, ret);
			
 
				 		goto fail;
			
@@ -1674,7 +1670,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 	if (ret) {
			
 
				 		btrfs_abort_transaction(trans, ret);
			
 
				 		goto fail;
			
@@ -1699,8 +1695,7 @@ no_free_objectid:
 
				 /*
			
 
				  * create all the snapshots we've scheduled for creation
			
 
				  */
			
 
				-static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
			
 
				-					     struct btrfs_fs_info *fs_info)
			
 
				+static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans)
			
 
				 {
			
 
				 	struct btrfs_pending_snapshot *pending, *next;
			
 
				 	struct list_head *head = &trans->transaction->pending_snapshots;
			
@@ -1708,7 +1703,7 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	list_for_each_entry_safe(pending, next, head, list) {
			
 
				 		list_del(&pending->list);
			
 
				-		ret = create_pending_snapshot(trans, fs_info, pending);
			
 
				+		ret = create_pending_snapshot(trans, pending);
			
 
				 		if (ret)
			
 
				 			break;
			
 
				 	}
			
@@ -1861,10 +1856,9 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 
				 }
			
 
				 
			
 
				 
			
 
				-static void cleanup_transaction(struct btrfs_trans_handle *trans,
			
 
				-				struct btrfs_root *root, int err)
			
 
				+static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				 	DEFINE_WAIT(wait);
			
 
				 
			
@@ -1904,7 +1898,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
 
				 	btrfs_put_transaction(cur_trans);
			
 
				 	btrfs_put_transaction(cur_trans);
			
 
				 
			
 
				-	trace_btrfs_transaction_commit(root);
			
 
				+	trace_btrfs_transaction_commit(trans->root);
			
 
				 
			
 
				 	if (current->journal_info == trans)
			
 
				 		current->journal_info = NULL;
			
@@ -1959,13 +1953,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	/* make a pass through all the delayed refs we have so far
			
 
				 	 * any runnings procs may add more while we are here
			
 
				 	 */
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, 0);
			
 
				+	ret = btrfs_run_delayed_refs(trans, 0);
			
 
				 	if (ret) {
			
 
				 		btrfs_end_transaction(trans);
			
 
				 		return ret;
			
 
				 	}
			
 
				 
			
 
				-	btrfs_trans_release_metadata(trans, fs_info);
			
 
				+	btrfs_trans_release_metadata(trans);
			
 
				 	trans->block_rsv = NULL;
			
 
				 
			
 
				 	cur_trans = trans->transaction;
			
@@ -1978,9 +1972,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	smp_wmb();
			
 
				 
			
 
				 	if (!list_empty(&trans->new_bgs))
			
 
				-		btrfs_create_pending_block_groups(trans, fs_info);
			
 
				+		btrfs_create_pending_block_groups(trans);
			
 
				 
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, 0);
			
 
				+	ret = btrfs_run_delayed_refs(trans, 0);
			
 
				 	if (ret) {
			
 
				 		btrfs_end_transaction(trans);
			
 
				 		return ret;
			
@@ -2008,12 +2002,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 			run_it = 1;
			
 
				 		mutex_unlock(&fs_info->ro_block_group_mutex);
			
 
				 
			
 
				-		if (run_it)
			
 
				-			ret = btrfs_start_dirty_block_groups(trans, fs_info);
			
 
				-	}
			
 
				-	if (ret) {
			
 
				-		btrfs_end_transaction(trans);
			
 
				-		return ret;
			
 
				+		if (run_it) {
			
 
				+			ret = btrfs_start_dirty_block_groups(trans);
			
 
				+			if (ret) {
			
 
				+				btrfs_end_transaction(trans);
			
 
				+				return ret;
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	spin_lock(&fs_info->trans_lock);
			
@@ -2061,7 +2056,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	if (ret)
			
 
				 		goto cleanup_transaction;
			
 
				 
			
 
				-	ret = btrfs_run_delayed_items(trans, fs_info);
			
 
				+	ret = btrfs_run_delayed_items(trans);
			
 
				 	if (ret)
			
 
				 		goto cleanup_transaction;
			
 
				 
			
@@ -2069,7 +2064,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 		   extwriter_counter_read(cur_trans) == 0);
			
 
				 
			
 
				 	/* some pending stuffs might be added after the previous flush. */
			
 
				-	ret = btrfs_run_delayed_items(trans, fs_info);
			
 
				+	ret = btrfs_run_delayed_items(trans);
			
 
				 	if (ret)
			
 
				 		goto cleanup_transaction;
			
 
				 
			
@@ -2106,7 +2101,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	 * deal with them in create_pending_snapshot(), which is the
			
 
				 	 * core function of the snapshot creation.
			
 
				 	 */
			
 
				-	ret = create_pending_snapshots(trans, fs_info);
			
 
				+	ret = create_pending_snapshots(trans);
			
 
				 	if (ret) {
			
 
				 		mutex_unlock(&fs_info->reloc_mutex);
			
 
				 		goto scrub_continue;
			
@@ -2122,13 +2117,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	 * because all the tree which are snapshoted will be forced to COW
			
 
				 	 * the nodes and leaves.
			
 
				 	 */
			
 
				-	ret = btrfs_run_delayed_items(trans, fs_info);
			
 
				+	ret = btrfs_run_delayed_items(trans);
			
 
				 	if (ret) {
			
 
				 		mutex_unlock(&fs_info->reloc_mutex);
			
 
				 		goto scrub_continue;
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 	if (ret) {
			
 
				 		mutex_unlock(&fs_info->reloc_mutex);
			
 
				 		goto scrub_continue;
			
@@ -2157,7 +2152,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	 */
			
 
				 	mutex_lock(&fs_info->tree_log_mutex);
			
 
				 
			
 
				-	ret = commit_fs_roots(trans, fs_info);
			
 
				+	ret = commit_fs_roots(trans);
			
 
				 	if (ret) {
			
 
				 		mutex_unlock(&fs_info->tree_log_mutex);
			
 
				 		mutex_unlock(&fs_info->reloc_mutex);
			
@@ -2179,7 +2174,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	 * commit_fs_roots() can call btrfs_save_ino_cache(), which generates
			
 
				 	 * new delayed refs. Must handle them or qgroup can be wrong.
			
 
				 	 */
			
 
				-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
			
 
				+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
			
 
				 	if (ret) {
			
 
				 		mutex_unlock(&fs_info->tree_log_mutex);
			
 
				 		mutex_unlock(&fs_info->reloc_mutex);
			
@@ -2190,14 +2185,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	 * Since fs roots are all committed, we can get a quite accurate
			
 
				 	 * new_roots. So let's do quota accounting.
			
 
				 	 */
			
 
				-	ret = btrfs_qgroup_account_extents(trans, fs_info);
			
 
				+	ret = btrfs_qgroup_account_extents(trans);
			
 
				 	if (ret < 0) {
			
 
				 		mutex_unlock(&fs_info->tree_log_mutex);
			
 
				 		mutex_unlock(&fs_info->reloc_mutex);
			
 
				 		goto scrub_continue;
			
 
				 	}
			
 
				 
			
 
				-	ret = commit_cowonly_roots(trans, fs_info);
			
 
				+	ret = commit_cowonly_roots(trans);
			
 
				 	if (ret) {
			
 
				 		mutex_unlock(&fs_info->tree_log_mutex);
			
 
				 		mutex_unlock(&fs_info->reloc_mutex);
			
@@ -2229,7 +2224,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	list_add_tail(&fs_info->chunk_root->dirty_list,
			
 
				 		      &cur_trans->switch_commits);
			
 
				 
			
 
				-	switch_commit_roots(cur_trans, fs_info);
			
 
				+	switch_commit_roots(cur_trans);
			
 
				 
			
 
				 	ASSERT(list_empty(&cur_trans->dirty_bgs));
			
 
				 	ASSERT(list_empty(&cur_trans->io_bgs));
			
@@ -2241,7 +2236,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	       sizeof(*fs_info->super_copy));
			
 
				 
			
 
				 	btrfs_update_commit_device_size(fs_info);
			
 
				-	btrfs_update_commit_device_bytes_used(fs_info, cur_trans);
			
 
				+	btrfs_update_commit_device_bytes_used(cur_trans);
			
 
				 
			
 
				 	clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
			
 
				 	clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
			
@@ -2256,7 +2251,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 
			
 
				 	wake_up(&fs_info->transaction_wait);
			
 
				 
			
 
				-	ret = btrfs_write_and_wait_transaction(trans, fs_info);
			
 
				+	ret = btrfs_write_and_wait_transaction(trans);
			
 
				 	if (ret) {
			
 
				 		btrfs_handle_fs_error(fs_info, ret,
			
 
				 				      "Error while writing out transaction");
			
@@ -2273,7 +2268,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 	if (ret)
			
 
				 		goto scrub_continue;
			
 
				 
			
 
				-	btrfs_finish_extent_commit(trans, fs_info);
			
 
				+	btrfs_finish_extent_commit(trans);
			
 
				 
			
 
				 	if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
			
 
				 		btrfs_clear_space_info_full(fs_info);
			
@@ -2319,13 +2314,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
				 scrub_continue:
			
 
				 	btrfs_scrub_continue(fs_info);
			
 
				 cleanup_transaction:
			
 
				-	btrfs_trans_release_metadata(trans, fs_info);
			
 
				+	btrfs_trans_release_metadata(trans);
			
 
				 	btrfs_trans_release_chunk_metadata(trans);
			
 
				 	trans->block_rsv = NULL;
			
 
				 	btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
			
 
				 	if (current->journal_info == trans)
			
 
				 		current->journal_info = NULL;
			
 
				-	cleanup_transaction(trans, trans->root, ret);
			
 
				+	cleanup_transaction(trans, ret);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -69,6 +69,22 @@ struct btrfs_transaction {
 
				 	struct list_head pending_chunks;
			
 
				 	struct list_head switch_commits;
			
 
				 	struct list_head dirty_bgs;
			
 
				+
			
 
				+	/*
			
 
				+	 * There is no explicit lock which protects io_bgs, rather its
			
 
				+	 * consistency is implied by the fact that all the sites which modify
			
 
				+	 * it do so under some form of transaction critical section, namely:
			
 
				+	 *
			
 
				+	 * - btrfs_start_dirty_block_groups - This function can only ever be
			
 
				+	 *   run by one of the transaction committers. Refer to
			
 
				+	 *   BTRFS_TRANS_DIRTY_BG_RUN usage in btrfs_commit_transaction
			
 
				+	 *
			
 
				+	 * - btrfs_write_dirty_blockgroups - this is called by
			
 
				+	 *   commit_cowonly_roots from transaction critical section
			
 
				+	 *   (TRANS_STATE_COMMIT_DOING)
			
 
				+	 *
			
 
				+	 * - btrfs_cleanup_dirty_bgs - called on transaction abort
			
 
				+	 */
			
 
				 	struct list_head io_bgs;
			
 
				 	struct list_head dropped_roots;
			
 
				 
			
@@ -89,21 +105,18 @@ struct btrfs_transaction {
 
				 
			
 
				 #define __TRANS_FREEZABLE	(1U << 0)
			
 
				 
			
 
				-#define __TRANS_USERSPACE	(1U << 8)
			
 
				 #define __TRANS_START		(1U << 9)
			
 
				 #define __TRANS_ATTACH		(1U << 10)
			
 
				 #define __TRANS_JOIN		(1U << 11)
			
 
				 #define __TRANS_JOIN_NOLOCK	(1U << 12)
			
 
				 #define __TRANS_DUMMY		(1U << 13)
			
 
				 
			
 
				-#define TRANS_USERSPACE		(__TRANS_USERSPACE | __TRANS_FREEZABLE)
			
 
				 #define TRANS_START		(__TRANS_START | __TRANS_FREEZABLE)
			
 
				 #define TRANS_ATTACH		(__TRANS_ATTACH)
			
 
				 #define TRANS_JOIN		(__TRANS_JOIN | __TRANS_FREEZABLE)
			
 
				 #define TRANS_JOIN_NOLOCK	(__TRANS_JOIN_NOLOCK)
			
 
				 
			
 
				-#define TRANS_EXTWRITERS	(__TRANS_USERSPACE | __TRANS_START |	\
			
 
				-				 __TRANS_ATTACH)
			
 
				+#define TRANS_EXTWRITERS	(__TRANS_START | __TRANS_ATTACH)
			
 
				 
			
 
				 #define BTRFS_SEND_TRANS_STUB	((void *)1)
			
 
				 
			
@@ -186,15 +199,11 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
 
				 					struct btrfs_root *root,
			
 
				 					unsigned int num_items,
			
 
				 					int min_factor);
			
 
				-struct btrfs_trans_handle *btrfs_start_transaction_lflush(
			
 
				-					struct btrfs_root *root,
			
 
				-					unsigned int num_items);
			
 
				 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
			
 
				 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
			
 
				 struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
			
 
				 struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
			
 
				 					struct btrfs_root *root);
			
 
				-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
			
 
				 int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
			
 
				 
			
 
				 void btrfs_add_dead_root(struct btrfs_root *root);
			
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -30,7 +30,6 @@
 
				 #include "tree-checker.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "compression.h"
			
 
				-#include "hash.h"
			
 
				 
			
 
				 /*
			
 
				  * Error message should follow the following format:
			
@@ -53,7 +52,8 @@
 
				  * Allows callers to customize the output.
			
 
				  */
			
 
				 __printf(4, 5)
			
 
				-static void generic_err(const struct btrfs_root *root,
			
 
				+__cold
			
 
				+static void generic_err(const struct btrfs_fs_info *fs_info,
			
 
				 			const struct extent_buffer *eb, int slot,
			
 
				 			const char *fmt, ...)
			
 
				 {
			
@@ -65,10 +65,10 @@ static void generic_err(const struct btrfs_root *root,
 
				 	vaf.fmt = fmt;
			
 
				 	vaf.va = &args;
			
 
				 
			
 
				-	btrfs_crit(root->fs_info,
			
 
				+	btrfs_crit(fs_info,
			
 
				 		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
			
 
				 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
			
 
				-		root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
			
 
				+		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
			
 
				 	va_end(args);
			
 
				 }
			
 
				 
			
@@ -77,7 +77,8 @@ static void generic_err(const struct btrfs_root *root,
 
				  * offset has its own meaning.
			
 
				  */
			
 
				 __printf(4, 5)
			
 
				-static void file_extent_err(const struct btrfs_root *root,
			
 
				+__cold
			
 
				+static void file_extent_err(const struct btrfs_fs_info *fs_info,
			
 
				 			    const struct extent_buffer *eb, int slot,
			
 
				 			    const char *fmt, ...)
			
 
				 {
			
@@ -91,10 +92,11 @@ static void file_extent_err(const struct btrfs_root *root,
 
				 	vaf.fmt = fmt;
			
 
				 	vaf.va = &args;
			
 
				 
			
 
				-	btrfs_crit(root->fs_info,
			
 
				+	btrfs_crit(fs_info,
			
 
				 	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
			
 
				-		btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
			
 
				-		btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf);
			
 
				+		btrfs_header_level(eb) == 0 ? "leaf" : "node",
			
 
				+		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
			
 
				+		key.objectid, key.offset, &vaf);
			
 
				 	va_end(args);
			
 
				 }
			
 
				 
			
@@ -102,26 +104,26 @@ static void file_extent_err(const struct btrfs_root *root,
 
				  * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
			
 
				  * Else return 1
			
 
				  */
			
 
				-#define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment)		      \
			
 
				+#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment)	      \
			
 
				 ({									      \
			
 
				 	if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
			
 
				-		file_extent_err((root), (leaf), (slot),			      \
			
 
				+		file_extent_err((fs_info), (leaf), (slot),		      \
			
 
				 	"invalid %s for file extent, have %llu, should be aligned to %u",     \
			
 
				 			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
			
 
				 			(alignment));					      \
			
 
				 	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
			
 
				 })
			
 
				 
			
 
				-static int check_extent_data_item(struct btrfs_root *root,
			
 
				+static int check_extent_data_item(struct btrfs_fs_info *fs_info,
			
 
				 				  struct extent_buffer *leaf,
			
 
				 				  struct btrfs_key *key, int slot)
			
 
				 {
			
 
				 	struct btrfs_file_extent_item *fi;
			
 
				-	u32 sectorsize = root->fs_info->sectorsize;
			
 
				+	u32 sectorsize = fs_info->sectorsize;
			
 
				 	u32 item_size = btrfs_item_size_nr(leaf, slot);
			
 
				 
			
 
				 	if (!IS_ALIGNED(key->offset, sectorsize)) {
			
 
				-		file_extent_err(root, leaf, slot,
			
 
				+		file_extent_err(fs_info, leaf, slot,
			
 
				 "unaligned file_offset for file extent, have %llu should be aligned to %u",
			
 
				 			key->offset, sectorsize);
			
 
				 		return -EUCLEAN;
			
@@ -130,7 +132,7 @@ static int check_extent_data_item(struct btrfs_root *root,
 
				 	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
			
 
				 
			
 
				 	if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
			
 
				-		file_extent_err(root, leaf, slot,
			
 
				+		file_extent_err(fs_info, leaf, slot,
			
 
				 		"invalid type for file extent, have %u expect range [0, %u]",
			
 
				 			btrfs_file_extent_type(leaf, fi),
			
 
				 			BTRFS_FILE_EXTENT_TYPES);
			
@@ -142,14 +144,14 @@ static int check_extent_data_item(struct btrfs_root *root,
 
				 	 * and must be caught in open_ctree().
			
 
				 	 */
			
 
				 	if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
			
 
				-		file_extent_err(root, leaf, slot,
			
 
				+		file_extent_err(fs_info, leaf, slot,
			
 
				 	"invalid compression for file extent, have %u expect range [0, %u]",
			
 
				 			btrfs_file_extent_compression(leaf, fi),
			
 
				 			BTRFS_COMPRESS_TYPES);
			
 
				 		return -EUCLEAN;
			
 
				 	}
			
 
				 	if (btrfs_file_extent_encryption(leaf, fi)) {
			
 
				-		file_extent_err(root, leaf, slot,
			
 
				+		file_extent_err(fs_info, leaf, slot,
			
 
				 			"invalid encryption for file extent, have %u expect 0",
			
 
				 			btrfs_file_extent_encryption(leaf, fi));
			
 
				 		return -EUCLEAN;
			
@@ -157,7 +159,7 @@ static int check_extent_data_item(struct btrfs_root *root,
 
				 	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
			
 
				 		/* Inline extent must have 0 as key offset */
			
 
				 		if (key->offset) {
			
 
				-			file_extent_err(root, leaf, slot,
			
 
				+			file_extent_err(fs_info, leaf, slot,
			
 
				 		"invalid file_offset for inline file extent, have %llu expect 0",
			
 
				 				key->offset);
			
 
				 			return -EUCLEAN;
			
@@ -171,7 +173,7 @@ static int check_extent_data_item(struct btrfs_root *root,
 
				 		/* Uncompressed inline extent size must match item size */
			
 
				 		if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
			
 
				 		    btrfs_file_extent_ram_bytes(leaf, fi)) {
			
 
				-			file_extent_err(root, leaf, slot,
			
 
				+			file_extent_err(fs_info, leaf, slot,
			
 
				 	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
			
 
				 				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
			
 
				 				btrfs_file_extent_ram_bytes(leaf, fi));
			
@@ -182,40 +184,41 @@ static int check_extent_data_item(struct btrfs_root *root,
 
				 
			
 
				 	/* Regular or preallocated extent has fixed item size */
			
 
				 	if (item_size != sizeof(*fi)) {
			
 
				-		file_extent_err(root, leaf, slot,
			
 
				+		file_extent_err(fs_info, leaf, slot,
			
 
				 	"invalid item size for reg/prealloc file extent, have %u expect %zu",
			
 
				 			item_size, sizeof(*fi));
			
 
				 		return -EUCLEAN;
			
 
				 	}
			
 
				-	if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) ||
			
 
				-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) ||
			
 
				-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) ||
			
 
				-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) ||
			
 
				-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize))
			
 
				+	if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize))
			
 
				 		return -EUCLEAN;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
			
 
				-			   struct btrfs_key *key, int slot)
			
 
				+static int check_csum_item(struct btrfs_fs_info *fs_info,
			
 
				+			   struct extent_buffer *leaf, struct btrfs_key *key,
			
 
				+			   int slot)
			
 
				 {
			
 
				-	u32 sectorsize = root->fs_info->sectorsize;
			
 
				-	u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
			
 
				+	u32 sectorsize = fs_info->sectorsize;
			
 
				+	u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
			
 
				 
			
 
				 	if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
			
 
				-		generic_err(root, leaf, slot,
			
 
				+		generic_err(fs_info, leaf, slot,
			
 
				 		"invalid key objectid for csum item, have %llu expect %llu",
			
 
				 			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
			
 
				 		return -EUCLEAN;
			
 
				 	}
			
 
				 	if (!IS_ALIGNED(key->offset, sectorsize)) {
			
 
				-		generic_err(root, leaf, slot,
			
 
				+		generic_err(fs_info, leaf, slot,
			
 
				 	"unaligned key offset for csum item, have %llu should be aligned to %u",
			
 
				 			key->offset, sectorsize);
			
 
				 		return -EUCLEAN;
			
 
				 	}
			
 
				 	if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
			
 
				-		generic_err(root, leaf, slot,
			
 
				+		generic_err(fs_info, leaf, slot,
			
 
				 	"unaligned item size for csum item, have %u should be aligned to %u",
			
 
				 			btrfs_item_size_nr(leaf, slot), csumsize);
			
 
				 		return -EUCLEAN;
			
@@ -228,7 +231,8 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
 
				  * which represents inode number
			
 
				  */
			
 
				 __printf(4, 5)
			
 
				-static void dir_item_err(const struct btrfs_root *root,
			
 
				+__cold
			
 
				+static void dir_item_err(const struct btrfs_fs_info *fs_info,
			
 
				 			 const struct extent_buffer *eb, int slot,
			
 
				 			 const char *fmt, ...)
			
 
				 {
			
@@ -242,14 +246,15 @@ static void dir_item_err(const struct btrfs_root *root,
 
				 	vaf.fmt = fmt;
			
 
				 	vaf.va = &args;
			
 
				 
			
 
				-	btrfs_crit(root->fs_info,
			
 
				+	btrfs_crit(fs_info,
			
 
				 	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
			
 
				-		btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
			
 
				-		btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
			
 
				+		btrfs_header_level(eb) == 0 ? "leaf" : "node",
			
 
				+		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
			
 
				+		key.objectid, &vaf);
			
 
				 	va_end(args);
			
 
				 }
			
 
				 
			
 
				-static int check_dir_item(struct btrfs_root *root,
			
 
				+static int check_dir_item(struct btrfs_fs_info *fs_info,
			
 
				 			  struct extent_buffer *leaf,
			
 
				 			  struct btrfs_key *key, int slot)
			
 
				 {
			
@@ -268,7 +273,7 @@ static int check_dir_item(struct btrfs_root *root,
 
				 
			
 
				 		/* header itself should not cross item boundary */
			
 
				 		if (cur + sizeof(*di) > item_size) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 		"dir item header crosses item boundary, have %zu boundary %u",
			
 
				 				cur + sizeof(*di), item_size);
			
 
				 			return -EUCLEAN;
			
@@ -277,7 +282,7 @@ static int check_dir_item(struct btrfs_root *root,
 
				 		/* dir type check */
			
 
				 		dir_type = btrfs_dir_type(leaf, di);
			
 
				 		if (dir_type >= BTRFS_FT_MAX) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 			"invalid dir item type, have %u expect [0, %u)",
			
 
				 				dir_type, BTRFS_FT_MAX);
			
 
				 			return -EUCLEAN;
			
@@ -285,14 +290,14 @@ static int check_dir_item(struct btrfs_root *root,
 
				 
			
 
				 		if (key->type == BTRFS_XATTR_ITEM_KEY &&
			
 
				 		    dir_type != BTRFS_FT_XATTR) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 		"invalid dir item type for XATTR key, have %u expect %u",
			
 
				 				dir_type, BTRFS_FT_XATTR);
			
 
				 			return -EUCLEAN;
			
 
				 		}
			
 
				 		if (dir_type == BTRFS_FT_XATTR &&
			
 
				 		    key->type != BTRFS_XATTR_ITEM_KEY) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 			"xattr dir type found for non-XATTR key");
			
 
				 			return -EUCLEAN;
			
 
				 		}
			
@@ -305,21 +310,21 @@ static int check_dir_item(struct btrfs_root *root,
 
				 		name_len = btrfs_dir_name_len(leaf, di);
			
 
				 		data_len = btrfs_dir_data_len(leaf, di);
			
 
				 		if (name_len > max_name_len) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 			"dir item name len too long, have %u max %u",
			
 
				 				name_len, max_name_len);
			
 
				 			return -EUCLEAN;
			
 
				 		}
			
 
				-		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 			"dir item name and data len too long, have %u max %u",
			
 
				 				name_len + data_len,
			
 
				-				BTRFS_MAX_XATTR_SIZE(root->fs_info));
			
 
				+				BTRFS_MAX_XATTR_SIZE(fs_info));
			
 
				 			return -EUCLEAN;
			
 
				 		}
			
 
				 
			
 
				 		if (data_len && dir_type != BTRFS_FT_XATTR) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 			"dir item with invalid data len, have %u expect 0",
			
 
				 				data_len);
			
 
				 			return -EUCLEAN;
			
@@ -329,7 +334,7 @@ static int check_dir_item(struct btrfs_root *root,
 
				 
			
 
				 		/* header and name/data should not cross item boundary */
			
 
				 		if (cur + total_size > item_size) {
			
 
				-			dir_item_err(root, leaf, slot,
			
 
				+			dir_item_err(fs_info, leaf, slot,
			
 
				 		"dir item data crosses item boundary, have %u boundary %u",
			
 
				 				cur + total_size, item_size);
			
 
				 			return -EUCLEAN;
			
@@ -347,7 +352,7 @@ static int check_dir_item(struct btrfs_root *root,
 
				 					(unsigned long)(di + 1), name_len);
			
 
				 			name_hash = btrfs_name_hash(namebuf, name_len);
			
 
				 			if (key->offset != name_hash) {
			
 
				-				dir_item_err(root, leaf, slot,
			
 
				+				dir_item_err(fs_info, leaf, slot,
			
 
				 		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
			
 
				 					name_hash, key->offset);
			
 
				 				return -EUCLEAN;
			
@@ -362,7 +367,7 @@ static int check_dir_item(struct btrfs_root *root,
 
				 /*
			
 
				  * Common point to switch the item-specific validation.
			
 
				  */
			
 
				-static int check_leaf_item(struct btrfs_root *root,
			
 
				+static int check_leaf_item(struct btrfs_fs_info *fs_info,
			
 
				 			   struct extent_buffer *leaf,
			
 
				 			   struct btrfs_key *key, int slot)
			
 
				 {
			
@@ -370,24 +375,23 @@ static int check_leaf_item(struct btrfs_root *root,
 
				 
			
 
				 	switch (key->type) {
			
 
				 	case BTRFS_EXTENT_DATA_KEY:
			
 
				-		ret = check_extent_data_item(root, leaf, key, slot);
			
 
				+		ret = check_extent_data_item(fs_info, leaf, key, slot);
			
 
				 		break;
			
 
				 	case BTRFS_EXTENT_CSUM_KEY:
			
 
				-		ret = check_csum_item(root, leaf, key, slot);
			
 
				+		ret = check_csum_item(fs_info, leaf, key, slot);
			
 
				 		break;
			
 
				 	case BTRFS_DIR_ITEM_KEY:
			
 
				 	case BTRFS_DIR_INDEX_KEY:
			
 
				 	case BTRFS_XATTR_ITEM_KEY:
			
 
				-		ret = check_dir_item(root, leaf, key, slot);
			
 
				+		ret = check_dir_item(fs_info, leaf, key, slot);
			
 
				 		break;
			
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
			
 
				+static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
			
 
				 		      bool check_item_data)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	/* No valid key type is 0, so all key should be larger than this key */
			
 
				 	struct btrfs_key prev_key = {0, 0, 0};
			
 
				 	struct btrfs_key key;
			
@@ -420,7 +424,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
				 			eb = btrfs_root_node(check_root);
			
 
				 			/* if leaf is the root, then it's fine */
			
 
				 			if (leaf != eb) {
			
 
				-				generic_err(check_root, leaf, 0,
			
 
				+				generic_err(fs_info, leaf, 0,
			
 
				 		"invalid nritems, have %u should not be 0 for non-root leaf",
			
 
				 					nritems);
			
 
				 				free_extent_buffer(eb);
			
@@ -453,7 +457,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
				 
			
 
				 		/* Make sure the keys are in the right order */
			
 
				 		if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
			
 
				-			generic_err(root, leaf, slot,
			
 
				+			generic_err(fs_info, leaf, slot,
			
 
				 	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
			
 
				 				prev_key.objectid, prev_key.type,
			
 
				 				prev_key.offset, key.objectid, key.type,
			
@@ -472,7 +476,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
				 			item_end_expected = btrfs_item_offset_nr(leaf,
			
 
				 								 slot - 1);
			
 
				 		if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
			
 
				-			generic_err(root, leaf, slot,
			
 
				+			generic_err(fs_info, leaf, slot,
			
 
				 				"unexpected item end, have %u expect %u",
			
 
				 				btrfs_item_end_nr(leaf, slot),
			
 
				 				item_end_expected);
			
@@ -486,7 +490,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
				 		 */
			
 
				 		if (btrfs_item_end_nr(leaf, slot) >
			
 
				 		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
			
 
				-			generic_err(root, leaf, slot,
			
 
				+			generic_err(fs_info, leaf, slot,
			
 
				 			"slot end outside of leaf, have %u expect range [0, %u]",
			
 
				 				btrfs_item_end_nr(leaf, slot),
			
 
				 				BTRFS_LEAF_DATA_SIZE(fs_info));
			
@@ -496,7 +500,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
				 		/* Also check if the item pointer overlaps with btrfs item. */
			
 
				 		if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
			
 
				 		    btrfs_item_ptr_offset(leaf, slot)) {
			
 
				-			generic_err(root, leaf, slot,
			
 
				+			generic_err(fs_info, leaf, slot,
			
 
				 		"slot overlaps with its data, item end %lu data start %lu",
			
 
				 				btrfs_item_nr_offset(slot) +
			
 
				 				sizeof(struct btrfs_item),
			
@@ -509,7 +513,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
				 			 * Check if the item size and content meet other
			
 
				 			 * criteria
			
 
				 			 */
			
 
				-			ret = check_leaf_item(root, leaf, &key, slot);
			
 
				+			ret = check_leaf_item(fs_info, leaf, &key, slot);
			
 
				 			if (ret < 0)
			
 
				 				return ret;
			
 
				 		}
			
@@ -522,18 +526,19 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
			
 
				+int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
			
 
				+			  struct extent_buffer *leaf)
			
 
				 {
			
 
				-	return check_leaf(root, leaf, true);
			
 
				+	return check_leaf(fs_info, leaf, true);
			
 
				 }
			
 
				 
			
 
				-int btrfs_check_leaf_relaxed(struct btrfs_root *root,
			
 
				+int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
			
 
				 			     struct extent_buffer *leaf)
			
 
				 {
			
 
				-	return check_leaf(root, leaf, false);
			
 
				+	return check_leaf(fs_info, leaf, false);
			
 
				 }
			
 
				 
			
 
				-int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
			
 
				+int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
			
 
				 {
			
 
				 	unsigned long nr = btrfs_header_nritems(node);
			
 
				 	struct btrfs_key key, next_key;
			
@@ -541,12 +546,12 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
 
				 	u64 bytenr;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
			
 
				-		btrfs_crit(root->fs_info,
			
 
				+	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) {
			
 
				+		btrfs_crit(fs_info,
			
 
				 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
			
 
				-			   root->objectid, node->start,
			
 
				+			   btrfs_header_owner(node), node->start,
			
 
				 			   nr == 0 ? "small" : "large", nr,
			
 
				-			   BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
			
 
				+			   BTRFS_NODEPTRS_PER_BLOCK(fs_info));
			
 
				 		return -EUCLEAN;
			
 
				 	}
			
 
				 
			
@@ -556,21 +561,21 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
 
				 		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
			
 
				 
			
 
				 		if (!bytenr) {
			
 
				-			generic_err(root, node, slot,
			
 
				+			generic_err(fs_info, node, slot,
			
 
				 				"invalid NULL node pointer");
			
 
				 			ret = -EUCLEAN;
			
 
				 			goto out;
			
 
				 		}
			
 
				-		if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
			
 
				-			generic_err(root, node, slot,
			
 
				+		if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
			
 
				+			generic_err(fs_info, node, slot,
			
 
				 			"unaligned pointer, have %llu should be aligned to %u",
			
 
				-				bytenr, root->fs_info->sectorsize);
			
 
				+				bytenr, fs_info->sectorsize);
			
 
				 			ret = -EUCLEAN;
			
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				 		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
			
 
				-			generic_err(root, node, slot,
			
 
				+			generic_err(fs_info, node, slot,
			
 
				 	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
			
 
				 				key.objectid, key.type, key.offset,
			
 
				 				next_key.objectid, next_key.type,
			
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -25,14 +25,15 @@
 
				  * Will check not only the item pointers, but also every possible member
			
 
				  * in item data.
			
 
				  */
			
 
				-int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
			
 
				+int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
			
 
				+			  struct extent_buffer *leaf);
			
 
				 
			
 
				 /*
			
 
				  * Less strict leaf checker.
			
 
				  * Will only check item pointers, not reading item data.
			
 
				  */
			
 
				-int btrfs_check_leaf_relaxed(struct btrfs_root *root,
			
 
				+int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
			
 
				 			     struct extent_buffer *leaf);
			
 
				-int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
			
 
				+int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node);
			
 
				 
			
 
				 #endif
			
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -39,7 +39,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 	int level;
			
 
				 	int next_key_ret = 0;
			
 
				 	u64 last_ret = 0;
			
 
				-	u64 min_trans = 0;
			
 
				 
			
 
				 	if (root->fs_info->extent_root == root) {
			
 
				 		/*
			
@@ -81,7 +80,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	path->keep_locks = 1;
			
 
				 
			
 
				-	ret = btrfs_search_forward(root, &key, path, min_trans);
			
 
				+	ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 	if (ret > 0) {
			
@@ -130,7 +129,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
				 	 */
			
 
				 	path->slots[1] = btrfs_header_nritems(path->nodes[1]);
			
 
				 	next_key_ret = btrfs_find_next_key(root, path, &key, 1,
			
 
				-					   min_trans);
			
 
				+					   BTRFS_OLDEST_GENERATION);
			
 
				 	if (next_key_ret == 0) {
			
 
				 		memcpy(&root->defrag_progress, &key, sizeof(key));
			
 
				 		ret = -EAGAIN;
			
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -21,12 +21,12 @@
 
				 #include <linux/blkdev.h>
			
 
				 #include <linux/list_sort.h>
			
 
				 #include <linux/iversion.h>
			
 
				+#include "ctree.h"
			
 
				 #include "tree-log.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "locking.h"
			
 
				 #include "print-tree.h"
			
 
				 #include "backref.h"
			
 
				-#include "hash.h"
			
 
				 #include "compression.h"
			
 
				 #include "qgroup.h"
			
 
				 #include "inode-map.h"
			
@@ -286,7 +286,7 @@ struct walk_control {
 
				 	 * inside it
			
 
				 	 */
			
 
				 	int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
			
 
				-			    struct walk_control *wc, u64 gen);
			
 
				+			    struct walk_control *wc, u64 gen, int level);
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -294,7 +294,7 @@ struct walk_control {
 
				  */
			
 
				 static int process_one_buffer(struct btrfs_root *log,
			
 
				 			      struct extent_buffer *eb,
			
 
				-			      struct walk_control *wc, u64 gen)
			
 
				+			      struct walk_control *wc, u64 gen, int level)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = log->fs_info;
			
 
				 	int ret = 0;
			
@@ -304,7 +304,7 @@ static int process_one_buffer(struct btrfs_root *log,
 
				 	 * pin down any logged extents, so we have to read the block.
			
 
				 	 */
			
 
				 	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
			
 
				-		ret = btrfs_read_buffer(eb, gen);
			
 
				+		ret = btrfs_read_buffer(eb, gen, level, NULL);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 	}
			
@@ -853,7 +853,6 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 
				 				      struct btrfs_inode *dir,
			
 
				 				      struct btrfs_dir_item *di)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	struct inode *inode;
			
 
				 	char *name;
			
 
				 	int name_len;
			
@@ -887,7 +886,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 	else
			
 
				-		ret = btrfs_run_delayed_items(trans, fs_info);
			
 
				+		ret = btrfs_run_delayed_items(trans);
			
 
				 out:
			
 
				 	kfree(name);
			
 
				 	iput(inode);
			
@@ -1007,7 +1006,6 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
 
				 				  u64 ref_index, char *name, int namelen,
			
 
				 				  int *search_done)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	int ret;
			
 
				 	char *victim_name;
			
 
				 	int victim_name_len;
			
@@ -1065,7 +1063,7 @@ again:
 
				 				kfree(victim_name);
			
 
				 				if (ret)
			
 
				 					return ret;
			
 
				-				ret = btrfs_run_delayed_items(trans, fs_info);
			
 
				+				ret = btrfs_run_delayed_items(trans);
			
 
				 				if (ret)
			
 
				 					return ret;
			
 
				 				*search_done = 1;
			
@@ -1136,8 +1134,7 @@ again:
 
				 							victim_name_len);
			
 
				 					if (!ret)
			
 
				 						ret = btrfs_run_delayed_items(
			
 
				-								  trans,
			
 
				-								  fs_info);
			
 
				+								  trans);
			
 
				 				}
			
 
				 				iput(victim_parent);
			
 
				 				kfree(victim_name);
			
@@ -2098,7 +2095,6 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
 
				 				      struct inode *dir,
			
 
				 				      struct btrfs_key *dir_key)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	int ret;
			
 
				 	struct extent_buffer *eb;
			
 
				 	int slot;
			
@@ -2162,7 +2158,7 @@ again:
 
				 			ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
			
 
				 					BTRFS_I(inode), name, name_len);
			
 
				 			if (!ret)
			
 
				-				ret = btrfs_run_delayed_items(trans, fs_info);
			
 
				+				ret = btrfs_run_delayed_items(trans);
			
 
				 			kfree(name);
			
 
				 			iput(inode);
			
 
				 			if (ret)
			
@@ -2410,17 +2406,16 @@ out:
 
				  * back refs).
			
 
				  */
			
 
				 static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
			
 
				-			     struct walk_control *wc, u64 gen)
			
 
				+			     struct walk_control *wc, u64 gen, int level)
			
 
				 {
			
 
				 	int nritems;
			
 
				 	struct btrfs_path *path;
			
 
				 	struct btrfs_root *root = wc->replay_dest;
			
 
				 	struct btrfs_key key;
			
 
				-	int level;
			
 
				 	int i;
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = btrfs_read_buffer(eb, gen);
			
 
				+	ret = btrfs_read_buffer(eb, gen, level, NULL);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -2537,6 +2532,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
			
 
				 
			
 
				 	while (*level > 0) {
			
 
				+		struct btrfs_key first_key;
			
 
				+
			
 
				 		WARN_ON(*level < 0);
			
 
				 		WARN_ON(*level >= BTRFS_MAX_LEVEL);
			
 
				 		cur = path->nodes[*level];
			
@@ -2549,6 +2546,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 
			
 
				 		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
			
 
				 		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
			
 
				+		btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
			
 
				 		blocksize = fs_info->nodesize;
			
 
				 
			
 
				 		parent = path->nodes[*level];
			
@@ -2559,7 +2557,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 			return PTR_ERR(next);
			
 
				 
			
 
				 		if (*level == 1) {
			
 
				-			ret = wc->process_func(root, next, wc, ptr_gen);
			
 
				+			ret = wc->process_func(root, next, wc, ptr_gen,
			
 
				+					       *level - 1);
			
 
				 			if (ret) {
			
 
				 				free_extent_buffer(next);
			
 
				 				return ret;
			
@@ -2567,7 +2566,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 
			
 
				 			path->slots[*level]++;
			
 
				 			if (wc->free) {
			
 
				-				ret = btrfs_read_buffer(next, ptr_gen);
			
 
				+				ret = btrfs_read_buffer(next, ptr_gen,
			
 
				+							*level - 1, &first_key);
			
 
				 				if (ret) {
			
 
				 					free_extent_buffer(next);
			
 
				 					return ret;
			
@@ -2597,7 +2597,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 			free_extent_buffer(next);
			
 
				 			continue;
			
 
				 		}
			
 
				-		ret = btrfs_read_buffer(next, ptr_gen);
			
 
				+		ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key);
			
 
				 		if (ret) {
			
 
				 			free_extent_buffer(next);
			
 
				 			return ret;
			
@@ -2647,7 +2647,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 
				 
			
 
				 			root_owner = btrfs_header_owner(parent);
			
 
				 			ret = wc->process_func(root, path->nodes[*level], wc,
			
 
				-				 btrfs_header_generation(path->nodes[*level]));
			
 
				+				 btrfs_header_generation(path->nodes[*level]),
			
 
				+				 *level);
			
 
				 			if (ret)
			
 
				 				return ret;
			
 
				 
			
@@ -2729,7 +2730,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 
				 	/* was the root node processed? if not, catch it here */
			
 
				 	if (path->nodes[orig_level]) {
			
 
				 		ret = wc->process_func(log, path->nodes[orig_level], wc,
			
 
				-			 btrfs_header_generation(path->nodes[orig_level]));
			
 
				+			 btrfs_header_generation(path->nodes[orig_level]),
			
 
				+			 orig_level);
			
 
				 		if (ret)
			
 
				 			goto out;
			
 
				 		if (wc->free) {
			
@@ -3972,6 +3974,7 @@ fill_holes:
 
				 			ASSERT(ret == 0);
			
 
				 			src = src_path->nodes[0];
			
 
				 			i = 0;
			
 
				+			need_find_last_extent = true;
			
 
				 		}
			
 
				 
			
 
				 		btrfs_item_key_to_cpu(src, &key, i);
			
@@ -4006,6 +4009,36 @@ fill_holes:
 
				 			break;
			
 
				 		*last_extent = extent_end;
			
 
				 	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Check if there is a hole between the last extent found in our leaf
			
 
				+	 * and the first extent in the next leaf. If there is one, we need to
			
 
				+	 * log an explicit hole so that at replay time we can punch the hole.
			
 
				+	 */
			
 
				+	if (ret == 0 &&
			
 
				+	    key.objectid == btrfs_ino(inode) &&
			
 
				+	    key.type == BTRFS_EXTENT_DATA_KEY &&
			
 
				+	    i == btrfs_header_nritems(src_path->nodes[0])) {
			
 
				+		ret = btrfs_next_leaf(inode->root, src_path);
			
 
				+		need_find_last_extent = true;
			
 
				+		if (ret > 0) {
			
 
				+			ret = 0;
			
 
				+		} else if (ret == 0) {
			
 
				+			btrfs_item_key_to_cpu(src_path->nodes[0], &key,
			
 
				+					      src_path->slots[0]);
			
 
				+			if (key.objectid == btrfs_ino(inode) &&
			
 
				+			    key.type == BTRFS_EXTENT_DATA_KEY &&
			
 
				+			    *last_extent < key.offset) {
			
 
				+				const u64 len = key.offset - *last_extent;
			
 
				+
			
 
				+				ret = btrfs_insert_file_extent(trans, log,
			
 
				+							       btrfs_ino(inode),
			
 
				+							       *last_extent, 0,
			
 
				+							       0, len, 0, len,
			
 
				+							       0, 0, 0);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				 	/*
			
 
				 	 * Need to let the callers know we dropped the path so they should
			
 
				 	 * re-search.
			
@@ -5517,7 +5550,6 @@ out:
 
				  * the last committed transaction
			
 
				  */
			
 
				 static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
			
 
				-				  struct btrfs_root *root,
			
 
				 				  struct btrfs_inode *inode,
			
 
				 				  struct dentry *parent,
			
 
				 				  const loff_t start,
			
@@ -5525,6 +5557,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 
				 				  int inode_only,
			
 
				 				  struct btrfs_log_ctx *ctx)
			
 
				 {
			
 
				+	struct btrfs_root *root = inode->root;
			
 
				 	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	struct super_block *sb;
			
 
				 	struct dentry *old_parent = NULL;
			
@@ -5550,7 +5583,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 
				 		goto end_no_trans;
			
 
				 	}
			
 
				 
			
 
				-	if (root != inode->root || btrfs_root_refs(&root->root_item) == 0) {
			
 
				+	if (btrfs_root_refs(&root->root_item) == 0) {
			
 
				 		ret = 1;
			
 
				 		goto end_no_trans;
			
 
				 	}
			
@@ -5682,7 +5715,7 @@ end_no_trans:
 
				  * data on disk.
			
 
				  */
			
 
				 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
			
 
				-			  struct btrfs_root *root, struct dentry *dentry,
			
 
				+			  struct dentry *dentry,
			
 
				 			  const loff_t start,
			
 
				 			  const loff_t end,
			
 
				 			  struct btrfs_log_ctx *ctx)
			
@@ -5690,8 +5723,8 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 
				 	struct dentry *parent = dget_parent(dentry);
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
			
 
				-			parent, start, end, LOG_INODE_ALL, ctx);
			
 
				+	ret = btrfs_log_inode_parent(trans, BTRFS_I(d_inode(dentry)), parent,
			
 
				+				     start, end, LOG_INODE_ALL, ctx);
			
 
				 	dput(parent);
			
 
				 
			
 
				 	return ret;
			
@@ -5953,7 +5986,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 
				 			struct dentry *parent)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				-	struct btrfs_root *root = inode->root;
			
 
				 
			
 
				 	/*
			
 
				 	 * this will force the logging code to walk the dentry chain
			
@@ -5970,7 +6002,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 
				 	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
			
 
				 		return 0;
			
 
				 
			
 
				-	return btrfs_log_inode_parent(trans, root, inode, parent, 0,
			
 
				-				      LLONG_MAX, LOG_INODE_EXISTS, NULL);
			
 
				+	return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
			
 
				+				      LOG_INODE_EXISTS, NULL);
			
 
				 }
			
 
				 
			
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -65,7 +65,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 
				 			     struct btrfs_fs_info *fs_info);
			
 
				 int btrfs_recover_log_trees(struct btrfs_root *tree_root);
			
 
				 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
			
 
				-			  struct btrfs_root *root, struct dentry *dentry,
			
 
				+			  struct dentry *dentry,
			
 
				 			  const loff_t start,
			
 
				 			  const loff_t end,
			
 
				 			  struct btrfs_log_ctx *ctx);
			
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -282,7 +282,7 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
 
				 	key.offset = 0;
			
 
				 
			
 
				 again_search_slot:
			
 
				-	ret = btrfs_search_forward(root, &key, path, 0);
			
 
				+	ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
			
 
				 	if (ret) {
			
 
				 		if (ret > 0)
			
 
				 			ret = 0;
			
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -27,6 +27,7 @@
 
				 #include <linux/raid/pq.h>
			
 
				 #include <linux/semaphore.h>
			
 
				 #include <linux/uuid.h>
			
 
				+#include <linux/list_sort.h>
			
 
				 #include <asm/div64.h>
			
 
				 #include "ctree.h"
			
 
				 #include "extent_map.h"
			
@@ -278,7 +279,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev,
 
				 			&disk_to_dev(bdev->bd_disk)->kobj);
			
 
				 }
			
 
				 
			
 
				-void btrfs_cleanup_fs_uuids(void)
			
 
				+void __exit btrfs_cleanup_fs_uuids(void)
			
 
				 {
			
 
				 	struct btrfs_fs_devices *fs_devices;
			
 
				 
			
@@ -708,7 +709,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 
				 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
			
 
				 	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
			
 
				 		fs_devices->rw_devices++;
			
 
				-		list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
			
 
				+		list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
			
 
				 	}
			
 
				 	brelse(bh);
			
 
				 
			
@@ -895,7 +896,11 @@ error:
 
				 	return ERR_PTR(-ENOMEM);
			
 
				 }
			
 
				 
			
 
				-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
			
 
				+/*
			
 
				+ * After we have read the system tree and know devids belonging to
			
 
				+ * this filesystem, remove the device which does not belong there.
			
 
				+ */
			
 
				+void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
			
 
				 {
			
 
				 	struct btrfs_device *device, *next;
			
 
				 	struct btrfs_device *latest_dev = NULL;
			
@@ -1103,6 +1108,20 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
			
 
				+{
			
 
				+	struct btrfs_device *dev1, *dev2;
			
 
				+
			
 
				+	dev1 = list_entry(a, struct btrfs_device, dev_list);
			
 
				+	dev2 = list_entry(b, struct btrfs_device, dev_list);
			
 
				+
			
 
				+	if (dev1->devid < dev2->devid)
			
 
				+		return -1;
			
 
				+	else if (dev1->devid > dev2->devid)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
			
 
				 		       fmode_t flags, void *holder)
			
 
				 {
			
@@ -1113,6 +1132,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 
				 		fs_devices->opened++;
			
 
				 		ret = 0;
			
 
				 	} else {
			
 
				+		list_sort(NULL, &fs_devices->devices, devid_cmp);
			
 
				 		ret = __btrfs_open_devices(fs_devices, flags, holder);
			
 
				 	}
			
 
				 	mutex_unlock(&uuid_mutex);
			
@@ -1916,12 +1936,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 
				 	mutex_lock(&uuid_mutex);
			
 
				 
			
 
				 	num_devices = fs_info->fs_devices->num_devices;
			
 
				-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
			
 
				 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
			
 
				 		WARN_ON(num_devices < 1);
			
 
				 		num_devices--;
			
 
				 	}
			
 
				-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 
			
 
				 	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
			
 
				 	if (ret)
			
@@ -2047,7 +2067,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
 
				 {
			
 
				 	struct btrfs_fs_devices *fs_devices;
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
			
 
				+	lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
			
 
				 
			
 
				 	/*
			
 
				 	 * in case of fs with no seed, srcdev->fs_devices will point
			
@@ -2237,7 +2257,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
 
				 	struct btrfs_device *device;
			
 
				 	u64 super_flags;
			
 
				 
			
 
				-	BUG_ON(!mutex_is_locked(&uuid_mutex));
			
 
				+	lockdep_assert_held(&uuid_mutex);
			
 
				 	if (!fs_devices->seeding)
			
 
				 		return -EINVAL;
			
 
				 
			
@@ -2642,7 +2662,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 
				 	device->total_bytes = btrfs_device_get_total_bytes(srcdev);
			
 
				 	device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
			
 
				 	device->bytes_used = btrfs_device_get_bytes_used(srcdev);
			
 
				-	ASSERT(list_empty(&srcdev->resized_list));
			
 
				 	device->commit_total_bytes = srcdev->commit_total_bytes;
			
 
				 	device->commit_bytes_used = device->bytes_used;
			
 
				 	device->fs_info = fs_info;
			
@@ -2666,19 +2685,6 @@ error:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
			
 
				-					      struct btrfs_device *tgtdev)
			
 
				-{
			
 
				-	u32 sectorsize = fs_info->sectorsize;
			
 
				-
			
 
				-	WARN_ON(fs_info->fs_devices->rw_devices == 0);
			
 
				-	tgtdev->io_width = sectorsize;
			
 
				-	tgtdev->io_align = sectorsize;
			
 
				-	tgtdev->sector_size = sectorsize;
			
 
				-	tgtdev->fs_info = fs_info;
			
 
				-	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &tgtdev->dev_state);
			
 
				-}
			
 
				-
			
 
				 static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
			
 
				 					struct btrfs_device *device)
			
 
				 {
			
@@ -2984,7 +2990,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 
				 	 * we release the path used to search the chunk/dev tree and before
			
 
				 	 * the current task acquires this mutex and calls us.
			
 
				 	 */
			
 
				-	ASSERT(mutex_is_locked(&fs_info->delete_unused_bgs_mutex));
			
 
				+	lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
			
 
				 
			
 
				 	ret = btrfs_can_relocate(fs_info, chunk_offset);
			
 
				 	if (ret)
			
@@ -2997,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				+	/*
			
 
				+	 * We add the kobjects here (and after forcing data chunk creation)
			
 
				+	 * since relocation is the only place we'll create chunks of a new
			
 
				+	 * type at runtime.  The only place where we'll remove the last
			
 
				+	 * chunk of a type is the call immediately below this one.  Even
			
 
				+	 * so, we're protected against races with the cleaner thread since
			
 
				+	 * we're covered by the delete_unused_bgs_mutex.
			
 
				+	 */
			
 
				+	btrfs_add_raid_kobjects(fs_info);
			
 
				+
			
 
				 	trans = btrfs_start_trans_remove_block_group(root->fs_info,
			
 
				 						     chunk_offset);
			
 
				 	if (IS_ERR(trans)) {
			
@@ -3124,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
 
				 			if (ret < 0)
			
 
				 				return ret;
			
 
				 
			
 
				+			btrfs_add_raid_kobjects(fs_info);
			
 
				+
			
 
				 			return 1;
			
 
				 		}
			
 
				 	}
			
@@ -3892,12 +3910,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
				 	}
			
 
				 
			
 
				 	num_devices = fs_info->fs_devices->num_devices;
			
 
				-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
			
 
				 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
			
 
				 		BUG_ON(num_devices < 1);
			
 
				 		num_devices--;
			
 
				 	}
			
 
				-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
			
 
				 	if (num_devices > 1)
			
 
				 		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
			
@@ -4202,7 +4220,8 @@ static int btrfs_uuid_scan_kthread(void *data)
 
				 	key.offset = 0;
			
 
				 
			
 
				 	while (1) {
			
 
				-		ret = btrfs_search_forward(root, &key, path, 0);
			
 
				+		ret = btrfs_search_forward(root, &key, path,
			
 
				+				BTRFS_OLDEST_GENERATION);
			
 
				 		if (ret) {
			
 
				 			if (ret > 0)
			
 
				 				ret = 0;
			
@@ -4672,7 +4691,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
 
				 	btrfs_set_fs_incompat(info, RAID56);
			
 
				 }
			
 
				 
			
 
				-#define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r->fs_info)		\
			
 
				+#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)	\
			
 
				 			- sizeof(struct btrfs_chunk))		\
			
 
				 			/ sizeof(struct btrfs_stripe) + 1)
			
 
				 
			
@@ -4713,10 +4732,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	BUG_ON(!alloc_profile_is_valid(type, 0));
			
 
				 
			
 
				-	if (list_empty(&fs_devices->alloc_list))
			
 
				+	if (list_empty(&fs_devices->alloc_list)) {
			
 
				+		if (btrfs_test_opt(info, ENOSPC_DEBUG))
			
 
				+			btrfs_debug(info, "%s: no writable device", __func__);
			
 
				 		return -ENOSPC;
			
 
				+	}
			
 
				 
			
 
				-	index = __get_raid_index(type);
			
 
				+	index = btrfs_bg_flags_to_raid_index(type);
			
 
				 
			
 
				 	sub_stripes = btrfs_raid_array[index].sub_stripes;
			
 
				 	dev_stripes = btrfs_raid_array[index].dev_stripes;
			
@@ -4729,7 +4751,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 		max_stripe_size = SZ_1G;
			
 
				 		max_chunk_size = 10 * max_stripe_size;
			
 
				 		if (!devs_max)
			
 
				-			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
			
 
				+			devs_max = BTRFS_MAX_DEVS(info);
			
 
				 	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
			
 
				 		/* for larger filesystems, use larger metadata chunks */
			
 
				 		if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
			
@@ -4738,7 +4760,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 			max_stripe_size = SZ_256M;
			
 
				 		max_chunk_size = max_stripe_size;
			
 
				 		if (!devs_max)
			
 
				-			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
			
 
				+			devs_max = BTRFS_MAX_DEVS(info);
			
 
				 	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
			
 
				 		max_stripe_size = SZ_32M;
			
 
				 		max_chunk_size = 2 * max_stripe_size;
			
@@ -4797,8 +4819,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 		if (ret == 0)
			
 
				 			max_avail = max_stripe_size * dev_stripes;
			
 
				 
			
 
				-		if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
			
 
				+		if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) {
			
 
				+			if (btrfs_test_opt(info, ENOSPC_DEBUG))
			
 
				+				btrfs_debug(info,
			
 
				+			"%s: devid %llu has no free space, have=%llu want=%u",
			
 
				+					    __func__, device->devid, max_avail,
			
 
				+					    BTRFS_STRIPE_LEN * dev_stripes);
			
 
				 			continue;
			
 
				+		}
			
 
				 
			
 
				 		if (ndevs == fs_devices->rw_devices) {
			
 
				 			WARN(1, "%s: found more than %llu devices\n",
			
@@ -4821,8 +4849,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 	/* round down to number of usable stripes */
			
 
				 	ndevs = round_down(ndevs, devs_increment);
			
 
				 
			
 
				-	if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
			
 
				+	if (ndevs < devs_min) {
			
 
				 		ret = -ENOSPC;
			
 
				+		if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
			
 
				+			btrfs_debug(info,
			
 
				+	"%s: not enough devices with free space: have=%d minimum required=%d",
			
 
				+				    __func__, ndevs, devs_min);
			
 
				+		}
			
 
				 		goto error;
			
 
				 	}
			
 
				 
			
@@ -4856,18 +4889,17 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 	 * and compare that answer with the max chunk size
			
 
				 	 */
			
 
				 	if (stripe_size * data_stripes > max_chunk_size) {
			
 
				-		u64 mask = (1ULL << 24) - 1;
			
 
				-
			
 
				 		stripe_size = div_u64(max_chunk_size, data_stripes);
			
 
				 
			
 
				 		/* bump the answer up to a 16MB boundary */
			
 
				-		stripe_size = (stripe_size + mask) & ~mask;
			
 
				+		stripe_size = round_up(stripe_size, SZ_16M);
			
 
				 
			
 
				-		/* but don't go higher than the limits we found
			
 
				-		 * while searching for free extents
			
 
				+		/*
			
 
				+		 * But don't go higher than the limits we found while searching
			
 
				+		 * for free extents
			
 
				 		 */
			
 
				-		if (stripe_size > devices_info[ndevs-1].max_avail)
			
 
				-			stripe_size = devices_info[ndevs-1].max_avail;
			
 
				+		stripe_size = min(devices_info[ndevs - 1].max_avail,
			
 
				+				  stripe_size);
			
 
				 	}
			
 
				 
			
 
				 	/* align to BTRFS_STRIPE_LEN */
			
@@ -5068,7 +5100,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 {
			
 
				 	u64 chunk_offset;
			
 
				 
			
 
				-	ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
			
 
				+	lockdep_assert_held(&fs_info->chunk_mutex);
			
 
				 	chunk_offset = find_next_chunk(fs_info);
			
 
				 	return __btrfs_alloc_chunk(trans, chunk_offset, type);
			
 
				 }
			
@@ -5209,11 +5241,11 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 
				 		ret = 1;
			
 
				 	free_extent_map(em);
			
 
				 
			
 
				-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
			
 
				 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
			
 
				 	    fs_info->dev_replace.tgtdev)
			
 
				 		ret++;
			
 
				-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -5254,13 +5286,25 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 
				 }
			
 
				 
			
 
				 static int find_live_mirror(struct btrfs_fs_info *fs_info,
			
 
				-			    struct map_lookup *map, int first, int num,
			
 
				-			    int optimal, int dev_replace_is_ongoing)
			
 
				+			    struct map_lookup *map, int first,
			
 
				+			    int dev_replace_is_ongoing)
			
 
				 {
			
 
				 	int i;
			
 
				+	int num_stripes;
			
 
				+	int preferred_mirror;
			
 
				 	int tolerance;
			
 
				 	struct btrfs_device *srcdev;
			
 
				 
			
 
				+	ASSERT((map->type &
			
 
				+		 (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
			
 
				+
			
 
				+	if (map->type & BTRFS_BLOCK_GROUP_RAID10)
			
 
				+		num_stripes = map->sub_stripes;
			
 
				+	else
			
 
				+		num_stripes = map->num_stripes;
			
 
				+
			
 
				+	preferred_mirror = first + current->pid % num_stripes;
			
 
				+
			
 
				 	if (dev_replace_is_ongoing &&
			
 
				 	    fs_info->dev_replace.cont_reading_from_srcdev_mode ==
			
 
				 	     BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
			
@@ -5274,10 +5318,10 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
 
				 	 * mirror is available
			
 
				 	 */
			
 
				 	for (tolerance = 0; tolerance < 2; tolerance++) {
			
 
				-		if (map->stripes[optimal].dev->bdev &&
			
 
				-		    (tolerance || map->stripes[optimal].dev != srcdev))
			
 
				-			return optimal;
			
 
				-		for (i = first; i < first + num; i++) {
			
 
				+		if (map->stripes[preferred_mirror].dev->bdev &&
			
 
				+		    (tolerance || map->stripes[preferred_mirror].dev != srcdev))
			
 
				+			return preferred_mirror;
			
 
				+		for (i = first; i < first + num_stripes; i++) {
			
 
				 			if (map->stripes[i].dev->bdev &&
			
 
				 			    (tolerance || map->stripes[i].dev != srcdev))
			
 
				 				return i;
			
@@ -5287,7 +5331,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
 
				 	/* we couldn't find one that doesn't fail.  Just return something
			
 
				 	 * and the io error handling code will clean up eventually
			
 
				 	 */
			
 
				-	return optimal;
			
 
				+	return preferred_mirror;
			
 
				 }
			
 
				 
			
 
				 static inline int parity_smaller(u64 a, u64 b)
			
@@ -5779,10 +5823,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 	if (!bbio_ret)
			
 
				 		goto out;
			
 
				 
			
 
				-	btrfs_dev_replace_lock(dev_replace, 0);
			
 
				+	btrfs_dev_replace_read_lock(dev_replace);
			
 
				 	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
			
 
				 	if (!dev_replace_is_ongoing)
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 0);
			
 
				+		btrfs_dev_replace_read_unlock(dev_replace);
			
 
				 	else
			
 
				 		btrfs_dev_replace_set_lock_blocking(dev_replace);
			
 
				 
			
@@ -5814,8 +5858,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 			stripe_index = mirror_num - 1;
			
 
				 		else {
			
 
				 			stripe_index = find_live_mirror(fs_info, map, 0,
			
 
				-					    map->num_stripes,
			
 
				-					    current->pid % map->num_stripes,
			
 
				 					    dev_replace_is_ongoing);
			
 
				 			mirror_num = stripe_index + 1;
			
 
				 		}
			
@@ -5843,8 +5885,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 			int old_stripe_index = stripe_index;
			
 
				 			stripe_index = find_live_mirror(fs_info, map,
			
 
				 					      stripe_index,
			
 
				-					      map->sub_stripes, stripe_index +
			
 
				-					      current->pid % map->sub_stripes,
			
 
				 					      dev_replace_is_ongoing);
			
 
				 			mirror_num = stripe_index - old_stripe_index + 1;
			
 
				 		}
			
@@ -5984,7 +6024,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 out:
			
 
				 	if (dev_replace_is_ongoing) {
			
 
				 		btrfs_dev_replace_clear_lock_blocking(dev_replace);
			
 
				-		btrfs_dev_replace_unlock(dev_replace, 0);
			
 
				+		btrfs_dev_replace_read_unlock(dev_replace);
			
 
				 	}
			
 
				 	free_extent_map(em);
			
 
				 	return ret;
			
@@ -6618,7 +6658,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_fs_devices *fs_devices;
			
 
				 	int ret;
			
 
				 
			
 
				-	BUG_ON(!mutex_is_locked(&uuid_mutex));
			
 
				+	lockdep_assert_held(&uuid_mutex);
			
 
				 	ASSERT(fsid);
			
 
				 
			
 
				 	fs_devices = fs_info->fs_devices->seed;
			
@@ -7358,20 +7398,20 @@ void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
 
				 }
			
 
				 
			
 
				 /* Must be invoked during the transaction commit */
			
 
				-void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
			
 
				-					struct btrfs_transaction *transaction)
			
 
				+void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 	struct extent_map *em;
			
 
				 	struct map_lookup *map;
			
 
				 	struct btrfs_device *dev;
			
 
				 	int i;
			
 
				 
			
 
				-	if (list_empty(&transaction->pending_chunks))
			
 
				+	if (list_empty(&trans->pending_chunks))
			
 
				 		return;
			
 
				 
			
 
				 	/* In order to kick the device replace finish process */
			
 
				 	mutex_lock(&fs_info->chunk_mutex);
			
 
				-	list_for_each_entry(em, &transaction->pending_chunks, list) {
			
 
				+	list_for_each_entry(em, &trans->pending_chunks, list) {
			
 
				 		map = em->map_lookup;
			
 
				 
			
 
				 		for (i = 0; i < map->num_stripes; i++) {
			
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -422,7 +422,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 
				 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
			
 
				 			  struct btrfs_fs_devices **fs_devices_ret);
			
 
				 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
			
 
				-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
			
 
				+void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
			
 
				 void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
			
 
				 		struct btrfs_device *device, struct btrfs_device *this_dev);
			
 
				 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
			
@@ -436,7 +436,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 
				 					const u8 *uuid);
			
 
				 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
			
 
				 		    const char *device_path, u64 devid);
			
 
				-void btrfs_cleanup_fs_uuids(void);
			
 
				+void __exit btrfs_cleanup_fs_uuids(void);
			
 
				 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
			
 
				 int btrfs_grow_device(struct btrfs_trans_handle *trans,
			
 
				 		      struct btrfs_device *device, u64 new_size);
			
@@ -476,8 +476,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 
				 				      struct btrfs_device *srcdev);
			
 
				 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
			
 
				 				      struct btrfs_device *tgtdev);
			
 
				-void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
			
 
				-					      struct btrfs_device *tgtdev);
			
 
				 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
			
 
				 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
			
 
				 			   u64 logical, u64 len);
			
@@ -546,9 +544,30 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
 
				 	btrfs_dev_stat_set(dev, index, 0);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
			
 
				+ * can be used as index to access btrfs_raid_array[].
			
 
				+ */
			
 
				+static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
			
 
				+{
			
 
				+	if (flags & BTRFS_BLOCK_GROUP_RAID10)
			
 
				+		return BTRFS_RAID_RAID10;
			
 
				+	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
			
 
				+		return BTRFS_RAID_RAID1;
			
 
				+	else if (flags & BTRFS_BLOCK_GROUP_DUP)
			
 
				+		return BTRFS_RAID_DUP;
			
 
				+	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
			
 
				+		return BTRFS_RAID_RAID0;
			
 
				+	else if (flags & BTRFS_BLOCK_GROUP_RAID5)
			
 
				+		return BTRFS_RAID_RAID5;
			
 
				+	else if (flags & BTRFS_BLOCK_GROUP_RAID6)
			
 
				+		return BTRFS_RAID_RAID6;
			
 
				+
			
 
				+	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
			
 
				+}
			
 
				+
			
 
				 void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
			
 
				-void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
			
 
				-					struct btrfs_transaction *transaction);
			
 
				+void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
			
 
				 
			
 
				 struct list_head *btrfs_get_fs_uuids(void);
			
 
				 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
			
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -33,7 +33,7 @@
 
				 #include "locking.h"
			
 
				 
			
 
				 
			
 
				-ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
			
 
				+int btrfs_getxattr(struct inode *inode, const char *name,
			
 
				 				void *buffer, size_t size)
			
 
				 {
			
 
				 	struct btrfs_dir_item *di;
			
@@ -233,7 +233,7 @@ out:
 
				 /*
			
 
				  * @value: "" makes the attribute to empty, NULL removes it
			
 
				  */
			
 
				-int __btrfs_setxattr(struct btrfs_trans_handle *trans,
			
 
				+int btrfs_setxattr(struct btrfs_trans_handle *trans,
			
 
				 		     struct inode *inode, const char *name,
			
 
				 		     const void *value, size_t size, int flags)
			
 
				 {
			
@@ -374,7 +374,7 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
 
				 				   const char *name, void *buffer, size_t size)
			
 
				 {
			
 
				 	name = xattr_full_name(handler, name);
			
 
				-	return __btrfs_getxattr(inode, name, buffer, size);
			
 
				+	return btrfs_getxattr(inode, name, buffer, size);
			
 
				 }
			
 
				 
			
 
				 static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
			
@@ -383,7 +383,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
 
				 				   size_t size, int flags)
			
 
				 {
			
 
				 	name = xattr_full_name(handler, name);
			
 
				-	return __btrfs_setxattr(NULL, inode, name, buffer, size, flags);
			
 
				+	return btrfs_setxattr(NULL, inode, name, buffer, size, flags);
			
 
				 }
			
 
				 
			
 
				 static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
			
@@ -448,8 +448,8 @@ static int btrfs_initxattrs(struct inode *inode,
 
				 		}
			
 
				 		strcpy(name, XATTR_SECURITY_PREFIX);
			
 
				 		strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
			
 
				-		err = __btrfs_setxattr(trans, inode, name,
			
 
				-				       xattr->value, xattr->value_len, 0);
			
 
				+		err = btrfs_setxattr(trans, inode, name, xattr->value,
			
 
				+				xattr->value_len, 0);
			
 
				 		kfree(name);
			
 
				 		if (err < 0)
			
 
				 			break;
			
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -23,13 +23,14 @@
 
				 
			
 
				 extern const struct xattr_handler *btrfs_xattr_handlers[];
			
 
				 
			
 
				-extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
			
 
				+int btrfs_getxattr(struct inode *inode, const char *name,
			
 
				 		void *buffer, size_t size);
			
 
				-extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
			
 
				+int btrfs_setxattr(struct btrfs_trans_handle *trans,
			
 
				 			    struct inode *inode, const char *name,
			
 
				 			    const void *value, size_t size, int flags);
			
 
				+ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
			
 
				 
			
 
				-extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
			
 
				+int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
			
 
				 				     struct inode *inode, struct inode *dir,
			
 
				 				     const struct qstr *qstr);
			
 
				 
			
--- a/include/linux/crc32c.h
+++ b/include/linux/crc32c.h
@@ -5,6 +5,7 @@
 
				 #include <linux/types.h>
			
 
				 
			
 
				 extern u32 crc32c(u32 crc, const void *address, unsigned int length);
			
 
				+extern const char *crc32c_impl(void);
			
 
				 
			
 
				 /* This macro exists for backwards-compatibility. */
			
 
				 #define crc32c_le crc32c
			
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -71,6 +71,12 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
 
				 		 { BTRFS_FILE_EXTENT_REG,	"REG"	 },		\
			
 
				 		 { BTRFS_FILE_EXTENT_PREALLOC,	"PREALLOC"})
			
 
				 
			
 
				+#define show_qgroup_rsv_type(type)					\
			
 
				+	__print_symbolic(type,						\
			
 
				+		{ BTRFS_QGROUP_RSV_DATA,	  "DATA"	},	\
			
 
				+		{ BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" },	\
			
 
				+		{ BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
			
 
				+
			
 
				 #define BTRFS_GROUP_FLAGS	\
			
 
				 	{ BTRFS_BLOCK_GROUP_DATA,	"DATA"},	\
			
 
				 	{ BTRFS_BLOCK_GROUP_SYSTEM,	"SYSTEM"},	\
			
@@ -248,6 +254,41 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 
				 		  __entry->refs, __entry->compress_type)
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(btrfs_handle_em_exist,
			
 
				+
			
 
				+	TP_PROTO(const struct extent_map *existing, const struct extent_map *map, u64 start, u64 len),
			
 
				+
			
 
				+	TP_ARGS(existing, map, start, len),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	u64,  e_start		)
			
 
				+		__field(	u64,  e_len		)
			
 
				+		__field(	u64,  map_start		)
			
 
				+		__field(	u64,  map_len		)
			
 
				+		__field(	u64,  start		)
			
 
				+		__field(	u64,  len		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->e_start	= existing->start;
			
 
				+		__entry->e_len		= existing->len;
			
 
				+		__entry->map_start	= map->start;
			
 
				+		__entry->map_len	= map->len;
			
 
				+		__entry->start		= start;
			
 
				+		__entry->len		= len;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("start=%llu len=%llu "
			
 
				+		  "existing(start=%llu len=%llu) "
			
 
				+		  "em(start=%llu len=%llu)",
			
 
				+		  (unsigned long long)__entry->start,
			
 
				+		  (unsigned long long)__entry->len,
			
 
				+		  (unsigned long long)__entry->e_start,
			
 
				+		  (unsigned long long)__entry->e_len,
			
 
				+		  (unsigned long long)__entry->map_start,
			
 
				+		  (unsigned long long)__entry->map_len)
			
 
				+);
			
 
				+
			
 
				 /* file extent item */
			
 
				 DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
			
 
				 
			
@@ -1598,28 +1639,52 @@ TRACE_EVENT(qgroup_update_counters,
 
				 TRACE_EVENT(qgroup_update_reserve,
			
 
				 
			
 
				 	TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
			
 
				-		 s64 diff),
			
 
				+		 s64 diff, int type),
			
 
				 
			
 
				-	TP_ARGS(fs_info, qgroup, diff),
			
 
				+	TP_ARGS(fs_info, qgroup, diff, type),
			
 
				 
			
 
				 	TP_STRUCT__entry_btrfs(
			
 
				 		__field(	u64,	qgid			)
			
 
				 		__field(	u64,	cur_reserved		)
			
 
				 		__field(	s64,	diff			)
			
 
				+		__field(	int,	type			)
			
 
				 	),
			
 
				 
			
 
				 	TP_fast_assign_btrfs(fs_info,
			
 
				 		__entry->qgid		= qgroup->qgroupid;
			
 
				-		__entry->cur_reserved	= qgroup->reserved;
			
 
				+		__entry->cur_reserved	= qgroup->rsv.values[type];
			
 
				 		__entry->diff		= diff;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld",
			
 
				-		__entry->qgid, __entry->cur_reserved, __entry->diff)
			
 
				+	TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
			
 
				+		__entry->qgid, show_qgroup_rsv_type(__entry->type),
			
 
				+		__entry->cur_reserved, __entry->diff)
			
 
				 );
			
 
				 
			
 
				 TRACE_EVENT(qgroup_meta_reserve,
			
 
				 
			
 
				+	TP_PROTO(struct btrfs_root *root, s64 diff, int type),
			
 
				+
			
 
				+	TP_ARGS(root, diff, type),
			
 
				+
			
 
				+	TP_STRUCT__entry_btrfs(
			
 
				+		__field(	u64,	refroot			)
			
 
				+		__field(	s64,	diff			)
			
 
				+		__field(	int,	type			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign_btrfs(root->fs_info,
			
 
				+		__entry->refroot	= root->objectid;
			
 
				+		__entry->diff		= diff;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
			
 
				+		show_root_type(__entry->refroot),
			
 
				+		show_qgroup_rsv_type(__entry->type), __entry->diff)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(qgroup_meta_convert,
			
 
				+
			
 
				 	TP_PROTO(struct btrfs_root *root, s64 diff),
			
 
				 
			
 
				 	TP_ARGS(root, diff),
			
@@ -1627,6 +1692,7 @@ TRACE_EVENT(qgroup_meta_reserve,
 
				 	TP_STRUCT__entry_btrfs(
			
 
				 		__field(	u64,	refroot			)
			
 
				 		__field(	s64,	diff			)
			
 
				+		__field(	int,	type			)
			
 
				 	),
			
 
				 
			
 
				 	TP_fast_assign_btrfs(root->fs_info,
			
@@ -1634,8 +1700,36 @@ TRACE_EVENT(qgroup_meta_reserve,
 
				 		__entry->diff		= diff;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk_btrfs("refroot=%llu(%s) diff=%lld",
			
 
				-		show_root_type(__entry->refroot), __entry->diff)
			
 
				+	TP_printk_btrfs("refroot=%llu(%s) type=%s->%s diff=%lld",
			
 
				+		show_root_type(__entry->refroot),
			
 
				+		show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PREALLOC),
			
 
				+		show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PERTRANS),
			
 
				+		__entry->diff)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(qgroup_meta_free_all_pertrans,
			
 
				+
			
 
				+	TP_PROTO(struct btrfs_root *root),
			
 
				+
			
 
				+	TP_ARGS(root),
			
 
				+
			
 
				+	TP_STRUCT__entry_btrfs(
			
 
				+		__field(	u64,	refroot			)
			
 
				+		__field(	s64,	diff			)
			
 
				+		__field(	int,	type			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign_btrfs(root->fs_info,
			
 
				+		__entry->refroot	= root->objectid;
			
 
				+		spin_lock(&root->qgroup_meta_rsv_lock);
			
 
				+		__entry->diff		= -(s64)root->qgroup_meta_rsv_pertrans;
			
 
				+		spin_unlock(&root->qgroup_meta_rsv_lock);
			
 
				+		__entry->type		= BTRFS_QGROUP_RSV_META_PERTRANS;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
			
 
				+		show_root_type(__entry->refroot),
			
 
				+		show_qgroup_rsv_type(__entry->type), __entry->diff)
			
 
				 );
			
 
				 
			
 
				 DECLARE_EVENT_CLASS(btrfs__prelim_ref,
			
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -71,6 +71,12 @@ static void __exit libcrc32c_mod_fini(void)
 
				 	crypto_free_shash(tfm);
			
 
				 }
			
 
				 
			
 
				+const char *crc32c_impl(void)
			
 
				+{
			
 
				+	return crypto_shash_driver_name(tfm);
			
 
				+}
			
 
				+EXPORT_SYMBOL(crc32c_impl);
			
 
				+
			
 
				 module_init(libcrc32c_mod_init);
			
 
				 module_exit(libcrc32c_mod_fini);