10 år sedan · 2b9fb532d4
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1246,25 +1246,6 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * this makes the path point to (inum INODE_ITEM ioff)
			
 
				- */
			
 
				-int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
			
 
				-			struct btrfs_path *path)
			
 
				-{
			
 
				-	struct btrfs_key key;
			
 
				-	return btrfs_find_item(fs_root, path, inum, ioff,
			
 
				-			BTRFS_INODE_ITEM_KEY, &key);
			
 
				-}
			
 
				-
			
 
				-static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
			
 
				-				struct btrfs_path *path,
			
 
				-				struct btrfs_key *found_key)
			
 
				-{
			
 
				-	return btrfs_find_item(fs_root, path, inum, ioff,
			
 
				-			BTRFS_INODE_REF_KEY, found_key);
			
 
				-}
			
 
				-
			
 
				 int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
			
 
				 			  u64 start_off, struct btrfs_path *path,
			
 
				 			  struct btrfs_inode_extref **ret_extref,
			
@@ -1374,7 +1355,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 
				 			btrfs_tree_read_unlock_blocking(eb);
			
 
				 			free_extent_buffer(eb);
			
 
				 		}
			
 
				-		ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
			
 
				+		ret = btrfs_find_item(fs_root, path, parent, 0,
			
 
				+				BTRFS_INODE_REF_KEY, &found_key);
			
 
				 		if (ret > 0)
			
 
				 			ret = -ENOENT;
			
 
				 		if (ret)
			
@@ -1727,8 +1709,10 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
 
				 	struct btrfs_key found_key;
			
 
				 
			
 
				 	while (!ret) {
			
 
				-		ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
			
 
				-				     &found_key);
			
 
				+		ret = btrfs_find_item(fs_root, path, inum,
			
 
				+				parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
			
 
				+				&found_key);
			
 
				+
			
 
				 		if (ret < 0)
			
 
				 			break;
			
 
				 		if (ret) {
			
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -32,9 +32,6 @@ struct inode_fs_paths {
 
				 typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
			
 
				 		void *ctx);
			
 
				 
			
 
				-int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
			
 
				-			struct btrfs_path *path);
			
 
				-
			
 
				 int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
			
 
				 			struct btrfs_path *path, struct btrfs_key *found_key,
			
 
				 			u64 *flags);
			
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -185,6 +185,9 @@ struct btrfs_inode {
 
				 
			
 
				 	struct btrfs_delayed_node *delayed_node;
			
 
				 
			
 
				+	/* File creation time. */
			
 
				+	struct timespec i_otime;
			
 
				+
			
 
				 	struct inode vfs_inode;
			
 
				 };
			
 
				 
			
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -213,11 +213,19 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
 
				  */
			
 
				 static void add_root_to_dirty_list(struct btrfs_root *root)
			
 
				 {
			
 
				+	if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
			
 
				+	    !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
			
 
				+		return;
			
 
				+
			
 
				 	spin_lock(&root->fs_info->trans_lock);
			
 
				-	if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
			
 
				-	    list_empty(&root->dirty_list)) {
			
 
				-		list_add(&root->dirty_list,
			
 
				-			 &root->fs_info->dirty_cowonly_roots);
			
 
				+	if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
			
 
				+		/* Want the extent tree to be the last on the list */
			
 
				+		if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
			
 
				+			list_move_tail(&root->dirty_list,
			
 
				+				       &root->fs_info->dirty_cowonly_roots);
			
 
				+		else
			
 
				+			list_move(&root->dirty_list,
			
 
				+				  &root->fs_info->dirty_cowonly_roots);
			
 
				 	}
			
 
				 	spin_unlock(&root->fs_info->trans_lock);
			
 
				 }
			
@@ -1363,8 +1371,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 
				 
			
 
				 	if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
			
 
				 		BUG_ON(tm->slot != 0);
			
 
				-		eb_rewin = alloc_dummy_extent_buffer(eb->start,
			
 
				-						fs_info->tree_root->nodesize);
			
 
				+		eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
			
 
				 		if (!eb_rewin) {
			
 
				 			btrfs_tree_read_unlock_blocking(eb);
			
 
				 			free_extent_buffer(eb);
			
@@ -1444,7 +1451,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 
				 	} else if (old_root) {
			
 
				 		btrfs_tree_read_unlock(eb_root);
			
 
				 		free_extent_buffer(eb_root);
			
 
				-		eb = alloc_dummy_extent_buffer(logical, root->nodesize);
			
 
				+		eb = alloc_dummy_extent_buffer(root->fs_info, logical);
			
 
				 	} else {
			
 
				 		btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
			
 
				 		eb = btrfs_clone_extent_buffer(eb_root);
			
@@ -2282,7 +2289,7 @@ static void reada_for_search(struct btrfs_root *root,
 
				 		if ((search <= target && target - search <= 65536) ||
			
 
				 		    (search > target && search - target <= 65536)) {
			
 
				 			gen = btrfs_node_ptr_generation(node, nr);
			
 
				-			readahead_tree_block(root, search, blocksize);
			
 
				+			readahead_tree_block(root, search);
			
 
				 			nread += blocksize;
			
 
				 		}
			
 
				 		nscan++;
			
@@ -2301,7 +2308,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
 
				 	u64 gen;
			
 
				 	u64 block1 = 0;
			
 
				 	u64 block2 = 0;
			
 
				-	int blocksize;
			
 
				 
			
 
				 	parent = path->nodes[level + 1];
			
 
				 	if (!parent)
			
@@ -2309,7 +2315,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
 
				 
			
 
				 	nritems = btrfs_header_nritems(parent);
			
 
				 	slot = path->slots[level + 1];
			
 
				-	blocksize = root->nodesize;
			
 
				 
			
 
				 	if (slot > 0) {
			
 
				 		block1 = btrfs_node_blockptr(parent, slot - 1);
			
@@ -2334,9 +2339,9 @@ static noinline void reada_for_balance(struct btrfs_root *root,
 
				 	}
			
 
				 
			
 
				 	if (block1)
			
 
				-		readahead_tree_block(root, block1, blocksize);
			
 
				+		readahead_tree_block(root, block1);
			
 
				 	if (block2)
			
 
				-		readahead_tree_block(root, block2, blocksize);
			
 
				+		readahead_tree_block(root, block2);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -2609,32 +2614,24 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
			
 
				+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
			
 
				 		u64 iobjectid, u64 ioff, u8 key_type,
			
 
				 		struct btrfs_key *found_key)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct btrfs_key key;
			
 
				 	struct extent_buffer *eb;
			
 
				-	struct btrfs_path *path;
			
 
				+
			
 
				+	ASSERT(path);
			
 
				+	ASSERT(found_key);
			
 
				 
			
 
				 	key.type = key_type;
			
 
				 	key.objectid = iobjectid;
			
 
				 	key.offset = ioff;
			
 
				 
			
 
				-	if (found_path == NULL) {
			
 
				-		path = btrfs_alloc_path();
			
 
				-		if (!path)
			
 
				-			return -ENOMEM;
			
 
				-	} else
			
 
				-		path = found_path;
			
 
				-
			
 
				 	ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
			
 
				-	if ((ret < 0) || (found_key == NULL)) {
			
 
				-		if (path != found_path)
			
 
				-			btrfs_free_path(path);
			
 
				+	if (ret < 0)
			
 
				 		return ret;
			
 
				-	}
			
 
				 
			
 
				 	eb = path->nodes[0];
			
 
				 	if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
			
@@ -3383,7 +3380,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 
				 	add_root_to_dirty_list(root);
			
 
				 	extent_buffer_get(c);
			
 
				 	path->nodes[level] = c;
			
 
				-	path->locks[level] = BTRFS_WRITE_LOCK;
			
 
				+	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
			
 
				 	path->slots[level] = 0;
			
 
				 	return 0;
			
 
				 }
			
@@ -4356,13 +4353,15 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
 
				 	path->search_for_split = 1;
			
 
				 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
			
 
				 	path->search_for_split = 0;
			
 
				+	if (ret > 0)
			
 
				+		ret = -EAGAIN;
			
 
				 	if (ret < 0)
			
 
				 		goto err;
			
 
				 
			
 
				 	ret = -EAGAIN;
			
 
				 	leaf = path->nodes[0];
			
 
				-	/* if our item isn't there or got smaller, return now */
			
 
				-	if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
			
 
				+	/* if our item isn't there, return now */
			
 
				+	if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
			
 
				 		goto err;
			
 
				 
			
 
				 	/* the leaf has  changed, it now has room.  return now */
			
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
 
				 
			
 
				 #define BTRFS_DIRTY_METADATA_THRESH	(32 * 1024 * 1024)
			
 
				 
			
 
				+#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
			
 
				+
			
 
				 /*
			
 
				  * The key defines the order in the tree, and so it also defines (optimal)
			
 
				  * block layout.
			
@@ -1020,6 +1022,9 @@ enum btrfs_raid_types {
 
				 					 BTRFS_BLOCK_GROUP_RAID6 |   \
			
 
				 					 BTRFS_BLOCK_GROUP_DUP |     \
			
 
				 					 BTRFS_BLOCK_GROUP_RAID10)
			
 
				+#define BTRFS_BLOCK_GROUP_RAID56_MASK	(BTRFS_BLOCK_GROUP_RAID5 |   \
			
 
				+					 BTRFS_BLOCK_GROUP_RAID6)
			
 
				+
			
 
				 /*
			
 
				  * We need a bit for restriper to be able to tell when chunks of type
			
 
				  * SINGLE are available.  This "extended" profile format is used in
			
@@ -1239,7 +1244,6 @@ enum btrfs_disk_cache_state {
 
				 	BTRFS_DC_ERROR		= 1,
			
 
				 	BTRFS_DC_CLEAR		= 2,
			
 
				 	BTRFS_DC_SETUP		= 3,
			
 
				-	BTRFS_DC_NEED_WRITE	= 4,
			
 
				 };
			
 
				 
			
 
				 struct btrfs_caching_control {
			
@@ -1277,7 +1281,6 @@ struct btrfs_block_group_cache {
 
				 	unsigned long full_stripe_len;
			
 
				 
			
 
				 	unsigned int ro:1;
			
 
				-	unsigned int dirty:1;
			
 
				 	unsigned int iref:1;
			
 
				 	unsigned int has_caching_ctl:1;
			
 
				 	unsigned int removed:1;
			
@@ -1315,6 +1318,9 @@ struct btrfs_block_group_cache {
 
				 	struct list_head ro_list;
			
 
				 
			
 
				 	atomic_t trimming;
			
 
				+
			
 
				+	/* For dirty block groups */
			
 
				+	struct list_head dirty_list;
			
 
				 };
			
 
				 
			
 
				 /* delayed seq elem */
			
@@ -1741,6 +1747,7 @@ struct btrfs_fs_info {
 
				 
			
 
				 	spinlock_t unused_bgs_lock;
			
 
				 	struct list_head unused_bgs;
			
 
				+	struct mutex unused_bg_unpin_mutex;
			
 
				 
			
 
				 	/* For btrfs to record security options */
			
 
				 	struct security_mnt_opts security_opts;
			
@@ -1776,6 +1783,7 @@ struct btrfs_subvolume_writers {
 
				 #define BTRFS_ROOT_DEFRAG_RUNNING	6
			
 
				 #define BTRFS_ROOT_FORCE_COW		7
			
 
				 #define BTRFS_ROOT_MULTI_LOG_TASKS	8
			
 
				+#define BTRFS_ROOT_DIRTY		9
			
 
				 
			
 
				 /*
			
 
				  * in ram representation of the tree.  extent_root is used for all allocations
			
@@ -1794,8 +1802,6 @@ struct btrfs_root {
 
				 	struct btrfs_fs_info *fs_info;
			
 
				 	struct extent_io_tree dirty_log_pages;
			
 
				 
			
 
				-	struct kobject root_kobj;
			
 
				-	struct completion kobj_unregister;
			
 
				 	struct mutex objectid_mutex;
			
 
				 
			
 
				 	spinlock_t accounting_lock;
			
@@ -2465,31 +2471,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
 
				 BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
			
 
				 BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
			
 
				 BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
			
 
				-
			
 
				-static inline struct btrfs_timespec *
			
 
				-btrfs_inode_atime(struct btrfs_inode_item *inode_item)
			
 
				-{
			
 
				-	unsigned long ptr = (unsigned long)inode_item;
			
 
				-	ptr += offsetof(struct btrfs_inode_item, atime);
			
 
				-	return (struct btrfs_timespec *)ptr;
			
 
				-}
			
 
				-
			
 
				-static inline struct btrfs_timespec *
			
 
				-btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
			
 
				-{
			
 
				-	unsigned long ptr = (unsigned long)inode_item;
			
 
				-	ptr += offsetof(struct btrfs_inode_item, mtime);
			
 
				-	return (struct btrfs_timespec *)ptr;
			
 
				-}
			
 
				-
			
 
				-static inline struct btrfs_timespec *
			
 
				-btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
			
 
				-{
			
 
				-	unsigned long ptr = (unsigned long)inode_item;
			
 
				-	ptr += offsetof(struct btrfs_inode_item, ctime);
			
 
				-	return (struct btrfs_timespec *)ptr;
			
 
				-}
			
 
				-
			
 
				 BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
			
 
				 BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
			
 
				 BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
			
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1755,27 +1755,31 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
 
				 	btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
			
 
				 	btrfs_set_stack_inode_block_group(inode_item, 0);
			
 
				 
			
 
				-	btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
			
 
				+	btrfs_set_stack_timespec_sec(&inode_item->atime,
			
 
				 				     inode->i_atime.tv_sec);
			
 
				-	btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
			
 
				+	btrfs_set_stack_timespec_nsec(&inode_item->atime,
			
 
				 				      inode->i_atime.tv_nsec);
			
 
				 
			
 
				-	btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
			
 
				+	btrfs_set_stack_timespec_sec(&inode_item->mtime,
			
 
				 				     inode->i_mtime.tv_sec);
			
 
				-	btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
			
 
				+	btrfs_set_stack_timespec_nsec(&inode_item->mtime,
			
 
				 				      inode->i_mtime.tv_nsec);
			
 
				 
			
 
				-	btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
			
 
				+	btrfs_set_stack_timespec_sec(&inode_item->ctime,
			
 
				 				     inode->i_ctime.tv_sec);
			
 
				-	btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
			
 
				+	btrfs_set_stack_timespec_nsec(&inode_item->ctime,
			
 
				 				      inode->i_ctime.tv_nsec);
			
 
				+
			
 
				+	btrfs_set_stack_timespec_sec(&inode_item->otime,
			
 
				+				     BTRFS_I(inode)->i_otime.tv_sec);
			
 
				+	btrfs_set_stack_timespec_nsec(&inode_item->otime,
			
 
				+				     BTRFS_I(inode)->i_otime.tv_nsec);
			
 
				 }
			
 
				 
			
 
				 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
			
 
				 {
			
 
				 	struct btrfs_delayed_node *delayed_node;
			
 
				 	struct btrfs_inode_item *inode_item;
			
 
				-	struct btrfs_timespec *tspec;
			
 
				 
			
 
				 	delayed_node = btrfs_get_delayed_node(inode);
			
 
				 	if (!delayed_node)
			
@@ -1802,17 +1806,19 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
 
				 	*rdev = btrfs_stack_inode_rdev(inode_item);
			
 
				 	BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
			
 
				 
			
 
				-	tspec = btrfs_inode_atime(inode_item);
			
 
				-	inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
			
 
				-	inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
			
 
				+	inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
			
 
				+	inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
			
 
				+
			
 
				+	inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
			
 
				+	inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
			
 
				 
			
 
				-	tspec = btrfs_inode_mtime(inode_item);
			
 
				-	inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
			
 
				-	inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
			
 
				+	inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
			
 
				+	inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
			
 
				 
			
 
				-	tspec = btrfs_inode_ctime(inode_item);
			
 
				-	inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
			
 
				-	inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
			
 
				+	BTRFS_I(inode)->i_otime.tv_sec =
			
 
				+		btrfs_stack_timespec_sec(&inode_item->otime);
			
 
				+	BTRFS_I(inode)->i_otime.tv_nsec =
			
 
				+		btrfs_stack_timespec_nsec(&inode_item->otime);
			
 
				 
			
 
				 	inode->i_generation = BTRFS_I(inode)->generation;
			
 
				 	BTRFS_I(inode)->index_cnt = (u64)-1;
			
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -440,18 +440,9 @@ leave:
 
				  */
			
 
				 static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
			
 
				 {
			
 
				-	s64 writers;
			
 
				-	DEFINE_WAIT(wait);
			
 
				-
			
 
				 	set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
			
 
				-	do {
			
 
				-		prepare_to_wait(&fs_info->replace_wait, &wait,
			
 
				-				TASK_UNINTERRUPTIBLE);
			
 
				-		writers = percpu_counter_sum(&fs_info->bio_counter);
			
 
				-		if (writers)
			
 
				-			schedule();
			
 
				-		finish_wait(&fs_info->replace_wait, &wait);
			
 
				-	} while (writers);
			
 
				+	wait_event(fs_info->replace_wait, !percpu_counter_sum(
			
 
				+		   &fs_info->bio_counter));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -932,15 +923,15 @@ void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
 
				 
			
 
				 void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
			
 
				 {
			
 
				-	DEFINE_WAIT(wait);
			
 
				-again:
			
 
				-	percpu_counter_inc(&fs_info->bio_counter);
			
 
				-	if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) {
			
 
				+	while (1) {
			
 
				+		percpu_counter_inc(&fs_info->bio_counter);
			
 
				+		if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
			
 
				+				     &fs_info->fs_state)))
			
 
				+			break;
			
 
				+
			
 
				 		btrfs_bio_counter_dec(fs_info);
			
 
				 		wait_event(fs_info->replace_wait,
			
 
				 			   !test_bit(BTRFS_FS_STATE_DEV_REPLACING,
			
 
				 				     &fs_info->fs_state));
			
 
				-		goto again;
			
 
				 	}
			
 
				-
			
 
				 }
			
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
 
				 			memcpy(&found, result, csum_size);
			
 
				 
			
 
				 			read_extent_buffer(buf, &val, 0, csum_size);
			
 
				-			printk_ratelimited(KERN_INFO
			
 
				+			printk_ratelimited(KERN_WARNING
			
 
				 				"BTRFS: %s checksum verify failed on %llu wanted %X found %X "
			
 
				 				"level %d\n",
			
 
				 				root->fs_info->sb->s_id, buf->start,
			
@@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 
				 		ret = 0;
			
 
				 		goto out;
			
 
				 	}
			
 
				-	printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
			
 
				+	printk_ratelimited(KERN_ERR
			
 
				+	    "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
			
 
				 			eb->fs_info->sb->s_id, eb->start,
			
 
				 			parent_transid, btrfs_header_generation(eb));
			
 
				 	ret = 1;
			
@@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
				 
			
 
				 	found_start = btrfs_header_bytenr(eb);
			
 
				 	if (found_start != eb->start) {
			
 
				-		printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start "
			
 
				+		printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
			
 
				 			       "%llu %llu\n",
			
 
				 			       eb->fs_info->sb->s_id, found_start, eb->start);
			
 
				 		ret = -EIO;
			
 
				 		goto err;
			
 
				 	}
			
 
				 	if (check_tree_block_fsid(root, eb)) {
			
 
				-		printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n",
			
 
				+		printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
			
 
				 			       eb->fs_info->sb->s_id, eb->start);
			
 
				 		ret = -EIO;
			
 
				 		goto err;
			
 
				 	}
			
 
				 	found_level = btrfs_header_level(eb);
			
 
				 	if (found_level >= BTRFS_MAX_LEVEL) {
			
 
				-		btrfs_info(root->fs_info, "bad tree block level %d",
			
 
				+		btrfs_err(root->fs_info, "bad tree block level %d",
			
 
				 			   (int)btrfs_header_level(eb));
			
 
				 		ret = -EIO;
			
 
				 		goto err;
			
@@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
 
				 	.set_page_dirty = btree_set_page_dirty,
			
 
				 };
			
 
				 
			
 
				-void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
			
 
				+void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
			
 
				 {
			
 
				 	struct extent_buffer *buf = NULL;
			
 
				 	struct inode *btree_inode = root->fs_info->btree_inode;
			
 
				 
			
 
				-	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
			
 
				+	buf = btrfs_find_create_tree_block(root, bytenr);
			
 
				 	if (!buf)
			
 
				 		return;
			
 
				 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
			
@@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
 
				 	free_extent_buffer(buf);
			
 
				 }
			
 
				 
			
 
				-int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
			
 
				+int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
			
 
				 			 int mirror_num, struct extent_buffer **eb)
			
 
				 {
			
 
				 	struct extent_buffer *buf = NULL;
			
@@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
 
				 	struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
			
 
				 	int ret;
			
 
				 
			
 
				-	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
			
 
				+	buf = btrfs_find_create_tree_block(root, bytenr);
			
 
				 	if (!buf)
			
 
				 		return 0;
			
 
				 
			
@@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
 
				 }
			
 
				 
			
 
				 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
			
 
				-						 u64 bytenr, u32 blocksize)
			
 
				+						 u64 bytenr)
			
 
				 {
			
 
				 	if (btrfs_test_is_dummy_root(root))
			
 
				-		return alloc_test_extent_buffer(root->fs_info, bytenr,
			
 
				-						blocksize);
			
 
				-	return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
			
 
				+		return alloc_test_extent_buffer(root->fs_info, bytenr);
			
 
				+	return alloc_extent_buffer(root->fs_info, bytenr);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
				 	struct extent_buffer *buf = NULL;
			
 
				 	int ret;
			
 
				 
			
 
				-	buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize);
			
 
				+	buf = btrfs_find_create_tree_block(root, bytenr);
			
 
				 	if (!buf)
			
 
				 		return NULL;
			
 
				 
			
@@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
 
				 	memset(&root->root_key, 0, sizeof(root->root_key));
			
 
				 	memset(&root->root_item, 0, sizeof(root->root_item));
			
 
				 	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
			
 
				-	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
			
 
				 	if (fs_info)
			
 
				 		root->defrag_trans_start = fs_info->generation;
			
 
				 	else
			
 
				 		root->defrag_trans_start = 0;
			
 
				-	init_completion(&root->kobj_unregister);
			
 
				 	root->root_key.objectid = objectid;
			
 
				 	root->anon_dev = 0;
			
 
				 
			
@@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
 
				 				     bool check_ref)
			
 
				 {
			
 
				 	struct btrfs_root *root;
			
 
				+	struct btrfs_path *path;
			
 
				+	struct btrfs_key key;
			
 
				 	int ret;
			
 
				 
			
 
				 	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
			
@@ -1669,8 +1669,17 @@ again:
 
				 	if (ret)
			
 
				 		goto fail;
			
 
				 
			
 
				-	ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
			
 
				-			location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
			
 
				+	path = btrfs_alloc_path();
			
 
				+	if (!path) {
			
 
				+		ret = -ENOMEM;
			
 
				+		goto fail;
			
 
				+	}
			
 
				+	key.objectid = BTRFS_ORPHAN_OBJECTID;
			
 
				+	key.type = BTRFS_ORPHAN_ITEM_KEY;
			
 
				+	key.offset = location->objectid;
			
 
				+
			
 
				+	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
			
 
				+	btrfs_free_path(path);
			
 
				 	if (ret < 0)
			
 
				 		goto fail;
			
 
				 	if (ret == 0)
			
@@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb,
 
				 	spin_lock_init(&fs_info->qgroup_op_lock);
			
 
				 	spin_lock_init(&fs_info->buffer_lock);
			
 
				 	spin_lock_init(&fs_info->unused_bgs_lock);
			
 
				+	mutex_init(&fs_info->unused_bg_unpin_mutex);
			
 
				 	rwlock_init(&fs_info->tree_mod_log_lock);
			
 
				 	mutex_init(&fs_info->reloc_mutex);
			
 
				 	mutex_init(&fs_info->delalloc_root_mutex);
			
@@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb,
 
				 		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
			
 
				 
			
 
				 	if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
			
 
				-		printk(KERN_ERR "BTRFS: has skinny extents\n");
			
 
				+		printk(KERN_INFO "BTRFS: has skinny extents\n");
			
 
				 
			
 
				 	/*
			
 
				 	 * flag our filesystem as having big metadata blocks if
			
@@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb,
 
				 	 */
			
 
				 	if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
			
 
				 	    (sectorsize != nodesize)) {
			
 
				-		printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
			
 
				+		printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
			
 
				 				"are not allowed for mixed block groups on %s\n",
			
 
				 				sb->s_id);
			
 
				 		goto fail_alloc;
			
@@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb,
 
				 	sb->s_blocksize_bits = blksize_bits(sectorsize);
			
 
				 
			
 
				 	if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
			
 
				-		printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
			
 
				+		printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
			
 
				 		goto fail_sb_buffer;
			
 
				 	}
			
 
				 
			
 
				 	if (sectorsize != PAGE_SIZE) {
			
 
				-		printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
			
 
				+		printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
			
 
				 		       "found on %s\n", (unsigned long)sectorsize, sb->s_id);
			
 
				 		goto fail_sb_buffer;
			
 
				 	}
			
@@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb,
 
				 	ret = btrfs_read_sys_array(tree_root);
			
 
				 	mutex_unlock(&fs_info->chunk_mutex);
			
 
				 	if (ret) {
			
 
				-		printk(KERN_WARNING "BTRFS: failed to read the system "
			
 
				+		printk(KERN_ERR "BTRFS: failed to read the system "
			
 
				 		       "array on %s\n", sb->s_id);
			
 
				 		goto fail_sb_buffer;
			
 
				 	}
			
@@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb,
 
				 					   generation);
			
 
				 	if (!chunk_root->node ||
			
 
				 	    !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
			
 
				-		printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
			
 
				+		printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
			
 
				 		       sb->s_id);
			
 
				 		goto fail_tree_roots;
			
 
				 	}
			
@@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb,
 
				 
			
 
				 	ret = btrfs_read_chunk_tree(chunk_root);
			
 
				 	if (ret) {
			
 
				-		printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
			
 
				+		printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
			
 
				 		       sb->s_id);
			
 
				 		goto fail_tree_roots;
			
 
				 	}
			
@@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb,
 
				 	btrfs_close_extra_devices(fs_info, fs_devices, 0);
			
 
				 
			
 
				 	if (!fs_devices->latest_bdev) {
			
 
				-		printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
			
 
				+		printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
			
 
				 		       sb->s_id);
			
 
				 		goto fail_tree_roots;
			
 
				 	}
			
@@ -2765,7 +2775,7 @@ retry_root_backup:
 
				 
			
 
				 	ret = btrfs_recover_balance(fs_info);
			
 
				 	if (ret) {
			
 
				-		printk(KERN_WARNING "BTRFS: failed to recover balance\n");
			
 
				+		printk(KERN_ERR "BTRFS: failed to recover balance\n");
			
 
				 		goto fail_block_groups;
			
 
				 	}
			
 
				 
			
@@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 
				 		printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
			
 
				 				btrfs_super_log_root(sb));
			
 
				 
			
 
				+	/*
			
 
				+	 * Check the lower bound, the alignment and other constraints are
			
 
				+	 * checked later.
			
 
				+	 */
			
 
				+	if (btrfs_super_nodesize(sb) < 4096) {
			
 
				+		printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
			
 
				+				btrfs_super_nodesize(sb));
			
 
				+		ret = -EINVAL;
			
 
				+	}
			
 
				+	if (btrfs_super_sectorsize(sb) < 4096) {
			
 
				+		printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
			
 
				+				btrfs_super_sectorsize(sb));
			
 
				+		ret = -EINVAL;
			
 
				+	}
			
 
				+
			
 
				 	if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
			
 
				 		printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
			
 
				 				fs_info->fsid, sb->dev_item.fsid);
			
@@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 
				 	if (btrfs_super_num_devices(sb) > (1UL << 31))
			
 
				 		printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
			
 
				 				btrfs_super_num_devices(sb));
			
 
				+	if (btrfs_super_num_devices(sb) == 0) {
			
 
				+		printk(KERN_ERR "BTRFS: number of devices is 0\n");
			
 
				+		ret = -EINVAL;
			
 
				+	}
			
 
				 
			
 
				 	if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
			
 
				 		printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
			
@@ -3880,6 +3909,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 
				 		ret = -EINVAL;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Obvious sys_chunk_array corruptions, it must hold at least one key
			
 
				+	 * and one chunk
			
 
				+	 */
			
 
				+	if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
			
 
				+		printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
			
 
				+				btrfs_super_sys_array_size(sb),
			
 
				+				BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
			
 
				+		ret = -EINVAL;
			
 
				+	}
			
 
				+	if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
			
 
				+			+ sizeof(struct btrfs_chunk)) {
			
 
				+		printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
			
 
				+				btrfs_super_sys_array_size(sb),
			
 
				+				sizeof(struct btrfs_disk_key)
			
 
				+				+ sizeof(struct btrfs_chunk));
			
 
				+		ret = -EINVAL;
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * The generation is a global counter, we'll trust it more than the others
			
 
				 	 * but it's still possible that it's the one that's wrong.
			
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -46,11 +46,11 @@ struct btrfs_fs_devices;
 
				 
			
 
				 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
			
 
				 				      u64 parent_transid);
			
 
				-void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize);
			
 
				-int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
			
 
				+void readahead_tree_block(struct btrfs_root *root, u64 bytenr);
			
 
				+int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
			
 
				 			 int mirror_num, struct extent_buffer **eb);
			
 
				 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
			
 
				-						   u64 bytenr, u32 blocksize);
			
 
				+						   u64 bytenr);
			
 
				 void clean_tree_block(struct btrfs_trans_handle *trans,
			
 
				 		      struct btrfs_root *root, struct extent_buffer *buf);
			
 
				 int open_ctree(struct super_block *sb,
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -74,8 +74,9 @@ enum {
 
				 	RESERVE_ALLOC_NO_ACCOUNT = 2,
			
 
				 };
			
 
				 
			
 
				-static int update_block_group(struct btrfs_root *root,
			
 
				-			      u64 bytenr, u64 num_bytes, int alloc);
			
 
				+static int update_block_group(struct btrfs_trans_handle *trans,
			
 
				+			      struct btrfs_root *root, u64 bytenr,
			
 
				+			      u64 num_bytes, int alloc);
			
 
				 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
			
 
				 				struct btrfs_root *root,
			
 
				 				u64 bytenr, u64 num_bytes, u64 parent,
			
@@ -1925,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 
				 			 */
			
 
				 			ret = 0;
			
 
				 		}
			
 
				-		kfree(bbio);
			
 
				+		btrfs_put_bbio(bbio);
			
 
				 	}
			
 
				 
			
 
				 	if (actual_bytes)
			
@@ -2768,7 +2769,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_delayed_ref_head *head;
			
 
				 	int ret;
			
 
				 	int run_all = count == (unsigned long)-1;
			
 
				-	int run_most = 0;
			
 
				 
			
 
				 	/* We'll clean this up in btrfs_cleanup_transaction */
			
 
				 	if (trans->aborted)
			
@@ -2778,10 +2778,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 
				 		root = root->fs_info->tree_root;
			
 
				 
			
 
				 	delayed_refs = &trans->transaction->delayed_refs;
			
 
				-	if (count == 0) {
			
 
				+	if (count == 0)
			
 
				 		count = atomic_read(&delayed_refs->num_entries) * 2;
			
 
				-		run_most = 1;
			
 
				-	}
			
 
				 
			
 
				 again:
			
 
				 #ifdef SCRAMBLE_DELAYED_REFS
			
@@ -3315,120 +3313,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
				 				   struct btrfs_root *root)
			
 
				 {
			
 
				 	struct btrfs_block_group_cache *cache;
			
 
				-	int err = 0;
			
 
				+	struct btrfs_transaction *cur_trans = trans->transaction;
			
 
				+	int ret = 0;
			
 
				 	struct btrfs_path *path;
			
 
				-	u64 last = 0;
			
 
				+
			
 
				+	if (list_empty(&cur_trans->dirty_bgs))
			
 
				+		return 0;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-again:
			
 
				-	while (1) {
			
 
				-		cache = btrfs_lookup_first_block_group(root->fs_info, last);
			
 
				-		while (cache) {
			
 
				-			if (cache->disk_cache_state == BTRFS_DC_CLEAR)
			
 
				-				break;
			
 
				-			cache = next_block_group(root, cache);
			
 
				-		}
			
 
				-		if (!cache) {
			
 
				-			if (last == 0)
			
 
				-				break;
			
 
				-			last = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-		err = cache_save_setup(cache, trans, path);
			
 
				-		last = cache->key.objectid + cache->key.offset;
			
 
				-		btrfs_put_block_group(cache);
			
 
				-	}
			
 
				-
			
 
				-	while (1) {
			
 
				-		if (last == 0) {
			
 
				-			err = btrfs_run_delayed_refs(trans, root,
			
 
				-						     (unsigned long)-1);
			
 
				-			if (err) /* File system offline */
			
 
				-				goto out;
			
 
				-		}
			
 
				-
			
 
				-		cache = btrfs_lookup_first_block_group(root->fs_info, last);
			
 
				-		while (cache) {
			
 
				-			if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
			
 
				-				btrfs_put_block_group(cache);
			
 
				-				goto again;
			
 
				-			}
			
 
				-
			
 
				-			if (cache->dirty)
			
 
				-				break;
			
 
				-			cache = next_block_group(root, cache);
			
 
				-		}
			
 
				-		if (!cache) {
			
 
				-			if (last == 0)
			
 
				-				break;
			
 
				-			last = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (cache->disk_cache_state == BTRFS_DC_SETUP)
			
 
				-			cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
			
 
				-		cache->dirty = 0;
			
 
				-		last = cache->key.objectid + cache->key.offset;
			
 
				-
			
 
				-		err = write_one_cache_group(trans, root, path, cache);
			
 
				-		btrfs_put_block_group(cache);
			
 
				-		if (err) /* File system offline */
			
 
				-			goto out;
			
 
				-	}
			
 
				-
			
 
				-	while (1) {
			
 
				-		/*
			
 
				-		 * I don't think this is needed since we're just marking our
			
 
				-		 * preallocated extent as written, but just in case it can't
			
 
				-		 * hurt.
			
 
				-		 */
			
 
				-		if (last == 0) {
			
 
				-			err = btrfs_run_delayed_refs(trans, root,
			
 
				-						     (unsigned long)-1);
			
 
				-			if (err) /* File system offline */
			
 
				-				goto out;
			
 
				-		}
			
 
				-
			
 
				-		cache = btrfs_lookup_first_block_group(root->fs_info, last);
			
 
				-		while (cache) {
			
 
				-			/*
			
 
				-			 * Really this shouldn't happen, but it could if we
			
 
				-			 * couldn't write the entire preallocated extent and
			
 
				-			 * splitting the extent resulted in a new block.
			
 
				-			 */
			
 
				-			if (cache->dirty) {
			
 
				-				btrfs_put_block_group(cache);
			
 
				-				goto again;
			
 
				-			}
			
 
				-			if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
			
 
				-				break;
			
 
				-			cache = next_block_group(root, cache);
			
 
				-		}
			
 
				-		if (!cache) {
			
 
				-			if (last == 0)
			
 
				-				break;
			
 
				-			last = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		err = btrfs_write_out_cache(root, trans, cache, path);
			
 
				-
			
 
				-		/*
			
 
				-		 * If we didn't have an error then the cache state is still
			
 
				-		 * NEED_WRITE, so we can set it to WRITTEN.
			
 
				-		 */
			
 
				-		if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
			
 
				-			cache->disk_cache_state = BTRFS_DC_WRITTEN;
			
 
				-		last = cache->key.objectid + cache->key.offset;
			
 
				+	/*
			
 
				+	 * We don't need the lock here since we are protected by the transaction
			
 
				+	 * commit.  We want to do the cache_save_setup first and then run the
			
 
				+	 * delayed refs to make sure we have the best chance at doing this all
			
 
				+	 * in one shot.
			
 
				+	 */
			
 
				+	while (!list_empty(&cur_trans->dirty_bgs)) {
			
 
				+		cache = list_first_entry(&cur_trans->dirty_bgs,
			
 
				+					 struct btrfs_block_group_cache,
			
 
				+					 dirty_list);
			
 
				+		list_del_init(&cache->dirty_list);
			
 
				+		if (cache->disk_cache_state == BTRFS_DC_CLEAR)
			
 
				+			cache_save_setup(cache, trans, path);
			
 
				+		if (!ret)
			
 
				+			ret = btrfs_run_delayed_refs(trans, root,
			
 
				+						     (unsigned long) -1);
			
 
				+		if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
			
 
				+			btrfs_write_out_cache(root, trans, cache, path);
			
 
				+		if (!ret)
			
 
				+			ret = write_one_cache_group(trans, root, path, cache);
			
 
				 		btrfs_put_block_group(cache);
			
 
				 	}
			
 
				-out:
			
 
				 
			
 
				 	btrfs_free_path(path);
			
 
				-	return err;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
			
@@ -5043,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
 
				 /**
			
 
				  * drop_outstanding_extent - drop an outstanding extent
			
 
				  * @inode: the inode we're dropping the extent for
			
 
				+ * @num_bytes: the number of bytes we're relaseing.
			
 
				  *
			
 
				  * This is called when we are freeing up an outstanding extent, either called
			
 
				  * after an error or after an extent is written.  This will return the number of
			
 
				  * reserved extents that need to be freed.  This must be called with
			
 
				  * BTRFS_I(inode)->lock held.
			
 
				  */
			
 
				-static unsigned drop_outstanding_extent(struct inode *inode)
			
 
				+static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
			
 
				 {
			
 
				 	unsigned drop_inode_space = 0;
			
 
				 	unsigned dropped_extents = 0;
			
 
				+	unsigned num_extents = 0;
			
 
				 
			
 
				-	BUG_ON(!BTRFS_I(inode)->outstanding_extents);
			
 
				-	BTRFS_I(inode)->outstanding_extents--;
			
 
				+	num_extents = (unsigned)div64_u64(num_bytes +
			
 
				+					  BTRFS_MAX_EXTENT_SIZE - 1,
			
 
				+					  BTRFS_MAX_EXTENT_SIZE);
			
 
				+	ASSERT(num_extents);
			
 
				+	ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
			
 
				+	BTRFS_I(inode)->outstanding_extents -= num_extents;
			
 
				 
			
 
				 	if (BTRFS_I(inode)->outstanding_extents == 0 &&
			
 
				 	    test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
@@ -5226,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
				 
			
 
				 out_fail:
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				-	dropped = drop_outstanding_extent(inode);
			
 
				+	dropped = drop_outstanding_extent(inode, num_bytes);
			
 
				 	/*
			
 
				 	 * If the inodes csum_bytes is the same as the original
			
 
				 	 * csum_bytes then we know we haven't raced with any free()ers
			
@@ -5305,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 
				 
			
 
				 	num_bytes = ALIGN(num_bytes, root->sectorsize);
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				-	dropped = drop_outstanding_extent(inode);
			
 
				+	dropped = drop_outstanding_extent(inode, num_bytes);
			
 
				 
			
 
				 	if (num_bytes)
			
 
				 		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
			
@@ -5375,8 +5301,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
 
				 	btrfs_free_reserved_data_space(inode, num_bytes);
			
 
				 }
			
 
				 
			
 
				-static int update_block_group(struct btrfs_root *root,
			
 
				-			      u64 bytenr, u64 num_bytes, int alloc)
			
 
				+static int update_block_group(struct btrfs_trans_handle *trans,
			
 
				+			      struct btrfs_root *root, u64 bytenr,
			
 
				+			      u64 num_bytes, int alloc)
			
 
				 {
			
 
				 	struct btrfs_block_group_cache *cache = NULL;
			
 
				 	struct btrfs_fs_info *info = root->fs_info;
			
@@ -5414,6 +5341,14 @@ static int update_block_group(struct btrfs_root *root,
 
				 		if (!alloc && cache->cached == BTRFS_CACHE_NO)
			
 
				 			cache_block_group(cache, 1);
			
 
				 
			
 
				+		spin_lock(&trans->transaction->dirty_bgs_lock);
			
 
				+		if (list_empty(&cache->dirty_list)) {
			
 
				+			list_add_tail(&cache->dirty_list,
			
 
				+				      &trans->transaction->dirty_bgs);
			
 
				+			btrfs_get_block_group(cache);
			
 
				+		}
			
 
				+		spin_unlock(&trans->transaction->dirty_bgs_lock);
			
 
				+
			
 
				 		byte_in_group = bytenr - cache->key.objectid;
			
 
				 		WARN_ON(byte_in_group > cache->key.offset);
			
 
				 
			
@@ -5424,7 +5359,6 @@ static int update_block_group(struct btrfs_root *root,
 
				 		    cache->disk_cache_state < BTRFS_DC_CLEAR)
			
 
				 			cache->disk_cache_state = BTRFS_DC_CLEAR;
			
 
				 
			
 
				-		cache->dirty = 1;
			
 
				 		old_val = btrfs_block_group_used(&cache->item);
			
 
				 		num_bytes = min(total, cache->key.offset - byte_in_group);
			
 
				 		if (alloc) {
			
@@ -5807,10 +5741,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
				 		unpin = &fs_info->freed_extents[0];
			
 
				 
			
 
				 	while (1) {
			
 
				+		mutex_lock(&fs_info->unused_bg_unpin_mutex);
			
 
				 		ret = find_first_extent_bit(unpin, 0, &start, &end,
			
 
				 					    EXTENT_DIRTY, NULL);
			
 
				-		if (ret)
			
 
				+		if (ret) {
			
 
				+			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
			
 
				 			break;
			
 
				+		}
			
 
				 
			
 
				 		if (btrfs_test_opt(root, DISCARD))
			
 
				 			ret = btrfs_discard_extent(root, start,
			
@@ -5818,6 +5755,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
				 
			
 
				 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
			
 
				 		unpin_extent_range(root, start, end, true);
			
 
				+		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
			
 
				 		cond_resched();
			
 
				 	}
			
 
				 
			
@@ -6103,7 +6041,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		ret = update_block_group(root, bytenr, num_bytes, 0);
			
 
				+		ret = update_block_group(trans, root, bytenr, num_bytes, 0);
			
 
				 		if (ret) {
			
 
				 			btrfs_abort_transaction(trans, extent_root, ret);
			
 
				 			goto out;
			
@@ -6205,7 +6143,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
				 			   struct extent_buffer *buf,
			
 
				 			   u64 parent, int last_ref)
			
 
				 {
			
 
				-	struct btrfs_block_group_cache *cache = NULL;
			
 
				 	int pin = 1;
			
 
				 	int ret;
			
 
				 
			
@@ -6221,17 +6158,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
				 	if (!last_ref)
			
 
				 		return;
			
 
				 
			
 
				-	cache = btrfs_lookup_block_group(root->fs_info, buf->start);
			
 
				-
			
 
				 	if (btrfs_header_generation(buf) == trans->transid) {
			
 
				+		struct btrfs_block_group_cache *cache;
			
 
				+
			
 
				 		if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
			
 
				 			ret = check_ref_cleanup(trans, root, buf->start);
			
 
				 			if (!ret)
			
 
				 				goto out;
			
 
				 		}
			
 
				 
			
 
				+		cache = btrfs_lookup_block_group(root->fs_info, buf->start);
			
 
				+
			
 
				 		if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
			
 
				 			pin_down_extent(root, cache, buf->start, buf->len, 1);
			
 
				+			btrfs_put_block_group(cache);
			
 
				 			goto out;
			
 
				 		}
			
 
				 
			
@@ -6239,6 +6179,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
				 
			
 
				 		btrfs_add_free_space(cache, buf->start, buf->len);
			
 
				 		btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
			
 
				+		btrfs_put_block_group(cache);
			
 
				 		trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
			
 
				 		pin = 0;
			
 
				 	}
			
@@ -6253,7 +6194,6 @@ out:
 
				 	 * anymore.
			
 
				 	 */
			
 
				 	clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
			
 
				-	btrfs_put_block_group(cache);
			
 
				 }
			
 
				 
			
 
				 /* Can return -ENOMEM */
			
@@ -7063,7 +7003,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = update_block_group(root, ins->objectid, ins->offset, 1);
			
 
				+	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
			
 
				 	if (ret) { /* -ENOENT, logic error */
			
 
				 		btrfs_err(fs_info, "update block group failed for %llu %llu",
			
 
				 			ins->objectid, ins->offset);
			
@@ -7152,7 +7092,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 
				 			return ret;
			
 
				 	}
			
 
				 
			
 
				-	ret = update_block_group(root, ins->objectid, root->nodesize, 1);
			
 
				+	ret = update_block_group(trans, root, ins->objectid, root->nodesize,
			
 
				+				 1);
			
 
				 	if (ret) { /* -ENOENT, logic error */
			
 
				 		btrfs_err(fs_info, "update block group failed for %llu %llu",
			
 
				 			ins->objectid, ins->offset);
			
@@ -7217,11 +7158,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 
				 
			
 
				 static struct extent_buffer *
			
 
				 btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
			
 
				-		      u64 bytenr, u32 blocksize, int level)
			
 
				+		      u64 bytenr, int level)
			
 
				 {
			
 
				 	struct extent_buffer *buf;
			
 
				 
			
 
				-	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
			
 
				+	buf = btrfs_find_create_tree_block(root, bytenr);
			
 
				 	if (!buf)
			
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 	btrfs_set_header_generation(buf, trans->transid);
			
@@ -7340,7 +7281,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	if (btrfs_test_is_dummy_root(root)) {
			
 
				 		buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
			
 
				-					    blocksize, level);
			
 
				+					    level);
			
 
				 		if (!IS_ERR(buf))
			
 
				 			root->alloc_bytenr += blocksize;
			
 
				 		return buf;
			
@@ -7357,8 +7298,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 
				 		return ERR_PTR(ret);
			
 
				 	}
			
 
				 
			
 
				-	buf = btrfs_init_new_buffer(trans, root, ins.objectid,
			
 
				-				    blocksize, level);
			
 
				+	buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
			
 
				 	BUG_ON(IS_ERR(buf)); /* -ENOMEM */
			
 
				 
			
 
				 	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
			
@@ -7487,7 +7427,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
 
				 				continue;
			
 
				 		}
			
 
				 reada:
			
 
				-		readahead_tree_block(root, bytenr, blocksize);
			
 
				+		readahead_tree_block(root, bytenr);
			
 
				 		nread++;
			
 
				 	}
			
 
				 	wc->reada_slot = slot;
			
@@ -7828,7 +7768,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	next = btrfs_find_tree_block(root, bytenr);
			
 
				 	if (!next) {
			
 
				-		next = btrfs_find_create_tree_block(root, bytenr, blocksize);
			
 
				+		next = btrfs_find_create_tree_block(root, bytenr);
			
 
				 		if (!next)
			
 
				 			return -ENOMEM;
			
 
				 		btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
			
@@ -8548,14 +8488,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 
				 	if (IS_ERR(trans))
			
 
				 		return PTR_ERR(trans);
			
 
				 
			
 
				-	alloc_flags = update_block_group_flags(root, cache->flags);
			
 
				-	if (alloc_flags != cache->flags) {
			
 
				-		ret = do_chunk_alloc(trans, root, alloc_flags,
			
 
				-				     CHUNK_ALLOC_FORCE);
			
 
				-		if (ret < 0)
			
 
				-			goto out;
			
 
				-	}
			
 
				-
			
 
				 	ret = set_block_group_ro(cache, 0);
			
 
				 	if (!ret)
			
 
				 		goto out;
			
@@ -8566,6 +8498,11 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 
				 		goto out;
			
 
				 	ret = set_block_group_ro(cache, 0);
			
 
				 out:
			
 
				+	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
			
 
				+		alloc_flags = update_block_group_flags(root, cache->flags);
			
 
				+		check_system_chunk(trans, root, alloc_flags);
			
 
				+	}
			
 
				+
			
 
				 	btrfs_end_transaction(trans, root);
			
 
				 	return ret;
			
 
				 }
			
@@ -9005,6 +8942,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
 
				 	INIT_LIST_HEAD(&cache->cluster_list);
			
 
				 	INIT_LIST_HEAD(&cache->bg_list);
			
 
				 	INIT_LIST_HEAD(&cache->ro_list);
			
 
				+	INIT_LIST_HEAD(&cache->dirty_list);
			
 
				 	btrfs_init_free_space_ctl(cache);
			
 
				 	atomic_set(&cache->trimming, 0);
			
 
				 
			
@@ -9068,9 +9006,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
				 			 * b) Setting 'dirty flag' makes sure that we flush
			
 
				 			 *    the new space cache info onto disk.
			
 
				 			 */
			
 
				-			cache->disk_cache_state = BTRFS_DC_CLEAR;
			
 
				 			if (btrfs_test_opt(root, SPACE_CACHE))
			
 
				-				cache->dirty = 1;
			
 
				+				cache->disk_cache_state = BTRFS_DC_CLEAR;
			
 
				 		}
			
 
				 
			
 
				 		read_extent_buffer(leaf, &cache->item,
			
@@ -9460,6 +9397,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	spin_lock(&trans->transaction->dirty_bgs_lock);
			
 
				+	if (!list_empty(&block_group->dirty_list)) {
			
 
				+		list_del_init(&block_group->dirty_list);
			
 
				+		btrfs_put_block_group(block_group);
			
 
				+	}
			
 
				+	spin_unlock(&trans->transaction->dirty_bgs_lock);
			
 
				+
			
 
				 	btrfs_remove_free_space_cache(block_group);
			
 
				 
			
 
				 	spin_lock(&block_group->space_info->lock);
			
@@ -9611,7 +9555,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 
				 		 * Want to do this before we do anything else so we can recover
			
 
				 		 * properly if we fail to join the transaction.
			
 
				 		 */
			
 
				-		trans = btrfs_join_transaction(root);
			
 
				+		/* 1 for btrfs_orphan_reserve_metadata() */
			
 
				+		trans = btrfs_start_transaction(root, 1);
			
 
				 		if (IS_ERR(trans)) {
			
 
				 			btrfs_set_block_group_rw(root, block_group);
			
 
				 			ret = PTR_ERR(trans);
			
@@ -9624,18 +9569,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 
				 		 */
			
 
				 		start = block_group->key.objectid;
			
 
				 		end = start + block_group->key.offset - 1;
			
 
				+		/*
			
 
				+		 * Hold the unused_bg_unpin_mutex lock to avoid racing with
			
 
				+		 * btrfs_finish_extent_commit(). If we are at transaction N,
			
 
				+		 * another task might be running finish_extent_commit() for the
			
 
				+		 * previous transaction N - 1, and have seen a range belonging
			
 
				+		 * to the block group in freed_extents[] before we were able to
			
 
				+		 * clear the whole block group range from freed_extents[]. This
			
 
				+		 * means that task can lookup for the block group after we
			
 
				+		 * unpinned it from freed_extents[] and removed it, leading to
			
 
				+		 * a BUG_ON() at btrfs_unpin_extent_range().
			
 
				+		 */
			
 
				+		mutex_lock(&fs_info->unused_bg_unpin_mutex);
			
 
				 		ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
			
 
				 				  EXTENT_DIRTY, GFP_NOFS);
			
 
				 		if (ret) {
			
 
				+			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
			
 
				 			btrfs_set_block_group_rw(root, block_group);
			
 
				 			goto end_trans;
			
 
				 		}
			
 
				 		ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
			
 
				 				  EXTENT_DIRTY, GFP_NOFS);
			
 
				 		if (ret) {
			
 
				+			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
			
 
				 			btrfs_set_block_group_rw(root, block_group);
			
 
				 			goto end_trans;
			
 
				 		}
			
 
				+		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
			
 
				 
			
 
				 		/* Reset pinned so btrfs_put_block_group doesn't complain */
			
 
				 		block_group->pinned = 0;
			
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -64,7 +64,7 @@ void btrfs_leak_debug_check(void)
 
				 
			
 
				 	while (!list_empty(&states)) {
			
 
				 		state = list_entry(states.next, struct extent_state, leak_list);
			
 
				-		pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n",
			
 
				+		pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
			
 
				 		       state->start, state->end, state->state,
			
 
				 		       extent_state_in_tree(state),
			
 
				 		       atomic_read(&state->refs));
			
@@ -396,21 +396,21 @@ static void merge_state(struct extent_io_tree *tree,
 
				 }
			
 
				 
			
 
				 static void set_state_cb(struct extent_io_tree *tree,
			
 
				-			 struct extent_state *state, unsigned long *bits)
			
 
				+			 struct extent_state *state, unsigned *bits)
			
 
				 {
			
 
				 	if (tree->ops && tree->ops->set_bit_hook)
			
 
				 		tree->ops->set_bit_hook(tree->mapping->host, state, bits);
			
 
				 }
			
 
				 
			
 
				 static void clear_state_cb(struct extent_io_tree *tree,
			
 
				-			   struct extent_state *state, unsigned long *bits)
			
 
				+			   struct extent_state *state, unsigned *bits)
			
 
				 {
			
 
				 	if (tree->ops && tree->ops->clear_bit_hook)
			
 
				 		tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
			
 
				 }
			
 
				 
			
 
				 static void set_state_bits(struct extent_io_tree *tree,
			
 
				-			   struct extent_state *state, unsigned long *bits);
			
 
				+			   struct extent_state *state, unsigned *bits);
			
 
				 
			
 
				 /*
			
 
				  * insert an extent_state struct into the tree.  'bits' are set on the
			
@@ -426,7 +426,7 @@ static int insert_state(struct extent_io_tree *tree,
 
				 			struct extent_state *state, u64 start, u64 end,
			
 
				 			struct rb_node ***p,
			
 
				 			struct rb_node **parent,
			
 
				-			unsigned long *bits)
			
 
				+			unsigned *bits)
			
 
				 {
			
 
				 	struct rb_node *node;
			
 
				 
			
@@ -511,10 +511,10 @@ static struct extent_state *next_state(struct extent_state *state)
 
				  */
			
 
				 static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
			
 
				 					    struct extent_state *state,
			
 
				-					    unsigned long *bits, int wake)
			
 
				+					    unsigned *bits, int wake)
			
 
				 {
			
 
				 	struct extent_state *next;
			
 
				-	unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
			
 
				+	unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
			
 
				 
			
 
				 	if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
			
 
				 		u64 range = state->end - state->start + 1;
			
@@ -570,7 +570,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
 
				  * This takes the tree lock, and returns 0 on success and < 0 on error.
			
 
				  */
			
 
				 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		     unsigned long bits, int wake, int delete,
			
 
				+		     unsigned bits, int wake, int delete,
			
 
				 		     struct extent_state **cached_state,
			
 
				 		     gfp_t mask)
			
 
				 {
			
@@ -789,9 +789,9 @@ out:
 
				 
			
 
				 static void set_state_bits(struct extent_io_tree *tree,
			
 
				 			   struct extent_state *state,
			
 
				-			   unsigned long *bits)
			
 
				+			   unsigned *bits)
			
 
				 {
			
 
				-	unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
			
 
				+	unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
			
 
				 
			
 
				 	set_state_cb(tree, state, bits);
			
 
				 	if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
			
@@ -803,7 +803,7 @@ static void set_state_bits(struct extent_io_tree *tree,
 
				 
			
 
				 static void cache_state_if_flags(struct extent_state *state,
			
 
				 				 struct extent_state **cached_ptr,
			
 
				-				 const u64 flags)
			
 
				+				 unsigned flags)
			
 
				 {
			
 
				 	if (cached_ptr && !(*cached_ptr)) {
			
 
				 		if (!flags || (state->state & flags)) {
			
@@ -833,7 +833,7 @@ static void cache_state(struct extent_state *state,
 
				 
			
 
				 static int __must_check
			
 
				 __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		 unsigned long bits, unsigned long exclusive_bits,
			
 
				+		 unsigned bits, unsigned exclusive_bits,
			
 
				 		 u64 *failed_start, struct extent_state **cached_state,
			
 
				 		 gfp_t mask)
			
 
				 {
			
@@ -1034,7 +1034,7 @@ search_again:
 
				 }
			
 
				 
			
 
				 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		   unsigned long bits, u64 * failed_start,
			
 
				+		   unsigned bits, u64 * failed_start,
			
 
				 		   struct extent_state **cached_state, gfp_t mask)
			
 
				 {
			
 
				 	return __set_extent_bit(tree, start, end, bits, 0, failed_start,
			
@@ -1060,7 +1060,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 
				  * boundary bits like LOCK.
			
 
				  */
			
 
				 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		       unsigned long bits, unsigned long clear_bits,
			
 
				+		       unsigned bits, unsigned clear_bits,
			
 
				 		       struct extent_state **cached_state, gfp_t mask)
			
 
				 {
			
 
				 	struct extent_state *state;
			
@@ -1268,14 +1268,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 
				 }
			
 
				 
			
 
				 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		    unsigned long bits, gfp_t mask)
			
 
				+		    unsigned bits, gfp_t mask)
			
 
				 {
			
 
				 	return set_extent_bit(tree, start, end, bits, NULL,
			
 
				 			      NULL, mask);
			
 
				 }
			
 
				 
			
 
				 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		      unsigned long bits, gfp_t mask)
			
 
				+		      unsigned bits, gfp_t mask)
			
 
				 {
			
 
				 	return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
			
 
				 }
			
@@ -1330,10 +1330,11 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
 
				  * us if waiting is desired.
			
 
				  */
			
 
				 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		     unsigned long bits, struct extent_state **cached_state)
			
 
				+		     unsigned bits, struct extent_state **cached_state)
			
 
				 {
			
 
				 	int err;
			
 
				 	u64 failed_start;
			
 
				+
			
 
				 	while (1) {
			
 
				 		err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
			
 
				 				       EXTENT_LOCKED, &failed_start,
			
@@ -1440,7 +1441,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 
				  */
			
 
				 static struct extent_state *
			
 
				 find_first_extent_bit_state(struct extent_io_tree *tree,
			
 
				-			    u64 start, unsigned long bits)
			
 
				+			    u64 start, unsigned bits)
			
 
				 {
			
 
				 	struct rb_node *node;
			
 
				 	struct extent_state *state;
			
@@ -1474,7 +1475,7 @@ out:
 
				  * If nothing was found, 1 is returned. If found something, return 0.
			
 
				  */
			
 
				 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
			
 
				-			  u64 *start_ret, u64 *end_ret, unsigned long bits,
			
 
				+			  u64 *start_ret, u64 *end_ret, unsigned bits,
			
 
				 			  struct extent_state **cached_state)
			
 
				 {
			
 
				 	struct extent_state *state;
			
@@ -1753,7 +1754,7 @@ out_failed:
 
				 
			
 
				 int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
			
 
				 				 struct page *locked_page,
			
 
				-				 unsigned long clear_bits,
			
 
				+				 unsigned clear_bits,
			
 
				 				 unsigned long page_ops)
			
 
				 {
			
 
				 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
			
@@ -1810,7 +1811,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
 
				  */
			
 
				 u64 count_range_bits(struct extent_io_tree *tree,
			
 
				 		     u64 *start, u64 search_end, u64 max_bytes,
			
 
				-		     unsigned long bits, int contig)
			
 
				+		     unsigned bits, int contig)
			
 
				 {
			
 
				 	struct rb_node *node;
			
 
				 	struct extent_state *state;
			
@@ -1928,7 +1929,7 @@ out:
 
				  * range is found set.
			
 
				  */
			
 
				 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		   unsigned long bits, int filled, struct extent_state *cached)
			
 
				+		   unsigned bits, int filled, struct extent_state *cached)
			
 
				 {
			
 
				 	struct extent_state *state = NULL;
			
 
				 	struct rb_node *node;
			
@@ -2057,7 +2058,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
 
				 	sector = bbio->stripes[mirror_num-1].physical >> 9;
			
 
				 	bio->bi_iter.bi_sector = sector;
			
 
				 	dev = bbio->stripes[mirror_num-1].dev;
			
 
				-	kfree(bbio);
			
 
				+	btrfs_put_bbio(bbio);
			
 
				 	if (!dev || !dev->bdev || !dev->writeable) {
			
 
				 		bio_put(bio);
			
 
				 		return -EIO;
			
@@ -2816,8 +2817,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 
				 		    bio_add_page(bio, page, page_size, offset) < page_size) {
			
 
				 			ret = submit_one_bio(rw, bio, mirror_num,
			
 
				 					     prev_bio_flags);
			
 
				-			if (ret < 0)
			
 
				+			if (ret < 0) {
			
 
				+				*bio_ret = NULL;
			
 
				 				return ret;
			
 
				+			}
			
 
				 			bio = NULL;
			
 
				 		} else {
			
 
				 			return 0;
			
@@ -3239,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
 
				 					       page,
			
 
				 					       &delalloc_start,
			
 
				 					       &delalloc_end,
			
 
				-					       128 * 1024 * 1024);
			
 
				+					       BTRFS_MAX_EXTENT_SIZE);
			
 
				 		if (nr_delalloc == 0) {
			
 
				 			delalloc_start = delalloc_end + 1;
			
 
				 			continue;
			
@@ -4598,11 +4601,11 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
 
				 
			
 
				 static struct extent_buffer *
			
 
				 __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
			
 
				-		      unsigned long len, gfp_t mask)
			
 
				+		      unsigned long len)
			
 
				 {
			
 
				 	struct extent_buffer *eb = NULL;
			
 
				 
			
 
				-	eb = kmem_cache_zalloc(extent_buffer_cache, mask);
			
 
				+	eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS);
			
 
				 	if (eb == NULL)
			
 
				 		return NULL;
			
 
				 	eb->start = start;
			
@@ -4643,7 +4646,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
 
				 	struct extent_buffer *new;
			
 
				 	unsigned long num_pages = num_extent_pages(src->start, src->len);
			
 
				 
			
 
				-	new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
			
 
				+	new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
			
 
				 	if (new == NULL)
			
 
				 		return NULL;
			
 
				 
			
@@ -4666,13 +4669,26 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
 
				 	return new;
			
 
				 }
			
 
				 
			
 
				-struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
			
 
				+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
			
 
				+						u64 start)
			
 
				 {
			
 
				 	struct extent_buffer *eb;
			
 
				-	unsigned long num_pages = num_extent_pages(0, len);
			
 
				+	unsigned long len;
			
 
				+	unsigned long num_pages;
			
 
				 	unsigned long i;
			
 
				 
			
 
				-	eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
			
 
				+	if (!fs_info) {
			
 
				+		/*
			
 
				+		 * Called only from tests that don't always have a fs_info
			
 
				+		 * available, but we know that nodesize is 4096
			
 
				+		 */
			
 
				+		len = 4096;
			
 
				+	} else {
			
 
				+		len = fs_info->tree_root->nodesize;
			
 
				+	}
			
 
				+	num_pages = num_extent_pages(0, len);
			
 
				+
			
 
				+	eb = __alloc_extent_buffer(fs_info, start, len);
			
 
				 	if (!eb)
			
 
				 		return NULL;
			
 
				 
			
@@ -4762,7 +4778,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
			
 
				 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
			
 
				-					       u64 start, unsigned long len)
			
 
				+					       u64 start)
			
 
				 {
			
 
				 	struct extent_buffer *eb, *exists = NULL;
			
 
				 	int ret;
			
@@ -4770,7 +4786,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
 
				 	eb = find_extent_buffer(fs_info, start);
			
 
				 	if (eb)
			
 
				 		return eb;
			
 
				-	eb = alloc_dummy_extent_buffer(start, len);
			
 
				+	eb = alloc_dummy_extent_buffer(fs_info, start);
			
 
				 	if (!eb)
			
 
				 		return NULL;
			
 
				 	eb->fs_info = fs_info;
			
@@ -4808,8 +4824,9 @@ free_eb:
 
				 #endif
			
 
				 
			
 
				 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
			
 
				-					  u64 start, unsigned long len)
			
 
				+					  u64 start)
			
 
				 {
			
 
				+	unsigned long len = fs_info->tree_root->nodesize;
			
 
				 	unsigned long num_pages = num_extent_pages(start, len);
			
 
				 	unsigned long i;
			
 
				 	unsigned long index = start >> PAGE_CACHE_SHIFT;
			
@@ -4824,7 +4841,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
				 	if (eb)
			
 
				 		return eb;
			
 
				 
			
 
				-	eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
			
 
				+	eb = __alloc_extent_buffer(fs_info, start, len);
			
 
				 	if (!eb)
			
 
				 		return NULL;
			
 
				 
			
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -4,22 +4,22 @@
 
				 #include <linux/rbtree.h>
			
 
				 
			
 
				 /* bits for the extent state */
			
 
				-#define EXTENT_DIRTY 1
			
 
				-#define EXTENT_WRITEBACK (1 << 1)
			
 
				-#define EXTENT_UPTODATE (1 << 2)
			
 
				-#define EXTENT_LOCKED (1 << 3)
			
 
				-#define EXTENT_NEW (1 << 4)
			
 
				-#define EXTENT_DELALLOC (1 << 5)
			
 
				-#define EXTENT_DEFRAG (1 << 6)
			
 
				-#define EXTENT_BOUNDARY (1 << 9)
			
 
				-#define EXTENT_NODATASUM (1 << 10)
			
 
				-#define EXTENT_DO_ACCOUNTING (1 << 11)
			
 
				-#define EXTENT_FIRST_DELALLOC (1 << 12)
			
 
				-#define EXTENT_NEED_WAIT (1 << 13)
			
 
				-#define EXTENT_DAMAGED (1 << 14)
			
 
				-#define EXTENT_NORESERVE (1 << 15)
			
 
				-#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
			
 
				-#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
			
 
				+#define EXTENT_DIRTY		(1U << 0)
			
 
				+#define EXTENT_WRITEBACK	(1U << 1)
			
 
				+#define EXTENT_UPTODATE		(1U << 2)
			
 
				+#define EXTENT_LOCKED		(1U << 3)
			
 
				+#define EXTENT_NEW		(1U << 4)
			
 
				+#define EXTENT_DELALLOC		(1U << 5)
			
 
				+#define EXTENT_DEFRAG		(1U << 6)
			
 
				+#define EXTENT_BOUNDARY		(1U << 9)
			
 
				+#define EXTENT_NODATASUM	(1U << 10)
			
 
				+#define EXTENT_DO_ACCOUNTING	(1U << 11)
			
 
				+#define EXTENT_FIRST_DELALLOC	(1U << 12)
			
 
				+#define EXTENT_NEED_WAIT	(1U << 13)
			
 
				+#define EXTENT_DAMAGED		(1U << 14)
			
 
				+#define EXTENT_NORESERVE	(1U << 15)
			
 
				+#define EXTENT_IOBITS		(EXTENT_LOCKED | EXTENT_WRITEBACK)
			
 
				+#define EXTENT_CTLBITS		(EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
			
 
				 
			
 
				 /*
			
 
				  * flags for bio submission. The high bits indicate the compression
			
@@ -81,9 +81,9 @@ struct extent_io_ops {
 
				 	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
			
 
				 				      struct extent_state *state, int uptodate);
			
 
				 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
			
 
				-			     unsigned long *bits);
			
 
				+			     unsigned *bits);
			
 
				 	void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
			
 
				-			       unsigned long *bits);
			
 
				+			       unsigned *bits);
			
 
				 	void (*merge_extent_hook)(struct inode *inode,
			
 
				 				  struct extent_state *new,
			
 
				 				  struct extent_state *other);
			
@@ -108,7 +108,7 @@ struct extent_state {
 
				 	/* ADD NEW ELEMENTS AFTER THIS */
			
 
				 	wait_queue_head_t wq;
			
 
				 	atomic_t refs;
			
 
				-	unsigned long state;
			
 
				+	unsigned state;
			
 
				 
			
 
				 	/* for use by the FS */
			
 
				 	u64 private;
			
@@ -188,7 +188,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 
				 int try_release_extent_buffer(struct page *page);
			
 
				 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
			
 
				 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		     unsigned long bits, struct extent_state **cached);
			
 
				+		     unsigned bits, struct extent_state **cached);
			
 
				 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
			
 
				 int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 			 struct extent_state **cached, gfp_t mask);
			
@@ -202,21 +202,21 @@ void extent_io_exit(void);
 
				 
			
 
				 u64 count_range_bits(struct extent_io_tree *tree,
			
 
				 		     u64 *start, u64 search_end,
			
 
				-		     u64 max_bytes, unsigned long bits, int contig);
			
 
				+		     u64 max_bytes, unsigned bits, int contig);
			
 
				 
			
 
				 void free_extent_state(struct extent_state *state);
			
 
				 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		   unsigned long bits, int filled,
			
 
				+		   unsigned bits, int filled,
			
 
				 		   struct extent_state *cached_state);
			
 
				 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		      unsigned long bits, gfp_t mask);
			
 
				+		      unsigned bits, gfp_t mask);
			
 
				 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		     unsigned long bits, int wake, int delete,
			
 
				+		     unsigned bits, int wake, int delete,
			
 
				 		     struct extent_state **cached, gfp_t mask);
			
 
				 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		    unsigned long bits, gfp_t mask);
			
 
				+		    unsigned bits, gfp_t mask);
			
 
				 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		   unsigned long bits, u64 *failed_start,
			
 
				+		   unsigned bits, u64 *failed_start,
			
 
				 		   struct extent_state **cached_state, gfp_t mask);
			
 
				 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 			struct extent_state **cached_state, gfp_t mask);
			
@@ -229,14 +229,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 
				 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 		       gfp_t mask);
			
 
				 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				-		       unsigned long bits, unsigned long clear_bits,
			
 
				+		       unsigned bits, unsigned clear_bits,
			
 
				 		       struct extent_state **cached_state, gfp_t mask);
			
 
				 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 			struct extent_state **cached_state, gfp_t mask);
			
 
				 int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
			
 
				 		      struct extent_state **cached_state, gfp_t mask);
			
 
				 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
			
 
				-			  u64 *start_ret, u64 *end_ret, unsigned long bits,
			
 
				+			  u64 *start_ret, u64 *end_ret, unsigned bits,
			
 
				 			  struct extent_state **cached_state);
			
 
				 int extent_invalidatepage(struct extent_io_tree *tree,
			
 
				 			  struct page *page, unsigned long offset);
			
@@ -262,8 +262,9 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
 
				 void set_page_extent_mapped(struct page *page);
			
 
				 
			
 
				 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
			
 
				-					  u64 start, unsigned long len);
			
 
				-struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
			
 
				+					  u64 start);
			
 
				+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
			
 
				+		u64 start);
			
 
				 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
			
 
				 struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
			
 
				 					 u64 start);
			
@@ -322,7 +323,7 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
 
				 int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
			
 
				 int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
			
 
				 				 struct page *locked_page,
			
 
				-				 unsigned long bits_to_clear,
			
 
				+				 unsigned bits_to_clear,
			
 
				 				 unsigned long page_ops);
			
 
				 struct bio *
			
 
				 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
			
@@ -377,5 +378,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
 
				 				      u64 *end, u64 max_bytes);
			
 
				 #endif
			
 
				 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
			
 
				-					       u64 start, unsigned long len);
			
 
				+					       u64 start);
			
 
				 #endif
			
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -651,15 +651,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 
				 	struct io_ctl io_ctl;
			
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_free_space *e, *n;
			
 
				-	struct list_head bitmaps;
			
 
				+	LIST_HEAD(bitmaps);
			
 
				 	u64 num_entries;
			
 
				 	u64 num_bitmaps;
			
 
				 	u64 generation;
			
 
				 	u8 type;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	INIT_LIST_HEAD(&bitmaps);
			
 
				-
			
 
				 	/* Nothing in the space cache, goodbye */
			
 
				 	if (!i_size_read(inode))
			
 
				 		return 0;
			
@@ -1243,6 +1241,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 
				 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
			
 
				 	struct inode *inode;
			
 
				 	int ret = 0;
			
 
				+	enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
			
 
				 
			
 
				 	root = root->fs_info->tree_root;
			
 
				 
			
@@ -1266,9 +1265,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 
				 	ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
			
 
				 				      path, block_group->key.objectid);
			
 
				 	if (ret) {
			
 
				-		spin_lock(&block_group->lock);
			
 
				-		block_group->disk_cache_state = BTRFS_DC_ERROR;
			
 
				-		spin_unlock(&block_group->lock);
			
 
				+		dcs = BTRFS_DC_ERROR;
			
 
				 		ret = 0;
			
 
				 #ifdef DEBUG
			
 
				 		btrfs_err(root->fs_info,
			
@@ -1277,6 +1274,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 
				 #endif
			
 
				 	}
			
 
				 
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	block_group->disk_cache_state = dcs;
			
 
				+	spin_unlock(&block_group->lock);
			
 
				 	iput(inode);
			
 
				 	return ret;
			
 
				 }
			
@@ -2903,7 +2903,6 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
 
				 	trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
			
 
				 				 min_bytes);
			
 
				 
			
 
				-	INIT_LIST_HEAD(&bitmaps);
			
 
				 	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
			
 
				 				      bytes + empty_size,
			
 
				 				      cont1_bytes, min_bytes);
			
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -344,6 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	path->leave_spinning = 1;
			
 
				+	path->skip_release_on_error = 1;
			
 
				 	ret = btrfs_insert_empty_item(trans, root, path, &key,
			
 
				 				      ins_len);
			
 
				 	if (ret == -EEXIST) {
			
@@ -362,8 +363,12 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
 
				 		ptr = (unsigned long)(ref + 1);
			
 
				 		ret = 0;
			
 
				 	} else if (ret < 0) {
			
 
				-		if (ret == -EOVERFLOW)
			
 
				-			ret = -EMLINK;
			
 
				+		if (ret == -EOVERFLOW) {
			
 
				+			if (find_name_in_backref(path, name, name_len, &ref))
			
 
				+				ret = -EEXIST;
			
 
				+			else
			
 
				+				ret = -EMLINK;
			
 
				+		}
			
 
				 		goto out;
			
 
				 	} else {
			
 
				 		ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 
				 static void btrfs_split_extent_hook(struct inode *inode,
			
 
				 				    struct extent_state *orig, u64 split)
			
 
				 {
			
 
				+	u64 size;
			
 
				+
			
 
				 	/* not delalloc, ignore it */
			
 
				 	if (!(orig->state & EXTENT_DELALLOC))
			
 
				 		return;
			
 
				 
			
 
				+	size = orig->end - orig->start + 1;
			
 
				+	if (size > BTRFS_MAX_EXTENT_SIZE) {
			
 
				+		u64 num_extents;
			
 
				+		u64 new_size;
			
 
				+
			
 
				+		/*
			
 
				+		 * We need the largest size of the remaining extent to see if we
			
 
				+		 * need to add a new outstanding extent.  Think of the following
			
 
				+		 * case
			
 
				+		 *
			
 
				+		 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
			
 
				+		 *
			
 
				+		 * The new_size would just be 4k and we'd think we had enough
			
 
				+		 * outstanding extents for this if we only took one side of the
			
 
				+		 * split, same goes for the other direction.  We need to see if
			
 
				+		 * the larger size still is the same amount of extents as the
			
 
				+		 * original size, because if it is we need to add a new
			
 
				+		 * outstanding extent.  But if we split up and the larger size
			
 
				+		 * is less than the original then we are good to go since we've
			
 
				+		 * already accounted for the extra extent in our original
			
 
				+		 * accounting.
			
 
				+		 */
			
 
				+		new_size = orig->end - split + 1;
			
 
				+		if ((split - orig->start) > new_size)
			
 
				+			new_size = split - orig->start;
			
 
				+
			
 
				+		num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
			
 
				+					BTRFS_MAX_EXTENT_SIZE);
			
 
				+		if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
			
 
				+			      BTRFS_MAX_EXTENT_SIZE) < num_extents)
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				 	BTRFS_I(inode)->outstanding_extents++;
			
 
				 	spin_unlock(&BTRFS_I(inode)->lock);
			
@@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode,
 
				 				    struct extent_state *new,
			
 
				 				    struct extent_state *other)
			
 
				 {
			
 
				+	u64 new_size, old_size;
			
 
				+	u64 num_extents;
			
 
				+
			
 
				 	/* not delalloc, ignore it */
			
 
				 	if (!(other->state & EXTENT_DELALLOC))
			
 
				 		return;
			
 
				 
			
 
				+	old_size = other->end - other->start + 1;
			
 
				+	new_size = old_size + (new->end - new->start + 1);
			
 
				+
			
 
				+	/* we're not bigger than the max, unreserve the space and go */
			
 
				+	if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
			
 
				+		spin_lock(&BTRFS_I(inode)->lock);
			
 
				+		BTRFS_I(inode)->outstanding_extents--;
			
 
				+		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If we grew by another max_extent, just return, we want to keep that
			
 
				+	 * reserved amount.
			
 
				+	 */
			
 
				+	num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
			
 
				+				BTRFS_MAX_EXTENT_SIZE);
			
 
				+	if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
			
 
				+		      BTRFS_MAX_EXTENT_SIZE) > num_extents)
			
 
				+		return;
			
 
				+
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				 	BTRFS_I(inode)->outstanding_extents--;
			
 
				 	spin_unlock(&BTRFS_I(inode)->lock);
			
@@ -1604,7 +1663,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
 
				  * have pending delalloc work to be done.
			
 
				  */
			
 
				 static void btrfs_set_bit_hook(struct inode *inode,
			
 
				-			       struct extent_state *state, unsigned long *bits)
			
 
				+			       struct extent_state *state, unsigned *bits)
			
 
				 {
			
 
				 
			
 
				 	if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
			
@@ -1645,9 +1704,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
 
				  */
			
 
				 static void btrfs_clear_bit_hook(struct inode *inode,
			
 
				 				 struct extent_state *state,
			
 
				-				 unsigned long *bits)
			
 
				+				 unsigned *bits)
			
 
				 {
			
 
				 	u64 len = state->end + 1 - state->start;
			
 
				+	u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
			
 
				+				    BTRFS_MAX_EXTENT_SIZE);
			
 
				 
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				 	if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
			
@@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 
				 			*bits &= ~EXTENT_FIRST_DELALLOC;
			
 
				 		} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
			
 
				 			spin_lock(&BTRFS_I(inode)->lock);
			
 
				-			BTRFS_I(inode)->outstanding_extents--;
			
 
				+			BTRFS_I(inode)->outstanding_extents -= num_extents;
			
 
				 			spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 		}
			
 
				 
			
@@ -2945,7 +3006,7 @@ static int __readpage_endio_check(struct inode *inode,
 
				 	return 0;
			
 
				 zeroit:
			
 
				 	if (__ratelimit(&_rs))
			
 
				-		btrfs_info(BTRFS_I(inode)->root->fs_info,
			
 
				+		btrfs_warn(BTRFS_I(inode)->root->fs_info,
			
 
				 			   "csum failed ino %llu off %llu csum %u expected csum %u",
			
 
				 			   btrfs_ino(inode), start, csum, csum_expected);
			
 
				 	memset(kaddr + pgoff, 1, len);
			
@@ -3407,7 +3468,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
				 
			
 
				 out:
			
 
				 	if (ret)
			
 
				-		btrfs_crit(root->fs_info,
			
 
				+		btrfs_err(root->fs_info,
			
 
				 			"could not do orphan cleanup %d", ret);
			
 
				 	btrfs_free_path(path);
			
 
				 	return ret;
			
@@ -3490,7 +3551,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
				 	struct btrfs_path *path;
			
 
				 	struct extent_buffer *leaf;
			
 
				 	struct btrfs_inode_item *inode_item;
			
 
				-	struct btrfs_timespec *tspec;
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 	struct btrfs_key location;
			
 
				 	unsigned long ptr;
			
@@ -3527,17 +3587,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
				 	i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
			
 
				 	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
			
 
				 
			
 
				-	tspec = btrfs_inode_atime(inode_item);
			
 
				-	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
			
 
				-	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
			
 
				+	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
			
 
				+	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
			
 
				+
			
 
				+	inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
			
 
				+	inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
			
 
				 
			
 
				-	tspec = btrfs_inode_mtime(inode_item);
			
 
				-	inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
			
 
				-	inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
			
 
				+	inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
			
 
				+	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
			
 
				 
			
 
				-	tspec = btrfs_inode_ctime(inode_item);
			
 
				-	inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
			
 
				-	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
			
 
				+	BTRFS_I(inode)->i_otime.tv_sec =
			
 
				+		btrfs_timespec_sec(leaf, &inode_item->otime);
			
 
				+	BTRFS_I(inode)->i_otime.tv_nsec =
			
 
				+		btrfs_timespec_nsec(leaf, &inode_item->otime);
			
 
				 
			
 
				 	inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
			
 
				 	BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
			
@@ -3656,21 +3718,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 
				 	btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
			
 
				 	btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
			
 
				 
			
 
				-	btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
			
 
				+	btrfs_set_token_timespec_sec(leaf, &item->atime,
			
 
				 				     inode->i_atime.tv_sec, &token);
			
 
				-	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
			
 
				+	btrfs_set_token_timespec_nsec(leaf, &item->atime,
			
 
				 				      inode->i_atime.tv_nsec, &token);
			
 
				 
			
 
				-	btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
			
 
				+	btrfs_set_token_timespec_sec(leaf, &item->mtime,
			
 
				 				     inode->i_mtime.tv_sec, &token);
			
 
				-	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
			
 
				+	btrfs_set_token_timespec_nsec(leaf, &item->mtime,
			
 
				 				      inode->i_mtime.tv_nsec, &token);
			
 
				 
			
 
				-	btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
			
 
				+	btrfs_set_token_timespec_sec(leaf, &item->ctime,
			
 
				 				     inode->i_ctime.tv_sec, &token);
			
 
				-	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
			
 
				+	btrfs_set_token_timespec_nsec(leaf, &item->ctime,
			
 
				 				      inode->i_ctime.tv_nsec, &token);
			
 
				 
			
 
				+	btrfs_set_token_timespec_sec(leaf, &item->otime,
			
 
				+				     BTRFS_I(inode)->i_otime.tv_sec, &token);
			
 
				+	btrfs_set_token_timespec_nsec(leaf, &item->otime,
			
 
				+				      BTRFS_I(inode)->i_otime.tv_nsec, &token);
			
 
				+
			
 
				 	btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
			
 
				 				     &token);
			
 
				 	btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
			
@@ -5007,6 +5074,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
 
				 	struct btrfs_root *new_root;
			
 
				 	struct btrfs_root_ref *ref;
			
 
				 	struct extent_buffer *leaf;
			
 
				+	struct btrfs_key key;
			
 
				 	int ret;
			
 
				 	int err = 0;
			
 
				 
			
@@ -5017,9 +5085,12 @@ static int fixup_tree_root_location(struct btrfs_root *root,
 
				 	}
			
 
				 
			
 
				 	err = -ENOENT;
			
 
				-	ret = btrfs_find_item(root->fs_info->tree_root, path,
			
 
				-				BTRFS_I(dir)->root->root_key.objectid,
			
 
				-				location->objectid, BTRFS_ROOT_REF_KEY, NULL);
			
 
				+	key.objectid = BTRFS_I(dir)->root->root_key.objectid;
			
 
				+	key.type = BTRFS_ROOT_REF_KEY;
			
 
				+	key.offset = location->objectid;
			
 
				+
			
 
				+	ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
			
 
				+				0, 0);
			
 
				 	if (ret) {
			
 
				 		if (ret < 0)
			
 
				 			err = ret;
			
@@ -5258,7 +5329,10 @@ static struct inode *new_simple_dir(struct super_block *s,
 
				 	inode->i_op = &btrfs_dir_ro_inode_operations;
			
 
				 	inode->i_fop = &simple_dir_operations;
			
 
				 	inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
			
 
				-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
			
 
				+	inode->i_mtime = CURRENT_TIME;
			
 
				+	inode->i_atime = inode->i_mtime;
			
 
				+	inode->i_ctime = inode->i_mtime;
			
 
				+	BTRFS_I(inode)->i_otime = inode->i_mtime;
			
 
				 
			
 
				 	return inode;
			
 
				 }
			
@@ -5826,7 +5900,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	inode_init_owner(inode, dir, mode);
			
 
				 	inode_set_bytes(inode, 0);
			
 
				-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
			
 
				+
			
 
				+	inode->i_mtime = CURRENT_TIME;
			
 
				+	inode->i_atime = inode->i_mtime;
			
 
				+	inode->i_ctime = inode->i_mtime;
			
 
				+	BTRFS_I(inode)->i_otime = inode->i_mtime;
			
 
				+
			
 
				 	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
			
 
				 				  struct btrfs_inode_item);
			
 
				 	memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
			
@@ -7134,11 +7213,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 
				 	u64 start = iblock << inode->i_blkbits;
			
 
				 	u64 lockstart, lockend;
			
 
				 	u64 len = bh_result->b_size;
			
 
				+	u64 orig_len = len;
			
 
				 	int unlock_bits = EXTENT_LOCKED;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	if (create)
			
 
				-		unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
			
 
				+		unlock_bits |= EXTENT_DIRTY;
			
 
				 	else
			
 
				 		len = min_t(u64, len, root->sectorsize);
			
 
				 
			
@@ -7269,14 +7349,12 @@ unlock:
 
				 		if (start + len > i_size_read(inode))
			
 
				 			i_size_write(inode, start + len);
			
 
				 
			
 
				-		spin_lock(&BTRFS_I(inode)->lock);
			
 
				-		BTRFS_I(inode)->outstanding_extents++;
			
 
				-		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				-
			
 
				-		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
			
 
				-				     lockstart + len - 1, EXTENT_DELALLOC, NULL,
			
 
				-				     &cached_state, GFP_NOFS);
			
 
				-		BUG_ON(ret);
			
 
				+		if (len < orig_len) {
			
 
				+			spin_lock(&BTRFS_I(inode)->lock);
			
 
				+			BTRFS_I(inode)->outstanding_extents++;
			
 
				+			spin_unlock(&BTRFS_I(inode)->lock);
			
 
				+		}
			
 
				+		btrfs_free_reserved_data_space(inode, len);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -7805,8 +7883,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 
				 	}
			
 
				 
			
 
				 	/* async crcs make it difficult to collect full stripe writes. */
			
 
				-	if (btrfs_get_alloc_profile(root, 1) &
			
 
				-	    (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
			
 
				+	if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
			
 
				 		async_submit = 0;
			
 
				 	else
			
 
				 		async_submit = 1;
			
@@ -8053,8 +8130,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 
				 		else if (ret >= 0 && (size_t)ret < count)
			
 
				 			btrfs_delalloc_release_space(inode,
			
 
				 						     count - (size_t)ret);
			
 
				-		else
			
 
				-			btrfs_delalloc_release_metadata(inode, 0);
			
 
				 	}
			
 
				 out:
			
 
				 	if (wakeup)
			
@@ -8575,6 +8650,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 
				 
			
 
				 	ei->delayed_node = NULL;
			
 
				 
			
 
				+	ei->i_otime.tv_sec = 0;
			
 
				+	ei->i_otime.tv_nsec = 0;
			
 
				+
			
 
				 	inode = &ei->vfs_inode;
			
 
				 	extent_map_tree_init(&ei->extent_tree);
			
 
				 	extent_io_tree_init(&ei->io_tree, &inode->i_data);
			
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1431,9 +1431,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 
				 		qgroup = u64_to_ptr(unode->aux);
			
 
				 		qgroup->rfer += sign * oper->num_bytes;
			
 
				 		qgroup->rfer_cmpr += sign * oper->num_bytes;
			
 
				+		WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
			
 
				 		qgroup->excl += sign * oper->num_bytes;
			
 
				-		if (sign < 0)
			
 
				-			WARN_ON(qgroup->excl < oper->num_bytes);
			
 
				 		qgroup->excl_cmpr += sign * oper->num_bytes;
			
 
				 		qgroup_dirty(fs_info, qgroup);
			
 
				 
			
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -58,15 +58,6 @@
 
				  */
			
 
				 #define RBIO_CACHE_READY_BIT	3
			
 
				 
			
 
				-/*
			
 
				- * bbio and raid_map is managed by the caller, so we shouldn't free
			
 
				- * them here. And besides that, all rbios with this flag should not
			
 
				- * be cached, because we need raid_map to check the rbios' stripe
			
 
				- * is the same or not, but it is very likely that the caller has
			
 
				- * free raid_map, so don't cache those rbios.
			
 
				- */
			
 
				-#define RBIO_HOLD_BBIO_MAP_BIT	4
			
 
				-
			
 
				 #define RBIO_CACHE_SIZE 1024
			
 
				 
			
 
				 enum btrfs_rbio_ops {
			
@@ -79,13 +70,6 @@ struct btrfs_raid_bio {
 
				 	struct btrfs_fs_info *fs_info;
			
 
				 	struct btrfs_bio *bbio;
			
 
				 
			
 
				-	/*
			
 
				-	 * logical block numbers for the start of each stripe
			
 
				-	 * The last one or two are p/q.  These are sorted,
			
 
				-	 * so raid_map[0] is the start of our full stripe
			
 
				-	 */
			
 
				-	u64 *raid_map;
			
 
				-
			
 
				 	/* while we're doing rmw on a stripe
			
 
				 	 * we put it into a hash table so we can
			
 
				 	 * lock the stripe and merge more rbios
			
@@ -303,7 +287,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
 
				  */
			
 
				 static int rbio_bucket(struct btrfs_raid_bio *rbio)
			
 
				 {
			
 
				-	u64 num = rbio->raid_map[0];
			
 
				+	u64 num = rbio->bbio->raid_map[0];
			
 
				 
			
 
				 	/*
			
 
				 	 * we shift down quite a bit.  We're using byte
			
@@ -606,8 +590,8 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
 
				 	    test_bit(RBIO_CACHE_BIT, &cur->flags))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (last->raid_map[0] !=
			
 
				-	    cur->raid_map[0])
			
 
				+	if (last->bbio->raid_map[0] !=
			
 
				+	    cur->bbio->raid_map[0])
			
 
				 		return 0;
			
 
				 
			
 
				 	/* we can't merge with different operations */
			
@@ -689,7 +673,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
 
				 	spin_lock_irqsave(&h->lock, flags);
			
 
				 	list_for_each_entry(cur, &h->hash_list, hash_list) {
			
 
				 		walk++;
			
 
				-		if (cur->raid_map[0] == rbio->raid_map[0]) {
			
 
				+		if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
			
 
				 			spin_lock(&cur->bio_list_lock);
			
 
				 
			
 
				 			/* can we steal this cached rbio's pages? */
			
@@ -841,21 +825,6 @@ done_nolock:
 
				 		remove_rbio_from_cache(rbio);
			
 
				 }
			
 
				 
			
 
				-static inline void
			
 
				-__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
			
 
				-{
			
 
				-	if (need) {
			
 
				-		kfree(raid_map);
			
 
				-		kfree(bbio);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
			
 
				-{
			
 
				-	__free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
			
 
				-			!test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
			
 
				-}
			
 
				-
			
 
				 static void __free_raid_bio(struct btrfs_raid_bio *rbio)
			
 
				 {
			
 
				 	int i;
			
@@ -875,8 +844,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	free_bbio_and_raid_map(rbio);
			
 
				-
			
 
				+	btrfs_put_bbio(rbio->bbio);
			
 
				 	kfree(rbio);
			
 
				 }
			
 
				 
			
@@ -985,8 +953,7 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 
				  * this does not allocate any pages for rbio->pages.
			
 
				  */
			
 
				 static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
			
 
				-			  struct btrfs_bio *bbio, u64 *raid_map,
			
 
				-			  u64 stripe_len)
			
 
				+			  struct btrfs_bio *bbio, u64 stripe_len)
			
 
				 {
			
 
				 	struct btrfs_raid_bio *rbio;
			
 
				 	int nr_data = 0;
			
@@ -1007,7 +974,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
 
				 	INIT_LIST_HEAD(&rbio->stripe_cache);
			
 
				 	INIT_LIST_HEAD(&rbio->hash_list);
			
 
				 	rbio->bbio = bbio;
			
 
				-	rbio->raid_map = raid_map;
			
 
				 	rbio->fs_info = root->fs_info;
			
 
				 	rbio->stripe_len = stripe_len;
			
 
				 	rbio->nr_pages = num_pages;
			
@@ -1028,10 +994,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
 
				 	rbio->bio_pages = p + sizeof(struct page *) * num_pages;
			
 
				 	rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
			
 
				 
			
 
				-	if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE)
			
 
				+	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
			
 
				+		nr_data = real_stripes - 1;
			
 
				+	else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
			
 
				 		nr_data = real_stripes - 2;
			
 
				 	else
			
 
				-		nr_data = real_stripes - 1;
			
 
				+		BUG();
			
 
				 
			
 
				 	rbio->nr_data = nr_data;
			
 
				 	return rbio;
			
@@ -1182,7 +1150,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
 
				 	spin_lock_irq(&rbio->bio_list_lock);
			
 
				 	bio_list_for_each(bio, &rbio->bio_list) {
			
 
				 		start = (u64)bio->bi_iter.bi_sector << 9;
			
 
				-		stripe_offset = start - rbio->raid_map[0];
			
 
				+		stripe_offset = start - rbio->bbio->raid_map[0];
			
 
				 		page_index = stripe_offset >> PAGE_CACHE_SHIFT;
			
 
				 
			
 
				 		for (i = 0; i < bio->bi_vcnt; i++) {
			
@@ -1402,7 +1370,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
 
				 	logical <<= 9;
			
 
				 
			
 
				 	for (i = 0; i < rbio->nr_data; i++) {
			
 
				-		stripe_start = rbio->raid_map[i];
			
 
				+		stripe_start = rbio->bbio->raid_map[i];
			
 
				 		if (logical >= stripe_start &&
			
 
				 		    logical < stripe_start + rbio->stripe_len) {
			
 
				 			return i;
			
@@ -1776,17 +1744,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
 
				  * our main entry point for writes from the rest of the FS.
			
 
				  */
			
 
				 int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
			
 
				-			struct btrfs_bio *bbio, u64 *raid_map,
			
 
				-			u64 stripe_len)
			
 
				+			struct btrfs_bio *bbio, u64 stripe_len)
			
 
				 {
			
 
				 	struct btrfs_raid_bio *rbio;
			
 
				 	struct btrfs_plug_cb *plug = NULL;
			
 
				 	struct blk_plug_cb *cb;
			
 
				 	int ret;
			
 
				 
			
 
				-	rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
			
 
				+	rbio = alloc_rbio(root, bbio, stripe_len);
			
 
				 	if (IS_ERR(rbio)) {
			
 
				-		__free_bbio_and_raid_map(bbio, raid_map, 1);
			
 
				+		btrfs_put_bbio(bbio);
			
 
				 		return PTR_ERR(rbio);
			
 
				 	}
			
 
				 	bio_list_add(&rbio->bio_list, bio);
			
@@ -1885,9 +1852,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 
				 		}
			
 
				 
			
 
				 		/* all raid6 handling here */
			
 
				-		if (rbio->raid_map[rbio->real_stripes - 1] ==
			
 
				-		    RAID6_Q_STRIPE) {
			
 
				-
			
 
				+		if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
			
 
				 			/*
			
 
				 			 * single failure, rebuild from parity raid5
			
 
				 			 * style
			
@@ -1922,8 +1887,9 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 
				 			 * here due to a crc mismatch and we can't give them the
			
 
				 			 * data they want
			
 
				 			 */
			
 
				-			if (rbio->raid_map[failb] == RAID6_Q_STRIPE) {
			
 
				-				if (rbio->raid_map[faila] == RAID5_P_STRIPE) {
			
 
				+			if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
			
 
				+				if (rbio->bbio->raid_map[faila] ==
			
 
				+				    RAID5_P_STRIPE) {
			
 
				 					err = -EIO;
			
 
				 					goto cleanup;
			
 
				 				}
			
@@ -1934,7 +1900,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 
				 				goto pstripe;
			
 
				 			}
			
 
				 
			
 
				-			if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
			
 
				+			if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
			
 
				 				raid6_datap_recov(rbio->real_stripes,
			
 
				 						  PAGE_SIZE, faila, pointers);
			
 
				 			} else {
			
@@ -2001,8 +1967,7 @@ cleanup:
 
				 
			
 
				 cleanup_io:
			
 
				 	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
			
 
				-		if (err == 0 &&
			
 
				-		    !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
			
 
				+		if (err == 0)
			
 
				 			cache_rbio_pages(rbio);
			
 
				 		else
			
 
				 			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
			
@@ -2156,15 +2121,16 @@ cleanup:
 
				  * of the drive.
			
 
				  */
			
 
				 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
			
 
				-			  struct btrfs_bio *bbio, u64 *raid_map,
			
 
				-			  u64 stripe_len, int mirror_num, int generic_io)
			
 
				+			  struct btrfs_bio *bbio, u64 stripe_len,
			
 
				+			  int mirror_num, int generic_io)
			
 
				 {
			
 
				 	struct btrfs_raid_bio *rbio;
			
 
				 	int ret;
			
 
				 
			
 
				-	rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
			
 
				+	rbio = alloc_rbio(root, bbio, stripe_len);
			
 
				 	if (IS_ERR(rbio)) {
			
 
				-		__free_bbio_and_raid_map(bbio, raid_map, generic_io);
			
 
				+		if (generic_io)
			
 
				+			btrfs_put_bbio(bbio);
			
 
				 		return PTR_ERR(rbio);
			
 
				 	}
			
 
				 
			
@@ -2175,7 +2141,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
 
				 	rbio->faila = find_logical_bio_stripe(rbio, bio);
			
 
				 	if (rbio->faila == -1) {
			
 
				 		BUG();
			
 
				-		__free_bbio_and_raid_map(bbio, raid_map, generic_io);
			
 
				+		if (generic_io)
			
 
				+			btrfs_put_bbio(bbio);
			
 
				 		kfree(rbio);
			
 
				 		return -EIO;
			
 
				 	}
			
@@ -2184,7 +2151,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
 
				 		btrfs_bio_counter_inc_noblocked(root->fs_info);
			
 
				 		rbio->generic_bio_cnt = 1;
			
 
				 	} else {
			
 
				-		set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags);
			
 
				+		btrfs_get_bbio(bbio);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -2240,14 +2207,14 @@ static void read_rebuild_work(struct btrfs_work *work)
 
				 
			
 
				 struct btrfs_raid_bio *
			
 
				 raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
			
 
				-			       struct btrfs_bio *bbio, u64 *raid_map,
			
 
				-			       u64 stripe_len, struct btrfs_device *scrub_dev,
			
 
				+			       struct btrfs_bio *bbio, u64 stripe_len,
			
 
				+			       struct btrfs_device *scrub_dev,
			
 
				 			       unsigned long *dbitmap, int stripe_nsectors)
			
 
				 {
			
 
				 	struct btrfs_raid_bio *rbio;
			
 
				 	int i;
			
 
				 
			
 
				-	rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
			
 
				+	rbio = alloc_rbio(root, bbio, stripe_len);
			
 
				 	if (IS_ERR(rbio))
			
 
				 		return NULL;
			
 
				 	bio_list_add(&rbio->bio_list, bio);
			
@@ -2279,10 +2246,10 @@ void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
 
				 	int stripe_offset;
			
 
				 	int index;
			
 
				 
			
 
				-	ASSERT(logical >= rbio->raid_map[0]);
			
 
				-	ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] +
			
 
				+	ASSERT(logical >= rbio->bbio->raid_map[0]);
			
 
				+	ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
			
 
				 				rbio->stripe_len * rbio->nr_data);
			
 
				-	stripe_offset = (int)(logical - rbio->raid_map[0]);
			
 
				+	stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
			
 
				 	index = stripe_offset >> PAGE_CACHE_SHIFT;
			
 
				 	rbio->bio_pages[index] = page;
			
 
				 }
			
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -43,16 +43,15 @@ struct btrfs_raid_bio;
 
				 struct btrfs_device;
			
 
				 
			
 
				 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
			
 
				-			  struct btrfs_bio *bbio, u64 *raid_map,
			
 
				-			  u64 stripe_len, int mirror_num, int generic_io);
			
 
				+			  struct btrfs_bio *bbio, u64 stripe_len,
			
 
				+			  int mirror_num, int generic_io);
			
 
				 int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
			
 
				-			       struct btrfs_bio *bbio, u64 *raid_map,
			
 
				-			       u64 stripe_len);
			
 
				+			       struct btrfs_bio *bbio, u64 stripe_len);
			
 
				 
			
 
				 struct btrfs_raid_bio *
			
 
				 raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
			
 
				-			       struct btrfs_bio *bbio, u64 *raid_map,
			
 
				-			       u64 stripe_len, struct btrfs_device *scrub_dev,
			
 
				+			       struct btrfs_bio *bbio, u64 stripe_len,
			
 
				+			       struct btrfs_device *scrub_dev,
			
 
				 			       unsigned long *dbitmap, int stripe_nsectors);
			
 
				 void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
			
 
				 				   struct page *page, u64 logical);
			
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -66,7 +66,6 @@ struct reada_extctl {
 
				 struct reada_extent {
			
 
				 	u64			logical;
			
 
				 	struct btrfs_key	top;
			
 
				-	u32			blocksize;
			
 
				 	int			err;
			
 
				 	struct list_head	extctl;
			
 
				 	int 			refcnt;
			
@@ -349,7 +348,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
 
				 
			
 
				 	blocksize = root->nodesize;
			
 
				 	re->logical = logical;
			
 
				-	re->blocksize = blocksize;
			
 
				 	re->top = *top;
			
 
				 	INIT_LIST_HEAD(&re->extctl);
			
 
				 	spin_lock_init(&re->lock);
			
@@ -463,7 +461,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
 
				 	spin_unlock(&fs_info->reada_lock);
			
 
				 	btrfs_dev_replace_unlock(&fs_info->dev_replace);
			
 
				 
			
 
				-	kfree(bbio);
			
 
				+	btrfs_put_bbio(bbio);
			
 
				 	return re;
			
 
				 
			
 
				 error:
			
@@ -488,7 +486,7 @@ error:
 
				 		kref_put(&zone->refcnt, reada_zone_release);
			
 
				 		spin_unlock(&fs_info->reada_lock);
			
 
				 	}
			
 
				-	kfree(bbio);
			
 
				+	btrfs_put_bbio(bbio);
			
 
				 	kfree(re);
			
 
				 	return re_exist;
			
 
				 }
			
@@ -660,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
 
				 	int mirror_num = 0;
			
 
				 	struct extent_buffer *eb = NULL;
			
 
				 	u64 logical;
			
 
				-	u32 blocksize;
			
 
				 	int ret;
			
 
				 	int i;
			
 
				 	int need_kick = 0;
			
@@ -694,7 +691,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
 
				 		spin_unlock(&fs_info->reada_lock);
			
 
				 		return 0;
			
 
				 	}
			
 
				-	dev->reada_next = re->logical + re->blocksize;
			
 
				+	dev->reada_next = re->logical + fs_info->tree_root->nodesize;
			
 
				 	re->refcnt++;
			
 
				 
			
 
				 	spin_unlock(&fs_info->reada_lock);
			
@@ -709,7 +706,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
 
				 		}
			
 
				 	}
			
 
				 	logical = re->logical;
			
 
				-	blocksize = re->blocksize;
			
 
				 
			
 
				 	spin_lock(&re->lock);
			
 
				 	if (re->scheduled_for == NULL) {
			
@@ -724,8 +720,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
 
				 		return 0;
			
 
				 
			
 
				 	atomic_inc(&dev->reada_in_flight);
			
 
				-	ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize,
			
 
				-			 mirror_num, &eb);
			
 
				+	ret = reada_tree_block_flagged(fs_info->extent_root, logical,
			
 
				+			mirror_num, &eb);
			
 
				 	if (ret)
			
 
				 		__readahead_hook(fs_info->extent_root, NULL, logical, ret);
			
 
				 	else if (eb)
			
@@ -851,7 +847,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 
				 				break;
			
 
				 			printk(KERN_DEBUG
			
 
				 				"  re: logical %llu size %u empty %d for %lld",
			
 
				-				re->logical, re->blocksize,
			
 
				+				re->logical, fs_info->tree_root->nodesize,
			
 
				 				list_empty(&re->extctl), re->scheduled_for ?
			
 
				 				re->scheduled_for->devid : -1);
			
 
				 
			
@@ -886,7 +882,8 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 
				 		}
			
 
				 		printk(KERN_DEBUG
			
 
				 			"re: logical %llu size %u list empty %d for %lld",
			
 
				-			re->logical, re->blocksize, list_empty(&re->extctl),
			
 
				+			re->logical, fs_info->tree_root->nodesize,
			
 
				+			list_empty(&re->extctl),
			
 
				 			re->scheduled_for ? re->scheduled_for->devid : -1);
			
 
				 		for (i = 0; i < re->nzones; ++i) {
			
 
				 			printk(KERN_CONT " zone %llu-%llu devs",
			
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2855,9 +2855,10 @@ static void update_processed_blocks(struct reloc_control *rc,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int tree_block_processed(u64 bytenr, u32 blocksize,
			
 
				-				struct reloc_control *rc)
			
 
				+static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
			
 
				 {
			
 
				+	u32 blocksize = rc->extent_root->nodesize;
			
 
				+
			
 
				 	if (test_range_bit(&rc->processed_blocks, bytenr,
			
 
				 			   bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
			
 
				 		return 1;
			
@@ -2965,8 +2966,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
 
				 	while (rb_node) {
			
 
				 		block = rb_entry(rb_node, struct tree_block, rb_node);
			
 
				 		if (!block->key_ready)
			
 
				-			readahead_tree_block(rc->extent_root, block->bytenr,
			
 
				-					block->key.objectid);
			
 
				+			readahead_tree_block(rc->extent_root, block->bytenr);
			
 
				 		rb_node = rb_next(rb_node);
			
 
				 	}
			
 
				 
			
@@ -3353,7 +3353,7 @@ static int __add_tree_block(struct reloc_control *rc,
 
				 	bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info,
			
 
				 					SKINNY_METADATA);
			
 
				 
			
 
				-	if (tree_block_processed(bytenr, blocksize, rc))
			
 
				+	if (tree_block_processed(bytenr, rc))
			
 
				 		return 0;
			
 
				 
			
 
				 	if (tree_search(blocks, bytenr))
			
@@ -3611,7 +3611,7 @@ static int find_data_references(struct reloc_control *rc,
 
				 		if (added)
			
 
				 			goto next;
			
 
				 
			
 
				-		if (!tree_block_processed(leaf->start, leaf->len, rc)) {
			
 
				+		if (!tree_block_processed(leaf->start, rc)) {
			
 
				 			block = kmalloc(sizeof(*block), GFP_NOFS);
			
 
				 			if (!block) {
			
 
				 				err = -ENOMEM;
			
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -66,7 +66,6 @@ struct scrub_ctx;
 
				 struct scrub_recover {
			
 
				 	atomic_t		refs;
			
 
				 	struct btrfs_bio	*bbio;
			
 
				-	u64			*raid_map;
			
 
				 	u64			map_length;
			
 
				 };
			
 
				 
			
@@ -80,7 +79,7 @@ struct scrub_page {
 
				 	u64			logical;
			
 
				 	u64			physical;
			
 
				 	u64			physical_for_dev_replace;
			
 
				-	atomic_t		ref_count;
			
 
				+	atomic_t		refs;
			
 
				 	struct {
			
 
				 		unsigned int	mirror_num:8;
			
 
				 		unsigned int	have_csum:1;
			
@@ -113,7 +112,7 @@ struct scrub_block {
 
				 	struct scrub_page	*pagev[SCRUB_MAX_PAGES_PER_BLOCK];
			
 
				 	int			page_count;
			
 
				 	atomic_t		outstanding_pages;
			
 
				-	atomic_t		ref_count; /* free mem on transition to zero */
			
 
				+	atomic_t		refs; /* free mem on transition to zero */
			
 
				 	struct scrub_ctx	*sctx;
			
 
				 	struct scrub_parity	*sparity;
			
 
				 	struct {
			
@@ -142,7 +141,7 @@ struct scrub_parity {
 
				 
			
 
				 	int			stripe_len;
			
 
				 
			
 
				-	atomic_t		ref_count;
			
 
				+	atomic_t		refs;
			
 
				 
			
 
				 	struct list_head	spages;
			
 
				 
			
@@ -194,6 +193,15 @@ struct scrub_ctx {
 
				 	 */
			
 
				 	struct btrfs_scrub_progress stat;
			
 
				 	spinlock_t		stat_lock;
			
 
				+
			
 
				+	/*
			
 
				+	 * Use a ref counter to avoid use-after-free issues. Scrub workers
			
 
				+	 * decrement bios_in_flight and workers_pending and then do a wakeup
			
 
				+	 * on the list_wait wait queue. We must ensure the main scrub task
			
 
				+	 * doesn't free the scrub context before or while the workers are
			
 
				+	 * doing the wakeup() call.
			
 
				+	 */
			
 
				+	atomic_t                refs;
			
 
				 };
			
 
				 
			
 
				 struct scrub_fixup_nodatasum {
			
@@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
 
				 static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
			
 
				 static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
			
 
				 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
			
 
				-static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
			
 
				-				     struct btrfs_fs_info *fs_info,
			
 
				-				     struct scrub_block *original_sblock,
			
 
				-				     u64 length, u64 logical,
			
 
				+static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
			
 
				 				     struct scrub_block *sblocks_for_recheck);
			
 
				 static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
			
 
				 				struct scrub_block *sblock, int is_metadata,
			
@@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
 
				 					 const u8 *csum, u64 generation,
			
 
				 					 u16 csum_size);
			
 
				 static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
			
 
				-					     struct scrub_block *sblock_good,
			
 
				-					     int force_write);
			
 
				+					     struct scrub_block *sblock_good);
			
 
				 static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
			
 
				 					    struct scrub_block *sblock_good,
			
 
				 					    int page_num, int force_write);
			
@@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
 
				 static void copy_nocow_pages_worker(struct btrfs_work *work);
			
 
				 static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
			
 
				 static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
			
 
				+static void scrub_put_ctx(struct scrub_ctx *sctx);
			
 
				 
			
 
				 
			
 
				 static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
			
 
				 {
			
 
				+	atomic_inc(&sctx->refs);
			
 
				 	atomic_inc(&sctx->bios_in_flight);
			
 
				 }
			
 
				 
			
@@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
 
				 {
			
 
				 	atomic_dec(&sctx->bios_in_flight);
			
 
				 	wake_up(&sctx->list_wait);
			
 
				+	scrub_put_ctx(sctx);
			
 
				 }
			
 
				 
			
 
				 static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
			
@@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
			
 
				 
			
 
				+	atomic_inc(&sctx->refs);
			
 
				 	/*
			
 
				 	 * increment scrubs_running to prevent cancel requests from
			
 
				 	 * completing as long as a worker is running. we must also
			
@@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
 
				 	atomic_dec(&sctx->workers_pending);
			
 
				 	wake_up(&fs_info->scrub_pause_wait);
			
 
				 	wake_up(&sctx->list_wait);
			
 
				+	scrub_put_ctx(sctx);
			
 
				 }
			
 
				 
			
 
				 static void scrub_free_csums(struct scrub_ctx *sctx)
			
@@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
 
				 	kfree(sctx);
			
 
				 }
			
 
				 
			
 
				+static void scrub_put_ctx(struct scrub_ctx *sctx)
			
 
				+{
			
 
				+	if (atomic_dec_and_test(&sctx->refs))
			
 
				+		scrub_free_ctx(sctx);
			
 
				+}
			
 
				+
			
 
				 static noinline_for_stack
			
 
				 struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
			
 
				 {
			
@@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
 
				 	sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
			
 
				 	if (!sctx)
			
 
				 		goto nomem;
			
 
				+	atomic_set(&sctx->refs, 1);
			
 
				 	sctx->is_dev_replace = is_dev_replace;
			
 
				 	sctx->pages_per_rd_bio = pages_per_rd_bio;
			
 
				 	sctx->curr = -1;
			
@@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 
				 	struct inode_fs_paths *ipath = NULL;
			
 
				 	struct btrfs_root *local_root;
			
 
				 	struct btrfs_key root_key;
			
 
				+	struct btrfs_key key;
			
 
				 
			
 
				 	root_key.objectid = root;
			
 
				 	root_key.type = BTRFS_ROOT_ITEM_KEY;
			
@@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 
				 		goto err;
			
 
				 	}
			
 
				 
			
 
				-	ret = inode_item_info(inum, 0, local_root, swarn->path);
			
 
				+	/*
			
 
				+	 * this makes the path point to (inum INODE_ITEM ioff)
			
 
				+	 */
			
 
				+	key.objectid = inum;
			
 
				+	key.type = BTRFS_INODE_ITEM_KEY;
			
 
				+	key.offset = 0;
			
 
				+
			
 
				+	ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
			
 
				 	if (ret) {
			
 
				 		btrfs_release_path(swarn->path);
			
 
				 		goto err;
			
@@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover)
 
				 static inline void scrub_put_recover(struct scrub_recover *recover)
			
 
				 {
			
 
				 	if (atomic_dec_and_test(&recover->refs)) {
			
 
				-		kfree(recover->bbio);
			
 
				-		kfree(recover->raid_map);
			
 
				+		btrfs_put_bbio(recover->bbio);
			
 
				 		kfree(recover);
			
 
				 	}
			
 
				 }
			
@@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
				 	}
			
 
				 
			
 
				 	/* setup the context, map the logical blocks and alloc the pages */
			
 
				-	ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
			
 
				-					logical, sblocks_for_recheck);
			
 
				+	ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
			
 
				 	if (ret) {
			
 
				 		spin_lock(&sctx->stat_lock);
			
 
				 		sctx->stat.read_errors++;
			
@@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
				 	if (!is_metadata && !have_csum) {
			
 
				 		struct scrub_fixup_nodatasum *fixup_nodatasum;
			
 
				 
			
 
				-nodatasum_case:
			
 
				 		WARN_ON(sctx->is_dev_replace);
			
 
				 
			
 
				+nodatasum_case:
			
 
				+
			
 
				 		/*
			
 
				 		 * !is_metadata and !have_csum, this means that the data
			
 
				 		 * might not be COW'ed, that it might be modified
			
@@ -1091,76 +1114,20 @@ nodatasum_case:
 
				 		    sblock_other->no_io_error_seen) {
			
 
				 			if (sctx->is_dev_replace) {
			
 
				 				scrub_write_block_to_dev_replace(sblock_other);
			
 
				+				goto corrected_error;
			
 
				 			} else {
			
 
				-				int force_write = is_metadata || have_csum;
			
 
				-
			
 
				 				ret = scrub_repair_block_from_good_copy(
			
 
				-						sblock_bad, sblock_other,
			
 
				-						force_write);
			
 
				+						sblock_bad, sblock_other);
			
 
				+				if (!ret)
			
 
				+					goto corrected_error;
			
 
				 			}
			
 
				-			if (0 == ret)
			
 
				-				goto corrected_error;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * for dev_replace, pick good pages and write to the target device.
			
 
				-	 */
			
 
				-	if (sctx->is_dev_replace) {
			
 
				-		success = 1;
			
 
				-		for (page_num = 0; page_num < sblock_bad->page_count;
			
 
				-		     page_num++) {
			
 
				-			int sub_success;
			
 
				-
			
 
				-			sub_success = 0;
			
 
				-			for (mirror_index = 0;
			
 
				-			     mirror_index < BTRFS_MAX_MIRRORS &&
			
 
				-			     sblocks_for_recheck[mirror_index].page_count > 0;
			
 
				-			     mirror_index++) {
			
 
				-				struct scrub_block *sblock_other =
			
 
				-					sblocks_for_recheck + mirror_index;
			
 
				-				struct scrub_page *page_other =
			
 
				-					sblock_other->pagev[page_num];
			
 
				-
			
 
				-				if (!page_other->io_error) {
			
 
				-					ret = scrub_write_page_to_dev_replace(
			
 
				-							sblock_other, page_num);
			
 
				-					if (ret == 0) {
			
 
				-						/* succeeded for this page */
			
 
				-						sub_success = 1;
			
 
				-						break;
			
 
				-					} else {
			
 
				-						btrfs_dev_replace_stats_inc(
			
 
				-							&sctx->dev_root->
			
 
				-							fs_info->dev_replace.
			
 
				-							num_write_errors);
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				-			if (!sub_success) {
			
 
				-				/*
			
 
				-				 * did not find a mirror to fetch the page
			
 
				-				 * from. scrub_write_page_to_dev_replace()
			
 
				-				 * handles this case (page->io_error), by
			
 
				-				 * filling the block with zeros before
			
 
				-				 * submitting the write request
			
 
				-				 */
			
 
				-				success = 0;
			
 
				-				ret = scrub_write_page_to_dev_replace(
			
 
				-						sblock_bad, page_num);
			
 
				-				if (ret)
			
 
				-					btrfs_dev_replace_stats_inc(
			
 
				-						&sctx->dev_root->fs_info->
			
 
				-						dev_replace.num_write_errors);
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		goto out;
			
 
				-	}
			
 
				+	if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
			
 
				+		goto did_not_correct_error;
			
 
				 
			
 
				 	/*
			
 
				-	 * for regular scrub, repair those pages that are errored.
			
 
				 	 * In case of I/O errors in the area that is supposed to be
			
 
				 	 * repaired, continue by picking good copies of those pages.
			
 
				 	 * Select the good pages from mirrors to rewrite bad pages from
			
@@ -1184,44 +1151,64 @@ nodatasum_case:
 
				 	 * mirror, even if other 512 byte sectors in the same PAGE_SIZE
			
 
				 	 * area are unreadable.
			
 
				 	 */
			
 
				-
			
 
				-	/* can only fix I/O errors from here on */
			
 
				-	if (sblock_bad->no_io_error_seen)
			
 
				-		goto did_not_correct_error;
			
 
				-
			
 
				 	success = 1;
			
 
				-	for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
			
 
				+	for (page_num = 0; page_num < sblock_bad->page_count;
			
 
				+	     page_num++) {
			
 
				 		struct scrub_page *page_bad = sblock_bad->pagev[page_num];
			
 
				+		struct scrub_block *sblock_other = NULL;
			
 
				 
			
 
				-		if (!page_bad->io_error)
			
 
				+		/* skip no-io-error page in scrub */
			
 
				+		if (!page_bad->io_error && !sctx->is_dev_replace)
			
 
				 			continue;
			
 
				 
			
 
				-		for (mirror_index = 0;
			
 
				-		     mirror_index < BTRFS_MAX_MIRRORS &&
			
 
				-		     sblocks_for_recheck[mirror_index].page_count > 0;
			
 
				-		     mirror_index++) {
			
 
				-			struct scrub_block *sblock_other = sblocks_for_recheck +
			
 
				-							   mirror_index;
			
 
				-			struct scrub_page *page_other = sblock_other->pagev[
			
 
				-							page_num];
			
 
				-
			
 
				-			if (!page_other->io_error) {
			
 
				-				ret = scrub_repair_page_from_good_copy(
			
 
				-					sblock_bad, sblock_other, page_num, 0);
			
 
				-				if (0 == ret) {
			
 
				-					page_bad->io_error = 0;
			
 
				-					break; /* succeeded for this page */
			
 
				+		/* try to find no-io-error page in mirrors */
			
 
				+		if (page_bad->io_error) {
			
 
				+			for (mirror_index = 0;
			
 
				+			     mirror_index < BTRFS_MAX_MIRRORS &&
			
 
				+			     sblocks_for_recheck[mirror_index].page_count > 0;
			
 
				+			     mirror_index++) {
			
 
				+				if (!sblocks_for_recheck[mirror_index].
			
 
				+				    pagev[page_num]->io_error) {
			
 
				+					sblock_other = sblocks_for_recheck +
			
 
				+						       mirror_index;
			
 
				+					break;
			
 
				 				}
			
 
				 			}
			
 
				+			if (!sblock_other)
			
 
				+				success = 0;
			
 
				 		}
			
 
				 
			
 
				-		if (page_bad->io_error) {
			
 
				-			/* did not find a mirror to copy the page from */
			
 
				-			success = 0;
			
 
				+		if (sctx->is_dev_replace) {
			
 
				+			/*
			
 
				+			 * did not find a mirror to fetch the page
			
 
				+			 * from. scrub_write_page_to_dev_replace()
			
 
				+			 * handles this case (page->io_error), by
			
 
				+			 * filling the block with zeros before
			
 
				+			 * submitting the write request
			
 
				+			 */
			
 
				+			if (!sblock_other)
			
 
				+				sblock_other = sblock_bad;
			
 
				+
			
 
				+			if (scrub_write_page_to_dev_replace(sblock_other,
			
 
				+							    page_num) != 0) {
			
 
				+				btrfs_dev_replace_stats_inc(
			
 
				+					&sctx->dev_root->
			
 
				+					fs_info->dev_replace.
			
 
				+					num_write_errors);
			
 
				+				success = 0;
			
 
				+			}
			
 
				+		} else if (sblock_other) {
			
 
				+			ret = scrub_repair_page_from_good_copy(sblock_bad,
			
 
				+							       sblock_other,
			
 
				+							       page_num, 0);
			
 
				+			if (0 == ret)
			
 
				+				page_bad->io_error = 0;
			
 
				+			else
			
 
				+				success = 0;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (success) {
			
 
				+	if (success && !sctx->is_dev_replace) {
			
 
				 		if (is_metadata || have_csum) {
			
 
				 			/*
			
 
				 			 * need to verify the checksum now that all
			
@@ -1288,19 +1275,18 @@ out:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map)
			
 
				+static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
			
 
				 {
			
 
				-	if (raid_map) {
			
 
				-		if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
			
 
				-			return 3;
			
 
				-		else
			
 
				-			return 2;
			
 
				-	} else {
			
 
				+	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
			
 
				+		return 2;
			
 
				+	else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
			
 
				+		return 3;
			
 
				+	else
			
 
				 		return (int)bbio->num_stripes;
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				-static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
			
 
				+static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
			
 
				+						 u64 *raid_map,
			
 
				 						 u64 mapped_length,
			
 
				 						 int nstripes, int mirror,
			
 
				 						 int *stripe_index,
			
@@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	if (raid_map) {
			
 
				+	if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 		/* RAID5/6 */
			
 
				 		for (i = 0; i < nstripes; i++) {
			
 
				 			if (raid_map[i] == RAID6_Q_STRIPE ||
			
@@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
			
 
				-				     struct btrfs_fs_info *fs_info,
			
 
				-				     struct scrub_block *original_sblock,
			
 
				-				     u64 length, u64 logical,
			
 
				+static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
			
 
				 				     struct scrub_block *sblocks_for_recheck)
			
 
				 {
			
 
				+	struct scrub_ctx *sctx = original_sblock->sctx;
			
 
				+	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
			
 
				+	u64 length = original_sblock->page_count * PAGE_SIZE;
			
 
				+	u64 logical = original_sblock->pagev[0]->logical;
			
 
				 	struct scrub_recover *recover;
			
 
				 	struct btrfs_bio *bbio;
			
 
				-	u64 *raid_map;
			
 
				 	u64 sublen;
			
 
				 	u64 mapped_length;
			
 
				 	u64 stripe_offset;
			
 
				 	int stripe_index;
			
 
				-	int page_index;
			
 
				+	int page_index = 0;
			
 
				 	int mirror_index;
			
 
				 	int nmirrors;
			
 
				 	int ret;
			
 
				 
			
 
				 	/*
			
 
				-	 * note: the two members ref_count and outstanding_pages
			
 
				+	 * note: the two members refs and outstanding_pages
			
 
				 	 * are not used (and not set) in the blocks that are used for
			
 
				 	 * the recheck procedure
			
 
				 	 */
			
 
				 
			
 
				-	page_index = 0;
			
 
				 	while (length > 0) {
			
 
				 		sublen = min_t(u64, length, PAGE_SIZE);
			
 
				 		mapped_length = sublen;
			
 
				 		bbio = NULL;
			
 
				-		raid_map = NULL;
			
 
				 
			
 
				 		/*
			
 
				 		 * with a length of PAGE_SIZE, each returned stripe
			
 
				 		 * represents one mirror
			
 
				 		 */
			
 
				 		ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
			
 
				-				       &mapped_length, &bbio, 0, &raid_map);
			
 
				+				       &mapped_length, &bbio, 0, 1);
			
 
				 		if (ret || !bbio || mapped_length < sublen) {
			
 
				-			kfree(bbio);
			
 
				-			kfree(raid_map);
			
 
				+			btrfs_put_bbio(bbio);
			
 
				 			return -EIO;
			
 
				 		}
			
 
				 
			
 
				 		recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
			
 
				 		if (!recover) {
			
 
				-			kfree(bbio);
			
 
				-			kfree(raid_map);
			
 
				+			btrfs_put_bbio(bbio);
			
 
				 			return -ENOMEM;
			
 
				 		}
			
 
				 
			
 
				 		atomic_set(&recover->refs, 1);
			
 
				 		recover->bbio = bbio;
			
 
				-		recover->raid_map = raid_map;
			
 
				 		recover->map_length = mapped_length;
			
 
				 
			
 
				 		BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
			
 
				 
			
 
				-		nmirrors = scrub_nr_raid_mirrors(bbio, raid_map);
			
 
				+		nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
			
 
				+
			
 
				 		for (mirror_index = 0; mirror_index < nmirrors;
			
 
				 		     mirror_index++) {
			
 
				 			struct scrub_block *sblock;
			
 
				 			struct scrub_page *page;
			
 
				 
			
 
				-			if (mirror_index >= BTRFS_MAX_MIRRORS)
			
 
				-				continue;
			
 
				-
			
 
				 			sblock = sblocks_for_recheck + mirror_index;
			
 
				 			sblock->sctx = sctx;
			
 
				 			page = kzalloc(sizeof(*page), GFP_NOFS);
			
@@ -1410,9 +1389,12 @@ leave_nomem:
 
				 			sblock->pagev[page_index] = page;
			
 
				 			page->logical = logical;
			
 
				 
			
 
				-			scrub_stripe_index_and_offset(logical, raid_map,
			
 
				+			scrub_stripe_index_and_offset(logical,
			
 
				+						      bbio->map_type,
			
 
				+						      bbio->raid_map,
			
 
				 						      mapped_length,
			
 
				-						      bbio->num_stripes,
			
 
				+						      bbio->num_stripes -
			
 
				+						      bbio->num_tgtdevs,
			
 
				 						      mirror_index,
			
 
				 						      &stripe_index,
			
 
				 						      &stripe_offset);
			
@@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error)
 
				 
			
 
				 static inline int scrub_is_page_on_raid56(struct scrub_page *page)
			
 
				 {
			
 
				-	return page->recover && page->recover->raid_map;
			
 
				+	return page->recover &&
			
 
				+	       (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
			
 
				 }
			
 
				 
			
 
				 static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
			
@@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 
				 	bio->bi_end_io = scrub_bio_wait_endio;
			
 
				 
			
 
				 	ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
			
 
				-				    page->recover->raid_map,
			
 
				 				    page->recover->map_length,
			
 
				 				    page->mirror_num, 0);
			
 
				 	if (ret)
			
@@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
 
				 }
			
 
				 
			
 
				 static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
			
 
				-					     struct scrub_block *sblock_good,
			
 
				-					     int force_write)
			
 
				+					     struct scrub_block *sblock_good)
			
 
				 {
			
 
				 	int page_num;
			
 
				 	int ret = 0;
			
@@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
 
				 
			
 
				 		ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
			
 
				 							   sblock_good,
			
 
				-							   page_num,
			
 
				-							   force_write);
			
 
				+							   page_num, 1);
			
 
				 		if (ret_sub)
			
 
				 			ret = ret_sub;
			
 
				 	}
			
@@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
 
				 
			
 
				 static void scrub_block_get(struct scrub_block *sblock)
			
 
				 {
			
 
				-	atomic_inc(&sblock->ref_count);
			
 
				+	atomic_inc(&sblock->refs);
			
 
				 }
			
 
				 
			
 
				 static void scrub_block_put(struct scrub_block *sblock)
			
 
				 {
			
 
				-	if (atomic_dec_and_test(&sblock->ref_count)) {
			
 
				+	if (atomic_dec_and_test(&sblock->refs)) {
			
 
				 		int i;
			
 
				 
			
 
				 		if (sblock->sparity)
			
@@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock)
 
				 
			
 
				 static void scrub_page_get(struct scrub_page *spage)
			
 
				 {
			
 
				-	atomic_inc(&spage->ref_count);
			
 
				+	atomic_inc(&spage->refs);
			
 
				 }
			
 
				 
			
 
				 static void scrub_page_put(struct scrub_page *spage)
			
 
				 {
			
 
				-	if (atomic_dec_and_test(&spage->ref_count)) {
			
 
				+	if (atomic_dec_and_test(&spage->refs)) {
			
 
				 		if (spage->page)
			
 
				 			__free_page(spage->page);
			
 
				 		kfree(spage);
			
@@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
 
				 
			
 
				 	/* one ref inside this function, plus one for each page added to
			
 
				 	 * a bio later on */
			
 
				-	atomic_set(&sblock->ref_count, 1);
			
 
				+	atomic_set(&sblock->refs, 1);
			
 
				 	sblock->sctx = sctx;
			
 
				 	sblock->no_io_error_seen = 1;
			
 
				 
			
@@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
 
				 
			
 
				 	/* one ref inside this function, plus one for each page added to
			
 
				 	 * a bio later on */
			
 
				-	atomic_set(&sblock->ref_count, 1);
			
 
				+	atomic_set(&sblock->refs, 1);
			
 
				 	sblock->sctx = sctx;
			
 
				 	sblock->no_io_error_seen = 1;
			
 
				 	sblock->sparity = sparity;
			
@@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 
				 	struct btrfs_raid_bio *rbio;
			
 
				 	struct scrub_page *spage;
			
 
				 	struct btrfs_bio *bbio = NULL;
			
 
				-	u64 *raid_map = NULL;
			
 
				 	u64 length;
			
 
				 	int ret;
			
 
				 
			
@@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 
				 	length = sparity->logic_end - sparity->logic_start + 1;
			
 
				 	ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
			
 
				 			       sparity->logic_start,
			
 
				-			       &length, &bbio, 0, &raid_map);
			
 
				-	if (ret || !bbio || !raid_map)
			
 
				+			       &length, &bbio, 0, 1);
			
 
				+	if (ret || !bbio || !bbio->raid_map)
			
 
				 		goto bbio_out;
			
 
				 
			
 
				 	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
			
@@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 
				 	bio->bi_end_io = scrub_parity_bio_endio;
			
 
				 
			
 
				 	rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
			
 
				-					      raid_map, length,
			
 
				-					      sparity->scrub_dev,
			
 
				+					      length, sparity->scrub_dev,
			
 
				 					      sparity->dbitmap,
			
 
				 					      sparity->nsectors);
			
 
				 	if (!rbio)
			
@@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 
				 rbio_out:
			
 
				 	bio_put(bio);
			
 
				 bbio_out:
			
 
				-	kfree(bbio);
			
 
				-	kfree(raid_map);
			
 
				+	btrfs_put_bbio(bbio);
			
 
				 	bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
			
 
				 		  sparity->nsectors);
			
 
				 	spin_lock(&sctx->stat_lock);
			
@@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
 
				 
			
 
				 static void scrub_parity_get(struct scrub_parity *sparity)
			
 
				 {
			
 
				-	atomic_inc(&sparity->ref_count);
			
 
				+	atomic_inc(&sparity->refs);
			
 
				 }
			
 
				 
			
 
				 static void scrub_parity_put(struct scrub_parity *sparity)
			
 
				 {
			
 
				-	if (!atomic_dec_and_test(&sparity->ref_count))
			
 
				+	if (!atomic_dec_and_test(&sparity->refs))
			
 
				 		return;
			
 
				 
			
 
				 	scrub_parity_check_and_repair(sparity);
			
@@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
 
				 	sparity->scrub_dev = sdev;
			
 
				 	sparity->logic_start = logic_start;
			
 
				 	sparity->logic_end = logic_end;
			
 
				-	atomic_set(&sparity->ref_count, 1);
			
 
				+	atomic_set(&sparity->refs, 1);
			
 
				 	INIT_LIST_HEAD(&sparity->spages);
			
 
				 	sparity->dbitmap = sparity->bitmap;
			
 
				 	sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
			
@@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
			
 
				 		increment = map->stripe_len;
			
 
				 		mirror_num = num % map->num_stripes + 1;
			
 
				-	} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-				BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 		get_raid56_logic_offset(physical, num, map, &offset, NULL);
			
 
				 		increment = map->stripe_len * nr_data_stripes(map);
			
 
				 		mirror_num = 1;
			
@@ -3074,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 	 */
			
 
				 	logical = base + offset;
			
 
				 	physical_end = physical + nstripes * map->stripe_len;
			
 
				-	if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-			 BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 		get_raid56_logic_offset(physical_end, num,
			
 
				 					map, &logic_end, NULL);
			
 
				 		logic_end += base;
			
@@ -3121,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 	ret = 0;
			
 
				 	while (physical < physical_end) {
			
 
				 		/* for raid56, we skip parity stripe */
			
 
				-		if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-				BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 			ret = get_raid56_logic_offset(physical, num,
			
 
				 					map, &logical, &stripe_logical);
			
 
				 			logical += base;
			
@@ -3280,8 +3254,7 @@ again:
 
				 			scrub_free_csums(sctx);
			
 
				 			if (extent_logical + extent_len <
			
 
				 			    key.objectid + bytes) {
			
 
				-				if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-					BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+				if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 					/*
			
 
				 					 * loop until we find next data stripe
			
 
				 					 * or we have finished all stripes.
			
@@ -3775,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 
				 	scrub_workers_put(fs_info);
			
 
				 	mutex_unlock(&fs_info->scrub_lock);
			
 
				 
			
 
				-	scrub_free_ctx(sctx);
			
 
				+	scrub_put_ctx(sctx);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -3881,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
 
				 			      &mapped_length, &bbio, 0);
			
 
				 	if (ret || !bbio || mapped_length < extent_len ||
			
 
				 	    !bbio->stripes[0].dev->bdev) {
			
 
				-		kfree(bbio);
			
 
				+		btrfs_put_bbio(bbio);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	*extent_physical = bbio->stripes[0].physical;
			
 
				 	*extent_mirror_num = bbio->mirror_num;
			
 
				 	*extent_dev = bbio->stripes[0].dev;
			
 
				-	kfree(bbio);
			
 
				+	btrfs_put_bbio(bbio);
			
 
				 }
			
 
				 
			
 
				 static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
			
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2471,12 +2471,9 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
			
 
				-	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
			
 
				-			btrfs_inode_atime(ii));
			
 
				-	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
			
 
				-			btrfs_inode_mtime(ii));
			
 
				-	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
			
 
				-			btrfs_inode_ctime(ii));
			
 
				+	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
			
 
				+	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
			
 
				+	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
			
 
				 	/* TODO Add otime support when the otime patches get into upstream */
			
 
				 
			
 
				 	ret = send_cmd(sctx);
			
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1958,11 +1958,6 @@ static int btrfs_freeze(struct super_block *sb)
 
				 	return btrfs_commit_transaction(trans, root);
			
 
				 }
			
 
				 
			
 
				-static int btrfs_unfreeze(struct super_block *sb)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
			
@@ -2011,7 +2006,6 @@ static const struct super_operations btrfs_super_ops = {
 
				 	.statfs		= btrfs_statfs,
			
 
				 	.remount_fs	= btrfs_remount,
			
 
				 	.freeze_fs	= btrfs_freeze,
			
 
				-	.unfreeze_fs	= btrfs_unfreeze,
			
 
				 };
			
 
				 
			
 
				 static const struct file_operations btrfs_ctl_fops = {
			
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -733,10 +733,18 @@ int btrfs_init_sysfs(void)
 
				 
			
 
				 	ret = btrfs_init_debugfs();
			
 
				 	if (ret)
			
 
				-		return ret;
			
 
				+		goto out1;
			
 
				 
			
 
				 	init_feature_attrs();
			
 
				 	ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
			
 
				+	if (ret)
			
 
				+		goto out2;
			
 
				+
			
 
				+	return 0;
			
 
				+out2:
			
 
				+	debugfs_remove_recursive(btrfs_debugfs_root_dentry);
			
 
				+out1:
			
 
				+	kset_unregister(btrfs_kset);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -53,7 +53,7 @@ static int test_btrfs_split_item(void)
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				-	path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096);
			
 
				+	path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
			
 
				 	if (!eb) {
			
 
				 		test_msg("Could not allocate dummy buffer\n");
			
 
				 		ret = -ENOMEM;
			
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -258,8 +258,7 @@ static int test_find_delalloc(void)
 
				 	}
			
 
				 	ret = 0;
			
 
				 out_bits:
			
 
				-	clear_extent_bits(&tmp, 0, total_dirty - 1,
			
 
				-			  (unsigned long)-1, GFP_NOFS);
			
 
				+	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
			
 
				 out:
			
 
				 	if (locked_page)
			
 
				 		page_cache_release(locked_page);
			
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -255,7 +255,7 @@ static noinline int test_btrfs_get_extent(void)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	root->node = alloc_dummy_extent_buffer(0, 4096);
			
 
				+	root->node = alloc_dummy_extent_buffer(NULL, 4096);
			
 
				 	if (!root->node) {
			
 
				 		test_msg("Couldn't allocate dummy buffer\n");
			
 
				 		goto out;
			
@@ -843,7 +843,7 @@ static int test_hole_first(void)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	root->node = alloc_dummy_extent_buffer(0, 4096);
			
 
				+	root->node = alloc_dummy_extent_buffer(NULL, 4096);
			
 
				 	if (!root->node) {
			
 
				 		test_msg("Couldn't allocate dummy buffer\n");
			
 
				 		goto out;
			
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -404,12 +404,22 @@ int btrfs_test_qgroups(void)
 
				 		ret = -ENOMEM;
			
 
				 		goto out;
			
 
				 	}
			
 
				+	/* We are using this root as our extent root */
			
 
				+	root->fs_info->extent_root = root;
			
 
				+
			
 
				+	/*
			
 
				+	 * Some of the paths we test assume we have a filled out fs_info, so we
			
 
				+	 * just need to add the root in there so we don't panic.
			
 
				+	 */
			
 
				+	root->fs_info->tree_root = root;
			
 
				+	root->fs_info->quota_root = root;
			
 
				+	root->fs_info->quota_enabled = 1;
			
 
				 
			
 
				 	/*
			
 
				 	 * Can't use bytenr 0, some things freak out
			
 
				 	 * *cough*backref walking code*cough*
			
 
				 	 */
			
 
				-	root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096);
			
 
				+	root->node = alloc_test_extent_buffer(root->fs_info, 4096);
			
 
				 	if (!root->node) {
			
 
				 		test_msg("Couldn't allocate dummy buffer\n");
			
 
				 		ret = -ENOMEM;
			
@@ -448,17 +458,6 @@ int btrfs_test_qgroups(void)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	/* We are using this root as our extent root */
			
 
				-	root->fs_info->extent_root = root;
			
 
				-
			
 
				-	/*
			
 
				-	 * Some of the paths we test assume we have a filled out fs_info, so we
			
 
				-	 * just need to addt he root in there so we don't panic.
			
 
				-	 */
			
 
				-	root->fs_info->tree_root = root;
			
 
				-	root->fs_info->quota_root = root;
			
 
				-	root->fs_info->quota_enabled = 1;
			
 
				-
			
 
				 	test_msg("Running qgroup tests\n");
			
 
				 	ret = test_no_shared_qgroup(root);
			
 
				 	if (ret)
			
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -220,6 +220,7 @@ loop:
 
				 	 * commit the transaction.
			
 
				 	 */
			
 
				 	atomic_set(&cur_trans->use_count, 2);
			
 
				+	cur_trans->have_free_bgs = 0;
			
 
				 	cur_trans->start_time = get_seconds();
			
 
				 
			
 
				 	cur_trans->delayed_refs.href_root = RB_ROOT;
			
@@ -248,6 +249,8 @@ loop:
 
				 	INIT_LIST_HEAD(&cur_trans->pending_chunks);
			
 
				 	INIT_LIST_HEAD(&cur_trans->switch_commits);
			
 
				 	INIT_LIST_HEAD(&cur_trans->pending_ordered);
			
 
				+	INIT_LIST_HEAD(&cur_trans->dirty_bgs);
			
 
				+	spin_lock_init(&cur_trans->dirty_bgs_lock);
			
 
				 	list_add_tail(&cur_trans->list, &fs_info->trans_list);
			
 
				 	extent_io_tree_init(&cur_trans->dirty_pages,
			
 
				 			     fs_info->btree_inode->i_mapping);
			
@@ -1020,6 +1023,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 
				 	u64 old_root_bytenr;
			
 
				 	u64 old_root_used;
			
 
				 	struct btrfs_root *tree_root = root->fs_info->tree_root;
			
 
				+	bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
			
 
				 
			
 
				 	old_root_used = btrfs_root_used(&root->root_item);
			
 
				 	btrfs_write_dirty_block_groups(trans, root);
			
@@ -1027,7 +1031,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 
				 	while (1) {
			
 
				 		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
			
 
				 		if (old_root_bytenr == root->node->start &&
			
 
				-		    old_root_used == btrfs_root_used(&root->root_item))
			
 
				+		    old_root_used == btrfs_root_used(&root->root_item) &&
			
 
				+		    (!extent_root ||
			
 
				+		     list_empty(&trans->transaction->dirty_bgs)))
			
 
				 			break;
			
 
				 
			
 
				 		btrfs_set_root_node(&root->root_item, root->node);
			
@@ -1038,7 +1044,15 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 
				 			return ret;
			
 
				 
			
 
				 		old_root_used = btrfs_root_used(&root->root_item);
			
 
				-		ret = btrfs_write_dirty_block_groups(trans, root);
			
 
				+		if (extent_root) {
			
 
				+			ret = btrfs_write_dirty_block_groups(trans, root);
			
 
				+			if (ret)
			
 
				+				return ret;
			
 
				+		}
			
 
				+		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 	}
			
@@ -1061,10 +1075,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 
				 	struct extent_buffer *eb;
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
			
 
				-	if (ret)
			
 
				-		return ret;
			
 
				-
			
 
				 	eb = btrfs_lock_root_node(fs_info->tree_root);
			
 
				 	ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
			
 
				 			      0, &eb);
			
@@ -1097,6 +1107,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 
				 		next = fs_info->dirty_cowonly_roots.next;
			
 
				 		list_del_init(next);
			
 
				 		root = list_entry(next, struct btrfs_root, dirty_list);
			
 
				+		clear_bit(BTRFS_ROOT_DIRTY, &root->state);
			
 
				 
			
 
				 		if (root != fs_info->extent_root)
			
 
				 			list_add_tail(&root->dirty_list,
			
@@ -1983,6 +1994,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 	switch_commit_roots(cur_trans, root->fs_info);
			
 
				 
			
 
				 	assert_qgroups_uptodate(trans);
			
 
				+	ASSERT(list_empty(&cur_trans->dirty_bgs));
			
 
				 	update_super_roots(root);
			
 
				 
			
 
				 	btrfs_set_super_log_root(root->fs_info->super_copy, 0);
			
@@ -2026,6 +2038,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	btrfs_finish_extent_commit(trans, root);
			
 
				 
			
 
				+	if (cur_trans->have_free_bgs)
			
 
				+		btrfs_clear_space_info_full(root->fs_info);
			
 
				+
			
 
				 	root->fs_info->last_trans_committed = cur_trans->transid;
			
 
				 	/*
			
 
				 	 * We needn't acquire the lock here because there is no other task
			
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -47,6 +47,11 @@ struct btrfs_transaction {
 
				 	atomic_t num_writers;
			
 
				 	atomic_t use_count;
			
 
				 
			
 
				+	/*
			
 
				+	 * true if there is free bgs operations in this transaction
			
 
				+	 */
			
 
				+	int have_free_bgs;
			
 
				+
			
 
				 	/* Be protected by fs_info->trans_lock when we want to change it. */
			
 
				 	enum btrfs_trans_state state;
			
 
				 	struct list_head list;
			
@@ -58,6 +63,8 @@ struct btrfs_transaction {
 
				 	struct list_head pending_chunks;
			
 
				 	struct list_head pending_ordered;
			
 
				 	struct list_head switch_commits;
			
 
				+	struct list_head dirty_bgs;
			
 
				+	spinlock_t dirty_bgs_lock;
			
 
				 	struct btrfs_delayed_ref_root delayed_refs;
			
 
				 	int aborted;
			
 
				 };
			
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
 
				 insert:
			
 
				 	btrfs_release_path(path);
			
 
				 	/* try to insert the key into the destination tree */
			
 
				+	path->skip_release_on_error = 1;
			
 
				 	ret = btrfs_insert_empty_item(trans, root, path,
			
 
				 				      key, item_size);
			
 
				+	path->skip_release_on_error = 0;
			
 
				 
			
 
				 	/* make sure any existing item is the correct size */
			
 
				-	if (ret == -EEXIST) {
			
 
				+	if (ret == -EEXIST || ret == -EOVERFLOW) {
			
 
				 		u32 found_size;
			
 
				 		found_size = btrfs_item_size_nr(path->nodes[0],
			
 
				 						path->slots[0]);
			
@@ -488,8 +490,20 @@ insert:
 
				 		src_item = (struct btrfs_inode_item *)src_ptr;
			
 
				 		dst_item = (struct btrfs_inode_item *)dst_ptr;
			
 
				 
			
 
				-		if (btrfs_inode_generation(eb, src_item) == 0)
			
 
				+		if (btrfs_inode_generation(eb, src_item) == 0) {
			
 
				+			struct extent_buffer *dst_eb = path->nodes[0];
			
 
				+
			
 
				+			if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
			
 
				+			    S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
			
 
				+				struct btrfs_map_token token;
			
 
				+				u64 ino_size = btrfs_inode_size(eb, src_item);
			
 
				+
			
 
				+				btrfs_init_map_token(&token);
			
 
				+				btrfs_set_token_inode_size(dst_eb, dst_item,
			
 
				+							   ino_size, &token);
			
 
				+			}
			
 
				 			goto no_copy;
			
 
				+		}
			
 
				 
			
 
				 		if (overwrite_root &&
			
 
				 		    S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
			
@@ -844,7 +858,7 @@ out:
 
				 static noinline int backref_in_log(struct btrfs_root *log,
			
 
				 				   struct btrfs_key *key,
			
 
				 				   u64 ref_objectid,
			
 
				-				   char *name, int namelen)
			
 
				+				   const char *name, int namelen)
			
 
				 {
			
 
				 	struct btrfs_path *path;
			
 
				 	struct btrfs_inode_ref *ref;
			
@@ -1254,13 +1268,14 @@ out:
 
				 }
			
 
				 
			
 
				 static int insert_orphan_item(struct btrfs_trans_handle *trans,
			
 
				-			      struct btrfs_root *root, u64 offset)
			
 
				+			      struct btrfs_root *root, u64 ino)
			
 
				 {
			
 
				 	int ret;
			
 
				-	ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
			
 
				-			offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
			
 
				-	if (ret > 0)
			
 
				-		ret = btrfs_insert_orphan_item(trans, root, offset);
			
 
				+
			
 
				+	ret = btrfs_insert_orphan_item(trans, root, ino);
			
 
				+	if (ret == -EEXIST)
			
 
				+		ret = 0;
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
 
				 		leaf = path->nodes[0];
			
 
				 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
			
 
				 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
			
 
				+		cur_offset = 0;
			
 
				 
			
 
				 		while (cur_offset < item_size) {
			
 
				 			extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
			
@@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
 
				 	}
			
 
				 	btrfs_release_path(path);
			
 
				 
			
 
				-	if (ret < 0)
			
 
				+	if (ret < 0 && ret != -ENOENT)
			
 
				 		return ret;
			
 
				 	return nlink;
			
 
				 }
			
@@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
 
				 	nlink = ret;
			
 
				 
			
 
				 	ret = count_inode_extrefs(root, inode, path);
			
 
				-	if (ret == -ENOENT)
			
 
				-		ret = 0;
			
 
				-
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 
			
@@ -1556,6 +1569,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return true if an inode reference exists in the log for the given name,
			
 
				+ * inode and parent inode.
			
 
				+ */
			
 
				+static bool name_in_log_ref(struct btrfs_root *log_root,
			
 
				+			    const char *name, const int name_len,
			
 
				+			    const u64 dirid, const u64 ino)
			
 
				+{
			
 
				+	struct btrfs_key search_key;
			
 
				+
			
 
				+	search_key.objectid = ino;
			
 
				+	search_key.type = BTRFS_INODE_REF_KEY;
			
 
				+	search_key.offset = dirid;
			
 
				+	if (backref_in_log(log_root, &search_key, dirid, name, name_len))
			
 
				+		return true;
			
 
				+
			
 
				+	search_key.type = BTRFS_INODE_EXTREF_KEY;
			
 
				+	search_key.offset = btrfs_extref_hash(dirid, name, name_len);
			
 
				+	if (backref_in_log(log_root, &search_key, dirid, name, name_len))
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * take a single entry in a log directory item and replay it into
			
 
				  * the subvolume.
			
@@ -1666,10 +1703,17 @@ out:
 
				 	return ret;
			
 
				 
			
 
				 insert:
			
 
				+	if (name_in_log_ref(root->log_root, name, name_len,
			
 
				+			    key->objectid, log_key.objectid)) {
			
 
				+		/* The dentry will be added later. */
			
 
				+		ret = 0;
			
 
				+		update_size = false;
			
 
				+		goto out;
			
 
				+	}
			
 
				 	btrfs_release_path(path);
			
 
				 	ret = insert_one_name(trans, root, path, key->objectid, key->offset,
			
 
				 			      name, name_len, log_type, &log_key);
			
 
				-	if (ret && ret != -ENOENT)
			
 
				+	if (ret && ret != -ENOENT && ret != -EEXIST)
			
 
				 		goto out;
			
 
				 	update_size = false;
			
 
				 	ret = 0;
			
@@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
				 		parent = path->nodes[*level];
			
 
				 		root_owner = btrfs_header_owner(parent);
			
 
				 
			
 
				-		next = btrfs_find_create_tree_block(root, bytenr, blocksize);
			
 
				+		next = btrfs_find_create_tree_block(root, bytenr);
			
 
				 		if (!next)
			
 
				 			return -ENOMEM;
			
 
				 
			
@@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
 
				 		mutex_unlock(&root->log_mutex);
			
 
				 		if (atomic_read(&root->log_writers))
			
 
				 			schedule();
			
 
				-		mutex_lock(&root->log_mutex);
			
 
				 		finish_wait(&root->log_writer_wait, &wait);
			
 
				+		mutex_lock(&root->log_mutex);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
 
				 static void fill_inode_item(struct btrfs_trans_handle *trans,
			
 
				 			    struct extent_buffer *leaf,
			
 
				 			    struct btrfs_inode_item *item,
			
 
				-			    struct inode *inode, int log_inode_only)
			
 
				+			    struct inode *inode, int log_inode_only,
			
 
				+			    u64 logged_isize)
			
 
				 {
			
 
				 	struct btrfs_map_token token;
			
 
				 
			
@@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 
				 		 * to say 'update this inode with these values'
			
 
				 		 */
			
 
				 		btrfs_set_token_inode_generation(leaf, item, 0, &token);
			
 
				-		btrfs_set_token_inode_size(leaf, item, 0, &token);
			
 
				+		btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
			
 
				 	} else {
			
 
				 		btrfs_set_token_inode_generation(leaf, item,
			
 
				 						 BTRFS_I(inode)->generation,
			
@@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 
				 	btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
			
 
				 	btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
			
 
				 
			
 
				-	btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
			
 
				+	btrfs_set_token_timespec_sec(leaf, &item->atime,
			
 
				 				     inode->i_atime.tv_sec, &token);
			
 
				-	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
			
 
				+	btrfs_set_token_timespec_nsec(leaf, &item->atime,
			
 
				 				      inode->i_atime.tv_nsec, &token);
			
 
				 
			
 
				-	btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
			
 
				+	btrfs_set_token_timespec_sec(leaf, &item->mtime,
			
 
				 				     inode->i_mtime.tv_sec, &token);
			
 
				-	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
			
 
				+	btrfs_set_token_timespec_nsec(leaf, &item->mtime,
			
 
				 				      inode->i_mtime.tv_nsec, &token);
			
 
				 
			
 
				-	btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
			
 
				+	btrfs_set_token_timespec_sec(leaf, &item->ctime,
			
 
				 				     inode->i_ctime.tv_sec, &token);
			
 
				-	btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
			
 
				+	btrfs_set_token_timespec_nsec(leaf, &item->ctime,
			
 
				 				      inode->i_ctime.tv_nsec, &token);
			
 
				 
			
 
				 	btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
			
@@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
 
				 		return ret;
			
 
				 	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
			
 
				 				    struct btrfs_inode_item);
			
 
				-	fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
			
 
				+	fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
			
 
				 	btrfs_release_path(path);
			
 
				 	return 0;
			
 
				 }
			
@@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 
				 			       struct inode *inode,
			
 
				 			       struct btrfs_path *dst_path,
			
 
				 			       struct btrfs_path *src_path, u64 *last_extent,
			
 
				-			       int start_slot, int nr, int inode_only)
			
 
				+			       int start_slot, int nr, int inode_only,
			
 
				+			       u64 logged_isize)
			
 
				 {
			
 
				 	unsigned long src_offset;
			
 
				 	unsigned long dst_offset;
			
@@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 
				 						    dst_path->slots[0],
			
 
				 						    struct btrfs_inode_item);
			
 
				 			fill_inode_item(trans, dst_path->nodes[0], inode_item,
			
 
				-					inode, inode_only == LOG_INODE_EXISTS);
			
 
				+					inode, inode_only == LOG_INODE_EXISTS,
			
 
				+					logged_isize);
			
 
				 		} else {
			
 
				 			copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
			
 
				 					   src_offset, ins_sizes[i]);
			
@@ -3902,6 +3949,33 @@ process:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
			
 
				+			     struct btrfs_path *path, u64 *size_ret)
			
 
				+{
			
 
				+	struct btrfs_key key;
			
 
				+	int ret;
			
 
				+
			
 
				+	key.objectid = btrfs_ino(inode);
			
 
				+	key.type = BTRFS_INODE_ITEM_KEY;
			
 
				+	key.offset = 0;
			
 
				+
			
 
				+	ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
			
 
				+	if (ret < 0) {
			
 
				+		return ret;
			
 
				+	} else if (ret > 0) {
			
 
				+		*size_ret = i_size_read(inode);
			
 
				+	} else {
			
 
				+		struct btrfs_inode_item *item;
			
 
				+
			
 
				+		item = btrfs_item_ptr(path->nodes[0], path->slots[0],
			
 
				+				      struct btrfs_inode_item);
			
 
				+		*size_ret = btrfs_inode_size(path->nodes[0], item);
			
 
				+	}
			
 
				+
			
 
				+	btrfs_release_path(path);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /* log a single inode in the tree log.
			
 
				  * At least one parent directory for this inode must exist in the tree
			
 
				  * or be logged already.
			
@@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 	bool fast_search = false;
			
 
				 	u64 ino = btrfs_ino(inode);
			
 
				 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
			
 
				+	u64 logged_isize = 0;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
@@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 		max_key.type = (u8)-1;
			
 
				 	max_key.offset = (u64)-1;
			
 
				 
			
 
				-	/* Only run delayed items if we are a dir or a new file */
			
 
				+	/*
			
 
				+	 * Only run delayed items if we are a dir or a new file.
			
 
				+	 * Otherwise commit the delayed inode only, which is needed in
			
 
				+	 * order for the log replay code to mark inodes for link count
			
 
				+	 * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
			
 
				+	 */
			
 
				 	if (S_ISDIR(inode->i_mode) ||
			
 
				-	    BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
			
 
				+	    BTRFS_I(inode)->generation > root->fs_info->last_trans_committed)
			
 
				 		ret = btrfs_commit_inode_delayed_items(trans, inode);
			
 
				-		if (ret) {
			
 
				-			btrfs_free_path(path);
			
 
				-			btrfs_free_path(dst_path);
			
 
				-			return ret;
			
 
				-		}
			
 
				+	else
			
 
				+		ret = btrfs_commit_inode_delayed_inode(inode);
			
 
				+
			
 
				+	if (ret) {
			
 
				+		btrfs_free_path(path);
			
 
				+		btrfs_free_path(dst_path);
			
 
				+		return ret;
			
 
				 	}
			
 
				 
			
 
				 	mutex_lock(&BTRFS_I(inode)->log_mutex);
			
@@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 	if (S_ISDIR(inode->i_mode)) {
			
 
				 		int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
			
 
				 
			
 
				-		if (inode_only == LOG_INODE_EXISTS)
			
 
				-			max_key_type = BTRFS_XATTR_ITEM_KEY;
			
 
				+		if (inode_only == LOG_INODE_EXISTS) {
			
 
				+			max_key_type = BTRFS_INODE_EXTREF_KEY;
			
 
				+			max_key.type = max_key_type;
			
 
				+		}
			
 
				 		ret = drop_objectid_items(trans, log, path, ino, max_key_type);
			
 
				 	} else {
			
 
				-		if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
			
 
				-				       &BTRFS_I(inode)->runtime_flags)) {
			
 
				-			clear_bit(BTRFS_INODE_COPY_EVERYTHING,
			
 
				-				  &BTRFS_I(inode)->runtime_flags);
			
 
				-			ret = btrfs_truncate_inode_items(trans, log,
			
 
				-							 inode, 0, 0);
			
 
				-		} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
			
 
				-					      &BTRFS_I(inode)->runtime_flags) ||
			
 
				+		if (inode_only == LOG_INODE_EXISTS) {
			
 
				+			/*
			
 
				+			 * Make sure the new inode item we write to the log has
			
 
				+			 * the same isize as the current one (if it exists).
			
 
				+			 * This is necessary to prevent data loss after log
			
 
				+			 * replay, and also to prevent doing a wrong expanding
			
 
				+			 * truncate - for e.g. create file, write 4K into offset
			
 
				+			 * 0, fsync, write 4K into offset 4096, add hard link,
			
 
				+			 * fsync some other file (to sync log), power fail - if
			
 
				+			 * we use the inode's current i_size, after log replay
			
 
				+			 * we get a 8Kb file, with the last 4Kb extent as a hole
			
 
				+			 * (zeroes), as if an expanding truncate happened,
			
 
				+			 * instead of getting a file of 4Kb only.
			
 
				+			 */
			
 
				+			err = logged_inode_size(log, inode, path,
			
 
				+						&logged_isize);
			
 
				+			if (err)
			
 
				+				goto out_unlock;
			
 
				+		}
			
 
				+		if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
			
 
				+			     &BTRFS_I(inode)->runtime_flags)) {
			
 
				+			if (inode_only == LOG_INODE_EXISTS) {
			
 
				+				max_key.type = BTRFS_INODE_EXTREF_KEY;
			
 
				+				ret = drop_objectid_items(trans, log, path, ino,
			
 
				+							  max_key.type);
			
 
				+			} else {
			
 
				+				clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
			
 
				+					  &BTRFS_I(inode)->runtime_flags);
			
 
				+				clear_bit(BTRFS_INODE_COPY_EVERYTHING,
			
 
				+					  &BTRFS_I(inode)->runtime_flags);
			
 
				+				ret = btrfs_truncate_inode_items(trans, log,
			
 
				+								 inode, 0, 0);
			
 
				+			}
			
 
				+		} else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
			
 
				+				    &BTRFS_I(inode)->runtime_flags) ||
			
 
				 			   inode_only == LOG_INODE_EXISTS) {
			
 
				-			if (inode_only == LOG_INODE_ALL)
			
 
				+			if (inode_only == LOG_INODE_ALL) {
			
 
				+				clear_bit(BTRFS_INODE_COPY_EVERYTHING,
			
 
				+					  &BTRFS_I(inode)->runtime_flags);
			
 
				 				fast_search = true;
			
 
				-			max_key.type = BTRFS_XATTR_ITEM_KEY;
			
 
				+				max_key.type = BTRFS_XATTR_ITEM_KEY;
			
 
				+			} else {
			
 
				+				max_key.type = BTRFS_INODE_EXTREF_KEY;
			
 
				+			}
			
 
				 			ret = drop_objectid_items(trans, log, path, ino,
			
 
				 						  max_key.type);
			
 
				 		} else {
			
@@ -4047,7 +4163,8 @@ again:
 
				 		}
			
 
				 
			
 
				 		ret = copy_items(trans, inode, dst_path, path, &last_extent,
			
 
				-				 ins_start_slot, ins_nr, inode_only);
			
 
				+				 ins_start_slot, ins_nr, inode_only,
			
 
				+				 logged_isize);
			
 
				 		if (ret < 0) {
			
 
				 			err = ret;
			
 
				 			goto out_unlock;
			
@@ -4071,7 +4188,7 @@ next_slot:
 
				 		if (ins_nr) {
			
 
				 			ret = copy_items(trans, inode, dst_path, path,
			
 
				 					 &last_extent, ins_start_slot,
			
 
				-					 ins_nr, inode_only);
			
 
				+					 ins_nr, inode_only, logged_isize);
			
 
				 			if (ret < 0) {
			
 
				 				err = ret;
			
 
				 				goto out_unlock;
			
@@ -4092,7 +4209,8 @@ next_slot:
 
				 	}
			
 
				 	if (ins_nr) {
			
 
				 		ret = copy_items(trans, inode, dst_path, path, &last_extent,
			
 
				-				 ins_start_slot, ins_nr, inode_only);
			
 
				+				 ins_start_slot, ins_nr, inode_only,
			
 
				+				 logged_isize);
			
 
				 		if (ret < 0) {
			
 
				 			err = ret;
			
 
				 			goto out_unlock;
			
@@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 
				 	struct dentry *old_parent = NULL;
			
 
				 	int ret = 0;
			
 
				 	u64 last_committed = root->fs_info->last_trans_committed;
			
 
				+	const struct dentry * const first_parent = parent;
			
 
				+	const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
			
 
				+				 last_committed);
			
 
				 
			
 
				 	sb = inode->i_sb;
			
 
				 
			
@@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 
				 		goto end_trans;
			
 
				 	}
			
 
				 
			
 
				-	inode_only = LOG_INODE_EXISTS;
			
 
				 	while (1) {
			
 
				 		if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
			
 
				 			break;
			
@@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 
				 		if (root != BTRFS_I(inode)->root)
			
 
				 			break;
			
 
				 
			
 
				+		/*
			
 
				+		 * On unlink we must make sure our immediate parent directory
			
 
				+		 * inode is fully logged. This is to prevent leaving dangling
			
 
				+		 * directory index entries and a wrong directory inode's i_size.
			
 
				+		 * Not doing so can result in a directory being impossible to
			
 
				+		 * delete after log replay (rmdir will always fail with error
			
 
				+		 * -ENOTEMPTY).
			
 
				+		 */
			
 
				+		if (did_unlink && parent == first_parent)
			
 
				+			inode_only = LOG_INODE_ALL;
			
 
				+		else
			
 
				+			inode_only = LOG_INODE_EXISTS;
			
 
				+
			
 
				 		if (BTRFS_I(inode)->generation >
			
 
				-		    root->fs_info->last_trans_committed) {
			
 
				+		    root->fs_info->last_trans_committed ||
			
 
				+		    inode_only == LOG_INODE_ALL) {
			
 
				 			ret = btrfs_log_inode(trans, root, inode, inode_only,
			
 
				 					      0, LLONG_MAX, ctx);
			
 
				 			if (ret)
			
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1310,6 +1310,8 @@ again:
 
				 	if (ret) {
			
 
				 		btrfs_error(root->fs_info, ret,
			
 
				 			    "Failed to remove dev extent item");
			
 
				+	} else {
			
 
				+		trans->transaction->have_free_bgs = 1;
			
 
				 	}
			
 
				 out:
			
 
				 	btrfs_free_path(path);
			
@@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
 
				 
			
 
				 static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
			
 
				 {
			
 
				-	if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
			
 
				+	if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
			
 
				 		return;
			
 
				 
			
 
				 	btrfs_set_fs_incompat(info, RAID56);
			
@@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
 
				 
			
 
				 	BUG_ON(em->start > logical || em->start + em->len < logical);
			
 
				 	map = (struct map_lookup *)em->bdev;
			
 
				-	if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-			 BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
			
 
				 		len = map->stripe_len * nr_data_stripes(map);
			
 
				-	}
			
 
				 	free_extent_map(em);
			
 
				 	return len;
			
 
				 }
			
@@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
 
				 
			
 
				 	BUG_ON(em->start > logical || em->start + em->len < logical);
			
 
				 	map = (struct map_lookup *)em->bdev;
			
 
				-	if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-			 BTRFS_BLOCK_GROUP_RAID6))
			
 
				+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
			
 
				 		ret = 1;
			
 
				 	free_extent_map(em);
			
 
				 	return ret;
			
@@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b)
 
				 }
			
 
				 
			
 
				 /* Bubble-sort the stripe set to put the parity/syndrome stripes last */
			
 
				-static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
			
 
				+static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
			
 
				 {
			
 
				 	struct btrfs_bio_stripe s;
			
 
				-	int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
			
 
				 	int i;
			
 
				 	u64 l;
			
 
				 	int again = 1;
			
 
				-	int m;
			
 
				 
			
 
				 	while (again) {
			
 
				 		again = 0;
			
 
				-		for (i = 0; i < real_stripes - 1; i++) {
			
 
				-			if (parity_smaller(raid_map[i], raid_map[i+1])) {
			
 
				+		for (i = 0; i < num_stripes - 1; i++) {
			
 
				+			if (parity_smaller(bbio->raid_map[i],
			
 
				+					   bbio->raid_map[i+1])) {
			
 
				 				s = bbio->stripes[i];
			
 
				-				l = raid_map[i];
			
 
				+				l = bbio->raid_map[i];
			
 
				 				bbio->stripes[i] = bbio->stripes[i+1];
			
 
				-				raid_map[i] = raid_map[i+1];
			
 
				+				bbio->raid_map[i] = bbio->raid_map[i+1];
			
 
				 				bbio->stripes[i+1] = s;
			
 
				-				raid_map[i+1] = l;
			
 
				-
			
 
				-				if (bbio->tgtdev_map) {
			
 
				-					m = bbio->tgtdev_map[i];
			
 
				-					bbio->tgtdev_map[i] =
			
 
				-							bbio->tgtdev_map[i + 1];
			
 
				-					bbio->tgtdev_map[i + 1] = m;
			
 
				-				}
			
 
				+				bbio->raid_map[i+1] = l;
			
 
				 
			
 
				 				again = 1;
			
 
				 			}
			
@@ -4909,10 +4900,41 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
			
 
				+{
			
 
				+	struct btrfs_bio *bbio = kzalloc(
			
 
				+		sizeof(struct btrfs_bio) +
			
 
				+		sizeof(struct btrfs_bio_stripe) * (total_stripes) +
			
 
				+		sizeof(int) * (real_stripes) +
			
 
				+		sizeof(u64) * (real_stripes),
			
 
				+		GFP_NOFS);
			
 
				+	if (!bbio)
			
 
				+		return NULL;
			
 
				+
			
 
				+	atomic_set(&bbio->error, 0);
			
 
				+	atomic_set(&bbio->refs, 1);
			
 
				+
			
 
				+	return bbio;
			
 
				+}
			
 
				+
			
 
				+void btrfs_get_bbio(struct btrfs_bio *bbio)
			
 
				+{
			
 
				+	WARN_ON(!atomic_read(&bbio->refs));
			
 
				+	atomic_inc(&bbio->refs);
			
 
				+}
			
 
				+
			
 
				+void btrfs_put_bbio(struct btrfs_bio *bbio)
			
 
				+{
			
 
				+	if (!bbio)
			
 
				+		return;
			
 
				+	if (atomic_dec_and_test(&bbio->refs))
			
 
				+		kfree(bbio);
			
 
				+}
			
 
				+
			
 
				 static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
			
 
				 			     u64 logical, u64 *length,
			
 
				 			     struct btrfs_bio **bbio_ret,
			
 
				-			     int mirror_num, u64 **raid_map_ret)
			
 
				+			     int mirror_num, int need_raid_map)
			
 
				 {
			
 
				 	struct extent_map *em;
			
 
				 	struct map_lookup *map;
			
@@ -4925,7 +4947,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 	u64 stripe_nr_orig;
			
 
				 	u64 stripe_nr_end;
			
 
				 	u64 stripe_len;
			
 
				-	u64 *raid_map = NULL;
			
 
				 	int stripe_index;
			
 
				 	int i;
			
 
				 	int ret = 0;
			
@@ -4976,7 +4997,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 	stripe_offset = offset - stripe_offset;
			
 
				 
			
 
				 	/* if we're here for raid56, we need to know the stripe aligned start */
			
 
				-	if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 		unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
			
 
				 		raid56_full_stripe_start = offset;
			
 
				 
			
@@ -4989,8 +5010,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 
			
 
				 	if (rw & REQ_DISCARD) {
			
 
				 		/* we don't discard raid56 yet */
			
 
				-		if (map->type &
			
 
				-		    (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 			ret = -EOPNOTSUPP;
			
 
				 			goto out;
			
 
				 		}
			
@@ -5000,7 +5020,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 		/* For writes to RAID[56], allow a full stripeset across all disks.
			
 
				 		   For other RAID types and for RAID[56] reads, just allow a single
			
 
				 		   stripe (on a single disk). */
			
 
				-		if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) &&
			
 
				+		if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
			
 
				 		    (rw & REQ_WRITE)) {
			
 
				 			max_len = stripe_len * nr_data_stripes(map) -
			
 
				 				(offset - raid56_full_stripe_start);
			
@@ -5047,7 +5067,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 		u64 physical_of_found = 0;
			
 
				 
			
 
				 		ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
			
 
				-			     logical, &tmp_length, &tmp_bbio, 0, NULL);
			
 
				+			     logical, &tmp_length, &tmp_bbio, 0, 0);
			
 
				 		if (ret) {
			
 
				 			WARN_ON(tmp_bbio != NULL);
			
 
				 			goto out;
			
@@ -5061,7 +5081,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 			 * is not left of the left cursor
			
 
				 			 */
			
 
				 			ret = -EIO;
			
 
				-			kfree(tmp_bbio);
			
 
				+			btrfs_put_bbio(tmp_bbio);
			
 
				 			goto out;
			
 
				 		}
			
 
				 
			
@@ -5096,11 +5116,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 		} else {
			
 
				 			WARN_ON(1);
			
 
				 			ret = -EIO;
			
 
				-			kfree(tmp_bbio);
			
 
				+			btrfs_put_bbio(tmp_bbio);
			
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				-		kfree(tmp_bbio);
			
 
				+		btrfs_put_bbio(tmp_bbio);
			
 
				 	} else if (mirror_num > map->num_stripes) {
			
 
				 		mirror_num = 0;
			
 
				 	}
			
@@ -5166,15 +5186,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 			mirror_num = stripe_index - old_stripe_index + 1;
			
 
				 		}
			
 
				 
			
 
				-	} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-				BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				-		u64 tmp;
			
 
				-
			
 
				-		if (raid_map_ret &&
			
 
				+	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				+		if (need_raid_map &&
			
 
				 		    ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
			
 
				 		     mirror_num > 1)) {
			
 
				-			int i, rot;
			
 
				-
			
 
				 			/* push stripe_nr back to the start of the full stripe */
			
 
				 			stripe_nr = raid56_full_stripe_start;
			
 
				 			do_div(stripe_nr, stripe_len * nr_data_stripes(map));
			
@@ -5183,32 +5198,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 			num_stripes = map->num_stripes;
			
 
				 			max_errors = nr_parity_stripes(map);
			
 
				 
			
 
				-			raid_map = kmalloc_array(num_stripes, sizeof(u64),
			
 
				-					   GFP_NOFS);
			
 
				-			if (!raid_map) {
			
 
				-				ret = -ENOMEM;
			
 
				-				goto out;
			
 
				-			}
			
 
				-
			
 
				-			/* Work out the disk rotation on this stripe-set */
			
 
				-			tmp = stripe_nr;
			
 
				-			rot = do_div(tmp, num_stripes);
			
 
				-
			
 
				-			/* Fill in the logical address of each stripe */
			
 
				-			tmp = stripe_nr * nr_data_stripes(map);
			
 
				-			for (i = 0; i < nr_data_stripes(map); i++)
			
 
				-				raid_map[(i+rot) % num_stripes] =
			
 
				-					em->start + (tmp + i) * map->stripe_len;
			
 
				-
			
 
				-			raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
			
 
				-			if (map->type & BTRFS_BLOCK_GROUP_RAID6)
			
 
				-				raid_map[(i+rot+1) % num_stripes] =
			
 
				-					RAID6_Q_STRIPE;
			
 
				-
			
 
				 			*length = map->stripe_len;
			
 
				 			stripe_index = 0;
			
 
				 			stripe_offset = 0;
			
 
				 		} else {
			
 
				+			u64 tmp;
			
 
				+
			
 
				 			/*
			
 
				 			 * Mirror #0 or #1 means the original data block.
			
 
				 			 * Mirror #2 is RAID5 parity block.
			
@@ -5246,17 +5241,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 		tgtdev_indexes = num_stripes;
			
 
				 	}
			
 
				 
			
 
				-	bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes),
			
 
				-		       GFP_NOFS);
			
 
				+	bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
			
 
				 	if (!bbio) {
			
 
				-		kfree(raid_map);
			
 
				 		ret = -ENOMEM;
			
 
				 		goto out;
			
 
				 	}
			
 
				-	atomic_set(&bbio->error, 0);
			
 
				 	if (dev_replace_is_ongoing)
			
 
				 		bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
			
 
				 
			
 
				+	/* build raid_map */
			
 
				+	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
			
 
				+	    need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
			
 
				+	    mirror_num > 1)) {
			
 
				+		u64 tmp;
			
 
				+		int i, rot;
			
 
				+
			
 
				+		bbio->raid_map = (u64 *)((void *)bbio->stripes +
			
 
				+				 sizeof(struct btrfs_bio_stripe) *
			
 
				+				 num_alloc_stripes +
			
 
				+				 sizeof(int) * tgtdev_indexes);
			
 
				+
			
 
				+		/* Work out the disk rotation on this stripe-set */
			
 
				+		tmp = stripe_nr;
			
 
				+		rot = do_div(tmp, num_stripes);
			
 
				+
			
 
				+		/* Fill in the logical address of each stripe */
			
 
				+		tmp = stripe_nr * nr_data_stripes(map);
			
 
				+		for (i = 0; i < nr_data_stripes(map); i++)
			
 
				+			bbio->raid_map[(i+rot) % num_stripes] =
			
 
				+				em->start + (tmp + i) * map->stripe_len;
			
 
				+
			
 
				+		bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
			
 
				+		if (map->type & BTRFS_BLOCK_GROUP_RAID6)
			
 
				+			bbio->raid_map[(i+rot+1) % num_stripes] =
			
 
				+				RAID6_Q_STRIPE;
			
 
				+	}
			
 
				+
			
 
				 	if (rw & REQ_DISCARD) {
			
 
				 		int factor = 0;
			
 
				 		int sub_stripes = 0;
			
@@ -5340,6 +5360,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 	if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
			
 
				 		max_errors = btrfs_chunk_max_errors(map);
			
 
				 
			
 
				+	if (bbio->raid_map)
			
 
				+		sort_parity_stripes(bbio, num_stripes);
			
 
				+
			
 
				 	tgtdev_indexes = 0;
			
 
				 	if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
			
 
				 	    dev_replace->tgtdev != NULL) {
			
@@ -5427,6 +5450,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 	}
			
 
				 
			
 
				 	*bbio_ret = bbio;
			
 
				+	bbio->map_type = map->type;
			
 
				 	bbio->num_stripes = num_stripes;
			
 
				 	bbio->max_errors = max_errors;
			
 
				 	bbio->mirror_num = mirror_num;
			
@@ -5443,10 +5467,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 		bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
			
 
				 		bbio->mirror_num = map->num_stripes + 1;
			
 
				 	}
			
 
				-	if (raid_map) {
			
 
				-		sort_parity_stripes(bbio, raid_map);
			
 
				-		*raid_map_ret = raid_map;
			
 
				-	}
			
 
				 out:
			
 
				 	if (dev_replace_is_ongoing)
			
 
				 		btrfs_dev_replace_unlock(dev_replace);
			
@@ -5459,17 +5479,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
				 		      struct btrfs_bio **bbio_ret, int mirror_num)
			
 
				 {
			
 
				 	return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
			
 
				-				 mirror_num, NULL);
			
 
				+				 mirror_num, 0);
			
 
				 }
			
 
				 
			
 
				 /* For Scrub/replace */
			
 
				 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
			
 
				 		     u64 logical, u64 *length,
			
 
				 		     struct btrfs_bio **bbio_ret, int mirror_num,
			
 
				-		     u64 **raid_map_ret)
			
 
				+		     int need_raid_map)
			
 
				 {
			
 
				 	return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
			
 
				-				 mirror_num, raid_map_ret);
			
 
				+				 mirror_num, need_raid_map);
			
 
				 }
			
 
				 
			
 
				 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
			
@@ -5511,8 +5531,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
 
				 		do_div(length, map->num_stripes / map->sub_stripes);
			
 
				 	else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
			
 
				 		do_div(length, map->num_stripes);
			
 
				-	else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				-			      BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				+	else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				 		do_div(length, nr_data_stripes(map));
			
 
				 		rmap_len = map->stripe_len * nr_data_stripes(map);
			
 
				 	}
			
@@ -5565,7 +5584,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
 
				 		bio_endio_nodec(bio, err);
			
 
				 	else
			
 
				 		bio_endio(bio, err);
			
 
				-	kfree(bbio);
			
 
				+	btrfs_put_bbio(bbio);
			
 
				 }
			
 
				 
			
 
				 static void btrfs_end_bio(struct bio *bio, int err)
			
@@ -5808,7 +5827,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 
				 	u64 logical = (u64)bio->bi_iter.bi_sector << 9;
			
 
				 	u64 length = 0;
			
 
				 	u64 map_length;
			
 
				-	u64 *raid_map = NULL;
			
 
				 	int ret;
			
 
				 	int dev_nr = 0;
			
 
				 	int total_devs = 1;
			
@@ -5819,7 +5837,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 
				 
			
 
				 	btrfs_bio_counter_inc_blocked(root->fs_info);
			
 
				 	ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
			
 
				-			      mirror_num, &raid_map);
			
 
				+			      mirror_num, 1);
			
 
				 	if (ret) {
			
 
				 		btrfs_bio_counter_dec(root->fs_info);
			
 
				 		return ret;
			
@@ -5832,15 +5850,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 
				 	bbio->fs_info = root->fs_info;
			
 
				 	atomic_set(&bbio->stripes_pending, bbio->num_stripes);
			
 
				 
			
 
				-	if (raid_map) {
			
 
				+	if (bbio->raid_map) {
			
 
				 		/* In this case, map_length has been set to the length of
			
 
				 		   a single stripe; not the whole write */
			
 
				 		if (rw & WRITE) {
			
 
				-			ret = raid56_parity_write(root, bio, bbio,
			
 
				-						  raid_map, map_length);
			
 
				+			ret = raid56_parity_write(root, bio, bbio, map_length);
			
 
				 		} else {
			
 
				-			ret = raid56_parity_recover(root, bio, bbio,
			
 
				-						    raid_map, map_length,
			
 
				+			ret = raid56_parity_recover(root, bio, bbio, map_length,
			
 
				 						    mirror_num, 1);
			
 
				 		}
			
 
				 
			
@@ -6238,17 +6254,22 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 
				 	struct extent_buffer *sb;
			
 
				 	struct btrfs_disk_key *disk_key;
			
 
				 	struct btrfs_chunk *chunk;
			
 
				-	u8 *ptr;
			
 
				-	unsigned long sb_ptr;
			
 
				+	u8 *array_ptr;
			
 
				+	unsigned long sb_array_offset;
			
 
				 	int ret = 0;
			
 
				 	u32 num_stripes;
			
 
				 	u32 array_size;
			
 
				 	u32 len = 0;
			
 
				-	u32 cur;
			
 
				+	u32 cur_offset;
			
 
				 	struct btrfs_key key;
			
 
				 
			
 
				-	sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
			
 
				-					  BTRFS_SUPER_INFO_SIZE);
			
 
				+	ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
			
 
				+	/*
			
 
				+	 * This will create extent buffer of nodesize, superblock size is
			
 
				+	 * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
			
 
				+	 * overallocate but we can keep it as-is, only the first page is used.
			
 
				+	 */
			
 
				+	sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
			
 
				 	if (!sb)
			
 
				 		return -ENOMEM;
			
 
				 	btrfs_set_buffer_uptodate(sb);
			
@@ -6271,35 +6292,56 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 
				 	write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
			
 
				 	array_size = btrfs_super_sys_array_size(super_copy);
			
 
				 
			
 
				-	ptr = super_copy->sys_chunk_array;
			
 
				-	sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
			
 
				-	cur = 0;
			
 
				+	array_ptr = super_copy->sys_chunk_array;
			
 
				+	sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
			
 
				+	cur_offset = 0;
			
 
				+
			
 
				+	while (cur_offset < array_size) {
			
 
				+		disk_key = (struct btrfs_disk_key *)array_ptr;
			
 
				+		len = sizeof(*disk_key);
			
 
				+		if (cur_offset + len > array_size)
			
 
				+			goto out_short_read;
			
 
				 
			
 
				-	while (cur < array_size) {
			
 
				-		disk_key = (struct btrfs_disk_key *)ptr;
			
 
				 		btrfs_disk_key_to_cpu(&key, disk_key);
			
 
				 
			
 
				-		len = sizeof(*disk_key); ptr += len;
			
 
				-		sb_ptr += len;
			
 
				-		cur += len;
			
 
				+		array_ptr += len;
			
 
				+		sb_array_offset += len;
			
 
				+		cur_offset += len;
			
 
				 
			
 
				 		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
			
 
				-			chunk = (struct btrfs_chunk *)sb_ptr;
			
 
				+			chunk = (struct btrfs_chunk *)sb_array_offset;
			
 
				+			/*
			
 
				+			 * At least one btrfs_chunk with one stripe must be
			
 
				+			 * present, exact stripe count check comes afterwards
			
 
				+			 */
			
 
				+			len = btrfs_chunk_item_size(1);
			
 
				+			if (cur_offset + len > array_size)
			
 
				+				goto out_short_read;
			
 
				+
			
 
				+			num_stripes = btrfs_chunk_num_stripes(sb, chunk);
			
 
				+			len = btrfs_chunk_item_size(num_stripes);
			
 
				+			if (cur_offset + len > array_size)
			
 
				+				goto out_short_read;
			
 
				+
			
 
				 			ret = read_one_chunk(root, &key, sb, chunk);
			
 
				 			if (ret)
			
 
				 				break;
			
 
				-			num_stripes = btrfs_chunk_num_stripes(sb, chunk);
			
 
				-			len = btrfs_chunk_item_size(num_stripes);
			
 
				 		} else {
			
 
				 			ret = -EIO;
			
 
				 			break;
			
 
				 		}
			
 
				-		ptr += len;
			
 
				-		sb_ptr += len;
			
 
				-		cur += len;
			
 
				+		array_ptr += len;
			
 
				+		sb_array_offset += len;
			
 
				+		cur_offset += len;
			
 
				 	}
			
 
				 	free_extent_buffer(sb);
			
 
				 	return ret;
			
 
				+
			
 
				+out_short_read:
			
 
				+	printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
			
 
				+			len, cur_offset);
			
 
				+	free_extent_buffer(sb);
			
 
				+	return -EIO;
			
 
				 }
			
 
				 
			
 
				 int btrfs_read_chunk_tree(struct btrfs_root *root)
			
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -295,8 +295,10 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
 
				 #define BTRFS_BIO_ORIG_BIO_SUBMITTED	(1 << 0)
			
 
				 
			
 
				 struct btrfs_bio {
			
 
				+	atomic_t refs;
			
 
				 	atomic_t stripes_pending;
			
 
				 	struct btrfs_fs_info *fs_info;
			
 
				+	u64 map_type; /* get from map_lookup->type */
			
 
				 	bio_end_io_t *end_io;
			
 
				 	struct bio *orig_bio;
			
 
				 	unsigned long flags;
			
@@ -307,6 +309,12 @@ struct btrfs_bio {
 
				 	int mirror_num;
			
 
				 	int num_tgtdevs;
			
 
				 	int *tgtdev_map;
			
 
				+	/*
			
 
				+	 * logical block numbers for the start of each stripe
			
 
				+	 * The last one or two are p/q.  These are sorted,
			
 
				+	 * so raid_map[0] is the start of our full stripe
			
 
				+	 */
			
 
				+	u64 *raid_map;
			
 
				 	struct btrfs_bio_stripe stripes[];
			
 
				 };
			
 
				 
			
@@ -388,19 +396,15 @@ struct btrfs_balance_control {
 
				 
			
 
				 int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
			
 
				 				   u64 end, u64 *length);
			
 
				-
			
 
				-#define btrfs_bio_size(total_stripes, real_stripes)		\
			
 
				-	(sizeof(struct btrfs_bio) +				\
			
 
				-	 (sizeof(struct btrfs_bio_stripe) * (total_stripes)) +	\
			
 
				-	 (sizeof(int) * (real_stripes)))
			
 
				-
			
 
				+void btrfs_get_bbio(struct btrfs_bio *bbio);
			
 
				+void btrfs_put_bbio(struct btrfs_bio *bbio);
			
 
				 int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
			
 
				 		    u64 logical, u64 *length,
			
 
				 		    struct btrfs_bio **bbio_ret, int mirror_num);
			
 
				 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
			
 
				 		     u64 logical, u64 *length,
			
 
				 		     struct btrfs_bio **bbio_ret, int mirror_num,
			
 
				-		     u64 **raid_map_ret);
			
 
				+		     int need_raid_map);
			
 
				 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
			
 
				 		     u64 chunk_start, u64 physical, u64 devid,
			
 
				 		     u64 **logical, int *naddrs, int *stripe_len);
			
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -495,8 +495,7 @@ struct btrfs_ioctl_send_args {
 
				 
			
 
				 /* Error codes as returned by the kernel */
			
 
				 enum btrfs_err_code {
			
 
				-	notused,
			
 
				-	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
			
 
				+	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
			
 
				 	BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
			
 
				 	BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
			
 
				 	BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,