7 år sedan · 5cea7647e6
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -91,3 +91,14 @@ config BTRFS_ASSERT
 
				 	  any of the assertions trip.  This is meant for btrfs developers only.
			
 
				 
			
 
				 	  If unsure, say N.
			
 
				+
			
 
				+config BTRFS_FS_REF_VERIFY
			
 
				+	bool "Btrfs with the ref verify tool compiled in"
			
 
				+	depends on BTRFS_FS
			
 
				+	default n
			
 
				+	help
			
 
				+	  Enable run-time extent reference verification instrumentation.  This
			
 
				+	  is meant to be used by btrfs developers for tracking down extent
			
 
				+	  reference problems or verifying they didn't break something.
			
 
				+
			
 
				+	  If unsure, say N.
			
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -10,10 +10,11 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 
				 	   export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
			
 
				 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
			
 
				 	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
			
 
				-	   uuid-tree.o props.o hash.o free-space-tree.o
			
 
				+	   uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
			
 
				 
			
 
				 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
			
 
				 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
			
 
				+btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
			
 
				 
			
 
				 btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
			
 
				 	tests/extent-buffer-tests.o tests/btrfs-tests.o \
			
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -67,7 +67,7 @@ struct btrfs_workqueue {
 
				 static void normal_work_helper(struct btrfs_work *work);
			
 
				 
			
 
				 #define BTRFS_WORK_HELPER(name)					\
			
 
				-void btrfs_##name(struct work_struct *arg)				\
			
 
				+noinline_for_stack void btrfs_##name(struct work_struct *arg)		\
			
 
				 {									\
			
 
				 	struct btrfs_work *work = container_of(arg, struct btrfs_work,	\
			
 
				 					       normal_work);		\
			
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -40,12 +40,14 @@ static int check_extent_in_eb(const struct btrfs_key *key,
 
				 			      const struct extent_buffer *eb,
			
 
				 			      const struct btrfs_file_extent_item *fi,
			
 
				 			      u64 extent_item_pos,
			
 
				-			      struct extent_inode_elem **eie)
			
 
				+			      struct extent_inode_elem **eie,
			
 
				+			      bool ignore_offset)
			
 
				 {
			
 
				 	u64 offset = 0;
			
 
				 	struct extent_inode_elem *e;
			
 
				 
			
 
				-	if (!btrfs_file_extent_compression(eb, fi) &&
			
 
				+	if (!ignore_offset &&
			
 
				+	    !btrfs_file_extent_compression(eb, fi) &&
			
 
				 	    !btrfs_file_extent_encryption(eb, fi) &&
			
 
				 	    !btrfs_file_extent_other_encoding(eb, fi)) {
			
 
				 		u64 data_offset;
			
@@ -84,7 +86,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie)
 
				 
			
 
				 static int find_extent_in_eb(const struct extent_buffer *eb,
			
 
				 			     u64 wanted_disk_byte, u64 extent_item_pos,
			
 
				-			     struct extent_inode_elem **eie)
			
 
				+			     struct extent_inode_elem **eie,
			
 
				+			     bool ignore_offset)
			
 
				 {
			
 
				 	u64 disk_byte;
			
 
				 	struct btrfs_key key;
			
@@ -113,7 +116,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
 
				 		if (disk_byte != wanted_disk_byte)
			
 
				 			continue;
			
 
				 
			
 
				-		ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
			
 
				+		ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
			
 
				 		if (ret < 0)
			
 
				 			return ret;
			
 
				 	}
			
@@ -419,7 +422,7 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
 
				 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
			
 
				 			   struct ulist *parents, struct prelim_ref *ref,
			
 
				 			   int level, u64 time_seq, const u64 *extent_item_pos,
			
 
				-			   u64 total_refs)
			
 
				+			   u64 total_refs, bool ignore_offset)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int slot;
			
@@ -472,7 +475,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
 
				 			if (extent_item_pos) {
			
 
				 				ret = check_extent_in_eb(&key, eb, fi,
			
 
				 						*extent_item_pos,
			
 
				-						&eie);
			
 
				+						&eie, ignore_offset);
			
 
				 				if (ret < 0)
			
 
				 					break;
			
 
				 			}
			
@@ -510,7 +513,8 @@ next:
 
				 static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
			
 
				 				struct btrfs_path *path, u64 time_seq,
			
 
				 				struct prelim_ref *ref, struct ulist *parents,
			
 
				-				const u64 *extent_item_pos, u64 total_refs)
			
 
				+				const u64 *extent_item_pos, u64 total_refs,
			
 
				+				bool ignore_offset)
			
 
				 {
			
 
				 	struct btrfs_root *root;
			
 
				 	struct btrfs_key root_key;
			
@@ -581,7 +585,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 
				 	}
			
 
				 
			
 
				 	ret = add_all_parents(root, path, parents, ref, level, time_seq,
			
 
				-			      extent_item_pos, total_refs);
			
 
				+			      extent_item_pos, total_refs, ignore_offset);
			
 
				 out:
			
 
				 	path->lowest_level = 0;
			
 
				 	btrfs_release_path(path);
			
@@ -616,7 +620,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 
				 				 struct btrfs_path *path, u64 time_seq,
			
 
				 				 struct preftrees *preftrees,
			
 
				 				 const u64 *extent_item_pos, u64 total_refs,
			
 
				-				 struct share_check *sc)
			
 
				+				 struct share_check *sc, bool ignore_offset)
			
 
				 {
			
 
				 	int err;
			
 
				 	int ret = 0;
			
@@ -661,7 +665,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 
				 		}
			
 
				 		err = resolve_indirect_ref(fs_info, path, time_seq, ref,
			
 
				 					   parents, extent_item_pos,
			
 
				-					   total_refs);
			
 
				+					   total_refs, ignore_offset);
			
 
				 		/*
			
 
				 		 * we can only tolerate ENOENT,otherwise,we should catch error
			
 
				 		 * and return directly.
			
@@ -769,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_key tmp_op_key;
			
 
				 	struct btrfs_key *op_key = NULL;
			
 
				+	struct rb_node *n;
			
 
				 	int count;
			
 
				 	int ret = 0;
			
 
				 
			
@@ -778,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
 
				 	}
			
 
				 
			
 
				 	spin_lock(&head->lock);
			
 
				-	list_for_each_entry(node, &head->ref_list, list) {
			
 
				+	for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
			
 
				+		node = rb_entry(n, struct btrfs_delayed_ref_node,
			
 
				+				ref_node);
			
 
				 		if (node->seq > seq)
			
 
				 			continue;
			
 
				 
			
@@ -1107,13 +1114,17 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
 
				  *
			
 
				  * Otherwise this returns 0 for success and <0 for an error.
			
 
				  *
			
 
				+ * If ignore_offset is set to false, only extent refs whose offsets match
			
 
				+ * extent_item_pos are returned.  If true, every extent ref is returned
			
 
				+ * and extent_item_pos is ignored.
			
 
				+ *
			
 
				  * FIXME some caching might speed things up
			
 
				  */
			
 
				 static int find_parent_nodes(struct btrfs_trans_handle *trans,
			
 
				 			     struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				 			     u64 time_seq, struct ulist *refs,
			
 
				 			     struct ulist *roots, const u64 *extent_item_pos,
			
 
				-			     struct share_check *sc)
			
 
				+			     struct share_check *sc, bool ignore_offset)
			
 
				 {
			
 
				 	struct btrfs_key key;
			
 
				 	struct btrfs_path *path;
			
@@ -1178,7 +1189,7 @@ again:
 
				 		head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
			
 
				 		if (head) {
			
 
				 			if (!mutex_trylock(&head->mutex)) {
			
 
				-				refcount_inc(&head->node.refs);
			
 
				+				refcount_inc(&head->refs);
			
 
				 				spin_unlock(&delayed_refs->lock);
			
 
				 
			
 
				 				btrfs_release_path(path);
			
@@ -1189,7 +1200,7 @@ again:
 
				 				 */
			
 
				 				mutex_lock(&head->mutex);
			
 
				 				mutex_unlock(&head->mutex);
			
 
				-				btrfs_put_delayed_ref(&head->node);
			
 
				+				btrfs_put_delayed_ref_head(head);
			
 
				 				goto again;
			
 
				 			}
			
 
				 			spin_unlock(&delayed_refs->lock);
			
@@ -1235,7 +1246,7 @@ again:
 
				 	WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
			
 
				 
			
 
				 	ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
			
 
				-				    extent_item_pos, total_refs, sc);
			
 
				+				    extent_item_pos, total_refs, sc, ignore_offset);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
@@ -1282,7 +1293,7 @@ again:
 
				 				btrfs_tree_read_lock(eb);
			
 
				 				btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
			
 
				 				ret = find_extent_in_eb(eb, bytenr,
			
 
				-							*extent_item_pos, &eie);
			
 
				+							*extent_item_pos, &eie, ignore_offset);
			
 
				 				btrfs_tree_read_unlock_blocking(eb);
			
 
				 				free_extent_buffer(eb);
			
 
				 				if (ret < 0)
			
@@ -1350,7 +1361,7 @@ static void free_leaf_list(struct ulist *blocks)
 
				 static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
			
 
				 				struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				 				u64 time_seq, struct ulist **leafs,
			
 
				-				const u64 *extent_item_pos)
			
 
				+				const u64 *extent_item_pos, bool ignore_offset)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -1359,7 +1370,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
			
 
				-				*leafs, NULL, extent_item_pos, NULL);
			
 
				+				*leafs, NULL, extent_item_pos, NULL, ignore_offset);
			
 
				 	if (ret < 0 && ret != -ENOENT) {
			
 
				 		free_leaf_list(*leafs);
			
 
				 		return ret;
			
@@ -1383,7 +1394,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
 
				  */
			
 
				 static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
			
 
				 				     struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				-				     u64 time_seq, struct ulist **roots)
			
 
				+				     u64 time_seq, struct ulist **roots,
			
 
				+				     bool ignore_offset)
			
 
				 {
			
 
				 	struct ulist *tmp;
			
 
				 	struct ulist_node *node = NULL;
			
@@ -1402,7 +1414,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
 
				 	ULIST_ITER_INIT(&uiter);
			
 
				 	while (1) {
			
 
				 		ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
			
 
				-					tmp, *roots, NULL, NULL);
			
 
				+					tmp, *roots, NULL, NULL, ignore_offset);
			
 
				 		if (ret < 0 && ret != -ENOENT) {
			
 
				 			ulist_free(tmp);
			
 
				 			ulist_free(*roots);
			
@@ -1421,14 +1433,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
 
				 
			
 
				 int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
			
 
				 			 struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				-			 u64 time_seq, struct ulist **roots)
			
 
				+			 u64 time_seq, struct ulist **roots,
			
 
				+			 bool ignore_offset)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				 	if (!trans)
			
 
				 		down_read(&fs_info->commit_root_sem);
			
 
				 	ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
			
 
				-					time_seq, roots);
			
 
				+					time_seq, roots, ignore_offset);
			
 
				 	if (!trans)
			
 
				 		up_read(&fs_info->commit_root_sem);
			
 
				 	return ret;
			
@@ -1483,7 +1496,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
 
				 	ULIST_ITER_INIT(&uiter);
			
 
				 	while (1) {
			
 
				 		ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
			
 
				-					roots, NULL, &shared);
			
 
				+					roots, NULL, &shared, false);
			
 
				 		if (ret == BACKREF_FOUND_SHARED) {
			
 
				 			/* this is the only condition under which we return 1 */
			
 
				 			ret = 1;
			
@@ -1877,7 +1890,8 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
 
				 int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
			
 
				 				u64 extent_item_objectid, u64 extent_item_pos,
			
 
				 				int search_commit_root,
			
 
				-				iterate_extent_inodes_t *iterate, void *ctx)
			
 
				+				iterate_extent_inodes_t *iterate, void *ctx,
			
 
				+				bool ignore_offset)
			
 
				 {
			
 
				 	int ret;
			
 
				 	struct btrfs_trans_handle *trans = NULL;
			
@@ -1903,14 +1917,15 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 	ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
			
 
				 				   tree_mod_seq_elem.seq, &refs,
			
 
				-				   &extent_item_pos);
			
 
				+				   &extent_item_pos, ignore_offset);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
 
				 	ULIST_ITER_INIT(&ref_uiter);
			
 
				 	while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
			
 
				 		ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
			
 
				-						tree_mod_seq_elem.seq, &roots);
			
 
				+						tree_mod_seq_elem.seq, &roots,
			
 
				+						ignore_offset);
			
 
				 		if (ret)
			
 
				 			break;
			
 
				 		ULIST_ITER_INIT(&root_uiter);
			
@@ -1943,7 +1958,8 @@ out:
 
				 
			
 
				 int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
			
 
				 				struct btrfs_path *path,
			
 
				-				iterate_extent_inodes_t *iterate, void *ctx)
			
 
				+				iterate_extent_inodes_t *iterate, void *ctx,
			
 
				+				bool ignore_offset)
			
 
				 {
			
 
				 	int ret;
			
 
				 	u64 extent_item_pos;
			
@@ -1961,7 +1977,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
 
				 	extent_item_pos = logical - found_key.objectid;
			
 
				 	ret = iterate_extent_inodes(fs_info, found_key.objectid,
			
 
				 					extent_item_pos, search_commit_root,
			
 
				-					iterate, ctx);
			
 
				+					iterate, ctx, ignore_offset);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -43,17 +43,19 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
 
				 int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
			
 
				 				u64 extent_item_objectid,
			
 
				 				u64 extent_offset, int search_commit_root,
			
 
				-				iterate_extent_inodes_t *iterate, void *ctx);
			
 
				+				iterate_extent_inodes_t *iterate, void *ctx,
			
 
				+				bool ignore_offset);
			
 
				 
			
 
				 int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
			
 
				 				struct btrfs_path *path,
			
 
				-				iterate_extent_inodes_t *iterate, void *ctx);
			
 
				+				iterate_extent_inodes_t *iterate, void *ctx,
			
 
				+				bool ignore_offset);
			
 
				 
			
 
				 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
			
 
				 
			
 
				 int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
			
 
				 			 struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				-			 u64 time_seq, struct ulist **roots);
			
 
				+			 u64 time_seq, struct ulist **roots, bool ignore_offset);
			
 
				 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
			
 
				 			u32 name_len, unsigned long name_off,
			
 
				 			struct extent_buffer *eb_in, u64 parent,
			
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -36,14 +36,13 @@
 
				 #define BTRFS_INODE_ORPHAN_META_RESERVED	1
			
 
				 #define BTRFS_INODE_DUMMY			2
			
 
				 #define BTRFS_INODE_IN_DEFRAG			3
			
 
				-#define BTRFS_INODE_DELALLOC_META_RESERVED	4
			
 
				-#define BTRFS_INODE_HAS_ORPHAN_ITEM		5
			
 
				-#define BTRFS_INODE_HAS_ASYNC_EXTENT		6
			
 
				-#define BTRFS_INODE_NEEDS_FULL_SYNC		7
			
 
				-#define BTRFS_INODE_COPY_EVERYTHING		8
			
 
				-#define BTRFS_INODE_IN_DELALLOC_LIST		9
			
 
				-#define BTRFS_INODE_READDIO_NEED_LOCK		10
			
 
				-#define BTRFS_INODE_HAS_PROPS		        11
			
 
				+#define BTRFS_INODE_HAS_ORPHAN_ITEM		4
			
 
				+#define BTRFS_INODE_HAS_ASYNC_EXTENT		5
			
 
				+#define BTRFS_INODE_NEEDS_FULL_SYNC		6
			
 
				+#define BTRFS_INODE_COPY_EVERYTHING		7
			
 
				+#define BTRFS_INODE_IN_DELALLOC_LIST		8
			
 
				+#define BTRFS_INODE_READDIO_NEED_LOCK		9
			
 
				+#define BTRFS_INODE_HAS_PROPS		        10
			
 
				 
			
 
				 /* in memory btrfs inode */
			
 
				 struct btrfs_inode {
			
@@ -176,7 +175,8 @@ struct btrfs_inode {
 
				 	 * of extent items we've reserved metadata for.
			
 
				 	 */
			
 
				 	unsigned outstanding_extents;
			
 
				-	unsigned reserved_extents;
			
 
				+
			
 
				+	struct btrfs_block_rsv block_rsv;
			
 
				 
			
 
				 	/*
			
 
				 	 * Cached values of inode properties
			
@@ -267,6 +267,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				+static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
			
 
				+						 int mod)
			
 
				+{
			
 
				+	lockdep_assert_held(&inode->lock);
			
 
				+	inode->outstanding_extents += mod;
			
 
				+	if (btrfs_is_free_space_inode(inode))
			
 
				+		return;
			
 
				+	trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
			
 
				+						  mod);
			
 
				+}
			
 
				+
			
 
				 static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
			
 
				 {
			
 
				 	int ret = 0;
			
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -613,7 +613,7 @@ static void btrfsic_dev_state_hashtable_add(
 
				 		struct btrfsic_dev_state_hashtable *h)
			
 
				 {
			
 
				 	const unsigned int hashval =
			
 
				-	    (((unsigned int)((uintptr_t)ds->bdev)) &
			
 
				+	    (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
			
 
				 	     (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
			
 
				 
			
 
				 	list_add(&ds->collision_resolving_node, h->table + hashval);
			
@@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
 
				 	mutex_lock(&btrfsic_mutex);
			
 
				 	/* since btrfsic_submit_bio() is also called before
			
 
				 	 * btrfsic_mount(), this might return NULL */
			
 
				-	dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
			
 
				+	dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno);
			
 
				 	if (NULL != dev_state &&
			
 
				 	    (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
			
 
				 		unsigned int i = 0;
			
@@ -2913,7 +2913,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
 
				 	state = kvzalloc(sizeof(*state), GFP_KERNEL);
			
 
				 	if (!state) {
			
 
				 		pr_info("btrfs check-integrity: allocation failed!\n");
			
 
				-		return -1;
			
 
				+		return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				 	if (!btrfsic_is_initialized) {
			
@@ -2945,7 +2945,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
 
				 		if (NULL == ds) {
			
 
				 			pr_info("btrfs check-integrity: kmalloc() failed!\n");
			
 
				 			mutex_unlock(&btrfsic_mutex);
			
 
				-			return -1;
			
 
				+			return -ENOMEM;
			
 
				 		}
			
 
				 		ds->bdev = device->bdev;
			
 
				 		ds->state = state;
			
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -33,6 +33,8 @@
 
				 #include <linux/bit_spinlock.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/sched/mm.h>
			
 
				+#include <linux/sort.h>
			
 
				+#include <linux/log2.h>
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "transaction.h"
			
@@ -255,7 +257,8 @@ static void end_compressed_bio_write(struct bio *bio)
 
				 					 cb->start,
			
 
				 					 cb->start + cb->len - 1,
			
 
				 					 NULL,
			
 
				-					 bio->bi_status ? 0 : 1);
			
 
				+					 bio->bi_status ?
			
 
				+					 BLK_STS_OK : BLK_STS_NOTSUPP);
			
 
				 	cb->compressed_pages[0]->mapping = NULL;
			
 
				 
			
 
				 	end_compressed_writeback(inode, cb);
			
@@ -706,7 +709,86 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static struct {
			
 
				+/*
			
 
				+ * Heuristic uses systematic sampling to collect data from the input data
			
 
				+ * range, the logic can be tuned by the following constants:
			
 
				+ *
			
 
				+ * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
			
 
				+ * @SAMPLING_INTERVAL  - range from which the sampled data can be collected
			
 
				+ */
			
 
				+#define SAMPLING_READ_SIZE	(16)
			
 
				+#define SAMPLING_INTERVAL	(256)
			
 
				+
			
 
				+/*
			
 
				+ * For statistical analysis of the input data we consider bytes that form a
			
 
				+ * Galois Field of 256 objects. Each object has an attribute count, ie. how
			
 
				+ * many times the object appeared in the sample.
			
 
				+ */
			
 
				+#define BUCKET_SIZE		(256)
			
 
				+
			
 
				+/*
			
 
				+ * The size of the sample is based on a statistical sampling rule of thumb.
			
 
				+ * The common way is to perform sampling tests as long as the number of
			
 
				+ * elements in each cell is at least 5.
			
 
				+ *
			
 
				+ * Instead of 5, we choose 32 to obtain more accurate results.
			
 
				+ * If the data contain the maximum number of symbols, which is 256, we obtain a
			
 
				+ * sample size bound by 8192.
			
 
				+ *
			
 
				+ * For a sample of at most 8KB of data per data range: 16 consecutive bytes
			
 
				+ * from up to 512 locations.
			
 
				+ */
			
 
				+#define MAX_SAMPLE_SIZE		(BTRFS_MAX_UNCOMPRESSED *		\
			
 
				+				 SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
			
 
				+
			
 
				+struct bucket_item {
			
 
				+	u32 count;
			
 
				+};
			
 
				+
			
 
				+struct heuristic_ws {
			
 
				+	/* Partial copy of input data */
			
 
				+	u8 *sample;
			
 
				+	u32 sample_size;
			
 
				+	/* Buckets store counters for each byte value */
			
 
				+	struct bucket_item *bucket;
			
 
				+	struct list_head list;
			
 
				+};
			
 
				+
			
 
				+static void free_heuristic_ws(struct list_head *ws)
			
 
				+{
			
 
				+	struct heuristic_ws *workspace;
			
 
				+
			
 
				+	workspace = list_entry(ws, struct heuristic_ws, list);
			
 
				+
			
 
				+	kvfree(workspace->sample);
			
 
				+	kfree(workspace->bucket);
			
 
				+	kfree(workspace);
			
 
				+}
			
 
				+
			
 
				+static struct list_head *alloc_heuristic_ws(void)
			
 
				+{
			
 
				+	struct heuristic_ws *ws;
			
 
				+
			
 
				+	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
			
 
				+	if (!ws)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
			
 
				+	if (!ws->sample)
			
 
				+		goto fail;
			
 
				+
			
 
				+	ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL);
			
 
				+	if (!ws->bucket)
			
 
				+		goto fail;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&ws->list);
			
 
				+	return &ws->list;
			
 
				+fail:
			
 
				+	free_heuristic_ws(&ws->list);
			
 
				+	return ERR_PTR(-ENOMEM);
			
 
				+}
			
 
				+
			
 
				+struct workspaces_list {
			
 
				 	struct list_head idle_ws;
			
 
				 	spinlock_t ws_lock;
			
 
				 	/* Number of free workspaces */
			
@@ -715,7 +797,11 @@ static struct {
 
				 	atomic_t total_ws;
			
 
				 	/* Waiters for a free workspace */
			
 
				 	wait_queue_head_t ws_wait;
			
 
				-} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
			
 
				+};
			
 
				+
			
 
				+static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
			
 
				+
			
 
				+static struct workspaces_list btrfs_heuristic_ws;
			
 
				 
			
 
				 static const struct btrfs_compress_op * const btrfs_compress_op[] = {
			
 
				 	&btrfs_zlib_compress,
			
@@ -725,11 +811,25 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
 
				 
			
 
				 void __init btrfs_init_compress(void)
			
 
				 {
			
 
				+	struct list_head *workspace;
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
			
 
				-		struct list_head *workspace;
			
 
				+	INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
			
 
				+	spin_lock_init(&btrfs_heuristic_ws.ws_lock);
			
 
				+	atomic_set(&btrfs_heuristic_ws.total_ws, 0);
			
 
				+	init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);
			
 
				+
			
 
				+	workspace = alloc_heuristic_ws();
			
 
				+	if (IS_ERR(workspace)) {
			
 
				+		pr_warn(
			
 
				+	"BTRFS: cannot preallocate heuristic workspace, will try later\n");
			
 
				+	} else {
			
 
				+		atomic_set(&btrfs_heuristic_ws.total_ws, 1);
			
 
				+		btrfs_heuristic_ws.free_ws = 1;
			
 
				+		list_add(workspace, &btrfs_heuristic_ws.idle_ws);
			
 
				+	}
			
 
				 
			
 
				+	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
			
 
				 		INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
			
 
				 		spin_lock_init(&btrfs_comp_ws[i].ws_lock);
			
 
				 		atomic_set(&btrfs_comp_ws[i].total_ws, 0);
			
@@ -756,18 +856,32 @@ void __init btrfs_init_compress(void)
 
				  * Preallocation makes a forward progress guarantees and we do not return
			
 
				  * errors.
			
 
				  */
			
 
				-static struct list_head *find_workspace(int type)
			
 
				+static struct list_head *__find_workspace(int type, bool heuristic)
			
 
				 {
			
 
				 	struct list_head *workspace;
			
 
				 	int cpus = num_online_cpus();
			
 
				 	int idx = type - 1;
			
 
				 	unsigned nofs_flag;
			
 
				+	struct list_head *idle_ws;
			
 
				+	spinlock_t *ws_lock;
			
 
				+	atomic_t *total_ws;
			
 
				+	wait_queue_head_t *ws_wait;
			
 
				+	int *free_ws;
			
 
				+
			
 
				+	if (heuristic) {
			
 
				+		idle_ws	 = &btrfs_heuristic_ws.idle_ws;
			
 
				+		ws_lock	 = &btrfs_heuristic_ws.ws_lock;
			
 
				+		total_ws = &btrfs_heuristic_ws.total_ws;
			
 
				+		ws_wait	 = &btrfs_heuristic_ws.ws_wait;
			
 
				+		free_ws	 = &btrfs_heuristic_ws.free_ws;
			
 
				+	} else {
			
 
				+		idle_ws	 = &btrfs_comp_ws[idx].idle_ws;
			
 
				+		ws_lock	 = &btrfs_comp_ws[idx].ws_lock;
			
 
				+		total_ws = &btrfs_comp_ws[idx].total_ws;
			
 
				+		ws_wait	 = &btrfs_comp_ws[idx].ws_wait;
			
 
				+		free_ws	 = &btrfs_comp_ws[idx].free_ws;
			
 
				+	}
			
 
				 
			
 
				-	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;
			
 
				-	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock;
			
 
				-	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;
			
 
				-	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait;
			
 
				-	int *free_ws			= &btrfs_comp_ws[idx].free_ws;
			
 
				 again:
			
 
				 	spin_lock(ws_lock);
			
 
				 	if (!list_empty(idle_ws)) {
			
@@ -797,7 +911,10 @@ again:
 
				 	 * context of btrfs_compress_bio/btrfs_compress_pages
			
 
				 	 */
			
 
				 	nofs_flag = memalloc_nofs_save();
			
 
				-	workspace = btrfs_compress_op[idx]->alloc_workspace();
			
 
				+	if (heuristic)
			
 
				+		workspace = alloc_heuristic_ws();
			
 
				+	else
			
 
				+		workspace = btrfs_compress_op[idx]->alloc_workspace();
			
 
				 	memalloc_nofs_restore(nofs_flag);
			
 
				 
			
 
				 	if (IS_ERR(workspace)) {
			
@@ -828,18 +945,38 @@ again:
 
				 	return workspace;
			
 
				 }
			
 
				 
			
 
				+static struct list_head *find_workspace(int type)
			
 
				+{
			
 
				+	return __find_workspace(type, false);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * put a workspace struct back on the list or free it if we have enough
			
 
				  * idle ones sitting around
			
 
				  */
			
 
				-static void free_workspace(int type, struct list_head *workspace)
			
 
				+static void __free_workspace(int type, struct list_head *workspace,
			
 
				+			     bool heuristic)
			
 
				 {
			
 
				 	int idx = type - 1;
			
 
				-	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;
			
 
				-	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock;
			
 
				-	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;
			
 
				-	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait;
			
 
				-	int *free_ws			= &btrfs_comp_ws[idx].free_ws;
			
 
				+	struct list_head *idle_ws;
			
 
				+	spinlock_t *ws_lock;
			
 
				+	atomic_t *total_ws;
			
 
				+	wait_queue_head_t *ws_wait;
			
 
				+	int *free_ws;
			
 
				+
			
 
				+	if (heuristic) {
			
 
				+		idle_ws	 = &btrfs_heuristic_ws.idle_ws;
			
 
				+		ws_lock	 = &btrfs_heuristic_ws.ws_lock;
			
 
				+		total_ws = &btrfs_heuristic_ws.total_ws;
			
 
				+		ws_wait	 = &btrfs_heuristic_ws.ws_wait;
			
 
				+		free_ws	 = &btrfs_heuristic_ws.free_ws;
			
 
				+	} else {
			
 
				+		idle_ws	 = &btrfs_comp_ws[idx].idle_ws;
			
 
				+		ws_lock	 = &btrfs_comp_ws[idx].ws_lock;
			
 
				+		total_ws = &btrfs_comp_ws[idx].total_ws;
			
 
				+		ws_wait	 = &btrfs_comp_ws[idx].ws_wait;
			
 
				+		free_ws	 = &btrfs_comp_ws[idx].free_ws;
			
 
				+	}
			
 
				 
			
 
				 	spin_lock(ws_lock);
			
 
				 	if (*free_ws <= num_online_cpus()) {
			
@@ -850,7 +987,10 @@ static void free_workspace(int type, struct list_head *workspace)
 
				 	}
			
 
				 	spin_unlock(ws_lock);
			
 
				 
			
 
				-	btrfs_compress_op[idx]->free_workspace(workspace);
			
 
				+	if (heuristic)
			
 
				+		free_heuristic_ws(workspace);
			
 
				+	else
			
 
				+		btrfs_compress_op[idx]->free_workspace(workspace);
			
 
				 	atomic_dec(total_ws);
			
 
				 wake:
			
 
				 	/*
			
@@ -861,6 +1001,11 @@ wake:
 
				 		wake_up(ws_wait);
			
 
				 }
			
 
				 
			
 
				+static void free_workspace(int type, struct list_head *ws)
			
 
				+{
			
 
				+	return __free_workspace(type, ws, false);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * cleanup function for module exit
			
 
				  */
			
@@ -869,6 +1014,13 @@ static void free_workspaces(void)
 
				 	struct list_head *workspace;
			
 
				 	int i;
			
 
				 
			
 
				+	while (!list_empty(&btrfs_heuristic_ws.idle_ws)) {
			
 
				+		workspace = btrfs_heuristic_ws.idle_ws.next;
			
 
				+		list_del(workspace);
			
 
				+		free_heuristic_ws(workspace);
			
 
				+		atomic_dec(&btrfs_heuristic_ws.total_ws);
			
 
				+	}
			
 
				+
			
 
				 	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
			
 
				 		while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
			
 
				 			workspace = btrfs_comp_ws[i].idle_ws.next;
			
@@ -883,6 +1035,11 @@ static void free_workspaces(void)
 
				  * Given an address space and start and length, compress the bytes into @pages
			
 
				  * that are allocated on demand.
			
 
				  *
			
 
				+ * @type_level is encoded algorithm and level, where level 0 means whatever
			
 
				+ * default the algorithm chooses and is opaque here;
			
 
				+ * - compression algo are 0-3
			
 
				+ * - the level are bits 4-7
			
 
				+ *
			
 
				  * @out_pages is an in/out parameter, holds maximum number of pages to allocate
			
 
				  * and returns number of actually allocated pages
			
 
				  *
			
@@ -897,7 +1054,7 @@ static void free_workspaces(void)
 
				  * @max_out tells us the max number of bytes that we're allowed to
			
 
				  * stuff into pages
			
 
				  */
			
 
				-int btrfs_compress_pages(int type, struct address_space *mapping,
			
 
				+int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
			
 
				 			 u64 start, struct page **pages,
			
 
				 			 unsigned long *out_pages,
			
 
				 			 unsigned long *total_in,
			
@@ -905,9 +1062,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 
				 {
			
 
				 	struct list_head *workspace;
			
 
				 	int ret;
			
 
				+	int type = type_level & 0xF;
			
 
				 
			
 
				 	workspace = find_workspace(type);
			
 
				 
			
 
				+	btrfs_compress_op[type - 1]->set_level(workspace, type_level);
			
 
				 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
			
 
				 						      start, pages,
			
 
				 						      out_pages,
			
@@ -1065,6 +1224,211 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Shannon Entropy calculation
			
 
				+ *
			
 
				+ * Pure byte distribution analysis fails to determine compressiability of data.
			
 
				+ * Try calculating entropy to estimate the average minimum number of bits
			
 
				+ * needed to encode the sampled data.
			
 
				+ *
			
 
				+ * For convenience, return the percentage of needed bits, instead of amount of
			
 
				+ * bits directly.
			
 
				+ *
			
 
				+ * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy
			
 
				+ *			    and can be compressible with high probability
			
 
				+ *
			
 
				+ * @ENTROPY_LVL_HIGH - data are not compressible with high probability
			
 
				+ *
			
 
				+ * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
			
 
				+ */
			
 
				+#define ENTROPY_LVL_ACEPTABLE		(65)
			
 
				+#define ENTROPY_LVL_HIGH		(80)
			
 
				+
			
 
				+/*
			
 
				+ * For increasead precision in shannon_entropy calculation,
			
 
				+ * let's do pow(n, M) to save more digits after comma:
			
 
				+ *
			
 
				+ * - maximum int bit length is 64
			
 
				+ * - ilog2(MAX_SAMPLE_SIZE)	-> 13
			
 
				+ * - 13 * 4 = 52 < 64		-> M = 4
			
 
				+ *
			
 
				+ * So use pow(n, 4).
			
 
				+ */
			
 
				+static inline u32 ilog2_w(u64 n)
			
 
				+{
			
 
				+	return ilog2(n * n * n * n);
			
 
				+}
			
 
				+
			
 
				+static u32 shannon_entropy(struct heuristic_ws *ws)
			
 
				+{
			
 
				+	const u32 entropy_max = 8 * ilog2_w(2);
			
 
				+	u32 entropy_sum = 0;
			
 
				+	u32 p, p_base, sz_base;
			
 
				+	u32 i;
			
 
				+
			
 
				+	sz_base = ilog2_w(ws->sample_size);
			
 
				+	for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) {
			
 
				+		p = ws->bucket[i].count;
			
 
				+		p_base = ilog2_w(p);
			
 
				+		entropy_sum += p * (sz_base - p_base);
			
 
				+	}
			
 
				+
			
 
				+	entropy_sum /= ws->sample_size;
			
 
				+	return entropy_sum * 100 / entropy_max;
			
 
				+}
			
 
				+
			
 
				+/* Compare buckets by size, ascending */
			
 
				+static int bucket_comp_rev(const void *lv, const void *rv)
			
 
				+{
			
 
				+	const struct bucket_item *l = (const struct bucket_item *)lv;
			
 
				+	const struct bucket_item *r = (const struct bucket_item *)rv;
			
 
				+
			
 
				+	return r->count - l->count;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Size of the core byte set - how many bytes cover 90% of the sample
			
 
				+ *
			
 
				+ * There are several types of structured binary data that use nearly all byte
			
 
				+ * values. The distribution can be uniform and counts in all buckets will be
			
 
				+ * nearly the same (eg. encrypted data). Unlikely to be compressible.
			
 
				+ *
			
 
				+ * Other possibility is normal (Gaussian) distribution, where the data could
			
 
				+ * be potentially compressible, but we have to take a few more steps to decide
			
 
				+ * how much.
			
 
				+ *
			
 
				+ * @BYTE_CORE_SET_LOW  - main part of byte values repeated frequently,
			
 
				+ *                       compression algo can easy fix that
			
 
				+ * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high
			
 
				+ *                       probability is not compressible
			
 
				+ */
			
 
				+#define BYTE_CORE_SET_LOW		(64)
			
 
				+#define BYTE_CORE_SET_HIGH		(200)
			
 
				+
			
 
				+static int byte_core_set_size(struct heuristic_ws *ws)
			
 
				+{
			
 
				+	u32 i;
			
 
				+	u32 coreset_sum = 0;
			
 
				+	const u32 core_set_threshold = ws->sample_size * 90 / 100;
			
 
				+	struct bucket_item *bucket = ws->bucket;
			
 
				+
			
 
				+	/* Sort in reverse order */
			
 
				+	sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
			
 
				+
			
 
				+	for (i = 0; i < BYTE_CORE_SET_LOW; i++)
			
 
				+		coreset_sum += bucket[i].count;
			
 
				+
			
 
				+	if (coreset_sum > core_set_threshold)
			
 
				+		return i;
			
 
				+
			
 
				+	for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) {
			
 
				+		coreset_sum += bucket[i].count;
			
 
				+		if (coreset_sum > core_set_threshold)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Count byte values in buckets.
			
 
				+ * This heuristic can detect textual data (configs, xml, json, html, etc).
			
 
				+ * Because in most text-like data byte set is restricted to limited number of
			
 
				+ * possible characters, and that restriction in most cases makes data easy to
			
 
				+ * compress.
			
 
				+ *
			
 
				+ * @BYTE_SET_THRESHOLD - consider all data within this byte set size:
			
 
				+ *	less - compressible
			
 
				+ *	more - need additional analysis
			
 
				+ */
			
 
				+#define BYTE_SET_THRESHOLD		(64)
			
 
				+
			
 
				+static u32 byte_set_size(const struct heuristic_ws *ws)
			
 
				+{
			
 
				+	u32 i;
			
 
				+	u32 byte_set_size = 0;
			
 
				+
			
 
				+	for (i = 0; i < BYTE_SET_THRESHOLD; i++) {
			
 
				+		if (ws->bucket[i].count > 0)
			
 
				+			byte_set_size++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Continue collecting count of byte values in buckets.  If the byte
			
 
				+	 * set size is bigger then the threshold, it's pointless to continue,
			
 
				+	 * the detection technique would fail for this type of data.
			
 
				+	 */
			
 
				+	for (; i < BUCKET_SIZE; i++) {
			
 
				+		if (ws->bucket[i].count > 0) {
			
 
				+			byte_set_size++;
			
 
				+			if (byte_set_size > BYTE_SET_THRESHOLD)
			
 
				+				return byte_set_size;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return byte_set_size;
			
 
				+}
			
 
				+
			
 
				+static bool sample_repeated_patterns(struct heuristic_ws *ws)
			
 
				+{
			
 
				+	const u32 half_of_sample = ws->sample_size / 2;
			
 
				+	const u8 *data = ws->sample;
			
 
				+
			
 
				+	return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0;
			
 
				+}
			
 
				+
			
 
				+static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
			
 
				+				     struct heuristic_ws *ws)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+	u64 index, index_end;
			
 
				+	u32 i, curr_sample_pos;
			
 
				+	u8 *in_data;
			
 
				+
			
 
				+	/*
			
 
				+	 * Compression handles the input data by chunks of 128KiB
			
 
				+	 * (defined by BTRFS_MAX_UNCOMPRESSED)
			
 
				+	 *
			
 
				+	 * We do the same for the heuristic and loop over the whole range.
			
 
				+	 *
			
 
				+	 * MAX_SAMPLE_SIZE - calculated under assumption that heuristic will
			
 
				+	 * process no more than BTRFS_MAX_UNCOMPRESSED at a time.
			
 
				+	 */
			
 
				+	if (end - start > BTRFS_MAX_UNCOMPRESSED)
			
 
				+		end = start + BTRFS_MAX_UNCOMPRESSED;
			
 
				+
			
 
				+	index = start >> PAGE_SHIFT;
			
 
				+	index_end = end >> PAGE_SHIFT;
			
 
				+
			
 
				+	/* Don't miss unaligned end */
			
 
				+	if (!IS_ALIGNED(end, PAGE_SIZE))
			
 
				+		index_end++;
			
 
				+
			
 
				+	curr_sample_pos = 0;
			
 
				+	while (index < index_end) {
			
 
				+		page = find_get_page(inode->i_mapping, index);
			
 
				+		in_data = kmap(page);
			
 
				+		/* Handle case where the start is not aligned to PAGE_SIZE */
			
 
				+		i = start % PAGE_SIZE;
			
 
				+		while (i < PAGE_SIZE - SAMPLING_READ_SIZE) {
			
 
				+			/* Don't sample any garbage from the last page */
			
 
				+			if (start > end - SAMPLING_READ_SIZE)
			
 
				+				break;
			
 
				+			memcpy(&ws->sample[curr_sample_pos], &in_data[i],
			
 
				+					SAMPLING_READ_SIZE);
			
 
				+			i += SAMPLING_INTERVAL;
			
 
				+			start += SAMPLING_INTERVAL;
			
 
				+			curr_sample_pos += SAMPLING_READ_SIZE;
			
 
				+		}
			
 
				+		kunmap(page);
			
 
				+		put_page(page);
			
 
				+
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	ws->sample_size = curr_sample_pos;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Compression heuristic.
			
 
				  *
			
@@ -1082,18 +1446,87 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 
				  */
			
 
				 int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
			
 
				 {
			
 
				-	u64 index = start >> PAGE_SHIFT;
			
 
				-	u64 end_index = end >> PAGE_SHIFT;
			
 
				-	struct page *page;
			
 
				-	int ret = 1;
			
 
				+	struct list_head *ws_list = __find_workspace(0, true);
			
 
				+	struct heuristic_ws *ws;
			
 
				+	u32 i;
			
 
				+	u8 byte;
			
 
				+	int ret = 0;
			
 
				 
			
 
				-	while (index <= end_index) {
			
 
				-		page = find_get_page(inode->i_mapping, index);
			
 
				-		kmap(page);
			
 
				-		kunmap(page);
			
 
				-		put_page(page);
			
 
				-		index++;
			
 
				+	ws = list_entry(ws_list, struct heuristic_ws, list);
			
 
				+
			
 
				+	heuristic_collect_sample(inode, start, end, ws);
			
 
				+
			
 
				+	if (sample_repeated_patterns(ws)) {
			
 
				+		ret = 1;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE);
			
 
				+
			
 
				+	for (i = 0; i < ws->sample_size; i++) {
			
 
				+		byte = ws->sample[i];
			
 
				+		ws->bucket[byte].count++;
			
 
				+	}
			
 
				+
			
 
				+	i = byte_set_size(ws);
			
 
				+	if (i < BYTE_SET_THRESHOLD) {
			
 
				+		ret = 2;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	i = byte_core_set_size(ws);
			
 
				+	if (i <= BYTE_CORE_SET_LOW) {
			
 
				+		ret = 3;
			
 
				+		goto out;
			
 
				 	}
			
 
				 
			
 
				+	if (i >= BYTE_CORE_SET_HIGH) {
			
 
				+		ret = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	i = shannon_entropy(ws);
			
 
				+	if (i <= ENTROPY_LVL_ACEPTABLE) {
			
 
				+		ret = 4;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * For the levels below ENTROPY_LVL_HIGH, additional analysis would be
			
 
				+	 * needed to give green light to compression.
			
 
				+	 *
			
 
				+	 * For now just assume that compression at that level is not worth the
			
 
				+	 * resources because:
			
 
				+	 *
			
 
				+	 * 1. it is possible to defrag the data later
			
 
				+	 *
			
 
				+	 * 2. the data would turn out to be hardly compressible, eg. 150 byte
			
 
				+	 * values, every bucket has counter at level ~54. The heuristic would
			
 
				+	 * be confused. This can happen when data have some internal repeated
			
 
				+	 * patterns like "abbacbbc...". This can be detected by analyzing
			
 
				+	 * pairs of bytes, which is too costly.
			
 
				+	 */
			
 
				+	if (i < ENTROPY_LVL_HIGH) {
			
 
				+		ret = 5;
			
 
				+		goto out;
			
 
				+	} else {
			
 
				+		ret = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	__free_workspace(0, ws_list, true);
			
 
				 	return ret;
			
 
				 }
			
 
				+
			
 
				+unsigned int btrfs_compress_str2level(const char *str)
			
 
				+{
			
 
				+	if (strncmp(str, "zlib", 4) != 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
			
 
				+	if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
			
 
				+		return str[5] - '0';
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -76,7 +76,7 @@ struct compressed_bio {
 
				 void btrfs_init_compress(void);
			
 
				 void btrfs_exit_compress(void);
			
 
				 
			
 
				-int btrfs_compress_pages(int type, struct address_space *mapping,
			
 
				+int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
			
 
				 			 u64 start, struct page **pages,
			
 
				 			 unsigned long *out_pages,
			
 
				 			 unsigned long *total_in,
			
@@ -95,6 +95,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
				 blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
			
 
				 				 int mirror_num, unsigned long bio_flags);
			
 
				 
			
 
				+unsigned btrfs_compress_str2level(const char *str);
			
 
				+
			
 
				 enum btrfs_compression_type {
			
 
				 	BTRFS_COMPRESS_NONE  = 0,
			
 
				 	BTRFS_COMPRESS_ZLIB  = 1,
			
@@ -124,6 +126,8 @@ struct btrfs_compress_op {
 
				 			  struct page *dest_page,
			
 
				 			  unsigned long start_byte,
			
 
				 			  size_t srclen, size_t destlen);
			
 
				+
			
 
				+	void (*set_level)(struct list_head *ws, unsigned int type);
			
 
				 };
			
 
				 
			
 
				 extern const struct btrfs_compress_op btrfs_zlib_compress;
			
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -192,7 +192,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
 
				  * tree until you end up with a lock on the root.  A locked buffer
			
 
				  * is returned, with a reference held.
			
 
				  */
			
 
				-static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
			
 
				+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
			
 
				 {
			
 
				 	struct extent_buffer *eb;
			
 
				 
			
@@ -5496,8 +5496,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 
				 			goto out;
			
 
				 		} else if (left_end_reached) {
			
 
				 			if (right_level == 0) {
			
 
				-				ret = changed_cb(left_root, right_root,
			
 
				-						left_path, right_path,
			
 
				+				ret = changed_cb(left_path, right_path,
			
 
				 						&right_key,
			
 
				 						BTRFS_COMPARE_TREE_DELETED,
			
 
				 						ctx);
			
@@ -5508,8 +5507,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 
				 			continue;
			
 
				 		} else if (right_end_reached) {
			
 
				 			if (left_level == 0) {
			
 
				-				ret = changed_cb(left_root, right_root,
			
 
				-						left_path, right_path,
			
 
				+				ret = changed_cb(left_path, right_path,
			
 
				 						&left_key,
			
 
				 						BTRFS_COMPARE_TREE_NEW,
			
 
				 						ctx);
			
@@ -5523,8 +5521,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 
				 		if (left_level == 0 && right_level == 0) {
			
 
				 			cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
			
 
				 			if (cmp < 0) {
			
 
				-				ret = changed_cb(left_root, right_root,
			
 
				-						left_path, right_path,
			
 
				+				ret = changed_cb(left_path, right_path,
			
 
				 						&left_key,
			
 
				 						BTRFS_COMPARE_TREE_NEW,
			
 
				 						ctx);
			
@@ -5532,8 +5529,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 
				 					goto out;
			
 
				 				advance_left = ADVANCE;
			
 
				 			} else if (cmp > 0) {
			
 
				-				ret = changed_cb(left_root, right_root,
			
 
				-						left_path, right_path,
			
 
				+				ret = changed_cb(left_path, right_path,
			
 
				 						&right_key,
			
 
				 						BTRFS_COMPARE_TREE_DELETED,
			
 
				 						ctx);
			
@@ -5550,8 +5546,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 
				 					result = BTRFS_COMPARE_TREE_CHANGED;
			
 
				 				else
			
 
				 					result = BTRFS_COMPARE_TREE_SAME;
			
 
				-				ret = changed_cb(left_root, right_root,
			
 
				-						 left_path, right_path,
			
 
				+				ret = changed_cb(left_path, right_path,
			
 
				 						 &left_key, result, ctx);
			
 
				 				if (ret < 0)
			
 
				 					goto out;
			
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -523,7 +523,7 @@ struct btrfs_caching_control {
 
				 };
			
 
				 
			
 
				 /* Once caching_thread() finds this much free space, it will wake up waiters. */
			
 
				-#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
			
 
				+#define CACHING_CTL_WAKE_UP SZ_2M
			
 
				 
			
 
				 struct btrfs_io_ctl {
			
 
				 	void *cur, *orig;
			
@@ -763,8 +763,6 @@ struct btrfs_fs_info {
 
				 	 * delayed dir index item
			
 
				 	 */
			
 
				 	struct btrfs_block_rsv global_block_rsv;
			
 
				-	/* block reservation for delay allocation */
			
 
				-	struct btrfs_block_rsv delalloc_block_rsv;
			
 
				 	/* block reservation for metadata operations */
			
 
				 	struct btrfs_block_rsv trans_block_rsv;
			
 
				 	/* block reservation for chunk tree */
			
@@ -790,6 +788,7 @@ struct btrfs_fs_info {
 
				 	 */
			
 
				 	unsigned long pending_changes;
			
 
				 	unsigned long compress_type:4;
			
 
				+	unsigned int compress_level;
			
 
				 	int commit_interval;
			
 
				 	/*
			
 
				 	 * It is a suggestive number, the read side is safe even it gets a
			
@@ -878,9 +877,6 @@ struct btrfs_fs_info {
 
				 	rwlock_t tree_mod_log_lock;
			
 
				 	struct rb_root tree_mod_log;
			
 
				 
			
 
				-	atomic_t nr_async_submits;
			
 
				-	atomic_t async_submit_draining;
			
 
				-	atomic_t nr_async_bios;
			
 
				 	atomic_t async_delalloc_pages;
			
 
				 	atomic_t open_ioctl_trans;
			
 
				 
			
@@ -1100,6 +1096,11 @@ struct btrfs_fs_info {
 
				 	u32 nodesize;
			
 
				 	u32 sectorsize;
			
 
				 	u32 stripesize;
			
 
				+
			
 
				+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			
 
				+	spinlock_t ref_verify_lock;
			
 
				+	struct rb_root block_tree;
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
			
@@ -1338,6 +1339,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
 
				 #define BTRFS_MOUNT_FRAGMENT_METADATA	(1 << 25)
			
 
				 #define BTRFS_MOUNT_FREE_SPACE_TREE	(1 << 26)
			
 
				 #define BTRFS_MOUNT_NOLOGREPLAY		(1 << 27)
			
 
				+#define BTRFS_MOUNT_REF_VERIFY		(1 << 28)
			
 
				 
			
 
				 #define BTRFS_DEFAULT_COMMIT_INTERVAL	(30)
			
 
				 #define BTRFS_DEFAULT_MAX_INLINE	(2048)
			
@@ -2639,7 +2641,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
				 			   struct extent_buffer *buf,
			
 
				 			   u64 parent, int last_ref);
			
 
				 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
			
 
				-				     u64 root_objectid, u64 owner,
			
 
				+				     struct btrfs_root *root, u64 owner,
			
 
				 				     u64 offset, u64 ram_bytes,
			
 
				 				     struct btrfs_key *ins);
			
 
				 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
			
@@ -2658,7 +2660,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 
				 				u64 bytenr, u64 num_bytes, u64 flags,
			
 
				 				int level, int is_data);
			
 
				 int btrfs_free_extent(struct btrfs_trans_handle *trans,
			
 
				-		      struct btrfs_fs_info *fs_info,
			
 
				+		      struct btrfs_root *root,
			
 
				 		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
			
 
				 		      u64 owner, u64 offset);
			
 
				 
			
@@ -2670,7 +2672,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
 
				 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			
 
				 			       struct btrfs_fs_info *fs_info);
			
 
				 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
			
 
				-			 struct btrfs_fs_info *fs_info,
			
 
				+			 struct btrfs_root *root,
			
 
				 			 u64 bytenr, u64 num_bytes, u64 parent,
			
 
				 			 u64 root_objectid, u64 owner, u64 offset);
			
 
				 
			
@@ -2744,6 +2746,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 
				 				     u64 *qgroup_reserved, bool use_global_rsv);
			
 
				 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
			
 
				 				      struct btrfs_block_rsv *rsv);
			
 
				+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
			
 
				+
			
 
				 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
			
 
				 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
			
 
				 int btrfs_delalloc_reserve_space(struct inode *inode,
			
@@ -2751,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
 
				 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
			
 
				 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
			
 
				 					      unsigned short type);
			
 
				+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
			
 
				+				   struct btrfs_block_rsv *rsv,
			
 
				+				   unsigned short type);
			
 
				 void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
			
 
				 			  struct btrfs_block_rsv *rsv);
			
 
				 void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
			
@@ -2809,6 +2816,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
 
				 			     const struct btrfs_key *new_key);
			
 
				 struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
			
 
				 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
			
 
				+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
			
 
				 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
			
 
				 			struct btrfs_key *key, int lowest_level,
			
 
				 			u64 min_trans);
			
@@ -2821,9 +2829,7 @@ enum btrfs_compare_tree_result {
 
				 	BTRFS_COMPARE_TREE_CHANGED,
			
 
				 	BTRFS_COMPARE_TREE_SAME,
			
 
				 };
			
 
				-typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
			
 
				-				  struct btrfs_root *right_root,
			
 
				-				  struct btrfs_path *left_path,
			
 
				+typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
			
 
				 				  struct btrfs_path *right_path,
			
 
				 				  struct btrfs_key *key,
			
 
				 				  enum btrfs_compare_tree_result result,
			
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -581,36 +581,12 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 	struct btrfs_block_rsv *dst_rsv;
			
 
				 	u64 num_bytes;
			
 
				 	int ret;
			
 
				-	bool release = false;
			
 
				 
			
 
				 	src_rsv = trans->block_rsv;
			
 
				 	dst_rsv = &fs_info->delayed_block_rsv;
			
 
				 
			
 
				 	num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
			
 
				 
			
 
				-	/*
			
 
				-	 * If our block_rsv is the delalloc block reserve then check and see if
			
 
				-	 * we have our extra reservation for updating the inode.  If not fall
			
 
				-	 * through and try to reserve space quickly.
			
 
				-	 *
			
 
				-	 * We used to try and steal from the delalloc block rsv or the global
			
 
				-	 * reserve, but we'd steal a full reservation, which isn't kind.  We are
			
 
				-	 * here through delalloc which means we've likely just cowed down close
			
 
				-	 * to the leaf that contains the inode, so we would steal less just
			
 
				-	 * doing the fallback inode update, so if we do end up having to steal
			
 
				-	 * from the global block rsv we hopefully only steal one or two blocks
			
 
				-	 * worth which is less likely to hurt us.
			
 
				-	 */
			
 
				-	if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
			
 
				-		spin_lock(&inode->lock);
			
 
				-		if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				-				       &inode->runtime_flags))
			
 
				-			release = true;
			
 
				-		else
			
 
				-			src_rsv = NULL;
			
 
				-		spin_unlock(&inode->lock);
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
			
 
				 	 * which doesn't reserve space for speed.  This is a problem since we
			
@@ -618,7 +594,7 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 	 * space.
			
 
				 	 *
			
 
				 	 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
			
 
				-	 * we're accounted for.
			
 
				+	 * we always reserve enough to update the inode item.
			
 
				 	 */
			
 
				 	if (!src_rsv || (!trans->bytes_reserved &&
			
 
				 			 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
			
@@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 	}
			
 
				 
			
 
				 	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
			
 
				-
			
 
				-	/*
			
 
				-	 * Migrate only takes a reservation, it doesn't touch the size of the
			
 
				-	 * block_rsv.  This is to simplify people who don't normally have things
			
 
				-	 * migrated from their block rsv.  If they go to release their
			
 
				-	 * reservation, that will decrease the size as well, so if migrate
			
 
				-	 * reduced size we'd end up with a negative size.  But for the
			
 
				-	 * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
			
 
				-	 * but we could in fact do this reserve/migrate dance several times
			
 
				-	 * between the time we did the original reservation and we'd clean it
			
 
				-	 * up.  So to take care of this, release the space for the meta
			
 
				-	 * reservation here.  I think it may be time for a documentation page on
			
 
				-	 * how block rsvs. work.
			
 
				-	 */
			
 
				 	if (!ret) {
			
 
				 		trace_btrfs_space_reservation(fs_info, "delayed_inode",
			
 
				 					      btrfs_ino(inode), num_bytes, 1);
			
 
				 		node->bytes_reserved = num_bytes;
			
 
				 	}
			
 
				 
			
 
				-	if (release) {
			
 
				-		trace_btrfs_space_reservation(fs_info, "delalloc",
			
 
				-					      btrfs_ino(inode), num_bytes, 0);
			
 
				-		btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
			
 
				-	}
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -40,10 +40,10 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
 
				 /*
			
 
				  * compare two delayed tree backrefs with same bytenr and type
			
 
				  */
			
 
				-static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
			
 
				-			  struct btrfs_delayed_tree_ref *ref1, int type)
			
 
				+static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
			
 
				+			  struct btrfs_delayed_tree_ref *ref2)
			
 
				 {
			
 
				-	if (type == BTRFS_TREE_BLOCK_REF_KEY) {
			
 
				+	if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
			
 
				 		if (ref1->root < ref2->root)
			
 
				 			return -1;
			
 
				 		if (ref1->root > ref2->root)
			
@@ -60,8 +60,8 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
 
				 /*
			
 
				  * compare two delayed data backrefs with same bytenr and type
			
 
				  */
			
 
				-static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
			
 
				-			  struct btrfs_delayed_data_ref *ref1)
			
 
				+static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
			
 
				+			  struct btrfs_delayed_data_ref *ref2)
			
 
				 {
			
 
				 	if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
			
 
				 		if (ref1->root < ref2->root)
			
@@ -85,6 +85,34 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int comp_refs(struct btrfs_delayed_ref_node *ref1,
			
 
				+		     struct btrfs_delayed_ref_node *ref2,
			
 
				+		     bool check_seq)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (ref1->type < ref2->type)
			
 
				+		return -1;
			
 
				+	if (ref1->type > ref2->type)
			
 
				+		return 1;
			
 
				+	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
			
 
				+	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
			
 
				+		ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
			
 
				+				     btrfs_delayed_node_to_tree_ref(ref2));
			
 
				+	else
			
 
				+		ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
			
 
				+				     btrfs_delayed_node_to_data_ref(ref2));
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+	if (check_seq) {
			
 
				+		if (ref1->seq < ref2->seq)
			
 
				+			return -1;
			
 
				+		if (ref1->seq > ref2->seq)
			
 
				+			return 1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /* insert a new ref to head ref rbtree */
			
 
				 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
			
 
				 						   struct rb_node *node)
			
@@ -96,15 +124,43 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
 
				 	u64 bytenr;
			
 
				 
			
 
				 	ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
			
 
				-	bytenr = ins->node.bytenr;
			
 
				+	bytenr = ins->bytenr;
			
 
				 	while (*p) {
			
 
				 		parent_node = *p;
			
 
				 		entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
			
 
				 				 href_node);
			
 
				 
			
 
				-		if (bytenr < entry->node.bytenr)
			
 
				+		if (bytenr < entry->bytenr)
			
 
				+			p = &(*p)->rb_left;
			
 
				+		else if (bytenr > entry->bytenr)
			
 
				+			p = &(*p)->rb_right;
			
 
				+		else
			
 
				+			return entry;
			
 
				+	}
			
 
				+
			
 
				+	rb_link_node(node, parent_node, p);
			
 
				+	rb_insert_color(node, root);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
			
 
				+		struct btrfs_delayed_ref_node *ins)
			
 
				+{
			
 
				+	struct rb_node **p = &root->rb_node;
			
 
				+	struct rb_node *node = &ins->ref_node;
			
 
				+	struct rb_node *parent_node = NULL;
			
 
				+	struct btrfs_delayed_ref_node *entry;
			
 
				+
			
 
				+	while (*p) {
			
 
				+		int comp;
			
 
				+
			
 
				+		parent_node = *p;
			
 
				+		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
			
 
				+				 ref_node);
			
 
				+		comp = comp_refs(ins, entry, true);
			
 
				+		if (comp < 0)
			
 
				 			p = &(*p)->rb_left;
			
 
				-		else if (bytenr > entry->node.bytenr)
			
 
				+		else if (comp > 0)
			
 
				 			p = &(*p)->rb_right;
			
 
				 		else
			
 
				 			return entry;
			
@@ -133,15 +189,15 @@ find_ref_head(struct rb_root *root, u64 bytenr,
 
				 	while (n) {
			
 
				 		entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
			
 
				 
			
 
				-		if (bytenr < entry->node.bytenr)
			
 
				+		if (bytenr < entry->bytenr)
			
 
				 			n = n->rb_left;
			
 
				-		else if (bytenr > entry->node.bytenr)
			
 
				+		else if (bytenr > entry->bytenr)
			
 
				 			n = n->rb_right;
			
 
				 		else
			
 
				 			return entry;
			
 
				 	}
			
 
				 	if (entry && return_bigger) {
			
 
				-		if (bytenr > entry->node.bytenr) {
			
 
				+		if (bytenr > entry->bytenr) {
			
 
				 			n = rb_next(&entry->href_node);
			
 
				 			if (!n)
			
 
				 				n = rb_first(root);
			
@@ -164,17 +220,17 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 
				 	if (mutex_trylock(&head->mutex))
			
 
				 		return 0;
			
 
				 
			
 
				-	refcount_inc(&head->node.refs);
			
 
				+	refcount_inc(&head->refs);
			
 
				 	spin_unlock(&delayed_refs->lock);
			
 
				 
			
 
				 	mutex_lock(&head->mutex);
			
 
				 	spin_lock(&delayed_refs->lock);
			
 
				-	if (!head->node.in_tree) {
			
 
				+	if (RB_EMPTY_NODE(&head->href_node)) {
			
 
				 		mutex_unlock(&head->mutex);
			
 
				-		btrfs_put_delayed_ref(&head->node);
			
 
				+		btrfs_put_delayed_ref_head(head);
			
 
				 		return -EAGAIN;
			
 
				 	}
			
 
				-	btrfs_put_delayed_ref(&head->node);
			
 
				+	btrfs_put_delayed_ref_head(head);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -183,15 +239,11 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
 
				 				    struct btrfs_delayed_ref_head *head,
			
 
				 				    struct btrfs_delayed_ref_node *ref)
			
 
				 {
			
 
				-	if (btrfs_delayed_ref_is_head(ref)) {
			
 
				-		head = btrfs_delayed_node_to_head(ref);
			
 
				-		rb_erase(&head->href_node, &delayed_refs->href_root);
			
 
				-	} else {
			
 
				-		assert_spin_locked(&head->lock);
			
 
				-		list_del(&ref->list);
			
 
				-		if (!list_empty(&ref->add_list))
			
 
				-			list_del(&ref->add_list);
			
 
				-	}
			
 
				+	assert_spin_locked(&head->lock);
			
 
				+	rb_erase(&ref->ref_node, &head->ref_tree);
			
 
				+	RB_CLEAR_NODE(&ref->ref_node);
			
 
				+	if (!list_empty(&ref->add_list))
			
 
				+		list_del(&ref->add_list);
			
 
				 	ref->in_tree = 0;
			
 
				 	btrfs_put_delayed_ref(ref);
			
 
				 	atomic_dec(&delayed_refs->num_entries);
			
@@ -206,36 +258,18 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
 
				 		      u64 seq)
			
 
				 {
			
 
				 	struct btrfs_delayed_ref_node *next;
			
 
				+	struct rb_node *node = rb_next(&ref->ref_node);
			
 
				 	bool done = false;
			
 
				 
			
 
				-	next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
			
 
				-				list);
			
 
				-	while (!done && &next->list != &head->ref_list) {
			
 
				+	while (!done && node) {
			
 
				 		int mod;
			
 
				-		struct btrfs_delayed_ref_node *next2;
			
 
				-
			
 
				-		next2 = list_next_entry(next, list);
			
 
				-
			
 
				-		if (next == ref)
			
 
				-			goto next;
			
 
				 
			
 
				+		next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
			
 
				+		node = rb_next(node);
			
 
				 		if (seq && next->seq >= seq)
			
 
				-			goto next;
			
 
				-
			
 
				-		if (next->type != ref->type)
			
 
				-			goto next;
			
 
				-
			
 
				-		if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
			
 
				-		     ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
			
 
				-		    comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
			
 
				-				   btrfs_delayed_node_to_tree_ref(next),
			
 
				-				   ref->type))
			
 
				-			goto next;
			
 
				-		if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
			
 
				-		     ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
			
 
				-		    comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
			
 
				-				   btrfs_delayed_node_to_data_ref(next)))
			
 
				-			goto next;
			
 
				+			break;
			
 
				+		if (comp_refs(ref, next, false))
			
 
				+			break;
			
 
				 
			
 
				 		if (ref->action == next->action) {
			
 
				 			mod = next->ref_mod;
			
@@ -259,8 +293,6 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
 
				 			WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
			
 
				 				ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
			
 
				 		}
			
 
				-next:
			
 
				-		next = next2;
			
 
				 	}
			
 
				 
			
 
				 	return done;
			
@@ -272,11 +304,12 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 
				 			      struct btrfs_delayed_ref_head *head)
			
 
				 {
			
 
				 	struct btrfs_delayed_ref_node *ref;
			
 
				+	struct rb_node *node;
			
 
				 	u64 seq = 0;
			
 
				 
			
 
				 	assert_spin_locked(&head->lock);
			
 
				 
			
 
				-	if (list_empty(&head->ref_list))
			
 
				+	if (RB_EMPTY_ROOT(&head->ref_tree))
			
 
				 		return;
			
 
				 
			
 
				 	/* We don't have too many refs to merge for data. */
			
@@ -293,22 +326,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 	spin_unlock(&fs_info->tree_mod_seq_lock);
			
 
				 
			
 
				-	ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
			
 
				-			       list);
			
 
				-	while (&ref->list != &head->ref_list) {
			
 
				+again:
			
 
				+	for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
			
 
				+		ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
			
 
				 		if (seq && ref->seq >= seq)
			
 
				-			goto next;
			
 
				-
			
 
				-		if (merge_ref(trans, delayed_refs, head, ref, seq)) {
			
 
				-			if (list_empty(&head->ref_list))
			
 
				-				break;
			
 
				-			ref = list_first_entry(&head->ref_list,
			
 
				-					       struct btrfs_delayed_ref_node,
			
 
				-					       list);
			
 
				 			continue;
			
 
				-		}
			
 
				-next:
			
 
				-		ref = list_next_entry(ref, list);
			
 
				+		if (merge_ref(trans, delayed_refs, head, ref, seq))
			
 
				+			goto again;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -380,8 +404,8 @@ again:
 
				 	head->processing = 1;
			
 
				 	WARN_ON(delayed_refs->num_heads_ready == 0);
			
 
				 	delayed_refs->num_heads_ready--;
			
 
				-	delayed_refs->run_delayed_start = head->node.bytenr +
			
 
				-		head->node.num_bytes;
			
 
				+	delayed_refs->run_delayed_start = head->bytenr +
			
 
				+		head->num_bytes;
			
 
				 	return head;
			
 
				 }
			
 
				 
			
@@ -391,37 +415,19 @@ again:
 
				  * Return 0 for insert.
			
 
				  * Return >0 for merge.
			
 
				  */
			
 
				-static int
			
 
				-add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
			
 
				-			   struct btrfs_delayed_ref_root *root,
			
 
				-			   struct btrfs_delayed_ref_head *href,
			
 
				-			   struct btrfs_delayed_ref_node *ref)
			
 
				+static int insert_delayed_ref(struct btrfs_trans_handle *trans,
			
 
				+			      struct btrfs_delayed_ref_root *root,
			
 
				+			      struct btrfs_delayed_ref_head *href,
			
 
				+			      struct btrfs_delayed_ref_node *ref)
			
 
				 {
			
 
				 	struct btrfs_delayed_ref_node *exist;
			
 
				 	int mod;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	spin_lock(&href->lock);
			
 
				-	/* Check whether we can merge the tail node with ref */
			
 
				-	if (list_empty(&href->ref_list))
			
 
				-		goto add_tail;
			
 
				-	exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
			
 
				-			   list);
			
 
				-	/* No need to compare bytenr nor is_head */
			
 
				-	if (exist->type != ref->type || exist->seq != ref->seq)
			
 
				-		goto add_tail;
			
 
				-
			
 
				-	if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
			
 
				-	     exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
			
 
				-	    comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
			
 
				-			   btrfs_delayed_node_to_tree_ref(ref),
			
 
				-			   ref->type))
			
 
				-		goto add_tail;
			
 
				-	if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
			
 
				-	     exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
			
 
				-	    comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
			
 
				-			   btrfs_delayed_node_to_data_ref(ref)))
			
 
				-		goto add_tail;
			
 
				+	exist = tree_insert(&href->ref_tree, ref);
			
 
				+	if (!exist)
			
 
				+		goto inserted;
			
 
				 
			
 
				 	/* Now we are sure we can merge */
			
 
				 	ret = 1;
			
@@ -452,9 +458,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
 
				 		drop_delayed_ref(trans, root, href, exist);
			
 
				 	spin_unlock(&href->lock);
			
 
				 	return ret;
			
 
				-
			
 
				-add_tail:
			
 
				-	list_add_tail(&ref->list, &href->ref_list);
			
 
				+inserted:
			
 
				 	if (ref->action == BTRFS_ADD_DELAYED_REF)
			
 
				 		list_add_tail(&ref->add_list, &href->ref_add_list);
			
 
				 	atomic_inc(&root->num_entries);
			
@@ -469,20 +473,16 @@ add_tail:
 
				  */
			
 
				 static noinline void
			
 
				 update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
			
 
				-			 struct btrfs_delayed_ref_node *existing,
			
 
				-			 struct btrfs_delayed_ref_node *update,
			
 
				+			 struct btrfs_delayed_ref_head *existing,
			
 
				+			 struct btrfs_delayed_ref_head *update,
			
 
				 			 int *old_ref_mod_ret)
			
 
				 {
			
 
				-	struct btrfs_delayed_ref_head *existing_ref;
			
 
				-	struct btrfs_delayed_ref_head *ref;
			
 
				 	int old_ref_mod;
			
 
				 
			
 
				-	existing_ref = btrfs_delayed_node_to_head(existing);
			
 
				-	ref = btrfs_delayed_node_to_head(update);
			
 
				-	BUG_ON(existing_ref->is_data != ref->is_data);
			
 
				+	BUG_ON(existing->is_data != update->is_data);
			
 
				 
			
 
				-	spin_lock(&existing_ref->lock);
			
 
				-	if (ref->must_insert_reserved) {
			
 
				+	spin_lock(&existing->lock);
			
 
				+	if (update->must_insert_reserved) {
			
 
				 		/* if the extent was freed and then
			
 
				 		 * reallocated before the delayed ref
			
 
				 		 * entries were processed, we can end up
			
@@ -490,7 +490,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 
				 		 * the must_insert_reserved flag set.
			
 
				 		 * Set it again here
			
 
				 		 */
			
 
				-		existing_ref->must_insert_reserved = ref->must_insert_reserved;
			
 
				+		existing->must_insert_reserved = update->must_insert_reserved;
			
 
				 
			
 
				 		/*
			
 
				 		 * update the num_bytes so we make sure the accounting
			
@@ -500,22 +500,22 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 
				 
			
 
				 	}
			
 
				 
			
 
				-	if (ref->extent_op) {
			
 
				-		if (!existing_ref->extent_op) {
			
 
				-			existing_ref->extent_op = ref->extent_op;
			
 
				+	if (update->extent_op) {
			
 
				+		if (!existing->extent_op) {
			
 
				+			existing->extent_op = update->extent_op;
			
 
				 		} else {
			
 
				-			if (ref->extent_op->update_key) {
			
 
				-				memcpy(&existing_ref->extent_op->key,
			
 
				-				       &ref->extent_op->key,
			
 
				-				       sizeof(ref->extent_op->key));
			
 
				-				existing_ref->extent_op->update_key = true;
			
 
				+			if (update->extent_op->update_key) {
			
 
				+				memcpy(&existing->extent_op->key,
			
 
				+				       &update->extent_op->key,
			
 
				+				       sizeof(update->extent_op->key));
			
 
				+				existing->extent_op->update_key = true;
			
 
				 			}
			
 
				-			if (ref->extent_op->update_flags) {
			
 
				-				existing_ref->extent_op->flags_to_set |=
			
 
				-					ref->extent_op->flags_to_set;
			
 
				-				existing_ref->extent_op->update_flags = true;
			
 
				+			if (update->extent_op->update_flags) {
			
 
				+				existing->extent_op->flags_to_set |=
			
 
				+					update->extent_op->flags_to_set;
			
 
				+				existing->extent_op->update_flags = true;
			
 
				 			}
			
 
				-			btrfs_free_delayed_extent_op(ref->extent_op);
			
 
				+			btrfs_free_delayed_extent_op(update->extent_op);
			
 
				 		}
			
 
				 	}
			
 
				 	/*
			
@@ -523,23 +523,23 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 
				 	 * only need the lock for this case cause we could be processing it
			
 
				 	 * currently, for refs we just added we know we're a-ok.
			
 
				 	 */
			
 
				-	old_ref_mod = existing_ref->total_ref_mod;
			
 
				+	old_ref_mod = existing->total_ref_mod;
			
 
				 	if (old_ref_mod_ret)
			
 
				 		*old_ref_mod_ret = old_ref_mod;
			
 
				 	existing->ref_mod += update->ref_mod;
			
 
				-	existing_ref->total_ref_mod += update->ref_mod;
			
 
				+	existing->total_ref_mod += update->ref_mod;
			
 
				 
			
 
				 	/*
			
 
				 	 * If we are going to from a positive ref mod to a negative or vice
			
 
				 	 * versa we need to make sure to adjust pending_csums accordingly.
			
 
				 	 */
			
 
				-	if (existing_ref->is_data) {
			
 
				-		if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
			
 
				+	if (existing->is_data) {
			
 
				+		if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
			
 
				 			delayed_refs->pending_csums -= existing->num_bytes;
			
 
				-		if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
			
 
				+		if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
			
 
				 			delayed_refs->pending_csums += existing->num_bytes;
			
 
				 	}
			
 
				-	spin_unlock(&existing_ref->lock);
			
 
				+	spin_unlock(&existing->lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -550,14 +550,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 
				 static noinline struct btrfs_delayed_ref_head *
			
 
				 add_delayed_ref_head(struct btrfs_fs_info *fs_info,
			
 
				 		     struct btrfs_trans_handle *trans,
			
 
				-		     struct btrfs_delayed_ref_node *ref,
			
 
				+		     struct btrfs_delayed_ref_head *head_ref,
			
 
				 		     struct btrfs_qgroup_extent_record *qrecord,
			
 
				 		     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
			
 
				 		     int action, int is_data, int *qrecord_inserted_ret,
			
 
				 		     int *old_ref_mod, int *new_ref_mod)
			
 
				 {
			
 
				 	struct btrfs_delayed_ref_head *existing;
			
 
				-	struct btrfs_delayed_ref_head *head_ref = NULL;
			
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 	int count_mod = 1;
			
 
				 	int must_insert_reserved = 0;
			
@@ -593,26 +592,21 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 	delayed_refs = &trans->transaction->delayed_refs;
			
 
				 
			
 
				-	/* first set the basic ref node struct up */
			
 
				-	refcount_set(&ref->refs, 1);
			
 
				-	ref->bytenr = bytenr;
			
 
				-	ref->num_bytes = num_bytes;
			
 
				-	ref->ref_mod = count_mod;
			
 
				-	ref->type  = 0;
			
 
				-	ref->action  = 0;
			
 
				-	ref->is_head = 1;
			
 
				-	ref->in_tree = 1;
			
 
				-	ref->seq = 0;
			
 
				-
			
 
				-	head_ref = btrfs_delayed_node_to_head(ref);
			
 
				+	refcount_set(&head_ref->refs, 1);
			
 
				+	head_ref->bytenr = bytenr;
			
 
				+	head_ref->num_bytes = num_bytes;
			
 
				+	head_ref->ref_mod = count_mod;
			
 
				 	head_ref->must_insert_reserved = must_insert_reserved;
			
 
				 	head_ref->is_data = is_data;
			
 
				-	INIT_LIST_HEAD(&head_ref->ref_list);
			
 
				+	head_ref->ref_tree = RB_ROOT;
			
 
				 	INIT_LIST_HEAD(&head_ref->ref_add_list);
			
 
				+	RB_CLEAR_NODE(&head_ref->href_node);
			
 
				 	head_ref->processing = 0;
			
 
				 	head_ref->total_ref_mod = count_mod;
			
 
				 	head_ref->qgroup_reserved = 0;
			
 
				 	head_ref->qgroup_ref_root = 0;
			
 
				+	spin_lock_init(&head_ref->lock);
			
 
				+	mutex_init(&head_ref->mutex);
			
 
				 
			
 
				 	/* Record qgroup extent info if provided */
			
 
				 	if (qrecord) {
			
@@ -632,17 +626,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 
				 			qrecord_inserted = 1;
			
 
				 	}
			
 
				 
			
 
				-	spin_lock_init(&head_ref->lock);
			
 
				-	mutex_init(&head_ref->mutex);
			
 
				-
			
 
				-	trace_add_delayed_ref_head(fs_info, ref, head_ref, action);
			
 
				+	trace_add_delayed_ref_head(fs_info, head_ref, action);
			
 
				 
			
 
				 	existing = htree_insert(&delayed_refs->href_root,
			
 
				 				&head_ref->href_node);
			
 
				 	if (existing) {
			
 
				 		WARN_ON(ref_root && reserved && existing->qgroup_ref_root
			
 
				 			&& existing->qgroup_reserved);
			
 
				-		update_existing_head_ref(delayed_refs, &existing->node, ref,
			
 
				+		update_existing_head_ref(delayed_refs, existing, head_ref,
			
 
				 					 old_ref_mod);
			
 
				 		/*
			
 
				 		 * we've updated the existing ref, free the newly
			
@@ -699,7 +690,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
				 	ref->is_head = 0;
			
 
				 	ref->in_tree = 1;
			
 
				 	ref->seq = seq;
			
 
				-	INIT_LIST_HEAD(&ref->list);
			
 
				+	RB_CLEAR_NODE(&ref->ref_node);
			
 
				 	INIT_LIST_HEAD(&ref->add_list);
			
 
				 
			
 
				 	full_ref = btrfs_delayed_node_to_tree_ref(ref);
			
@@ -713,7 +704,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 	trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
			
 
				 
			
 
				-	ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
			
 
				+	ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
			
 
				 
			
 
				 	/*
			
 
				 	 * XXX: memory should be freed at the same level allocated.
			
@@ -756,7 +747,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
				 	ref->is_head = 0;
			
 
				 	ref->in_tree = 1;
			
 
				 	ref->seq = seq;
			
 
				-	INIT_LIST_HEAD(&ref->list);
			
 
				+	RB_CLEAR_NODE(&ref->ref_node);
			
 
				 	INIT_LIST_HEAD(&ref->add_list);
			
 
				 
			
 
				 	full_ref = btrfs_delayed_node_to_data_ref(ref);
			
@@ -772,8 +763,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
				 
			
 
				 	trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
			
 
				 
			
 
				-	ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
			
 
				-
			
 
				+	ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
			
 
				 	if (ret > 0)
			
 
				 		kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
			
 
				 }
			
@@ -821,7 +811,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
				 	 * insert both the head node and the new ref without dropping
			
 
				 	 * the spin lock
			
 
				 	 */
			
 
				-	head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
			
 
				+	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
			
 
				 					bytenr, num_bytes, 0, 0, action, 0,
			
 
				 					&qrecord_inserted, old_ref_mod,
			
 
				 					new_ref_mod);
			
@@ -888,7 +878,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
				 	 * insert both the head node and the new ref without dropping
			
 
				 	 * the spin lock
			
 
				 	 */
			
 
				-	head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
			
 
				+	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
			
 
				 					bytenr, num_bytes, ref_root, reserved,
			
 
				 					action, 1, &qrecord_inserted,
			
 
				 					old_ref_mod, new_ref_mod);
			
@@ -920,7 +910,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 
				 	delayed_refs = &trans->transaction->delayed_refs;
			
 
				 	spin_lock(&delayed_refs->lock);
			
 
				 
			
 
				-	add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
			
 
				+	add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
			
 
				 			     num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
			
 
				 			     extent_op->is_data, NULL, NULL, NULL);
			
 
				 
			
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -26,18 +26,8 @@
 
				 #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
			
 
				 #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
			
 
				 
			
 
				-/*
			
 
				- * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
			
 
				- * same ref_node structure.
			
 
				- * Ref_head is in a higher logic level than tree/data ref, and duplicated
			
 
				- * bytenr/num_bytes in ref_node is really a waste or memory, they should be
			
 
				- * referred from ref_head.
			
 
				- * This gets more disgusting after we use list to store tree/data ref in
			
 
				- * ref_head. Must clean this mess up later.
			
 
				- */
			
 
				 struct btrfs_delayed_ref_node {
			
 
				-	/*data/tree ref use list, stored in ref_head->ref_list. */
			
 
				-	struct list_head list;
			
 
				+	struct rb_node ref_node;
			
 
				 	/*
			
 
				 	 * If action is BTRFS_ADD_DELAYED_REF, also link this node to
			
 
				 	 * ref_head->ref_add_list, then we do not need to iterate the
			
@@ -91,8 +81,9 @@ struct btrfs_delayed_extent_op {
 
				  * reference count modifications we've queued up.
			
 
				  */
			
 
				 struct btrfs_delayed_ref_head {
			
 
				-	struct btrfs_delayed_ref_node node;
			
 
				-
			
 
				+	u64 bytenr;
			
 
				+	u64 num_bytes;
			
 
				+	refcount_t refs;
			
 
				 	/*
			
 
				 	 * the mutex is held while running the refs, and it is also
			
 
				 	 * held when checking the sum of reference modifications.
			
@@ -100,7 +91,7 @@ struct btrfs_delayed_ref_head {
 
				 	struct mutex mutex;
			
 
				 
			
 
				 	spinlock_t lock;
			
 
				-	struct list_head ref_list;
			
 
				+	struct rb_root ref_tree;
			
 
				 	/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
			
 
				 	struct list_head ref_add_list;
			
 
				 
			
@@ -115,6 +106,14 @@ struct btrfs_delayed_ref_head {
 
				 	 */
			
 
				 	int total_ref_mod;
			
 
				 
			
 
				+	/*
			
 
				+	 * This is the current outstanding mod references for this bytenr.  This
			
 
				+	 * is used with lookup_extent_info to get an accurate reference count
			
 
				+	 * for a bytenr, so it is adjusted as delayed refs are run so that any
			
 
				+	 * on disk reference count + ref_mod is accurate.
			
 
				+	 */
			
 
				+	int ref_mod;
			
 
				+
			
 
				 	/*
			
 
				 	 * For qgroup reserved space freeing.
			
 
				 	 *
			
@@ -234,15 +233,18 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
 
				 		case BTRFS_SHARED_DATA_REF_KEY:
			
 
				 			kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
			
 
				 			break;
			
 
				-		case 0:
			
 
				-			kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
			
 
				-			break;
			
 
				 		default:
			
 
				 			BUG();
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
			
 
				+{
			
 
				+	if (refcount_dec_and_test(&head->refs))
			
 
				+		kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
			
 
				+}
			
 
				+
			
 
				 int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
			
 
				 			       struct btrfs_trans_handle *trans,
			
 
				 			       u64 bytenr, u64 num_bytes, u64 parent,
			
@@ -282,36 +284,18 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 
				 			    struct btrfs_delayed_ref_root *delayed_refs,
			
 
				 			    u64 seq);
			
 
				 
			
 
				-/*
			
 
				- * a node might live in a head or a regular ref, this lets you
			
 
				- * test for the proper type to use.
			
 
				- */
			
 
				-static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
			
 
				-{
			
 
				-	return node->is_head;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * helper functions to cast a node into its container
			
 
				  */
			
 
				 static inline struct btrfs_delayed_tree_ref *
			
 
				 btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
			
 
				 {
			
 
				-	WARN_ON(btrfs_delayed_ref_is_head(node));
			
 
				 	return container_of(node, struct btrfs_delayed_tree_ref, node);
			
 
				 }
			
 
				 
			
 
				 static inline struct btrfs_delayed_data_ref *
			
 
				 btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
			
 
				 {
			
 
				-	WARN_ON(btrfs_delayed_ref_is_head(node));
			
 
				 	return container_of(node, struct btrfs_delayed_data_ref, node);
			
 
				 }
			
 
				-
			
 
				-static inline struct btrfs_delayed_ref_head *
			
 
				-btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
			
 
				-{
			
 
				-	WARN_ON(!btrfs_delayed_ref_is_head(node));
			
 
				-	return container_of(node, struct btrfs_delayed_ref_head, node);
			
 
				-}
			
 
				 #endif
			
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,6 +50,8 @@
 
				 #include "sysfs.h"
			
 
				 #include "qgroup.h"
			
 
				 #include "compression.h"
			
 
				+#include "tree-checker.h"
			
 
				+#include "ref-verify.h"
			
 
				 
			
 
				 #ifdef CONFIG_X86
			
 
				 #include <asm/cpufeature.h>
			
@@ -543,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-#define CORRUPT(reason, eb, root, slot)					\
			
 
				-	btrfs_crit(root->fs_info,					\
			
 
				-		   "corrupt %s, %s: block=%llu, root=%llu, slot=%d",	\
			
 
				-		   btrfs_header_level(eb) == 0 ? "leaf" : "node",	\
			
 
				-		   reason, btrfs_header_bytenr(eb), root->objectid, slot)
			
 
				-
			
 
				-static noinline int check_leaf(struct btrfs_root *root,
			
 
				-			       struct extent_buffer *leaf)
			
 
				-{
			
 
				-	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				-	struct btrfs_key key;
			
 
				-	struct btrfs_key leaf_key;
			
 
				-	u32 nritems = btrfs_header_nritems(leaf);
			
 
				-	int slot;
			
 
				-
			
 
				-	/*
			
 
				-	 * Extent buffers from a relocation tree have a owner field that
			
 
				-	 * corresponds to the subvolume tree they are based on. So just from an
			
 
				-	 * extent buffer alone we can not find out what is the id of the
			
 
				-	 * corresponding subvolume tree, so we can not figure out if the extent
			
 
				-	 * buffer corresponds to the root of the relocation tree or not. So skip
			
 
				-	 * this check for relocation trees.
			
 
				-	 */
			
 
				-	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
			
 
				-		struct btrfs_root *check_root;
			
 
				-
			
 
				-		key.objectid = btrfs_header_owner(leaf);
			
 
				-		key.type = BTRFS_ROOT_ITEM_KEY;
			
 
				-		key.offset = (u64)-1;
			
 
				-
			
 
				-		check_root = btrfs_get_fs_root(fs_info, &key, false);
			
 
				-		/*
			
 
				-		 * The only reason we also check NULL here is that during
			
 
				-		 * open_ctree() some roots has not yet been set up.
			
 
				-		 */
			
 
				-		if (!IS_ERR_OR_NULL(check_root)) {
			
 
				-			struct extent_buffer *eb;
			
 
				-
			
 
				-			eb = btrfs_root_node(check_root);
			
 
				-			/* if leaf is the root, then it's fine */
			
 
				-			if (leaf != eb) {
			
 
				-				CORRUPT("non-root leaf's nritems is 0",
			
 
				-					leaf, check_root, 0);
			
 
				-				free_extent_buffer(eb);
			
 
				-				return -EIO;
			
 
				-			}
			
 
				-			free_extent_buffer(eb);
			
 
				-		}
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (nritems == 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	/* Check the 0 item */
			
 
				-	if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
			
 
				-	    BTRFS_LEAF_DATA_SIZE(fs_info)) {
			
 
				-		CORRUPT("invalid item offset size pair", leaf, root, 0);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Check to make sure each items keys are in the correct order and their
			
 
				-	 * offsets make sense.  We only have to loop through nritems-1 because
			
 
				-	 * we check the current slot against the next slot, which verifies the
			
 
				-	 * next slot's offset+size makes sense and that the current's slot
			
 
				-	 * offset is correct.
			
 
				-	 */
			
 
				-	for (slot = 0; slot < nritems - 1; slot++) {
			
 
				-		btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
			
 
				-		btrfs_item_key_to_cpu(leaf, &key, slot + 1);
			
 
				-
			
 
				-		/* Make sure the keys are in the right order */
			
 
				-		if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
			
 
				-			CORRUPT("bad key order", leaf, root, slot);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Make sure the offset and ends are right, remember that the
			
 
				-		 * item data starts at the end of the leaf and grows towards the
			
 
				-		 * front.
			
 
				-		 */
			
 
				-		if (btrfs_item_offset_nr(leaf, slot) !=
			
 
				-			btrfs_item_end_nr(leaf, slot + 1)) {
			
 
				-			CORRUPT("slot offset bad", leaf, root, slot);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Check to make sure that we don't point outside of the leaf,
			
 
				-		 * just in case all the items are consistent to each other, but
			
 
				-		 * all point outside of the leaf.
			
 
				-		 */
			
 
				-		if (btrfs_item_end_nr(leaf, slot) >
			
 
				-		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
			
 
				-			CORRUPT("slot end outside of leaf", leaf, root, slot);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int check_node(struct btrfs_root *root, struct extent_buffer *node)
			
 
				-{
			
 
				-	unsigned long nr = btrfs_header_nritems(node);
			
 
				-	struct btrfs_key key, next_key;
			
 
				-	int slot;
			
 
				-	u64 bytenr;
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
			
 
				-		btrfs_crit(root->fs_info,
			
 
				-			   "corrupt node: block %llu root %llu nritems %lu",
			
 
				-			   node->start, root->objectid, nr);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	for (slot = 0; slot < nr - 1; slot++) {
			
 
				-		bytenr = btrfs_node_blockptr(node, slot);
			
 
				-		btrfs_node_key_to_cpu(node, &key, slot);
			
 
				-		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
			
 
				-
			
 
				-		if (!bytenr) {
			
 
				-			CORRUPT("invalid item slot", node, root, slot);
			
 
				-			ret = -EIO;
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
			
 
				-			CORRUPT("bad key order", node, root, slot);
			
 
				-			ret = -EIO;
			
 
				-			goto out;
			
 
				-		}
			
 
				-	}
			
 
				-out:
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
			
 
				 				      u64 phy_offset, struct page *page,
			
 
				 				      u64 start, u64 end, int mirror)
			
@@ -748,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
				 	 * that we don't try and read the other copies of this block, just
			
 
				 	 * return -EIO.
			
 
				 	 */
			
 
				-	if (found_level == 0 && check_leaf(root, eb)) {
			
 
				+	if (found_level == 0 && btrfs_check_leaf(root, eb)) {
			
 
				 		set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
			
 
				 		ret = -EIO;
			
 
				 	}
			
 
				 
			
 
				-	if (found_level > 0 && check_node(root, eb))
			
 
				+	if (found_level > 0 && btrfs_check_node(root, eb))
			
 
				 		ret = -EIO;
			
 
				 
			
 
				 	if (!ret)
			
@@ -879,22 +741,9 @@ static void run_one_async_start(struct btrfs_work *work)
 
				 
			
 
				 static void run_one_async_done(struct btrfs_work *work)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info;
			
 
				 	struct async_submit_bio *async;
			
 
				-	int limit;
			
 
				 
			
 
				 	async = container_of(work, struct  async_submit_bio, work);
			
 
				-	fs_info = async->fs_info;
			
 
				-
			
 
				-	limit = btrfs_async_submit_limit(fs_info);
			
 
				-	limit = limit * 2 / 3;
			
 
				-
			
 
				-	/*
			
 
				-	 * atomic_dec_return implies a barrier for waitqueue_active
			
 
				-	 */
			
 
				-	if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
			
 
				-	    waitqueue_active(&fs_info->async_submit_wait))
			
 
				-		wake_up(&fs_info->async_submit_wait);
			
 
				 
			
 
				 	/* If an error occurred we just want to clean up the bio and move on */
			
 
				 	if (async->status) {
			
@@ -942,19 +791,10 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 
				 
			
 
				 	async->status = 0;
			
 
				 
			
 
				-	atomic_inc(&fs_info->nr_async_submits);
			
 
				-
			
 
				 	if (op_is_sync(bio->bi_opf))
			
 
				 		btrfs_set_work_high_priority(&async->work);
			
 
				 
			
 
				 	btrfs_queue_work(fs_info->workers, &async->work);
			
 
				-
			
 
				-	while (atomic_read(&fs_info->async_submit_draining) &&
			
 
				-	      atomic_read(&fs_info->nr_async_submits)) {
			
 
				-		wait_event(fs_info->async_submit_wait,
			
 
				-			   (atomic_read(&fs_info->nr_async_submits) == 0));
			
 
				-	}
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1005,9 +845,9 @@ static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int check_async_write(unsigned long bio_flags)
			
 
				+static int check_async_write(struct btrfs_inode *bi)
			
 
				 {
			
 
				-	if (bio_flags & EXTENT_BIO_TREE_LOG)
			
 
				+	if (atomic_read(&bi->sync_writers))
			
 
				 		return 0;
			
 
				 #ifdef CONFIG_X86
			
 
				 	if (static_cpu_has(X86_FEATURE_XMM4_2))
			
@@ -1022,7 +862,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
 
				 {
			
 
				 	struct inode *inode = private_data;
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
 
				-	int async = check_async_write(bio_flags);
			
 
				+	int async = check_async_write(BTRFS_I(inode));
			
 
				 	blk_status_t ret;
			
 
				 
			
 
				 	if (bio_op(bio) != REQ_OP_WRITE) {
			
@@ -2607,14 +2447,6 @@ int open_ctree(struct super_block *sb,
 
				 		goto fail_delalloc_bytes;
			
 
				 	}
			
 
				 
			
 
				-	fs_info->btree_inode = new_inode(sb);
			
 
				-	if (!fs_info->btree_inode) {
			
 
				-		err = -ENOMEM;
			
 
				-		goto fail_bio_counter;
			
 
				-	}
			
 
				-
			
 
				-	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
			
 
				-
			
 
				 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
			
 
				 	INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
			
 
				 	INIT_LIST_HEAD(&fs_info->trans_list);
			
@@ -2647,17 +2479,12 @@ int open_ctree(struct super_block *sb,
 
				 	btrfs_mapping_init(&fs_info->mapping_tree);
			
 
				 	btrfs_init_block_rsv(&fs_info->global_block_rsv,
			
 
				 			     BTRFS_BLOCK_RSV_GLOBAL);
			
 
				-	btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
			
 
				-			     BTRFS_BLOCK_RSV_DELALLOC);
			
 
				 	btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
			
 
				 	btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
			
 
				 	btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
			
 
				 	btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
			
 
				 			     BTRFS_BLOCK_RSV_DELOPS);
			
 
				-	atomic_set(&fs_info->nr_async_submits, 0);
			
 
				 	atomic_set(&fs_info->async_delalloc_pages, 0);
			
 
				-	atomic_set(&fs_info->async_submit_draining, 0);
			
 
				-	atomic_set(&fs_info->nr_async_bios, 0);
			
 
				 	atomic_set(&fs_info->defrag_running, 0);
			
 
				 	atomic_set(&fs_info->qgroup_op_seq, 0);
			
 
				 	atomic_set(&fs_info->reada_works_cnt, 0);
			
@@ -2673,12 +2500,21 @@ int open_ctree(struct super_block *sb,
 
				 	/* readahead state */
			
 
				 	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
			
 
				 	spin_lock_init(&fs_info->reada_lock);
			
 
				+	btrfs_init_ref_verify(fs_info);
			
 
				 
			
 
				 	fs_info->thread_pool_size = min_t(unsigned long,
			
 
				 					  num_online_cpus() + 2, 8);
			
 
				 
			
 
				 	INIT_LIST_HEAD(&fs_info->ordered_roots);
			
 
				 	spin_lock_init(&fs_info->ordered_root_lock);
			
 
				+
			
 
				+	fs_info->btree_inode = new_inode(sb);
			
 
				+	if (!fs_info->btree_inode) {
			
 
				+		err = -ENOMEM;
			
 
				+		goto fail_bio_counter;
			
 
				+	}
			
 
				+	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
			
 
				+
			
 
				 	fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
			
 
				 					GFP_KERNEL);
			
 
				 	if (!fs_info->delayed_root) {
			
@@ -2895,12 +2731,13 @@ int open_ctree(struct super_block *sb,
 
				 	sb->s_bdi->congested_fn = btrfs_congested_fn;
			
 
				 	sb->s_bdi->congested_data = fs_info;
			
 
				 	sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
			
 
				-	sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
			
 
				+	sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE;
			
 
				 	sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
			
 
				 	sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
			
 
				 
			
 
				 	sb->s_blocksize = sectorsize;
			
 
				 	sb->s_blocksize_bits = blksize_bits(sectorsize);
			
 
				+	memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE);
			
 
				 
			
 
				 	mutex_lock(&fs_info->chunk_mutex);
			
 
				 	ret = btrfs_read_sys_array(fs_info);
			
@@ -3083,6 +2920,9 @@ retry_root_backup:
 
				 	if (ret)
			
 
				 		goto fail_trans_kthread;
			
 
				 
			
 
				+	if (btrfs_build_ref_tree(fs_info))
			
 
				+		btrfs_err(fs_info, "couldn't build ref tree");
			
 
				+
			
 
				 	/* do not make disk changes in broken FS or nologreplay is given */
			
 
				 	if (btrfs_super_log_root(disk_super) != 0 &&
			
 
				 	    !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
			
@@ -3948,6 +3788,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 
				 	cleanup_srcu_struct(&fs_info->subvol_srcu);
			
 
				 
			
 
				 	btrfs_free_stripe_hash_table(fs_info);
			
 
				+	btrfs_free_ref_cache(fs_info);
			
 
				 
			
 
				 	__btrfs_free_block_rsv(root->orphan_block_rsv);
			
 
				 	root->orphan_block_rsv = NULL;
			
@@ -4007,7 +3848,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 
				 					 buf->len,
			
 
				 					 fs_info->dirty_metadata_batch);
			
 
				 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
			
 
				-	if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
			
 
				+	if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) {
			
 
				 		btrfs_print_leaf(buf);
			
 
				 		ASSERT(0);
			
 
				 	}
			
@@ -4272,26 +4113,28 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
				 
			
 
				 	while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
			
 
				 		struct btrfs_delayed_ref_head *head;
			
 
				-		struct btrfs_delayed_ref_node *tmp;
			
 
				+		struct rb_node *n;
			
 
				 		bool pin_bytes = false;
			
 
				 
			
 
				 		head = rb_entry(node, struct btrfs_delayed_ref_head,
			
 
				 				href_node);
			
 
				 		if (!mutex_trylock(&head->mutex)) {
			
 
				-			refcount_inc(&head->node.refs);
			
 
				+			refcount_inc(&head->refs);
			
 
				 			spin_unlock(&delayed_refs->lock);
			
 
				 
			
 
				 			mutex_lock(&head->mutex);
			
 
				 			mutex_unlock(&head->mutex);
			
 
				-			btrfs_put_delayed_ref(&head->node);
			
 
				+			btrfs_put_delayed_ref_head(head);
			
 
				 			spin_lock(&delayed_refs->lock);
			
 
				 			continue;
			
 
				 		}
			
 
				 		spin_lock(&head->lock);
			
 
				-		list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
			
 
				-						 list) {
			
 
				+		while ((n = rb_first(&head->ref_tree)) != NULL) {
			
 
				+			ref = rb_entry(n, struct btrfs_delayed_ref_node,
			
 
				+				       ref_node);
			
 
				 			ref->in_tree = 0;
			
 
				-			list_del(&ref->list);
			
 
				+			rb_erase(&ref->ref_node, &head->ref_tree);
			
 
				+			RB_CLEAR_NODE(&ref->ref_node);
			
 
				 			if (!list_empty(&ref->add_list))
			
 
				 				list_del(&ref->add_list);
			
 
				 			atomic_dec(&delayed_refs->num_entries);
			
@@ -4304,16 +4147,16 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
				 		if (head->processing == 0)
			
 
				 			delayed_refs->num_heads_ready--;
			
 
				 		atomic_dec(&delayed_refs->num_entries);
			
 
				-		head->node.in_tree = 0;
			
 
				 		rb_erase(&head->href_node, &delayed_refs->href_root);
			
 
				+		RB_CLEAR_NODE(&head->href_node);
			
 
				 		spin_unlock(&head->lock);
			
 
				 		spin_unlock(&delayed_refs->lock);
			
 
				 		mutex_unlock(&head->mutex);
			
 
				 
			
 
				 		if (pin_bytes)
			
 
				-			btrfs_pin_extent(fs_info, head->node.bytenr,
			
 
				-					 head->node.num_bytes, 1);
			
 
				-		btrfs_put_delayed_ref(&head->node);
			
 
				+			btrfs_pin_extent(fs_info, head->bytenr,
			
 
				+					 head->num_bytes, 1);
			
 
				+		btrfs_put_delayed_ref_head(head);
			
 
				 		cond_resched();
			
 
				 		spin_lock(&delayed_refs->lock);
			
 
				 	}
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -26,6 +26,7 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/ratelimit.h>
			
 
				 #include <linux/percpu_counter.h>
			
 
				+#include <linux/lockdep.h>
			
 
				 #include "hash.h"
			
 
				 #include "tree-log.h"
			
 
				 #include "disk-io.h"
			
@@ -38,6 +39,7 @@
 
				 #include "math.h"
			
 
				 #include "sysfs.h"
			
 
				 #include "qgroup.h"
			
 
				+#include "ref-verify.h"
			
 
				 
			
 
				 #undef SCRAMBLE_DELAYED_REFS
			
 
				 
			
@@ -61,9 +63,6 @@ enum {
 
				 	CHUNK_ALLOC_FORCE = 2,
			
 
				 };
			
 
				 
			
 
				-static int update_block_group(struct btrfs_trans_handle *trans,
			
 
				-			      struct btrfs_fs_info *fs_info, u64 bytenr,
			
 
				-			      u64 num_bytes, int alloc);
			
 
				 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
			
 
				 			       struct btrfs_fs_info *fs_info,
			
 
				 				struct btrfs_delayed_ref_node *node, u64 parent,
			
@@ -91,17 +90,8 @@ static int find_next_key(struct btrfs_path *path, int level,
 
				 static void dump_space_info(struct btrfs_fs_info *fs_info,
			
 
				 			    struct btrfs_space_info *info, u64 bytes,
			
 
				 			    int dump_block_groups);
			
 
				-static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
			
 
				-				    u64 ram_bytes, u64 num_bytes, int delalloc);
			
 
				-static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
			
 
				-				     u64 num_bytes, int delalloc);
			
 
				 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
			
 
				 			       u64 num_bytes);
			
 
				-static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
			
 
				-				    struct btrfs_space_info *space_info,
			
 
				-				    u64 orig_bytes,
			
 
				-				    enum btrfs_reserve_flush_enum flush,
			
 
				-				    bool system_chunk);
			
 
				 static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
			
 
				 				     struct btrfs_space_info *space_info,
			
 
				 				     u64 num_bytes);
			
@@ -652,7 +642,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 
				 	cache->cached = BTRFS_CACHE_FAST;
			
 
				 	spin_unlock(&cache->lock);
			
 
				 
			
 
				-	if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
			
 
				+	if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
			
 
				 		mutex_lock(&caching_ctl->mutex);
			
 
				 		ret = load_free_space_cache(fs_info, cache);
			
 
				 
			
@@ -923,7 +913,7 @@ search_again:
 
				 	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
			
 
				 	if (head) {
			
 
				 		if (!mutex_trylock(&head->mutex)) {
			
 
				-			refcount_inc(&head->node.refs);
			
 
				+			refcount_inc(&head->refs);
			
 
				 			spin_unlock(&delayed_refs->lock);
			
 
				 
			
 
				 			btrfs_release_path(path);
			
@@ -934,7 +924,7 @@ search_again:
 
				 			 */
			
 
				 			mutex_lock(&head->mutex);
			
 
				 			mutex_unlock(&head->mutex);
			
 
				-			btrfs_put_delayed_ref(&head->node);
			
 
				+			btrfs_put_delayed_ref_head(head);
			
 
				 			goto search_again;
			
 
				 		}
			
 
				 		spin_lock(&head->lock);
			
@@ -943,7 +933,7 @@ search_again:
 
				 		else
			
 
				 			BUG_ON(num_refs == 0);
			
 
				 
			
 
				-		num_refs += head->node.ref_mod;
			
 
				+		num_refs += head->ref_mod;
			
 
				 		spin_unlock(&head->lock);
			
 
				 		mutex_unlock(&head->mutex);
			
 
				 	}
			
@@ -2189,16 +2179,20 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 
				 
			
 
				 /* Can return -ENOMEM */
			
 
				 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
			
 
				-			 struct btrfs_fs_info *fs_info,
			
 
				+			 struct btrfs_root *root,
			
 
				 			 u64 bytenr, u64 num_bytes, u64 parent,
			
 
				 			 u64 root_objectid, u64 owner, u64 offset)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	int old_ref_mod, new_ref_mod;
			
 
				 	int ret;
			
 
				 
			
 
				 	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
			
 
				 	       root_objectid == BTRFS_TREE_LOG_OBJECTID);
			
 
				 
			
 
				+	btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
			
 
				+			   owner, offset, BTRFS_ADD_DELAYED_REF);
			
 
				+
			
 
				 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
			
 
				 		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
			
 
				 						 num_bytes, parent,
			
@@ -2344,7 +2338,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
 
				 
			
 
				 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
			
 
				 				 struct btrfs_fs_info *fs_info,
			
 
				-				 struct btrfs_delayed_ref_node *node,
			
 
				+				 struct btrfs_delayed_ref_head *head,
			
 
				 				 struct btrfs_delayed_extent_op *extent_op)
			
 
				 {
			
 
				 	struct btrfs_key key;
			
@@ -2366,14 +2360,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
 
				 	if (!path)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	key.objectid = node->bytenr;
			
 
				+	key.objectid = head->bytenr;
			
 
				 
			
 
				 	if (metadata) {
			
 
				 		key.type = BTRFS_METADATA_ITEM_KEY;
			
 
				 		key.offset = extent_op->level;
			
 
				 	} else {
			
 
				 		key.type = BTRFS_EXTENT_ITEM_KEY;
			
 
				-		key.offset = node->num_bytes;
			
 
				+		key.offset = head->num_bytes;
			
 
				 	}
			
 
				 
			
 
				 again:
			
@@ -2390,17 +2384,17 @@ again:
 
				 				path->slots[0]--;
			
 
				 				btrfs_item_key_to_cpu(path->nodes[0], &key,
			
 
				 						      path->slots[0]);
			
 
				-				if (key.objectid == node->bytenr &&
			
 
				+				if (key.objectid == head->bytenr &&
			
 
				 				    key.type == BTRFS_EXTENT_ITEM_KEY &&
			
 
				-				    key.offset == node->num_bytes)
			
 
				+				    key.offset == head->num_bytes)
			
 
				 					ret = 0;
			
 
				 			}
			
 
				 			if (ret > 0) {
			
 
				 				btrfs_release_path(path);
			
 
				 				metadata = 0;
			
 
				 
			
 
				-				key.objectid = node->bytenr;
			
 
				-				key.offset = node->num_bytes;
			
 
				+				key.objectid = head->bytenr;
			
 
				+				key.offset = head->num_bytes;
			
 
				 				key.type = BTRFS_EXTENT_ITEM_KEY;
			
 
				 				goto again;
			
 
				 			}
			
@@ -2507,44 +2501,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	if (btrfs_delayed_ref_is_head(node)) {
			
 
				-		struct btrfs_delayed_ref_head *head;
			
 
				-		/*
			
 
				-		 * we've hit the end of the chain and we were supposed
			
 
				-		 * to insert this extent into the tree.  But, it got
			
 
				-		 * deleted before we ever needed to insert it, so all
			
 
				-		 * we have to do is clean up the accounting
			
 
				-		 */
			
 
				-		BUG_ON(extent_op);
			
 
				-		head = btrfs_delayed_node_to_head(node);
			
 
				-		trace_run_delayed_ref_head(fs_info, node, head, node->action);
			
 
				-
			
 
				-		if (head->total_ref_mod < 0) {
			
 
				-			struct btrfs_block_group_cache *cache;
			
 
				-
			
 
				-			cache = btrfs_lookup_block_group(fs_info, node->bytenr);
			
 
				-			ASSERT(cache);
			
 
				-			percpu_counter_add(&cache->space_info->total_bytes_pinned,
			
 
				-					   -node->num_bytes);
			
 
				-			btrfs_put_block_group(cache);
			
 
				-		}
			
 
				-
			
 
				-		if (insert_reserved) {
			
 
				-			btrfs_pin_extent(fs_info, node->bytenr,
			
 
				-					 node->num_bytes, 1);
			
 
				-			if (head->is_data) {
			
 
				-				ret = btrfs_del_csums(trans, fs_info,
			
 
				-						      node->bytenr,
			
 
				-						      node->num_bytes);
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/* Also free its reserved qgroup space */
			
 
				-		btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
			
 
				-					      head->qgroup_reserved);
			
 
				-		return ret;
			
 
				-	}
			
 
				-
			
 
				 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
			
 
				 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
			
 
				 		ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
			
@@ -2563,7 +2519,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
 
				 {
			
 
				 	struct btrfs_delayed_ref_node *ref;
			
 
				 
			
 
				-	if (list_empty(&head->ref_list))
			
 
				+	if (RB_EMPTY_ROOT(&head->ref_tree))
			
 
				 		return NULL;
			
 
				 
			
 
				 	/*
			
@@ -2576,12 +2532,114 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
 
				 		return list_first_entry(&head->ref_add_list,
			
 
				 				struct btrfs_delayed_ref_node, add_list);
			
 
				 
			
 
				-	ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
			
 
				-			       list);
			
 
				+	ref = rb_entry(rb_first(&head->ref_tree),
			
 
				+		       struct btrfs_delayed_ref_node, ref_node);
			
 
				 	ASSERT(list_empty(&ref->add_list));
			
 
				 	return ref;
			
 
				 }
			
 
				 
			
 
				+static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
			
 
				+				      struct btrfs_delayed_ref_head *head)
			
 
				+{
			
 
				+	spin_lock(&delayed_refs->lock);
			
 
				+	head->processing = 0;
			
 
				+	delayed_refs->num_heads_ready++;
			
 
				+	spin_unlock(&delayed_refs->lock);
			
 
				+	btrfs_delayed_ref_unlock(head);
			
 
				+}
			
 
				+
			
 
				+static int cleanup_extent_op(struct btrfs_trans_handle *trans,
			
 
				+			     struct btrfs_fs_info *fs_info,
			
 
				+			     struct btrfs_delayed_ref_head *head)
			
 
				+{
			
 
				+	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!extent_op)
			
 
				+		return 0;
			
 
				+	head->extent_op = NULL;
			
 
				+	if (head->must_insert_reserved) {
			
 
				+		btrfs_free_delayed_extent_op(extent_op);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	spin_unlock(&head->lock);
			
 
				+	ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
			
 
				+	btrfs_free_delayed_extent_op(extent_op);
			
 
				+	return ret ? ret : 1;
			
 
				+}
			
 
				+
			
 
				+static int cleanup_ref_head(struct btrfs_trans_handle *trans,
			
 
				+			    struct btrfs_fs_info *fs_info,
			
 
				+			    struct btrfs_delayed_ref_head *head)
			
 
				+{
			
 
				+	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				+	int ret;
			
 
				+
			
 
				+	delayed_refs = &trans->transaction->delayed_refs;
			
 
				+
			
 
				+	ret = cleanup_extent_op(trans, fs_info, head);
			
 
				+	if (ret < 0) {
			
 
				+		unselect_delayed_ref_head(delayed_refs, head);
			
 
				+		btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
			
 
				+		return ret;
			
 
				+	} else if (ret) {
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Need to drop our head ref lock and re-acquire the delayed ref lock
			
 
				+	 * and then re-check to make sure nobody got added.
			
 
				+	 */
			
 
				+	spin_unlock(&head->lock);
			
 
				+	spin_lock(&delayed_refs->lock);
			
 
				+	spin_lock(&head->lock);
			
 
				+	if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
			
 
				+		spin_unlock(&head->lock);
			
 
				+		spin_unlock(&delayed_refs->lock);
			
 
				+		return 1;
			
 
				+	}
			
 
				+	delayed_refs->num_heads--;
			
 
				+	rb_erase(&head->href_node, &delayed_refs->href_root);
			
 
				+	RB_CLEAR_NODE(&head->href_node);
			
 
				+	spin_unlock(&delayed_refs->lock);
			
 
				+	spin_unlock(&head->lock);
			
 
				+	atomic_dec(&delayed_refs->num_entries);
			
 
				+
			
 
				+	trace_run_delayed_ref_head(fs_info, head, 0);
			
 
				+
			
 
				+	if (head->total_ref_mod < 0) {
			
 
				+		struct btrfs_block_group_cache *cache;
			
 
				+
			
 
				+		cache = btrfs_lookup_block_group(fs_info, head->bytenr);
			
 
				+		ASSERT(cache);
			
 
				+		percpu_counter_add(&cache->space_info->total_bytes_pinned,
			
 
				+				   -head->num_bytes);
			
 
				+		btrfs_put_block_group(cache);
			
 
				+
			
 
				+		if (head->is_data) {
			
 
				+			spin_lock(&delayed_refs->lock);
			
 
				+			delayed_refs->pending_csums -= head->num_bytes;
			
 
				+			spin_unlock(&delayed_refs->lock);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (head->must_insert_reserved) {
			
 
				+		btrfs_pin_extent(fs_info, head->bytenr,
			
 
				+				 head->num_bytes, 1);
			
 
				+		if (head->is_data) {
			
 
				+			ret = btrfs_del_csums(trans, fs_info, head->bytenr,
			
 
				+					      head->num_bytes);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Also free its reserved qgroup space */
			
 
				+	btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
			
 
				+				      head->qgroup_reserved);
			
 
				+	btrfs_delayed_ref_unlock(head);
			
 
				+	btrfs_put_delayed_ref_head(head);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Returns 0 on success or if called with an already aborted transaction.
			
 
				  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
			
@@ -2655,11 +2713,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 
				 		if (ref && ref->seq &&
			
 
				 		    btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
			
 
				 			spin_unlock(&locked_ref->lock);
			
 
				-			spin_lock(&delayed_refs->lock);
			
 
				-			locked_ref->processing = 0;
			
 
				-			delayed_refs->num_heads_ready++;
			
 
				-			spin_unlock(&delayed_refs->lock);
			
 
				-			btrfs_delayed_ref_unlock(locked_ref);
			
 
				+			unselect_delayed_ref_head(delayed_refs, locked_ref);
			
 
				 			locked_ref = NULL;
			
 
				 			cond_resched();
			
 
				 			count++;
			
@@ -2667,102 +2721,55 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 
				 		}
			
 
				 
			
 
				 		/*
			
 
				-		 * record the must insert reserved flag before we
			
 
				-		 * drop the spin lock.
			
 
				+		 * We're done processing refs in this ref_head, clean everything
			
 
				+		 * up and move on to the next ref_head.
			
 
				 		 */
			
 
				-		must_insert_reserved = locked_ref->must_insert_reserved;
			
 
				-		locked_ref->must_insert_reserved = 0;
			
 
				-
			
 
				-		extent_op = locked_ref->extent_op;
			
 
				-		locked_ref->extent_op = NULL;
			
 
				-
			
 
				 		if (!ref) {
			
 
				-
			
 
				-
			
 
				-			/* All delayed refs have been processed, Go ahead
			
 
				-			 * and send the head node to run_one_delayed_ref,
			
 
				-			 * so that any accounting fixes can happen
			
 
				-			 */
			
 
				-			ref = &locked_ref->node;
			
 
				-
			
 
				-			if (extent_op && must_insert_reserved) {
			
 
				-				btrfs_free_delayed_extent_op(extent_op);
			
 
				-				extent_op = NULL;
			
 
				-			}
			
 
				-
			
 
				-			if (extent_op) {
			
 
				-				spin_unlock(&locked_ref->lock);
			
 
				-				ret = run_delayed_extent_op(trans, fs_info,
			
 
				-							    ref, extent_op);
			
 
				-				btrfs_free_delayed_extent_op(extent_op);
			
 
				-
			
 
				-				if (ret) {
			
 
				-					/*
			
 
				-					 * Need to reset must_insert_reserved if
			
 
				-					 * there was an error so the abort stuff
			
 
				-					 * can cleanup the reserved space
			
 
				-					 * properly.
			
 
				-					 */
			
 
				-					if (must_insert_reserved)
			
 
				-						locked_ref->must_insert_reserved = 1;
			
 
				-					spin_lock(&delayed_refs->lock);
			
 
				-					locked_ref->processing = 0;
			
 
				-					delayed_refs->num_heads_ready++;
			
 
				-					spin_unlock(&delayed_refs->lock);
			
 
				-					btrfs_debug(fs_info,
			
 
				-						    "run_delayed_extent_op returned %d",
			
 
				-						    ret);
			
 
				-					btrfs_delayed_ref_unlock(locked_ref);
			
 
				-					return ret;
			
 
				-				}
			
 
				+			ret = cleanup_ref_head(trans, fs_info, locked_ref);
			
 
				+			if (ret > 0 ) {
			
 
				+				/* We dropped our lock, we need to loop. */
			
 
				+				ret = 0;
			
 
				 				continue;
			
 
				+			} else if (ret) {
			
 
				+				return ret;
			
 
				 			}
			
 
				+			locked_ref = NULL;
			
 
				+			count++;
			
 
				+			continue;
			
 
				+		}
			
 
				 
			
 
				-			/*
			
 
				-			 * Need to drop our head ref lock and re-acquire the
			
 
				-			 * delayed ref lock and then re-check to make sure
			
 
				-			 * nobody got added.
			
 
				-			 */
			
 
				-			spin_unlock(&locked_ref->lock);
			
 
				-			spin_lock(&delayed_refs->lock);
			
 
				-			spin_lock(&locked_ref->lock);
			
 
				-			if (!list_empty(&locked_ref->ref_list) ||
			
 
				-			    locked_ref->extent_op) {
			
 
				-				spin_unlock(&locked_ref->lock);
			
 
				-				spin_unlock(&delayed_refs->lock);
			
 
				-				continue;
			
 
				-			}
			
 
				-			ref->in_tree = 0;
			
 
				-			delayed_refs->num_heads--;
			
 
				-			rb_erase(&locked_ref->href_node,
			
 
				-				 &delayed_refs->href_root);
			
 
				-			spin_unlock(&delayed_refs->lock);
			
 
				-		} else {
			
 
				-			actual_count++;
			
 
				-			ref->in_tree = 0;
			
 
				-			list_del(&ref->list);
			
 
				-			if (!list_empty(&ref->add_list))
			
 
				-				list_del(&ref->add_list);
			
 
				+		actual_count++;
			
 
				+		ref->in_tree = 0;
			
 
				+		rb_erase(&ref->ref_node, &locked_ref->ref_tree);
			
 
				+		RB_CLEAR_NODE(&ref->ref_node);
			
 
				+		if (!list_empty(&ref->add_list))
			
 
				+			list_del(&ref->add_list);
			
 
				+		/*
			
 
				+		 * When we play the delayed ref, also correct the ref_mod on
			
 
				+		 * head
			
 
				+		 */
			
 
				+		switch (ref->action) {
			
 
				+		case BTRFS_ADD_DELAYED_REF:
			
 
				+		case BTRFS_ADD_DELAYED_EXTENT:
			
 
				+			locked_ref->ref_mod -= ref->ref_mod;
			
 
				+			break;
			
 
				+		case BTRFS_DROP_DELAYED_REF:
			
 
				+			locked_ref->ref_mod += ref->ref_mod;
			
 
				+			break;
			
 
				+		default:
			
 
				+			WARN_ON(1);
			
 
				 		}
			
 
				 		atomic_dec(&delayed_refs->num_entries);
			
 
				 
			
 
				-		if (!btrfs_delayed_ref_is_head(ref)) {
			
 
				-			/*
			
 
				-			 * when we play the delayed ref, also correct the
			
 
				-			 * ref_mod on head
			
 
				-			 */
			
 
				-			switch (ref->action) {
			
 
				-			case BTRFS_ADD_DELAYED_REF:
			
 
				-			case BTRFS_ADD_DELAYED_EXTENT:
			
 
				-				locked_ref->node.ref_mod -= ref->ref_mod;
			
 
				-				break;
			
 
				-			case BTRFS_DROP_DELAYED_REF:
			
 
				-				locked_ref->node.ref_mod += ref->ref_mod;
			
 
				-				break;
			
 
				-			default:
			
 
				-				WARN_ON(1);
			
 
				-			}
			
 
				-		}
			
 
				+		/*
			
 
				+		 * Record the must-insert_reserved flag before we drop the spin
			
 
				+		 * lock.
			
 
				+		 */
			
 
				+		must_insert_reserved = locked_ref->must_insert_reserved;
			
 
				+		locked_ref->must_insert_reserved = 0;
			
 
				+
			
 
				+		extent_op = locked_ref->extent_op;
			
 
				+		locked_ref->extent_op = NULL;
			
 
				 		spin_unlock(&locked_ref->lock);
			
 
				 
			
 
				 		ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
			
@@ -2770,33 +2777,13 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 
				 
			
 
				 		btrfs_free_delayed_extent_op(extent_op);
			
 
				 		if (ret) {
			
 
				-			spin_lock(&delayed_refs->lock);
			
 
				-			locked_ref->processing = 0;
			
 
				-			delayed_refs->num_heads_ready++;
			
 
				-			spin_unlock(&delayed_refs->lock);
			
 
				-			btrfs_delayed_ref_unlock(locked_ref);
			
 
				+			unselect_delayed_ref_head(delayed_refs, locked_ref);
			
 
				 			btrfs_put_delayed_ref(ref);
			
 
				 			btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
			
 
				 				    ret);
			
 
				 			return ret;
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * If this node is a head, that means all the refs in this head
			
 
				-		 * have been dealt with, and we will pick the next head to deal
			
 
				-		 * with, so we must unlock the head and drop it from the cluster
			
 
				-		 * list before we release it.
			
 
				-		 */
			
 
				-		if (btrfs_delayed_ref_is_head(ref)) {
			
 
				-			if (locked_ref->is_data &&
			
 
				-			    locked_ref->total_ref_mod < 0) {
			
 
				-				spin_lock(&delayed_refs->lock);
			
 
				-				delayed_refs->pending_csums -= ref->num_bytes;
			
 
				-				spin_unlock(&delayed_refs->lock);
			
 
				-			}
			
 
				-			btrfs_delayed_ref_unlock(locked_ref);
			
 
				-			locked_ref = NULL;
			
 
				-		}
			
 
				 		btrfs_put_delayed_ref(ref);
			
 
				 		count++;
			
 
				 		cond_resched();
			
@@ -3100,33 +3087,16 @@ again:
 
				 			spin_unlock(&delayed_refs->lock);
			
 
				 			goto out;
			
 
				 		}
			
 
				+		head = rb_entry(node, struct btrfs_delayed_ref_head,
			
 
				+				href_node);
			
 
				+		refcount_inc(&head->refs);
			
 
				+		spin_unlock(&delayed_refs->lock);
			
 
				 
			
 
				-		while (node) {
			
 
				-			head = rb_entry(node, struct btrfs_delayed_ref_head,
			
 
				-					href_node);
			
 
				-			if (btrfs_delayed_ref_is_head(&head->node)) {
			
 
				-				struct btrfs_delayed_ref_node *ref;
			
 
				-
			
 
				-				ref = &head->node;
			
 
				-				refcount_inc(&ref->refs);
			
 
				-
			
 
				-				spin_unlock(&delayed_refs->lock);
			
 
				-				/*
			
 
				-				 * Mutex was contended, block until it's
			
 
				-				 * released and try again
			
 
				-				 */
			
 
				-				mutex_lock(&head->mutex);
			
 
				-				mutex_unlock(&head->mutex);
			
 
				+		/* Mutex was contended, block until it's released and retry. */
			
 
				+		mutex_lock(&head->mutex);
			
 
				+		mutex_unlock(&head->mutex);
			
 
				 
			
 
				-				btrfs_put_delayed_ref(ref);
			
 
				-				cond_resched();
			
 
				-				goto again;
			
 
				-			} else {
			
 
				-				WARN_ON(1);
			
 
				-			}
			
 
				-			node = rb_next(node);
			
 
				-		}
			
 
				-		spin_unlock(&delayed_refs->lock);
			
 
				+		btrfs_put_delayed_ref_head(head);
			
 
				 		cond_resched();
			
 
				 		goto again;
			
 
				 	}
			
@@ -3169,6 +3139,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
 
				 	struct btrfs_delayed_data_ref *data_ref;
			
 
				 	struct btrfs_delayed_ref_root *delayed_refs;
			
 
				 	struct btrfs_transaction *cur_trans;
			
 
				+	struct rb_node *node;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	cur_trans = root->fs_info->running_transaction;
			
@@ -3184,7 +3155,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
 
				 	}
			
 
				 
			
 
				 	if (!mutex_trylock(&head->mutex)) {
			
 
				-		refcount_inc(&head->node.refs);
			
 
				+		refcount_inc(&head->refs);
			
 
				 		spin_unlock(&delayed_refs->lock);
			
 
				 
			
 
				 		btrfs_release_path(path);
			
@@ -3195,13 +3166,18 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
 
				 		 */
			
 
				 		mutex_lock(&head->mutex);
			
 
				 		mutex_unlock(&head->mutex);
			
 
				-		btrfs_put_delayed_ref(&head->node);
			
 
				+		btrfs_put_delayed_ref_head(head);
			
 
				 		return -EAGAIN;
			
 
				 	}
			
 
				 	spin_unlock(&delayed_refs->lock);
			
 
				 
			
 
				 	spin_lock(&head->lock);
			
 
				-	list_for_each_entry(ref, &head->ref_list, list) {
			
 
				+	/*
			
 
				+	 * XXX: We should replace this with a proper search function in the
			
 
				+	 * future.
			
 
				+	 */
			
 
				+	for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
			
 
				+		ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
			
 
				 		/* If it's a shared ref we know a cross reference exists */
			
 
				 		if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
			
 
				 			ret = 1;
			
@@ -3351,7 +3327,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 
				 	int level;
			
 
				 	int ret = 0;
			
 
				 	int (*process_func)(struct btrfs_trans_handle *,
			
 
				-			    struct btrfs_fs_info *,
			
 
				+			    struct btrfs_root *,
			
 
				 			    u64, u64, u64, u64, u64, u64);
			
 
				 
			
 
				 
			
@@ -3391,7 +3367,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 
				 
			
 
				 			num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
			
 
				 			key.offset -= btrfs_file_extent_offset(buf, fi);
			
 
				-			ret = process_func(trans, fs_info, bytenr, num_bytes,
			
 
				+			ret = process_func(trans, root, bytenr, num_bytes,
			
 
				 					   parent, ref_root, key.objectid,
			
 
				 					   key.offset);
			
 
				 			if (ret)
			
@@ -3399,7 +3375,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 
				 		} else {
			
 
				 			bytenr = btrfs_node_blockptr(buf, i);
			
 
				 			num_bytes = fs_info->nodesize;
			
 
				-			ret = process_func(trans, fs_info, bytenr, num_bytes,
			
 
				+			ret = process_func(trans, root, bytenr, num_bytes,
			
 
				 					   parent, ref_root, level - 1, 0);
			
 
				 			if (ret)
			
 
				 				goto fail;
			
@@ -4843,7 +4819,6 @@ static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
 
				 static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
			
 
				 			    u64 orig, bool wait_ordered)
			
 
				 {
			
 
				-	struct btrfs_block_rsv *block_rsv;
			
 
				 	struct btrfs_space_info *space_info;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	u64 delalloc_bytes;
			
@@ -4859,8 +4834,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
 
				 	to_reclaim = items * EXTENT_SIZE_PER_ITEM;
			
 
				 
			
 
				 	trans = (struct btrfs_trans_handle *)current->journal_info;
			
 
				-	block_rsv = &fs_info->delalloc_block_rsv;
			
 
				-	space_info = block_rsv->space_info;
			
 
				+	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
			
 
				 
			
 
				 	delalloc_bytes = percpu_counter_sum_positive(
			
 
				 						&fs_info->delalloc_bytes);
			
@@ -4919,6 +4893,13 @@ skip_async:
 
				 	}
			
 
				 }
			
 
				 
			
 
				+struct reserve_ticket {
			
 
				+	u64 bytes;
			
 
				+	int error;
			
 
				+	struct list_head list;
			
 
				+	wait_queue_head_t wait;
			
 
				+};
			
 
				+
			
 
				 /**
			
 
				  * maybe_commit_transaction - possibly commit the transaction if its ok to
			
 
				  * @root - the root we're allocating for
			
@@ -4930,18 +4911,29 @@ skip_async:
 
				  * will return -ENOSPC.
			
 
				  */
			
 
				 static int may_commit_transaction(struct btrfs_fs_info *fs_info,
			
 
				-				  struct btrfs_space_info *space_info,
			
 
				-				  u64 bytes, int force)
			
 
				+				  struct btrfs_space_info *space_info)
			
 
				 {
			
 
				+	struct reserve_ticket *ticket = NULL;
			
 
				 	struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				+	u64 bytes;
			
 
				 
			
 
				 	trans = (struct btrfs_trans_handle *)current->journal_info;
			
 
				 	if (trans)
			
 
				 		return -EAGAIN;
			
 
				 
			
 
				-	if (force)
			
 
				-		goto commit;
			
 
				+	spin_lock(&space_info->lock);
			
 
				+	if (!list_empty(&space_info->priority_tickets))
			
 
				+		ticket = list_first_entry(&space_info->priority_tickets,
			
 
				+					  struct reserve_ticket, list);
			
 
				+	else if (!list_empty(&space_info->tickets))
			
 
				+		ticket = list_first_entry(&space_info->tickets,
			
 
				+					  struct reserve_ticket, list);
			
 
				+	bytes = (ticket) ? ticket->bytes : 0;
			
 
				+	spin_unlock(&space_info->lock);
			
 
				+
			
 
				+	if (!bytes)
			
 
				+		return 0;
			
 
				 
			
 
				 	/* See if there is enough pinned space to make this reservation */
			
 
				 	if (percpu_counter_compare(&space_info->total_bytes_pinned,
			
@@ -4956,8 +4948,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 
				 		return -ENOSPC;
			
 
				 
			
 
				 	spin_lock(&delayed_rsv->lock);
			
 
				+	if (delayed_rsv->size > bytes)
			
 
				+		bytes = 0;
			
 
				+	else
			
 
				+		bytes -= delayed_rsv->size;
			
 
				 	if (percpu_counter_compare(&space_info->total_bytes_pinned,
			
 
				-				   bytes - delayed_rsv->size) < 0) {
			
 
				+				   bytes) < 0) {
			
 
				 		spin_unlock(&delayed_rsv->lock);
			
 
				 		return -ENOSPC;
			
 
				 	}
			
@@ -4971,13 +4967,6 @@ commit:
 
				 	return btrfs_commit_transaction(trans);
			
 
				 }
			
 
				 
			
 
				-struct reserve_ticket {
			
 
				-	u64 bytes;
			
 
				-	int error;
			
 
				-	struct list_head list;
			
 
				-	wait_queue_head_t wait;
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * Try to flush some data based on policy set by @state. This is only advisory
			
 
				  * and may fail for various reasons. The caller is supposed to examine the
			
@@ -5027,8 +5016,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 
				 			ret = 0;
			
 
				 		break;
			
 
				 	case COMMIT_TRANS:
			
 
				-		ret = may_commit_transaction(fs_info, space_info,
			
 
				-					     num_bytes, 0);
			
 
				+		ret = may_commit_transaction(fs_info, space_info);
			
 
				 		break;
			
 
				 	default:
			
 
				 		ret = -ENOSPC;
			
@@ -5582,11 +5570,12 @@ again:
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
			
 
				+static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
			
 
				 				    struct btrfs_block_rsv *block_rsv,
			
 
				 				    struct btrfs_block_rsv *dest, u64 num_bytes)
			
 
				 {
			
 
				 	struct btrfs_space_info *space_info = block_rsv->space_info;
			
 
				+	u64 ret;
			
 
				 
			
 
				 	spin_lock(&block_rsv->lock);
			
 
				 	if (num_bytes == (u64)-1)
			
@@ -5601,6 +5590,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
 
				 	}
			
 
				 	spin_unlock(&block_rsv->lock);
			
 
				 
			
 
				+	ret = num_bytes;
			
 
				 	if (num_bytes > 0) {
			
 
				 		if (dest) {
			
 
				 			spin_lock(&dest->lock);
			
@@ -5620,6 +5610,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
 
				 			space_info_add_old_bytes(fs_info, space_info,
			
 
				 						 num_bytes);
			
 
				 	}
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
			
@@ -5643,6 +5634,15 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
 
				 	rsv->type = type;
			
 
				 }
			
 
				 
			
 
				+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
			
 
				+				   struct btrfs_block_rsv *rsv,
			
 
				+				   unsigned short type)
			
 
				+{
			
 
				+	btrfs_init_block_rsv(rsv, type);
			
 
				+	rsv->space_info = __find_space_info(fs_info,
			
 
				+					    BTRFS_BLOCK_GROUP_METADATA);
			
 
				+}
			
 
				+
			
 
				 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
			
 
				 					      unsigned short type)
			
 
				 {
			
@@ -5652,9 +5652,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
 
				 	if (!block_rsv)
			
 
				 		return NULL;
			
 
				 
			
 
				-	btrfs_init_block_rsv(block_rsv, type);
			
 
				-	block_rsv->space_info = __find_space_info(fs_info,
			
 
				-						  BTRFS_BLOCK_GROUP_METADATA);
			
 
				+	btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
			
 
				 	return block_rsv;
			
 
				 }
			
 
				 
			
@@ -5737,6 +5735,66 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * btrfs_inode_rsv_refill - refill the inode block rsv.
			
 
				+ * @inode - the inode we are refilling.
			
 
				+ * @flush - the flusing restriction.
			
 
				+ *
			
 
				+ * Essentially the same as btrfs_block_rsv_refill, except it uses the
			
 
				+ * block_rsv->size as the minimum size.  We'll either refill the missing amount
			
 
				+ * or return if we already have enough space.  This will also handle the resreve
			
 
				+ * tracepoint for the reserved amount.
			
 
				+ */
			
 
				+int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
			
 
				+			   enum btrfs_reserve_flush_enum flush)
			
 
				+{
			
 
				+	struct btrfs_root *root = inode->root;
			
 
				+	struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
			
 
				+	u64 num_bytes = 0;
			
 
				+	int ret = -ENOSPC;
			
 
				+
			
 
				+	spin_lock(&block_rsv->lock);
			
 
				+	if (block_rsv->reserved < block_rsv->size)
			
 
				+		num_bytes = block_rsv->size - block_rsv->reserved;
			
 
				+	spin_unlock(&block_rsv->lock);
			
 
				+
			
 
				+	if (num_bytes == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
			
 
				+	if (!ret) {
			
 
				+		block_rsv_add_bytes(block_rsv, num_bytes, 0);
			
 
				+		trace_btrfs_space_reservation(root->fs_info, "delalloc",
			
 
				+					      btrfs_ino(inode), num_bytes, 1);
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * btrfs_inode_rsv_release - release any excessive reservation.
			
 
				+ * @inode - the inode we need to release from.
			
 
				+ *
			
 
				+ * This is the same as btrfs_block_rsv_release, except that it handles the
			
 
				+ * tracepoint for the reservation.
			
 
				+ */
			
 
				+void btrfs_inode_rsv_release(struct btrfs_inode *inode)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
			
 
				+	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
			
 
				+	struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
			
 
				+	u64 released = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Since we statically set the block_rsv->size we just want to say we
			
 
				+	 * are releasing 0 bytes, and then we'll just get the reservation over
			
 
				+	 * the size free'd.
			
 
				+	 */
			
 
				+	released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
			
 
				+	if (released > 0)
			
 
				+		trace_btrfs_space_reservation(fs_info, "delalloc",
			
 
				+					      btrfs_ino(inode), released, 0);
			
 
				+}
			
 
				+
			
 
				 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
			
 
				 			     struct btrfs_block_rsv *block_rsv,
			
 
				 			     u64 num_bytes)
			
@@ -5808,7 +5866,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
 
				 
			
 
				 	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
			
 
				 	fs_info->global_block_rsv.space_info = space_info;
			
 
				-	fs_info->delalloc_block_rsv.space_info = space_info;
			
 
				 	fs_info->trans_block_rsv.space_info = space_info;
			
 
				 	fs_info->empty_block_rsv.space_info = space_info;
			
 
				 	fs_info->delayed_block_rsv.space_info = space_info;
			
@@ -5828,8 +5885,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 
				 {
			
 
				 	block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
			
 
				 				(u64)-1);
			
 
				-	WARN_ON(fs_info->delalloc_block_rsv.size > 0);
			
 
				-	WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
			
 
				 	WARN_ON(fs_info->trans_block_rsv.size > 0);
			
 
				 	WARN_ON(fs_info->trans_block_rsv.reserved > 0);
			
 
				 	WARN_ON(fs_info->chunk_block_rsv.size > 0);
			
@@ -5841,12 +5896,15 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 
				 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
			
 
				 				  struct btrfs_fs_info *fs_info)
			
 
				 {
			
 
				-	if (!trans->block_rsv)
			
 
				+	if (!trans->block_rsv) {
			
 
				+		ASSERT(!trans->bytes_reserved);
			
 
				 		return;
			
 
				+	}
			
 
				 
			
 
				 	if (!trans->bytes_reserved)
			
 
				 		return;
			
 
				 
			
 
				+	ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
			
 
				 	trace_btrfs_space_reservation(fs_info, "transaction",
			
 
				 				      trans->transid, trans->bytes_reserved, 0);
			
 
				 	btrfs_block_rsv_release(fs_info, trans->block_rsv,
			
@@ -5968,104 +6026,37 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 
				 	btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * drop_outstanding_extent - drop an outstanding extent
			
 
				- * @inode: the inode we're dropping the extent for
			
 
				- * @num_bytes: the number of bytes we're releasing.
			
 
				- *
			
 
				- * This is called when we are freeing up an outstanding extent, either called
			
 
				- * after an error or after an extent is written.  This will return the number of
			
 
				- * reserved extents that need to be freed.  This must be called with
			
 
				- * BTRFS_I(inode)->lock held.
			
 
				- */
			
 
				-static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
			
 
				-		u64 num_bytes)
			
 
				-{
			
 
				-	unsigned drop_inode_space = 0;
			
 
				-	unsigned dropped_extents = 0;
			
 
				-	unsigned num_extents;
			
 
				-
			
 
				-	num_extents = count_max_extents(num_bytes);
			
 
				-	ASSERT(num_extents);
			
 
				-	ASSERT(inode->outstanding_extents >= num_extents);
			
 
				-	inode->outstanding_extents -= num_extents;
			
 
				-
			
 
				-	if (inode->outstanding_extents == 0 &&
			
 
				-	    test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				-			       &inode->runtime_flags))
			
 
				-		drop_inode_space = 1;
			
 
				-
			
 
				-	/*
			
 
				-	 * If we have more or the same amount of outstanding extents than we have
			
 
				-	 * reserved then we need to leave the reserved extents count alone.
			
 
				-	 */
			
 
				-	if (inode->outstanding_extents >= inode->reserved_extents)
			
 
				-		return drop_inode_space;
			
 
				-
			
 
				-	dropped_extents = inode->reserved_extents - inode->outstanding_extents;
			
 
				-	inode->reserved_extents -= dropped_extents;
			
 
				-	return dropped_extents + drop_inode_space;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * calc_csum_metadata_size - return the amount of metadata space that must be
			
 
				- *	reserved/freed for the given bytes.
			
 
				- * @inode: the inode we're manipulating
			
 
				- * @num_bytes: the number of bytes in question
			
 
				- * @reserve: 1 if we are reserving space, 0 if we are freeing space
			
 
				- *
			
 
				- * This adjusts the number of csum_bytes in the inode and then returns the
			
 
				- * correct amount of metadata that must either be reserved or freed.  We
			
 
				- * calculate how many checksums we can fit into one leaf and then divide the
			
 
				- * number of bytes that will need to be checksumed by this value to figure out
			
 
				- * how many checksums will be required.  If we are adding bytes then the number
			
 
				- * may go up and we will return the number of additional bytes that must be
			
 
				- * reserved.  If it is going down we will return the number of bytes that must
			
 
				- * be freed.
			
 
				- *
			
 
				- * This must be called with BTRFS_I(inode)->lock held.
			
 
				- */
			
 
				-static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
			
 
				-				   int reserve)
			
 
				+static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
			
 
				+						 struct btrfs_inode *inode)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				-	u64 old_csums, num_csums;
			
 
				-
			
 
				-	if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
			
 
				-	if (reserve)
			
 
				-		inode->csum_bytes += num_bytes;
			
 
				-	else
			
 
				-		inode->csum_bytes -= num_bytes;
			
 
				-	num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
			
 
				-
			
 
				-	/* No change, no need to reserve more */
			
 
				-	if (old_csums == num_csums)
			
 
				-		return 0;
			
 
				+	struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
			
 
				+	u64 reserve_size = 0;
			
 
				+	u64 csum_leaves;
			
 
				+	unsigned outstanding_extents;
			
 
				 
			
 
				-	if (reserve)
			
 
				-		return btrfs_calc_trans_metadata_size(fs_info,
			
 
				-						      num_csums - old_csums);
			
 
				+	lockdep_assert_held(&inode->lock);
			
 
				+	outstanding_extents = inode->outstanding_extents;
			
 
				+	if (outstanding_extents)
			
 
				+		reserve_size = btrfs_calc_trans_metadata_size(fs_info,
			
 
				+						outstanding_extents + 1);
			
 
				+	csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
			
 
				+						 inode->csum_bytes);
			
 
				+	reserve_size += btrfs_calc_trans_metadata_size(fs_info,
			
 
				+						       csum_leaves);
			
 
				 
			
 
				-	return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums);
			
 
				+	spin_lock(&block_rsv->lock);
			
 
				+	block_rsv->size = reserve_size;
			
 
				+	spin_unlock(&block_rsv->lock);
			
 
				 }
			
 
				 
			
 
				 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				 	struct btrfs_root *root = inode->root;
			
 
				-	struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
			
 
				-	u64 to_reserve = 0;
			
 
				-	u64 csum_bytes;
			
 
				 	unsigned nr_extents;
			
 
				 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
			
 
				 	int ret = 0;
			
 
				 	bool delalloc_lock = true;
			
 
				-	u64 to_free = 0;
			
 
				-	unsigned dropped;
			
 
				-	bool release_extra = false;
			
 
				 
			
 
				 	/* If we are a free space inode we need to not flush since we will be in
			
 
				 	 * the middle of a transaction commit.  We also don't need the delalloc
			
@@ -6091,19 +6082,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
				 
			
 
				 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
			
 
				 
			
 
				+	/* Add our new extents and calculate the new rsv size. */
			
 
				 	spin_lock(&inode->lock);
			
 
				 	nr_extents = count_max_extents(num_bytes);
			
 
				-	inode->outstanding_extents += nr_extents;
			
 
				-
			
 
				-	nr_extents = 0;
			
 
				-	if (inode->outstanding_extents > inode->reserved_extents)
			
 
				-		nr_extents += inode->outstanding_extents -
			
 
				-			inode->reserved_extents;
			
 
				-
			
 
				-	/* We always want to reserve a slot for updating the inode. */
			
 
				-	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
			
 
				-	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
			
 
				-	csum_bytes = inode->csum_bytes;
			
 
				+	btrfs_mod_outstanding_extents(inode, nr_extents);
			
 
				+	inode->csum_bytes += num_bytes;
			
 
				+	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
			
 
				 	spin_unlock(&inode->lock);
			
 
				 
			
 
				 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
			
@@ -6113,92 +6097,26 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
				 			goto out_fail;
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
			
 
				+	ret = btrfs_inode_rsv_refill(inode, flush);
			
 
				 	if (unlikely(ret)) {
			
 
				 		btrfs_qgroup_free_meta(root,
			
 
				 				       nr_extents * fs_info->nodesize);
			
 
				 		goto out_fail;
			
 
				 	}
			
 
				 
			
 
				-	spin_lock(&inode->lock);
			
 
				-	if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				-			     &inode->runtime_flags)) {
			
 
				-		to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
			
 
				-		release_extra = true;
			
 
				-	}
			
 
				-	inode->reserved_extents += nr_extents;
			
 
				-	spin_unlock(&inode->lock);
			
 
				-
			
 
				 	if (delalloc_lock)
			
 
				 		mutex_unlock(&inode->delalloc_mutex);
			
 
				-
			
 
				-	if (to_reserve)
			
 
				-		trace_btrfs_space_reservation(fs_info, "delalloc",
			
 
				-					      btrfs_ino(inode), to_reserve, 1);
			
 
				-	if (release_extra)
			
 
				-		btrfs_block_rsv_release(fs_info, block_rsv,
			
 
				-				btrfs_calc_trans_metadata_size(fs_info, 1));
			
 
				 	return 0;
			
 
				 
			
 
				 out_fail:
			
 
				 	spin_lock(&inode->lock);
			
 
				-	dropped = drop_outstanding_extent(inode, num_bytes);
			
 
				-	/*
			
 
				-	 * If the inodes csum_bytes is the same as the original
			
 
				-	 * csum_bytes then we know we haven't raced with any free()ers
			
 
				-	 * so we can just reduce our inodes csum bytes and carry on.
			
 
				-	 */
			
 
				-	if (inode->csum_bytes == csum_bytes) {
			
 
				-		calc_csum_metadata_size(inode, num_bytes, 0);
			
 
				-	} else {
			
 
				-		u64 orig_csum_bytes = inode->csum_bytes;
			
 
				-		u64 bytes;
			
 
				-
			
 
				-		/*
			
 
				-		 * This is tricky, but first we need to figure out how much we
			
 
				-		 * freed from any free-ers that occurred during this
			
 
				-		 * reservation, so we reset ->csum_bytes to the csum_bytes
			
 
				-		 * before we dropped our lock, and then call the free for the
			
 
				-		 * number of bytes that were freed while we were trying our
			
 
				-		 * reservation.
			
 
				-		 */
			
 
				-		bytes = csum_bytes - inode->csum_bytes;
			
 
				-		inode->csum_bytes = csum_bytes;
			
 
				-		to_free = calc_csum_metadata_size(inode, bytes, 0);
			
 
				-
			
 
				-
			
 
				-		/*
			
 
				-		 * Now we need to see how much we would have freed had we not
			
 
				-		 * been making this reservation and our ->csum_bytes were not
			
 
				-		 * artificially inflated.
			
 
				-		 */
			
 
				-		inode->csum_bytes = csum_bytes - num_bytes;
			
 
				-		bytes = csum_bytes - orig_csum_bytes;
			
 
				-		bytes = calc_csum_metadata_size(inode, bytes, 0);
			
 
				-
			
 
				-		/*
			
 
				-		 * Now reset ->csum_bytes to what it should be.  If bytes is
			
 
				-		 * more than to_free then we would have freed more space had we
			
 
				-		 * not had an artificially high ->csum_bytes, so we need to free
			
 
				-		 * the remainder.  If bytes is the same or less then we don't
			
 
				-		 * need to do anything, the other free-ers did the correct
			
 
				-		 * thing.
			
 
				-		 */
			
 
				-		inode->csum_bytes = orig_csum_bytes - num_bytes;
			
 
				-		if (bytes > to_free)
			
 
				-			to_free = bytes - to_free;
			
 
				-		else
			
 
				-			to_free = 0;
			
 
				-	}
			
 
				+	nr_extents = count_max_extents(num_bytes);
			
 
				+	btrfs_mod_outstanding_extents(inode, -nr_extents);
			
 
				+	inode->csum_bytes -= num_bytes;
			
 
				+	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
			
 
				 	spin_unlock(&inode->lock);
			
 
				-	if (dropped)
			
 
				-		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
			
 
				 
			
 
				-	if (to_free) {
			
 
				-		btrfs_block_rsv_release(fs_info, block_rsv, to_free);
			
 
				-		trace_btrfs_space_reservation(fs_info, "delalloc",
			
 
				-					      btrfs_ino(inode), to_free, 0);
			
 
				-	}
			
 
				+	btrfs_inode_rsv_release(inode);
			
 
				 	if (delalloc_lock)
			
 
				 		mutex_unlock(&inode->delalloc_mutex);
			
 
				 	return ret;
			
@@ -6206,36 +6124,55 @@ out_fail:
 
				 
			
 
				 /**
			
 
				  * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
			
 
				- * @inode: the inode to release the reservation for
			
 
				- * @num_bytes: the number of bytes we're releasing
			
 
				+ * @inode: the inode to release the reservation for.
			
 
				+ * @num_bytes: the number of bytes we are releasing.
			
 
				  *
			
 
				  * This will release the metadata reservation for an inode.  This can be called
			
 
				  * once we complete IO for a given set of bytes to release their metadata
			
 
				- * reservations.
			
 
				+ * reservations, or on error for the same reason.
			
 
				  */
			
 
				 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
			
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				-	u64 to_free = 0;
			
 
				-	unsigned dropped;
			
 
				 
			
 
				 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
			
 
				 	spin_lock(&inode->lock);
			
 
				-	dropped = drop_outstanding_extent(inode, num_bytes);
			
 
				-
			
 
				-	if (num_bytes)
			
 
				-		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
			
 
				+	inode->csum_bytes -= num_bytes;
			
 
				+	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
			
 
				 	spin_unlock(&inode->lock);
			
 
				-	if (dropped > 0)
			
 
				-		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
			
 
				 
			
 
				 	if (btrfs_is_testing(fs_info))
			
 
				 		return;
			
 
				 
			
 
				-	trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
			
 
				-				      to_free, 0);
			
 
				+	btrfs_inode_rsv_release(inode);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * btrfs_delalloc_release_extents - release our outstanding_extents
			
 
				+ * @inode: the inode to balance the reservation for.
			
 
				+ * @num_bytes: the number of bytes we originally reserved with
			
 
				+ *
			
 
				+ * When we reserve space we increase outstanding_extents for the extents we may
			
 
				+ * add.  Once we've set the range as delalloc or created our ordered extents we
			
 
				+ * have outstanding_extents to track the real usage, so we use this to free our
			
 
				+ * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
			
 
				+ * with btrfs_delalloc_reserve_metadata.
			
 
				+ */
			
 
				+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
			
 
				+	unsigned num_extents;
			
 
				+
			
 
				+	spin_lock(&inode->lock);
			
 
				+	num_extents = count_max_extents(num_bytes);
			
 
				+	btrfs_mod_outstanding_extents(inode, -num_extents);
			
 
				+	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
			
 
				+	spin_unlock(&inode->lock);
			
 
				+
			
 
				+	if (btrfs_is_testing(fs_info))
			
 
				+		return;
			
 
				 
			
 
				-	btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
			
 
				+	btrfs_inode_rsv_release(inode);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -6282,10 +6219,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
 
				  * @inode: inode we're releasing space for
			
 
				  * @start: start position of the space already reserved
			
 
				  * @len: the len of the space already reserved
			
 
				- *
			
 
				- * This must be matched with a call to btrfs_delalloc_reserve_space.  This is
			
 
				- * called in the case that we don't need the metadata AND data reservations
			
 
				- * anymore.  So if there is an error or we insert an inline extent.
			
 
				+ * @release_bytes: the len of the space we consumed or didn't use
			
 
				  *
			
 
				  * This function will release the metadata space that was not used and will
			
 
				  * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
			
@@ -6293,7 +6227,8 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
 
				  * Also it will handle the qgroup reserved space.
			
 
				  */
			
 
				 void btrfs_delalloc_release_space(struct inode *inode,
			
 
				-			struct extent_changeset *reserved, u64 start, u64 len)
			
 
				+				  struct extent_changeset *reserved,
			
 
				+				  u64 start, u64 len)
			
 
				 {
			
 
				 	btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
			
 
				 	btrfs_free_reserved_data_space(inode, reserved, start, len);
			
@@ -6958,7 +6893,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
				 	BUG_ON(!is_data && refs_to_drop != 1);
			
 
				 
			
 
				 	if (is_data)
			
 
				-		skinny_metadata = 0;
			
 
				+		skinny_metadata = false;
			
 
				 
			
 
				 	ret = lookup_extent_backref(trans, info, path, &iref,
			
 
				 				    bytenr, num_bytes, parent,
			
@@ -7213,7 +7148,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
 
				 		goto out_delayed_unlock;
			
 
				 
			
 
				 	spin_lock(&head->lock);
			
 
				-	if (!list_empty(&head->ref_list))
			
 
				+	if (!RB_EMPTY_ROOT(&head->ref_tree))
			
 
				 		goto out;
			
 
				 
			
 
				 	if (head->extent_op) {
			
@@ -7234,9 +7169,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
 
				 	 * at this point we have a head with no other entries.  Go
			
 
				 	 * ahead and process it.
			
 
				 	 */
			
 
				-	head->node.in_tree = 0;
			
 
				 	rb_erase(&head->href_node, &delayed_refs->href_root);
			
 
				-
			
 
				+	RB_CLEAR_NODE(&head->href_node);
			
 
				 	atomic_dec(&delayed_refs->num_entries);
			
 
				 
			
 
				 	/*
			
@@ -7255,7 +7189,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
 
				 		ret = 1;
			
 
				 
			
 
				 	mutex_unlock(&head->mutex);
			
 
				-	btrfs_put_delayed_ref(&head->node);
			
 
				+	btrfs_put_delayed_ref_head(head);
			
 
				 	return ret;
			
 
				 out:
			
 
				 	spin_unlock(&head->lock);
			
@@ -7277,6 +7211,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
				 	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
			
 
				 		int old_ref_mod, new_ref_mod;
			
 
				 
			
 
				+		btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
			
 
				+				   root->root_key.objectid,
			
 
				+				   btrfs_header_level(buf), 0,
			
 
				+				   BTRFS_DROP_DELAYED_REF);
			
 
				 		ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
			
 
				 						 buf->len, parent,
			
 
				 						 root->root_key.objectid,
			
@@ -7329,16 +7267,21 @@ out:
 
				 
			
 
				 /* Can return -ENOMEM */
			
 
				 int btrfs_free_extent(struct btrfs_trans_handle *trans,
			
 
				-		      struct btrfs_fs_info *fs_info,
			
 
				+		      struct btrfs_root *root,
			
 
				 		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
			
 
				 		      u64 owner, u64 offset)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	int old_ref_mod, new_ref_mod;
			
 
				 	int ret;
			
 
				 
			
 
				 	if (btrfs_is_testing(fs_info))
			
 
				 		return 0;
			
 
				 
			
 
				+	if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
			
 
				+		btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
			
 
				+				   root_objectid, owner, offset,
			
 
				+				   BTRFS_DROP_DELAYED_REF);
			
 
				 
			
 
				 	/*
			
 
				 	 * tree log blocks never actually go into the extent allocation
			
@@ -8306,17 +8249,22 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 
				 }
			
 
				 
			
 
				 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
			
 
				-				     u64 root_objectid, u64 owner,
			
 
				+				     struct btrfs_root *root, u64 owner,
			
 
				 				     u64 offset, u64 ram_bytes,
			
 
				 				     struct btrfs_key *ins)
			
 
				 {
			
 
				-	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				 	int ret;
			
 
				 
			
 
				-	BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
			
 
				+	BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
			
 
				+
			
 
				+	btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
			
 
				+			   root->root_key.objectid, owner, offset,
			
 
				+			   BTRFS_ADD_DELAYED_EXTENT);
			
 
				 
			
 
				 	ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
			
 
				-					 ins->offset, 0, root_objectid, owner,
			
 
				+					 ins->offset, 0,
			
 
				+					 root->root_key.objectid, owner,
			
 
				 					 offset, ram_bytes,
			
 
				 					 BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
			
 
				 	return ret;
			
@@ -8538,6 +8486,9 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 
				 		extent_op->is_data = false;
			
 
				 		extent_op->level = level;
			
 
				 
			
 
				+		btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
			
 
				+				   root_objectid, level, 0,
			
 
				+				   BTRFS_ADD_DELAYED_EXTENT);
			
 
				 		ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
			
 
				 						 ins.offset, parent,
			
 
				 						 root_objectid, level,
			
@@ -8894,7 +8845,7 @@ skip:
 
				 					     ret);
			
 
				 			}
			
 
				 		}
			
 
				-		ret = btrfs_free_extent(trans, fs_info, bytenr, blocksize,
			
 
				+		ret = btrfs_free_extent(trans, root, bytenr, blocksize,
			
 
				 					parent, root->root_key.objectid,
			
 
				 					level - 1, 0);
			
 
				 		if (ret)
			
@@ -9311,7 +9262,7 @@ out:
 
				 	 * don't have it in the radix (like when we recover after a power fail
			
 
				 	 * or unmount) so we don't leak memory.
			
 
				 	 */
			
 
				-	if (!for_reloc && root_dropped == false)
			
 
				+	if (!for_reloc && !root_dropped)
			
 
				 		btrfs_add_dead_root(root);
			
 
				 	if (err && err != -EAGAIN)
			
 
				 		btrfs_handle_fs_error(fs_info, err, NULL);
			
@@ -9968,9 +9919,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void __link_block_group(struct btrfs_space_info *space_info,
			
 
				-			       struct btrfs_block_group_cache *cache)
			
 
				+static void link_block_group(struct btrfs_block_group_cache *cache)
			
 
				 {
			
 
				+	struct btrfs_space_info *space_info = cache->space_info;
			
 
				 	int index = get_block_group_index(cache);
			
 
				 	bool first = false;
			
 
				 
			
@@ -10178,7 +10129,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 
				 
			
 
				 		cache->space_info = space_info;
			
 
				 
			
 
				-		__link_block_group(space_info, cache);
			
 
				+		link_block_group(cache);
			
 
				 
			
 
				 		set_avail_alloc_bits(info, cache->flags);
			
 
				 		if (btrfs_chunk_readonly(info, cache->key.objectid)) {
			
@@ -10337,7 +10288,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
				 				cache->bytes_super, &cache->space_info);
			
 
				 	update_global_block_rsv(fs_info);
			
 
				 
			
 
				-	__link_block_group(cache->space_info, cache);
			
 
				+	link_block_group(cache);
			
 
				 
			
 
				 	list_add_tail(&cache->bg_list, &trans->new_bgs);
			
 
				 
			
@@ -10387,6 +10338,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
				 	 * remove it.
			
 
				 	 */
			
 
				 	free_excluded_extents(fs_info, block_group);
			
 
				+	btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
			
 
				+				  block_group->key.offset);
			
 
				 
			
 
				 	memcpy(&key, &block_group->key, sizeof(key));
			
 
				 	index = get_block_group_index(block_group);
			
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -110,7 +110,6 @@ struct extent_page_data {
 
				 	struct bio *bio;
			
 
				 	struct extent_io_tree *tree;
			
 
				 	get_extent_t *get_extent;
			
 
				-	unsigned long bio_flags;
			
 
				 
			
 
				 	/* tells writepage not to lock the state bits for this range
			
 
				 	 * it still does the unlocking
			
@@ -2762,8 +2761,8 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
 
				  */
			
 
				 static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
			
 
				 			      struct writeback_control *wbc,
			
 
				-			      struct page *page, sector_t sector,
			
 
				-			      size_t size, unsigned long offset,
			
 
				+			      struct page *page, u64 offset,
			
 
				+			      size_t size, unsigned long pg_offset,
			
 
				 			      struct block_device *bdev,
			
 
				 			      struct bio **bio_ret,
			
 
				 			      bio_end_io_t end_io_func,
			
@@ -2777,6 +2776,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 
				 	int contig = 0;
			
 
				 	int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
			
 
				 	size_t page_size = min_t(size_t, size, PAGE_SIZE);
			
 
				+	sector_t sector = offset >> 9;
			
 
				 
			
 
				 	if (bio_ret && *bio_ret) {
			
 
				 		bio = *bio_ret;
			
@@ -2787,8 +2787,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 
				 
			
 
				 		if (prev_bio_flags != bio_flags || !contig ||
			
 
				 		    force_bio_submit ||
			
 
				-		    merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
			
 
				-		    bio_add_page(bio, page, page_size, offset) < page_size) {
			
 
				+		    merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
			
 
				+		    bio_add_page(bio, page, page_size, pg_offset) < page_size) {
			
 
				 			ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
			
 
				 			if (ret < 0) {
			
 
				 				*bio_ret = NULL;
			
@@ -2802,8 +2802,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
			
 
				-	bio_add_page(bio, page, page_size, offset);
			
 
				+	bio = btrfs_bio_alloc(bdev, offset);
			
 
				+	bio_add_page(bio, page, page_size, pg_offset);
			
 
				 	bio->bi_end_io = end_io_func;
			
 
				 	bio->bi_private = tree;
			
 
				 	bio->bi_write_hint = page->mapping->host->i_write_hint;
			
@@ -2893,7 +2893,6 @@ static int __do_readpage(struct extent_io_tree *tree,
 
				 	u64 last_byte = i_size_read(inode);
			
 
				 	u64 block_start;
			
 
				 	u64 cur_end;
			
 
				-	sector_t sector;
			
 
				 	struct extent_map *em;
			
 
				 	struct block_device *bdev;
			
 
				 	int ret = 0;
			
@@ -2929,6 +2928,7 @@ static int __do_readpage(struct extent_io_tree *tree,
 
				 	}
			
 
				 	while (cur <= end) {
			
 
				 		bool force_bio_submit = false;
			
 
				+		u64 offset;
			
 
				 
			
 
				 		if (cur >= last_byte) {
			
 
				 			char *userpage;
			
@@ -2968,9 +2968,9 @@ static int __do_readpage(struct extent_io_tree *tree,
 
				 		iosize = ALIGN(iosize, blocksize);
			
 
				 		if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
			
 
				 			disk_io_size = em->block_len;
			
 
				-			sector = em->block_start >> 9;
			
 
				+			offset = em->block_start;
			
 
				 		} else {
			
 
				-			sector = (em->block_start + extent_offset) >> 9;
			
 
				+			offset = em->block_start + extent_offset;
			
 
				 			disk_io_size = iosize;
			
 
				 		}
			
 
				 		bdev = em->bdev;
			
@@ -3063,8 +3063,8 @@ static int __do_readpage(struct extent_io_tree *tree,
 
				 		}
			
 
				 
			
 
				 		ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
			
 
				-					 page, sector, disk_io_size, pg_offset,
			
 
				-					 bdev, bio,
			
 
				+					 page, offset, disk_io_size,
			
 
				+					 pg_offset, bdev, bio,
			
 
				 					 end_bio_extent_readpage, mirror_num,
			
 
				 					 *bio_flags,
			
 
				 					 this_bio_flag,
			
@@ -3325,7 +3325,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
				 	u64 extent_offset;
			
 
				 	u64 block_start;
			
 
				 	u64 iosize;
			
 
				-	sector_t sector;
			
 
				 	struct extent_map *em;
			
 
				 	struct block_device *bdev;
			
 
				 	size_t pg_offset = 0;
			
@@ -3368,6 +3367,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
				 
			
 
				 	while (cur <= end) {
			
 
				 		u64 em_end;
			
 
				+		u64 offset;
			
 
				 
			
 
				 		if (cur >= i_size) {
			
 
				 			if (tree->ops && tree->ops->writepage_end_io_hook)
			
@@ -3389,7 +3389,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
				 		BUG_ON(end < cur);
			
 
				 		iosize = min(em_end - cur, end - cur + 1);
			
 
				 		iosize = ALIGN(iosize, blocksize);
			
 
				-		sector = (em->block_start + extent_offset) >> 9;
			
 
				+		offset = em->block_start + extent_offset;
			
 
				 		bdev = em->bdev;
			
 
				 		block_start = em->block_start;
			
 
				 		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
			
@@ -3432,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
				 		}
			
 
				 
			
 
				 		ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
			
 
				-					 page, sector, iosize, pg_offset,
			
 
				+					 page, offset, iosize, pg_offset,
			
 
				 					 bdev, &epd->bio,
			
 
				 					 end_bio_extent_writepage,
			
 
				 					 0, 0, 0, false);
			
@@ -3716,7 +3716,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 
				 	u64 offset = eb->start;
			
 
				 	u32 nritems;
			
 
				 	unsigned long i, num_pages;
			
 
				-	unsigned long bio_flags = 0;
			
 
				 	unsigned long start, end;
			
 
				 	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
			
 
				 	int ret = 0;
			
@@ -3724,8 +3723,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 
				 	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
			
 
				 	num_pages = num_extent_pages(eb->start, eb->len);
			
 
				 	atomic_set(&eb->io_pages, num_pages);
			
 
				-	if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
			
 
				-		bio_flags = EXTENT_BIO_TREE_LOG;
			
 
				 
			
 
				 	/* set btree blocks beyond nritems with 0 to avoid stale content. */
			
 
				 	nritems = btrfs_header_nritems(eb);
			
@@ -3749,11 +3746,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 
				 		clear_page_dirty_for_io(p);
			
 
				 		set_page_writeback(p);
			
 
				 		ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
			
 
				-					 p, offset >> 9, PAGE_SIZE, 0, bdev,
			
 
				+					 p, offset, PAGE_SIZE, 0, bdev,
			
 
				 					 &epd->bio,
			
 
				 					 end_bio_extent_buffer_writepage,
			
 
				-					 0, epd->bio_flags, bio_flags, false);
			
 
				-		epd->bio_flags = bio_flags;
			
 
				+					 0, 0, 0, false);
			
 
				 		if (ret) {
			
 
				 			set_btree_ioerr(p);
			
 
				 			if (PageWriteback(p))
			
@@ -3790,7 +3786,6 @@ int btree_write_cache_pages(struct address_space *mapping,
 
				 		.tree = tree,
			
 
				 		.extent_locked = 0,
			
 
				 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
			
 
				-		.bio_flags = 0,
			
 
				 	};
			
 
				 	int ret = 0;
			
 
				 	int done = 0;
			
@@ -4063,7 +4058,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
 
				 	if (epd->bio) {
			
 
				 		int ret;
			
 
				 
			
 
				-		ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
			
 
				+		ret = submit_one_bio(epd->bio, 0, 0);
			
 
				 		BUG_ON(ret < 0); /* -ENOMEM */
			
 
				 		epd->bio = NULL;
			
 
				 	}
			
@@ -4086,7 +4081,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 
				 		.get_extent = get_extent,
			
 
				 		.extent_locked = 0,
			
 
				 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
			
 
				-		.bio_flags = 0,
			
 
				 	};
			
 
				 
			
 
				 	ret = __extent_writepage(page, wbc, &epd);
			
@@ -4111,7 +4105,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
 
				 		.get_extent = get_extent,
			
 
				 		.extent_locked = 1,
			
 
				 		.sync_io = mode == WB_SYNC_ALL,
			
 
				-		.bio_flags = 0,
			
 
				 	};
			
 
				 	struct writeback_control wbc_writepages = {
			
 
				 		.sync_mode	= mode,
			
@@ -4151,7 +4144,6 @@ int extent_writepages(struct extent_io_tree *tree,
 
				 		.get_extent = get_extent,
			
 
				 		.extent_locked = 0,
			
 
				 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
			
 
				-		.bio_flags = 0,
			
 
				 	};
			
 
				 
			
 
				 	ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
			
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -34,7 +34,6 @@
 
				  * type for this bio
			
 
				  */
			
 
				 #define EXTENT_BIO_COMPRESSED 1
			
 
				-#define EXTENT_BIO_TREE_LOG 2
			
 
				 #define EXTENT_BIO_FLAG_SHIFT 16
			
 
				 
			
 
				 /* these are bit numbers for test/set bit */
			
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -856,7 +856,7 @@ next_slot:
 
				 			btrfs_mark_buffer_dirty(leaf);
			
 
				 
			
 
				 			if (update_refs && disk_bytenr > 0) {
			
 
				-				ret = btrfs_inc_extent_ref(trans, fs_info,
			
 
				+				ret = btrfs_inc_extent_ref(trans, root,
			
 
				 						disk_bytenr, num_bytes, 0,
			
 
				 						root->root_key.objectid,
			
 
				 						new_key.objectid,
			
@@ -940,7 +940,7 @@ delete_extent_item:
 
				 				extent_end = ALIGN(extent_end,
			
 
				 						   fs_info->sectorsize);
			
 
				 			} else if (update_refs && disk_bytenr > 0) {
			
 
				-				ret = btrfs_free_extent(trans, fs_info,
			
 
				+				ret = btrfs_free_extent(trans, root,
			
 
				 						disk_bytenr, num_bytes, 0,
			
 
				 						root->root_key.objectid,
			
 
				 						key.objectid, key.offset -
			
@@ -1234,7 +1234,7 @@ again:
 
				 						extent_end - split);
			
 
				 		btrfs_mark_buffer_dirty(leaf);
			
 
				 
			
 
				-		ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes,
			
 
				+		ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
			
 
				 					   0, root->root_key.objectid,
			
 
				 					   ino, orig_offset);
			
 
				 		if (ret) {
			
@@ -1268,7 +1268,7 @@ again:
 
				 		extent_end = other_end;
			
 
				 		del_slot = path->slots[0] + 1;
			
 
				 		del_nr++;
			
 
				-		ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
			
 
				+		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
			
 
				 					0, root->root_key.objectid,
			
 
				 					ino, orig_offset);
			
 
				 		if (ret) {
			
@@ -1288,7 +1288,7 @@ again:
 
				 		key.offset = other_start;
			
 
				 		del_slot = path->slots[0];
			
 
				 		del_nr++;
			
 
				-		ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
			
 
				+		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
			
 
				 					0, root->root_key.objectid,
			
 
				 					ino, orig_offset);
			
 
				 		if (ret) {
			
@@ -1590,7 +1590,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
				 	int ret = 0;
			
 
				 	bool only_release_metadata = false;
			
 
				 	bool force_page_uptodate = false;
			
 
				-	bool need_unlock;
			
 
				 
			
 
				 	nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
			
 
				 			PAGE_SIZE / (sizeof(struct page *)));
			
@@ -1613,6 +1612,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
				 		size_t copied;
			
 
				 		size_t dirty_sectors;
			
 
				 		size_t num_sectors;
			
 
				+		int extents_locked;
			
 
				 
			
 
				 		WARN_ON(num_pages > nrptrs);
			
 
				 
			
@@ -1656,6 +1656,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+		WARN_ON(reserve_bytes == 0);
			
 
				 		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
			
 
				 				reserve_bytes);
			
 
				 		if (ret) {
			
@@ -1669,7 +1670,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
				 		}
			
 
				 
			
 
				 		release_bytes = reserve_bytes;
			
 
				-		need_unlock = false;
			
 
				 again:
			
 
				 		/*
			
 
				 		 * This is going to setup the pages array with the number of
			
@@ -1679,19 +1679,23 @@ again:
 
				 		ret = prepare_pages(inode, pages, num_pages,
			
 
				 				    pos, write_bytes,
			
 
				 				    force_page_uptodate);
			
 
				-		if (ret)
			
 
				+		if (ret) {
			
 
				+			btrfs_delalloc_release_extents(BTRFS_I(inode),
			
 
				+						       reserve_bytes);
			
 
				 			break;
			
 
				+		}
			
 
				 
			
 
				-		ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
			
 
				+		extents_locked = lock_and_cleanup_extent_if_need(
			
 
				+				BTRFS_I(inode), pages,
			
 
				 				num_pages, pos, write_bytes, &lockstart,
			
 
				 				&lockend, &cached_state);
			
 
				-		if (ret < 0) {
			
 
				-			if (ret == -EAGAIN)
			
 
				+		if (extents_locked < 0) {
			
 
				+			if (extents_locked == -EAGAIN)
			
 
				 				goto again;
			
 
				+			btrfs_delalloc_release_extents(BTRFS_I(inode),
			
 
				+						       reserve_bytes);
			
 
				+			ret = extents_locked;
			
 
				 			break;
			
 
				-		} else if (ret > 0) {
			
 
				-			need_unlock = true;
			
 
				-			ret = 0;
			
 
				 		}
			
 
				 
			
 
				 		copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
			
@@ -1718,23 +1722,10 @@ again:
 
				 						   PAGE_SIZE);
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * If we had a short copy we need to release the excess delaloc
			
 
				-		 * bytes we reserved.  We need to increment outstanding_extents
			
 
				-		 * because btrfs_delalloc_release_space and
			
 
				-		 * btrfs_delalloc_release_metadata will decrement it, but
			
 
				-		 * we still have an outstanding extent for the chunk we actually
			
 
				-		 * managed to copy.
			
 
				-		 */
			
 
				 		if (num_sectors > dirty_sectors) {
			
 
				 			/* release everything except the sectors we dirtied */
			
 
				 			release_bytes -= dirty_sectors <<
			
 
				 						fs_info->sb->s_blocksize_bits;
			
 
				-			if (copied > 0) {
			
 
				-				spin_lock(&BTRFS_I(inode)->lock);
			
 
				-				BTRFS_I(inode)->outstanding_extents++;
			
 
				-				spin_unlock(&BTRFS_I(inode)->lock);
			
 
				-			}
			
 
				 			if (only_release_metadata) {
			
 
				 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				 								release_bytes);
			
@@ -1756,10 +1747,11 @@ again:
 
				 		if (copied > 0)
			
 
				 			ret = btrfs_dirty_pages(inode, pages, dirty_pages,
			
 
				 						pos, copied, NULL);
			
 
				-		if (need_unlock)
			
 
				+		if (extents_locked)
			
 
				 			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
			
 
				 					     lockstart, lockend, &cached_state,
			
 
				 					     GFP_NOFS);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
			
 
				 		if (ret) {
			
 
				 			btrfs_drop_pages(pages, num_pages);
			
 
				 			break;
			
@@ -2046,7 +2038,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
				 	struct btrfs_trans_handle *trans;
			
 
				 	struct btrfs_log_ctx ctx;
			
 
				 	int ret = 0, err;
			
 
				-	bool full_sync = 0;
			
 
				+	bool full_sync = false;
			
 
				 	u64 len;
			
 
				 
			
 
				 	/*
			
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1286,12 +1286,8 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
 
				 					struct btrfs_block_group_cache *block_group,
			
 
				 					struct btrfs_path *path)
			
 
				 {
			
 
				-	u64 start, end;
			
 
				 	int ret;
			
 
				 
			
 
				-	start = block_group->key.objectid;
			
 
				-	end = block_group->key.objectid + block_group->key.offset;
			
 
				-
			
 
				 	block_group->needs_free_space = 0;
			
 
				 
			
 
				 	ret = add_new_free_space_info(trans, fs_info, block_group, path);
			
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -500,11 +500,12 @@ again:
 
				 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
			
 
				 					      prealloc, prealloc, &alloc_hint);
			
 
				 	if (ret) {
			
 
				-		btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
			
 
				 		goto out_put;
			
 
				 	}
			
 
				 
			
 
				 	ret = btrfs_write_out_ino_cache(root, trans, path, inode);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
			
 
				 out_put:
			
 
				 	iput(inode);
			
 
				 out_release:
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -42,6 +42,7 @@
 
				 #include <linux/blkdev.h>
			
 
				 #include <linux/posix_acl_xattr.h>
			
 
				 #include <linux/uio.h>
			
 
				+#include <linux/magic.h>
			
 
				 #include "ctree.h"
			
 
				 #include "disk-io.h"
			
 
				 #include "transaction.h"
			
@@ -67,7 +68,6 @@ struct btrfs_iget_args {
 
				 };
			
 
				 
			
 
				 struct btrfs_dio_data {
			
 
				-	u64 outstanding_extents;
			
 
				 	u64 reserve;
			
 
				 	u64 unsubmitted_oe_range_start;
			
 
				 	u64 unsubmitted_oe_range_end;
			
@@ -316,7 +316,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 
				 		btrfs_free_path(path);
			
 
				 		return PTR_ERR(trans);
			
 
				 	}
			
 
				-	trans->block_rsv = &fs_info->delalloc_block_rsv;
			
 
				+	trans->block_rsv = &BTRFS_I(inode)->block_rsv;
			
 
				 
			
 
				 	if (compressed_size && compressed_pages)
			
 
				 		extent_item_size = btrfs_file_extent_calc_inline_size(
			
@@ -348,7 +348,6 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 
				 	}
			
 
				 
			
 
				 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
			
 
				-	btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
			
 
				 	btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
			
 
				 out:
			
 
				 	/*
			
@@ -458,7 +457,6 @@ static noinline void compress_file_range(struct inode *inode,
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				-	u64 num_bytes;
			
 
				 	u64 blocksize = fs_info->sectorsize;
			
 
				 	u64 actual_end;
			
 
				 	u64 isize = i_size_read(inode);
			
@@ -508,8 +506,6 @@ again:
 
				 
			
 
				 	total_compressed = min_t(unsigned long, total_compressed,
			
 
				 			BTRFS_MAX_UNCOMPRESSED);
			
 
				-	num_bytes = ALIGN(end - start + 1, blocksize);
			
 
				-	num_bytes = max(blocksize,  num_bytes);
			
 
				 	total_in = 0;
			
 
				 	ret = 0;
			
 
				 
			
@@ -542,7 +538,10 @@ again:
 
				 		 */
			
 
				 		extent_range_clear_dirty_for_io(inode, start, end);
			
 
				 		redirty = 1;
			
 
				-		ret = btrfs_compress_pages(compress_type,
			
 
				+
			
 
				+		/* Compression level is applied here and only here */
			
 
				+		ret = btrfs_compress_pages(
			
 
				+			compress_type | (fs_info->compress_level << 4),
			
 
				 					   inode->i_mapping, start,
			
 
				 					   pages,
			
 
				 					   &nr_pages,
			
@@ -570,7 +569,7 @@ again:
 
				 cont:
			
 
				 	if (start == 0) {
			
 
				 		/* lets try to make an inline extent */
			
 
				-		if (ret || total_in < (actual_end - start)) {
			
 
				+		if (ret || total_in < actual_end) {
			
 
				 			/* we didn't compress the entire range, try
			
 
				 			 * to make an uncompressed inline extent.
			
 
				 			 */
			
@@ -584,16 +583,21 @@ cont:
 
				 		}
			
 
				 		if (ret <= 0) {
			
 
				 			unsigned long clear_flags = EXTENT_DELALLOC |
			
 
				-				EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
			
 
				+				EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
			
 
				+				EXTENT_DO_ACCOUNTING;
			
 
				 			unsigned long page_error_op;
			
 
				 
			
 
				-			clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
			
 
				 			page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
			
 
				 
			
 
				 			/*
			
 
				 			 * inline extent creation worked or returned error,
			
 
				 			 * we don't need to create any more async work items.
			
 
				 			 * Unlock and free up our temp pages.
			
 
				+			 *
			
 
				+			 * We use DO_ACCOUNTING here because we need the
			
 
				+			 * delalloc_release_metadata to be done _after_ we drop
			
 
				+			 * our outstanding extent for clearing delalloc for this
			
 
				+			 * range.
			
 
				 			 */
			
 
				 			extent_clear_unlock_delalloc(inode, start, end, end,
			
 
				 						     NULL, clear_flags,
			
@@ -602,10 +606,6 @@ cont:
 
				 						     PAGE_SET_WRITEBACK |
			
 
				 						     page_error_op |
			
 
				 						     PAGE_END_WRITEBACK);
			
 
				-			if (ret == 0)
			
 
				-				btrfs_free_reserved_data_space_noquota(inode,
			
 
				-							       start,
			
 
				-							       end - start + 1);
			
 
				 			goto free_pages_out;
			
 
				 		}
			
 
				 	}
			
@@ -625,7 +625,6 @@ cont:
 
				 		 */
			
 
				 		total_in = ALIGN(total_in, PAGE_SIZE);
			
 
				 		if (total_compressed + blocksize <= total_in) {
			
 
				-			num_bytes = total_in;
			
 
				 			*num_added += 1;
			
 
				 
			
 
				 			/*
			
@@ -633,12 +632,12 @@ cont:
 
				 			 * allocation on disk for these compressed pages, and
			
 
				 			 * will submit them to the elevator.
			
 
				 			 */
			
 
				-			add_async_extent(async_cow, start, num_bytes,
			
 
				+			add_async_extent(async_cow, start, total_in,
			
 
				 					total_compressed, pages, nr_pages,
			
 
				 					compress_type);
			
 
				 
			
 
				-			if (start + num_bytes < end) {
			
 
				-				start += num_bytes;
			
 
				+			if (start + total_in < end) {
			
 
				+				start += total_in;
			
 
				 				pages = NULL;
			
 
				 				cond_resched();
			
 
				 				goto again;
			
@@ -982,15 +981,19 @@ static noinline int cow_file_range(struct inode *inode,
 
				 		ret = cow_file_range_inline(root, inode, start, end, 0,
			
 
				 					BTRFS_COMPRESS_NONE, NULL);
			
 
				 		if (ret == 0) {
			
 
				+			/*
			
 
				+			 * We use DO_ACCOUNTING here because we need the
			
 
				+			 * delalloc_release_metadata to be run _after_ we drop
			
 
				+			 * our outstanding extent for clearing delalloc for this
			
 
				+			 * range.
			
 
				+			 */
			
 
				 			extent_clear_unlock_delalloc(inode, start, end,
			
 
				 				     delalloc_end, NULL,
			
 
				 				     EXTENT_LOCKED | EXTENT_DELALLOC |
			
 
				-				     EXTENT_DELALLOC_NEW |
			
 
				-				     EXTENT_DEFRAG, PAGE_UNLOCK |
			
 
				+				     EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
			
 
				+				     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
			
 
				 				     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
			
 
				 				     PAGE_END_WRITEBACK);
			
 
				-			btrfs_free_reserved_data_space_noquota(inode, start,
			
 
				-						end - start + 1);
			
 
				 			*nr_written = *nr_written +
			
 
				 			     (end - start + PAGE_SIZE) / PAGE_SIZE;
			
 
				 			*page_started = 1;
			
@@ -1226,13 +1229,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 
				 
			
 
				 		btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
			
 
				 
			
 
				-		while (atomic_read(&fs_info->async_submit_draining) &&
			
 
				-		       atomic_read(&fs_info->async_delalloc_pages)) {
			
 
				-			wait_event(fs_info->async_submit_wait,
			
 
				-				   (atomic_read(&fs_info->async_delalloc_pages) ==
			
 
				-				    0));
			
 
				-		}
			
 
				-
			
 
				 		*nr_written += nr_pages;
			
 
				 		start = cur_end + 1;
			
 
				 	}
			
@@ -1635,7 +1631,7 @@ static void btrfs_split_extent_hook(void *private_data,
 
				 	}
			
 
				 
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				-	BTRFS_I(inode)->outstanding_extents++;
			
 
				+	btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
			
 
				 	spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 }
			
 
				 
			
@@ -1665,7 +1661,7 @@ static void btrfs_merge_extent_hook(void *private_data,
 
				 	/* we're not bigger than the max, unreserve the space and go */
			
 
				 	if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
			
 
				 		spin_lock(&BTRFS_I(inode)->lock);
			
 
				-		BTRFS_I(inode)->outstanding_extents--;
			
 
				+		btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
			
 
				 		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 		return;
			
 
				 	}
			
@@ -1696,7 +1692,7 @@ static void btrfs_merge_extent_hook(void *private_data,
 
				 		return;
			
 
				 
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				-	BTRFS_I(inode)->outstanding_extents--;
			
 
				+	btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
			
 
				 	spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 }
			
 
				 
			
@@ -1766,15 +1762,12 @@ static void btrfs_set_bit_hook(void *private_data,
 
				 	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
			
 
				 		struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				 		u64 len = state->end + 1 - state->start;
			
 
				+		u32 num_extents = count_max_extents(len);
			
 
				 		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
			
 
				 
			
 
				-		if (*bits & EXTENT_FIRST_DELALLOC) {
			
 
				-			*bits &= ~EXTENT_FIRST_DELALLOC;
			
 
				-		} else {
			
 
				-			spin_lock(&BTRFS_I(inode)->lock);
			
 
				-			BTRFS_I(inode)->outstanding_extents++;
			
 
				-			spin_unlock(&BTRFS_I(inode)->lock);
			
 
				-		}
			
 
				+		spin_lock(&BTRFS_I(inode)->lock);
			
 
				+		btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
			
 
				+		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 
			
 
				 		/* For sanity tests */
			
 
				 		if (btrfs_is_testing(fs_info))
			
@@ -1828,13 +1821,9 @@ static void btrfs_clear_bit_hook(void *private_data,
 
				 		struct btrfs_root *root = inode->root;
			
 
				 		bool do_list = !btrfs_is_free_space_inode(inode);
			
 
				 
			
 
				-		if (*bits & EXTENT_FIRST_DELALLOC) {
			
 
				-			*bits &= ~EXTENT_FIRST_DELALLOC;
			
 
				-		} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
			
 
				-			spin_lock(&inode->lock);
			
 
				-			inode->outstanding_extents -= num_extents;
			
 
				-			spin_unlock(&inode->lock);
			
 
				-		}
			
 
				+		spin_lock(&inode->lock);
			
 
				+		btrfs_mod_outstanding_extents(inode, -num_extents);
			
 
				+		spin_unlock(&inode->lock);
			
 
				 
			
 
				 		/*
			
 
				 		 * We don't reserve metadata space for space cache inodes so we
			
@@ -2105,6 +2094,7 @@ again:
 
				 				  0);
			
 
				 	ClearPageChecked(page);
			
 
				 	set_page_dirty(page);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				 out:
			
 
				 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
			
 
				 			     &cached_state, GFP_NOFS);
			
@@ -2229,8 +2219,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 	qg_released = ret;
			
 
				-	ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid,
			
 
				-			btrfs_ino(BTRFS_I(inode)), file_pos, qg_released, &ins);
			
 
				+	ret = btrfs_alloc_reserved_file_extent(trans, root,
			
 
				+					       btrfs_ino(BTRFS_I(inode)),
			
 
				+					       file_pos, qg_released, &ins);
			
 
				 out:
			
 
				 	btrfs_free_path(path);
			
 
				 
			
@@ -2464,7 +2455,7 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
 
				 		ret = iterate_inodes_from_logical(old->bytenr +
			
 
				 						  old->extent_offset, fs_info,
			
 
				 						  path, record_one_backref,
			
 
				-						  old);
			
 
				+						  old, false);
			
 
				 		if (ret < 0 && ret != -ENOENT)
			
 
				 			return false;
			
 
				 
			
@@ -2682,7 +2673,7 @@ again:
 
				 	inode_add_bytes(inode, len);
			
 
				 	btrfs_release_path(path);
			
 
				 
			
 
				-	ret = btrfs_inc_extent_ref(trans, fs_info, new->bytenr,
			
 
				+	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
			
 
				 			new->disk_len, 0,
			
 
				 			backref->root_id, backref->inum,
			
 
				 			new->file_pos);	/* start - extent_offset */
			
@@ -2964,7 +2955,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
				 			trans = NULL;
			
 
				 			goto out;
			
 
				 		}
			
 
				-		trans->block_rsv = &fs_info->delalloc_block_rsv;
			
 
				+		trans->block_rsv = &BTRFS_I(inode)->block_rsv;
			
 
				 		ret = btrfs_update_inode_fallback(trans, root, inode);
			
 
				 		if (ret) /* -ENOMEM or corruption */
			
 
				 			btrfs_abort_transaction(trans, ret);
			
@@ -3000,7 +2991,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	trans->block_rsv = &fs_info->delalloc_block_rsv;
			
 
				+	trans->block_rsv = &BTRFS_I(inode)->block_rsv;
			
 
				 
			
 
				 	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
			
 
				 		compress_type = ordered_extent->compress_type;
			
@@ -3058,9 +3049,6 @@ out:
 
				 				 0, &cached_state, GFP_NOFS);
			
 
				 	}
			
 
				 
			
 
				-	if (root != fs_info->tree_root)
			
 
				-		btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				-				ordered_extent->len);
			
 
				 	if (trans)
			
 
				 		btrfs_end_transaction(trans);
			
 
				 
			
@@ -4372,47 +4360,11 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
 
				 
			
 
				 }
			
 
				 
			
 
				-static int truncate_inline_extent(struct inode *inode,
			
 
				-				  struct btrfs_path *path,
			
 
				-				  struct btrfs_key *found_key,
			
 
				-				  const u64 item_end,
			
 
				-				  const u64 new_size)
			
 
				-{
			
 
				-	struct extent_buffer *leaf = path->nodes[0];
			
 
				-	int slot = path->slots[0];
			
 
				-	struct btrfs_file_extent_item *fi;
			
 
				-	u32 size = (u32)(new_size - found_key->offset);
			
 
				-	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				-
			
 
				-	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
			
 
				-
			
 
				-	if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
			
 
				-		loff_t offset = new_size;
			
 
				-		loff_t page_end = ALIGN(offset, PAGE_SIZE);
			
 
				-
			
 
				-		/*
			
 
				-		 * Zero out the remaining of the last page of our inline extent,
			
 
				-		 * instead of directly truncating our inline extent here - that
			
 
				-		 * would be much more complex (decompressing all the data, then
			
 
				-		 * compressing the truncated data, which might be bigger than
			
 
				-		 * the size of the inline extent, resize the extent, etc).
			
 
				-		 * We release the path because to get the page we might need to
			
 
				-		 * read the extent item from disk (data not in the page cache).
			
 
				-		 */
			
 
				-		btrfs_release_path(path);
			
 
				-		return btrfs_truncate_block(inode, offset, page_end - offset,
			
 
				-					0);
			
 
				-	}
			
 
				-
			
 
				-	btrfs_set_file_extent_ram_bytes(leaf, fi, size);
			
 
				-	size = btrfs_file_extent_calc_inline_size(size);
			
 
				-	btrfs_truncate_item(root->fs_info, path, size, 1);
			
 
				-
			
 
				-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
			
 
				-		inode_sub_bytes(inode, item_end + 1 - new_size);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				+/*
			
 
				+ * Return this if we need to call truncate_block for the last bit of the
			
 
				+ * truncate.
			
 
				+ */
			
 
				+#define NEED_TRUNCATE_BLOCK 1
			
 
				 
			
 
				 /*
			
 
				  * this can truncate away extent items, csum items and directory items.
			
@@ -4451,9 +4403,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
				 	int err = 0;
			
 
				 	u64 ino = btrfs_ino(BTRFS_I(inode));
			
 
				 	u64 bytes_deleted = 0;
			
 
				-	bool be_nice = 0;
			
 
				-	bool should_throttle = 0;
			
 
				-	bool should_end = 0;
			
 
				+	bool be_nice = false;
			
 
				+	bool should_throttle = false;
			
 
				+	bool should_end = false;
			
 
				 
			
 
				 	BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
			
 
				 
			
@@ -4463,7 +4415,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
				 	 */
			
 
				 	if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
			
 
				 	    test_bit(BTRFS_ROOT_REF_COWS, &root->state))
			
 
				-		be_nice = 1;
			
 
				+		be_nice = true;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
@@ -4573,11 +4525,6 @@ search_again:
 
				 		if (found_type != BTRFS_EXTENT_DATA_KEY)
			
 
				 			goto delete;
			
 
				 
			
 
				-		if (del_item)
			
 
				-			last_size = found_key.offset;
			
 
				-		else
			
 
				-			last_size = new_size;
			
 
				-
			
 
				 		if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
			
 
				 			u64 num_dec;
			
 
				 			extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
			
@@ -4619,40 +4566,30 @@ search_again:
 
				 			 */
			
 
				 			if (!del_item &&
			
 
				 			    btrfs_file_extent_encryption(leaf, fi) == 0 &&
			
 
				-			    btrfs_file_extent_other_encoding(leaf, fi) == 0) {
			
 
				-
			
 
				+			    btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
			
 
				+			    btrfs_file_extent_compression(leaf, fi) == 0) {
			
 
				+				u32 size = (u32)(new_size - found_key.offset);
			
 
				+
			
 
				+				btrfs_set_file_extent_ram_bytes(leaf, fi, size);
			
 
				+				size = btrfs_file_extent_calc_inline_size(size);
			
 
				+				btrfs_truncate_item(root->fs_info, path, size, 1);
			
 
				+			} else if (!del_item) {
			
 
				 				/*
			
 
				-				 * Need to release path in order to truncate a
			
 
				-				 * compressed extent. So delete any accumulated
			
 
				-				 * extent items so far.
			
 
				+				 * We have to bail so the last_size is set to
			
 
				+				 * just before this extent.
			
 
				 				 */
			
 
				-				if (btrfs_file_extent_compression(leaf, fi) !=
			
 
				-				    BTRFS_COMPRESS_NONE && pending_del_nr) {
			
 
				-					err = btrfs_del_items(trans, root, path,
			
 
				-							      pending_del_slot,
			
 
				-							      pending_del_nr);
			
 
				-					if (err) {
			
 
				-						btrfs_abort_transaction(trans,
			
 
				-									err);
			
 
				-						goto error;
			
 
				-					}
			
 
				-					pending_del_nr = 0;
			
 
				-				}
			
 
				+				err = NEED_TRUNCATE_BLOCK;
			
 
				+				break;
			
 
				+			}
			
 
				 
			
 
				-				err = truncate_inline_extent(inode, path,
			
 
				-							     &found_key,
			
 
				-							     item_end,
			
 
				-							     new_size);
			
 
				-				if (err) {
			
 
				-					btrfs_abort_transaction(trans, err);
			
 
				-					goto error;
			
 
				-				}
			
 
				-			} else if (test_bit(BTRFS_ROOT_REF_COWS,
			
 
				-					    &root->state)) {
			
 
				+			if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
			
 
				 				inode_sub_bytes(inode, item_end + 1 - new_size);
			
 
				-			}
			
 
				 		}
			
 
				 delete:
			
 
				+		if (del_item)
			
 
				+			last_size = found_key.offset;
			
 
				+		else
			
 
				+			last_size = new_size;
			
 
				 		if (del_item) {
			
 
				 			if (!pending_del_nr) {
			
 
				 				/* no pending yet, add ourselves */
			
@@ -4669,14 +4606,14 @@ delete:
 
				 		} else {
			
 
				 			break;
			
 
				 		}
			
 
				-		should_throttle = 0;
			
 
				+		should_throttle = false;
			
 
				 
			
 
				 		if (found_extent &&
			
 
				 		    (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
			
 
				 		     root == fs_info->tree_root)) {
			
 
				 			btrfs_set_path_blocking(path);
			
 
				 			bytes_deleted += extent_num_bytes;
			
 
				-			ret = btrfs_free_extent(trans, fs_info, extent_start,
			
 
				+			ret = btrfs_free_extent(trans, root, extent_start,
			
 
				 						extent_num_bytes, 0,
			
 
				 						btrfs_header_owner(leaf),
			
 
				 						ino, extent_offset);
			
@@ -4688,11 +4625,11 @@ delete:
 
				 			if (be_nice) {
			
 
				 				if (truncate_space_check(trans, root,
			
 
				 							 extent_num_bytes)) {
			
 
				-					should_end = 1;
			
 
				+					should_end = true;
			
 
				 				}
			
 
				 				if (btrfs_should_throttle_delayed_refs(trans,
			
 
				 								       fs_info))
			
 
				-					should_throttle = 1;
			
 
				+					should_throttle = true;
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -4801,8 +4738,11 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 
				 	    (!len || ((len & (blocksize - 1)) == 0)))
			
 
				 		goto out;
			
 
				 
			
 
				+	block_start = round_down(from, blocksize);
			
 
				+	block_end = block_start + blocksize - 1;
			
 
				+
			
 
				 	ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
			
 
				-			round_down(from, blocksize), blocksize);
			
 
				+					   block_start, blocksize);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
@@ -4810,15 +4750,12 @@ again:
 
				 	page = find_or_create_page(mapping, index, mask);
			
 
				 	if (!page) {
			
 
				 		btrfs_delalloc_release_space(inode, data_reserved,
			
 
				-				round_down(from, blocksize),
			
 
				-				blocksize);
			
 
				+					     block_start, blocksize);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
			
 
				 		ret = -ENOMEM;
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	block_start = round_down(from, blocksize);
			
 
				-	block_end = block_start + blocksize - 1;
			
 
				-
			
 
				 	if (!PageUptodate(page)) {
			
 
				 		ret = btrfs_readpage(NULL, page);
			
 
				 		lock_page(page);
			
@@ -4883,6 +4820,7 @@ out_unlock:
 
				 	if (ret)
			
 
				 		btrfs_delalloc_release_space(inode, data_reserved, block_start,
			
 
				 					     blocksize);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
			
 
				 	unlock_page(page);
			
 
				 	put_page(page);
			
 
				 out:
			
@@ -7797,33 +7735,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 
				 	return em;
			
 
				 }
			
 
				 
			
 
				-static void adjust_dio_outstanding_extents(struct inode *inode,
			
 
				-					   struct btrfs_dio_data *dio_data,
			
 
				-					   const u64 len)
			
 
				-{
			
 
				-	unsigned num_extents = count_max_extents(len);
			
 
				-
			
 
				-	/*
			
 
				-	 * If we have an outstanding_extents count still set then we're
			
 
				-	 * within our reservation, otherwise we need to adjust our inode
			
 
				-	 * counter appropriately.
			
 
				-	 */
			
 
				-	if (dio_data->outstanding_extents >= num_extents) {
			
 
				-		dio_data->outstanding_extents -= num_extents;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * If dio write length has been split due to no large enough
			
 
				-		 * contiguous space, we need to compensate our inode counter
			
 
				-		 * appropriately.
			
 
				-		 */
			
 
				-		u64 num_needed = num_extents - dio_data->outstanding_extents;
			
 
				-
			
 
				-		spin_lock(&BTRFS_I(inode)->lock);
			
 
				-		BTRFS_I(inode)->outstanding_extents += num_needed;
			
 
				-		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
			
 
				 				   struct buffer_head *bh_result, int create)
			
 
				 {
			
@@ -7985,7 +7896,6 @@ unlock:
 
				 		if (!dio_data->overwrite && start + len > i_size_read(inode))
			
 
				 			i_size_write(inode, start + len);
			
 
				 
			
 
				-		adjust_dio_outstanding_extents(inode, dio_data, len);
			
 
				 		WARN_ON(dio_data->reserve < len);
			
 
				 		dio_data->reserve -= len;
			
 
				 		dio_data->unsubmitted_oe_range_end = start + len;
			
@@ -8015,14 +7925,6 @@ unlock_err:
 
				 err:
			
 
				 	if (dio_data)
			
 
				 		current->journal_info = dio_data;
			
 
				-	/*
			
 
				-	 * Compensate the delalloc release we do in btrfs_direct_IO() when we
			
 
				-	 * write less data then expected, so that we don't underflow our inode's
			
 
				-	 * outstanding extents counter.
			
 
				-	 */
			
 
				-	if (create && dio_data)
			
 
				-		adjust_dio_outstanding_extents(inode, dio_data, len);
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -8495,7 +8397,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
 
				 	if (dip->errors) {
			
 
				 		bio_io_error(dip->orig_bio);
			
 
				 	} else {
			
 
				-		dip->dio_bio->bi_status = 0;
			
 
				+		dip->dio_bio->bi_status = BLK_STS_OK;
			
 
				 		bio_endio(dip->orig_bio);
			
 
				 	}
			
 
				 out:
			
@@ -8577,7 +8479,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
 
				 			goto err;
			
 
				 	}
			
 
				 map:
			
 
				-	ret = btrfs_map_bio(fs_info, bio, 0, async_submit);
			
 
				+	ret = btrfs_map_bio(fs_info, bio, 0, 0);
			
 
				 err:
			
 
				 	bio_put(bio);
			
 
				 	return ret;
			
@@ -8786,7 +8688,6 @@ free_ordered:
 
				 }
			
 
				 
			
 
				 static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
			
 
				-			       struct kiocb *iocb,
			
 
				 			       const struct iov_iter *iter, loff_t offset)
			
 
				 {
			
 
				 	int seg;
			
@@ -8833,7 +8734,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
				 	bool relock = false;
			
 
				 	ssize_t ret;
			
 
				 
			
 
				-	if (check_direct_IO(fs_info, iocb, iter, offset))
			
 
				+	if (check_direct_IO(fs_info, iter, offset))
			
 
				 		return 0;
			
 
				 
			
 
				 	inode_dio_begin(inode);
			
@@ -8868,7 +8769,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
				 						   offset, count);
			
 
				 		if (ret)
			
 
				 			goto out;
			
 
				-		dio_data.outstanding_extents = count_max_extents(count);
			
 
				 
			
 
				 		/*
			
 
				 		 * We need to know how many extents we reserved so that we can
			
@@ -8915,6 +8815,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
				 		} else if (ret >= 0 && (size_t)ret < count)
			
 
				 			btrfs_delalloc_release_space(inode, data_reserved,
			
 
				 					offset, count - (size_t)ret);
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), count);
			
 
				 	}
			
 
				 out:
			
 
				 	if (wakeup)
			
@@ -9232,9 +9133,6 @@ again:
 
				 					  fs_info->sectorsize);
			
 
				 		if (reserved_space < PAGE_SIZE) {
			
 
				 			end = page_start + reserved_space - 1;
			
 
				-			spin_lock(&BTRFS_I(inode)->lock);
			
 
				-			BTRFS_I(inode)->outstanding_extents++;
			
 
				-			spin_unlock(&BTRFS_I(inode)->lock);
			
 
				 			btrfs_delalloc_release_space(inode, data_reserved,
			
 
				 					page_start, PAGE_SIZE - reserved_space);
			
 
				 		}
			
@@ -9286,12 +9184,14 @@ again:
 
				 
			
 
				 out_unlock:
			
 
				 	if (!ret) {
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				 		sb_end_pagefault(inode->i_sb);
			
 
				 		extent_changeset_free(data_reserved);
			
 
				 		return VM_FAULT_LOCKED;
			
 
				 	}
			
 
				 	unlock_page(page);
			
 
				 out:
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				 	btrfs_delalloc_release_space(inode, data_reserved, page_start,
			
 
				 				     reserved_space);
			
 
				 out_noreserve:
			
@@ -9387,12 +9287,12 @@ static int btrfs_truncate(struct inode *inode)
 
				 		ret = btrfs_truncate_inode_items(trans, root, inode,
			
 
				 						 inode->i_size,
			
 
				 						 BTRFS_EXTENT_DATA_KEY);
			
 
				+		trans->block_rsv = &fs_info->trans_block_rsv;
			
 
				 		if (ret != -ENOSPC && ret != -EAGAIN) {
			
 
				 			err = ret;
			
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		trans->block_rsv = &fs_info->trans_block_rsv;
			
 
				 		ret = btrfs_update_inode(trans, root, inode);
			
 
				 		if (ret) {
			
 
				 			err = ret;
			
@@ -9416,6 +9316,27 @@ static int btrfs_truncate(struct inode *inode)
 
				 		trans->block_rsv = rsv;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * We can't call btrfs_truncate_block inside a trans handle as we could
			
 
				+	 * deadlock with freeze, if we got NEED_TRUNCATE_BLOCK then we know
			
 
				+	 * we've truncated everything except the last little bit, and can do
			
 
				+	 * btrfs_truncate_block and then update the disk_i_size.
			
 
				+	 */
			
 
				+	if (ret == NEED_TRUNCATE_BLOCK) {
			
 
				+		btrfs_end_transaction(trans);
			
 
				+		btrfs_btree_balance_dirty(fs_info);
			
 
				+
			
 
				+		ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
			
 
				+		if (ret)
			
 
				+			goto out;
			
 
				+		trans = btrfs_start_transaction(root, 1);
			
 
				+		if (IS_ERR(trans)) {
			
 
				+			ret = PTR_ERR(trans);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
			
 
				+	}
			
 
				+
			
 
				 	if (ret == 0 && inode->i_nlink > 0) {
			
 
				 		trans->block_rsv = root->orphan_block_rsv;
			
 
				 		ret = btrfs_orphan_del(trans, BTRFS_I(inode));
			
@@ -9480,6 +9401,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 
				 
			
 
				 struct inode *btrfs_alloc_inode(struct super_block *sb)
			
 
				 {
			
 
				+	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
			
 
				 	struct btrfs_inode *ei;
			
 
				 	struct inode *inode;
			
 
				 
			
@@ -9506,8 +9428,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 
				 
			
 
				 	spin_lock_init(&ei->lock);
			
 
				 	ei->outstanding_extents = 0;
			
 
				-	ei->reserved_extents = 0;
			
 
				-
			
 
				+	if (sb->s_magic != BTRFS_TEST_MAGIC)
			
 
				+		btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
			
 
				+					      BTRFS_BLOCK_RSV_DELALLOC);
			
 
				 	ei->runtime_flags = 0;
			
 
				 	ei->prop_compress = BTRFS_COMPRESS_NONE;
			
 
				 	ei->defrag_compress = BTRFS_COMPRESS_NONE;
			
@@ -9557,8 +9480,9 @@ void btrfs_destroy_inode(struct inode *inode)
 
				 
			
 
				 	WARN_ON(!hlist_empty(&inode->i_dentry));
			
 
				 	WARN_ON(inode->i_data.nrpages);
			
 
				+	WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
			
 
				+	WARN_ON(BTRFS_I(inode)->block_rsv.size);
			
 
				 	WARN_ON(BTRFS_I(inode)->outstanding_extents);
			
 
				-	WARN_ON(BTRFS_I(inode)->reserved_extents);
			
 
				 	WARN_ON(BTRFS_I(inode)->delalloc_bytes);
			
 
				 	WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
			
 
				 	WARN_ON(BTRFS_I(inode)->csum_bytes);
			
@@ -10337,19 +10261,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 
				 	ret = __start_delalloc_inodes(root, delay_iput, -1);
			
 
				 	if (ret > 0)
			
 
				 		ret = 0;
			
 
				-	/*
			
 
				-	 * the filemap_flush will queue IO into the worker threads, but
			
 
				-	 * we have to make sure the IO is actually started and that
			
 
				-	 * ordered extents get created before we return
			
 
				-	 */
			
 
				-	atomic_inc(&fs_info->async_submit_draining);
			
 
				-	while (atomic_read(&fs_info->nr_async_submits) ||
			
 
				-	       atomic_read(&fs_info->async_delalloc_pages)) {
			
 
				-		wait_event(fs_info->async_submit_wait,
			
 
				-			   (atomic_read(&fs_info->nr_async_submits) == 0 &&
			
 
				-			    atomic_read(&fs_info->async_delalloc_pages) == 0));
			
 
				-	}
			
 
				-	atomic_dec(&fs_info->async_submit_draining);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -10391,14 +10302,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
 
				 	spin_unlock(&fs_info->delalloc_root_lock);
			
 
				 
			
 
				 	ret = 0;
			
 
				-	atomic_inc(&fs_info->async_submit_draining);
			
 
				-	while (atomic_read(&fs_info->nr_async_submits) ||
			
 
				-	      atomic_read(&fs_info->async_delalloc_pages)) {
			
 
				-		wait_event(fs_info->async_submit_wait,
			
 
				-		   (atomic_read(&fs_info->nr_async_submits) == 0 &&
			
 
				-		    atomic_read(&fs_info->async_delalloc_pages) == 0));
			
 
				-	}
			
 
				-	atomic_dec(&fs_info->async_submit_draining);
			
 
				 out:
			
 
				 	if (!list_empty_careful(&splice)) {
			
 
				 		spin_lock(&fs_info->delalloc_root_lock);
			
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -86,6 +86,19 @@ struct btrfs_ioctl_received_subvol_args_32 {
 
				 				struct btrfs_ioctl_received_subvol_args_32)
			
 
				 #endif
			
 
				 
			
 
				+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
			
 
				+struct btrfs_ioctl_send_args_32 {
			
 
				+	__s64 send_fd;			/* in */
			
 
				+	__u64 clone_sources_count;	/* in */
			
 
				+	compat_uptr_t clone_sources;	/* in */
			
 
				+	__u64 parent_root;		/* in */
			
 
				+	__u64 flags;			/* in */
			
 
				+	__u64 reserved[4];		/* in */
			
 
				+} __attribute__ ((__packed__));
			
 
				+
			
 
				+#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
			
 
				+			       struct btrfs_ioctl_send_args_32)
			
 
				+#endif
			
 
				 
			
 
				 static int btrfs_clone(struct inode *src, struct inode *inode,
			
 
				 		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
			
@@ -609,23 +622,6 @@ fail_free:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void btrfs_wait_for_no_snapshotting_writes(struct btrfs_root *root)
			
 
				-{
			
 
				-	s64 writers;
			
 
				-	DEFINE_WAIT(wait);
			
 
				-
			
 
				-	do {
			
 
				-		prepare_to_wait(&root->subv_writers->wait, &wait,
			
 
				-				TASK_UNINTERRUPTIBLE);
			
 
				-
			
 
				-		writers = percpu_counter_sum(&root->subv_writers->counter);
			
 
				-		if (writers)
			
 
				-			schedule();
			
 
				-
			
 
				-		finish_wait(&root->subv_writers->wait, &wait);
			
 
				-	} while (writers);
			
 
				-}
			
 
				-
			
 
				 static int create_snapshot(struct btrfs_root *root, struct inode *dir,
			
 
				 			   struct dentry *dentry,
			
 
				 			   u64 *async_transid, bool readonly,
			
@@ -654,7 +650,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 
				 
			
 
				 	atomic_inc(&root->will_be_snapshotted);
			
 
				 	smp_mb__after_atomic();
			
 
				-	btrfs_wait_for_no_snapshotting_writes(root);
			
 
				+	/* wait for no snapshot writes */
			
 
				+	wait_event(root->subv_writers->wait,
			
 
				+		   percpu_counter_sum(&root->subv_writers->counter) == 0);
			
 
				 
			
 
				 	ret = btrfs_start_delalloc_inodes(root, 0);
			
 
				 	if (ret)
			
@@ -1219,6 +1217,7 @@ again:
 
				 		unlock_page(pages[i]);
			
 
				 		put_page(pages[i]);
			
 
				 	}
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
			
 
				 	extent_changeset_free(data_reserved);
			
 
				 	return i_done;
			
 
				 out:
			
@@ -1229,6 +1228,7 @@ out:
 
				 	btrfs_delalloc_release_space(inode, data_reserved,
			
 
				 			start_index << PAGE_SHIFT,
			
 
				 			page_cnt << PAGE_SHIFT);
			
 
				+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
			
 
				 	extent_changeset_free(data_reserved);
			
 
				 	return ret;
			
 
				 
			
@@ -1420,21 +1420,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
				 			filemap_flush(inode->i_mapping);
			
 
				 	}
			
 
				 
			
 
				-	if (do_compress) {
			
 
				-		/* the filemap_flush will queue IO into the worker threads, but
			
 
				-		 * we have to make sure the IO is actually started and that
			
 
				-		 * ordered extents get created before we return
			
 
				-		 */
			
 
				-		atomic_inc(&fs_info->async_submit_draining);
			
 
				-		while (atomic_read(&fs_info->nr_async_submits) ||
			
 
				-		       atomic_read(&fs_info->async_delalloc_pages)) {
			
 
				-			wait_event(fs_info->async_submit_wait,
			
 
				-				   (atomic_read(&fs_info->nr_async_submits) == 0 &&
			
 
				-				    atomic_read(&fs_info->async_delalloc_pages) == 0));
			
 
				-		}
			
 
				-		atomic_dec(&fs_info->async_submit_draining);
			
 
				-	}
			
 
				-
			
 
				 	if (range->compress_type == BTRFS_COMPRESS_LZO) {
			
 
				 		btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
			
 
				 	} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
			
@@ -1842,8 +1827,13 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
 
				 
			
 
				 	ret = btrfs_update_root(trans, fs_info->tree_root,
			
 
				 				&root->root_key, &root->root_item);
			
 
				+	if (ret < 0) {
			
 
				+		btrfs_end_transaction(trans);
			
 
				+		goto out_reset;
			
 
				+	}
			
 
				+
			
 
				+	ret = btrfs_commit_transaction(trans);
			
 
				 
			
 
				-	btrfs_commit_transaction(trans);
			
 
				 out_reset:
			
 
				 	if (ret)
			
 
				 		btrfs_set_root_flags(&root->root_item, root_flags);
			
@@ -2179,7 +2169,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
 
				 
			
 
				 	inode = file_inode(file);
			
 
				 	ret = search_ioctl(inode, &args.key, &buf_size,
			
 
				-			   (char *)(&uarg->buf[0]));
			
 
				+			   (char __user *)(&uarg->buf[0]));
			
 
				 	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
			
 
				 		ret = -EFAULT;
			
 
				 	else if (ret == -EOVERFLOW &&
			
@@ -3706,7 +3696,7 @@ process_slot:
 
				 				if (disko) {
			
 
				 					inode_add_bytes(inode, datal);
			
 
				 					ret = btrfs_inc_extent_ref(trans,
			
 
				-							fs_info,
			
 
				+							root,
			
 
				 							disko, diskl, 0,
			
 
				 							root->root_key.objectid,
			
 
				 							btrfs_ino(BTRFS_I(inode)),
			
@@ -4129,10 +4119,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
 
				 	struct btrfs_ioctl_space_info *dest_orig;
			
 
				 	struct btrfs_ioctl_space_info __user *user_dest;
			
 
				 	struct btrfs_space_info *info;
			
 
				-	u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
			
 
				-		       BTRFS_BLOCK_GROUP_SYSTEM,
			
 
				-		       BTRFS_BLOCK_GROUP_METADATA,
			
 
				-		       BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
			
 
				+	static const u64 types[] = {
			
 
				+		BTRFS_BLOCK_GROUP_DATA,
			
 
				+		BTRFS_BLOCK_GROUP_SYSTEM,
			
 
				+		BTRFS_BLOCK_GROUP_METADATA,
			
 
				+		BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA
			
 
				+	};
			
 
				 	int num_types = 4;
			
 
				 	int alloc_size;
			
 
				 	int ret = 0;
			
@@ -4504,8 +4496,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
 
				 		ipath->fspath->val[i] = rel_ptr;
			
 
				 	}
			
 
				 
			
 
				-	ret = copy_to_user((void *)(unsigned long)ipa->fspath,
			
 
				-			   (void *)(unsigned long)ipath->fspath, size);
			
 
				+	ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
			
 
				+			   ipath->fspath, size);
			
 
				 	if (ret) {
			
 
				 		ret = -EFAULT;
			
 
				 		goto out;
			
@@ -4540,13 +4532,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
 
				 }
			
 
				 
			
 
				 static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
			
 
				-					void __user *arg)
			
 
				+					void __user *arg, int version)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	int size;
			
 
				 	struct btrfs_ioctl_logical_ino_args *loi;
			
 
				 	struct btrfs_data_container *inodes = NULL;
			
 
				 	struct btrfs_path *path = NULL;
			
 
				+	bool ignore_offset;
			
 
				 
			
 
				 	if (!capable(CAP_SYS_ADMIN))
			
 
				 		return -EPERM;
			
@@ -4555,13 +4548,30 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
 
				 	if (IS_ERR(loi))
			
 
				 		return PTR_ERR(loi);
			
 
				 
			
 
				+	if (version == 1) {
			
 
				+		ignore_offset = false;
			
 
				+		size = min_t(u32, loi->size, SZ_64K);
			
 
				+	} else {
			
 
				+		/* All reserved bits must be 0 for now */
			
 
				+		if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) {
			
 
				+			ret = -EINVAL;
			
 
				+			goto out_loi;
			
 
				+		}
			
 
				+		/* Only accept flags we have defined so far */
			
 
				+		if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
			
 
				+			ret = -EINVAL;
			
 
				+			goto out_loi;
			
 
				+		}
			
 
				+		ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
			
 
				+		size = min_t(u32, loi->size, SZ_16M);
			
 
				+	}
			
 
				+
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path) {
			
 
				 		ret = -ENOMEM;
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	size = min_t(u32, loi->size, SZ_64K);
			
 
				 	inodes = init_data_container(size);
			
 
				 	if (IS_ERR(inodes)) {
			
 
				 		ret = PTR_ERR(inodes);
			
@@ -4570,20 +4580,21 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
 
				 	}
			
 
				 
			
 
				 	ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
			
 
				-					  build_ino_list, inodes);
			
 
				+					  build_ino_list, inodes, ignore_offset);
			
 
				 	if (ret == -EINVAL)
			
 
				 		ret = -ENOENT;
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 
			
 
				-	ret = copy_to_user((void *)(unsigned long)loi->inodes,
			
 
				-			   (void *)(unsigned long)inodes, size);
			
 
				+	ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes,
			
 
				+			   size);
			
 
				 	if (ret)
			
 
				 		ret = -EFAULT;
			
 
				 
			
 
				 out:
			
 
				 	btrfs_free_path(path);
			
 
				 	kvfree(inodes);
			
 
				+out_loi:
			
 
				 	kfree(loi);
			
 
				 
			
 
				 	return ret;
			
@@ -5160,15 +5171,11 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
 
				 					  root->root_key.objectid);
			
 
				 		if (ret < 0 && ret != -EEXIST) {
			
 
				 			btrfs_abort_transaction(trans, ret);
			
 
				+			btrfs_end_transaction(trans);
			
 
				 			goto out;
			
 
				 		}
			
 
				 	}
			
 
				 	ret = btrfs_commit_transaction(trans);
			
 
				-	if (ret < 0) {
			
 
				-		btrfs_abort_transaction(trans, ret);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				 out:
			
 
				 	up_write(&fs_info->subvol_sem);
			
 
				 	mnt_drop_write_file(file);
			
@@ -5490,6 +5497,41 @@ out_drop_write:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
			
 
				+{
			
 
				+	struct btrfs_ioctl_send_args *arg;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (compat) {
			
 
				+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
			
 
				+		struct btrfs_ioctl_send_args_32 args32;
			
 
				+
			
 
				+		ret = copy_from_user(&args32, argp, sizeof(args32));
			
 
				+		if (ret)
			
 
				+			return -EFAULT;
			
 
				+		arg = kzalloc(sizeof(*arg), GFP_KERNEL);
			
 
				+		if (!arg)
			
 
				+			return -ENOMEM;
			
 
				+		arg->send_fd = args32.send_fd;
			
 
				+		arg->clone_sources_count = args32.clone_sources_count;
			
 
				+		arg->clone_sources = compat_ptr(args32.clone_sources);
			
 
				+		arg->parent_root = args32.parent_root;
			
 
				+		arg->flags = args32.flags;
			
 
				+		memcpy(arg->reserved, args32.reserved,
			
 
				+		       sizeof(args32.reserved));
			
 
				+#else
			
 
				+		return -ENOTTY;
			
 
				+#endif
			
 
				+	} else {
			
 
				+		arg = memdup_user(argp, sizeof(*arg));
			
 
				+		if (IS_ERR(arg))
			
 
				+			return PTR_ERR(arg);
			
 
				+	}
			
 
				+	ret = btrfs_ioctl_send(file, arg);
			
 
				+	kfree(arg);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 long btrfs_ioctl(struct file *file, unsigned int
			
 
				 		cmd, unsigned long arg)
			
 
				 {
			
@@ -5554,7 +5596,9 @@ long btrfs_ioctl(struct file *file, unsigned int
 
				 	case BTRFS_IOC_INO_PATHS:
			
 
				 		return btrfs_ioctl_ino_to_path(root, argp);
			
 
				 	case BTRFS_IOC_LOGICAL_INO:
			
 
				-		return btrfs_ioctl_logical_to_ino(fs_info, argp);
			
 
				+		return btrfs_ioctl_logical_to_ino(fs_info, argp, 1);
			
 
				+	case BTRFS_IOC_LOGICAL_INO_V2:
			
 
				+		return btrfs_ioctl_logical_to_ino(fs_info, argp, 2);
			
 
				 	case BTRFS_IOC_SPACE_INFO:
			
 
				 		return btrfs_ioctl_space_info(fs_info, argp);
			
 
				 	case BTRFS_IOC_SYNC: {
			
@@ -5595,7 +5639,11 @@ long btrfs_ioctl(struct file *file, unsigned int
 
				 		return btrfs_ioctl_set_received_subvol_32(file, argp);
			
 
				 #endif
			
 
				 	case BTRFS_IOC_SEND:
			
 
				-		return btrfs_ioctl_send(file, argp);
			
 
				+		return _btrfs_ioctl_send(file, argp, false);
			
 
				+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
			
 
				+	case BTRFS_IOC_SEND_32:
			
 
				+		return _btrfs_ioctl_send(file, argp, true);
			
 
				+#endif
			
 
				 	case BTRFS_IOC_GET_DEV_STATS:
			
 
				 		return btrfs_ioctl_get_dev_stats(fs_info, argp);
			
 
				 	case BTRFS_IOC_QUOTA_CTL:
			
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -430,10 +430,15 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static void lzo_set_level(struct list_head *ws, unsigned int type)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				 const struct btrfs_compress_op btrfs_lzo_compress = {
			
 
				 	.alloc_workspace	= lzo_alloc_workspace,
			
 
				 	.free_workspace		= lzo_free_workspace,
			
 
				 	.compress_pages		= lzo_compress_pages,
			
 
				 	.decompress_bio		= lzo_decompress_bio,
			
 
				 	.decompress		= lzo_decompress,
			
 
				+	.set_level		= lzo_set_level,
			
 
				 };
			
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -242,6 +242,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 
				 	}
			
 
				 	spin_unlock(&root->ordered_extent_lock);
			
 
				 
			
 
				+	/*
			
 
				+	 * We don't need the count_max_extents here, we can assume that all of
			
 
				+	 * that work has been done at higher layers, so this is truly the
			
 
				+	 * smallest the extent is going to get.
			
 
				+	 */
			
 
				+	spin_lock(&BTRFS_I(inode)->lock);
			
 
				+	btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
			
 
				+	spin_unlock(&BTRFS_I(inode)->lock);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -591,11 +600,19 @@ void btrfs_remove_ordered_extent(struct inode *inode,
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
			
 
				 	struct btrfs_ordered_inode_tree *tree;
			
 
				-	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				+	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
			
 
				+	struct btrfs_root *root = btrfs_inode->root;
			
 
				 	struct rb_node *node;
			
 
				 	bool dec_pending_ordered = false;
			
 
				 
			
 
				-	tree = &BTRFS_I(inode)->ordered_tree;
			
 
				+	/* This is paired with btrfs_add_ordered_extent. */
			
 
				+	spin_lock(&btrfs_inode->lock);
			
 
				+	btrfs_mod_outstanding_extents(btrfs_inode, -1);
			
 
				+	spin_unlock(&btrfs_inode->lock);
			
 
				+	if (root != fs_info->tree_root)
			
 
				+		btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
			
 
				+
			
 
				+	tree = &btrfs_inode->ordered_tree;
			
 
				 	spin_lock_irq(&tree->lock);
			
 
				 	node = &entry->rb_node;
			
 
				 	rb_erase(node, &tree->tree);
			
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1441,7 +1441,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
 
				 	u64 bytenr = qrecord->bytenr;
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
			
 
				+	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
			
 
				 	if (ret < 0)
			
 
				 		return ret;
			
 
				 
			
@@ -2031,7 +2031,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
 
				 				/* Search commit root to find old_roots */
			
 
				 				ret = btrfs_find_all_roots(NULL, fs_info,
			
 
				 						record->bytenr, 0,
			
 
				-						&record->old_roots);
			
 
				+						&record->old_roots, false);
			
 
				 				if (ret < 0)
			
 
				 					goto cleanup;
			
 
				 			}
			
@@ -2042,7 +2042,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
 
				 			 * root. It's safe inside commit_transaction().
			
 
				 			 */
			
 
				 			ret = btrfs_find_all_roots(trans, fs_info,
			
 
				-					record->bytenr, SEQ_LAST, &new_roots);
			
 
				+				record->bytenr, SEQ_LAST, &new_roots, false);
			
 
				 			if (ret < 0)
			
 
				 				goto cleanup;
			
 
				 			if (qgroup_to_skip) {
			
@@ -2570,7 +2570,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 
				 			num_bytes = found.offset;
			
 
				 
			
 
				 		ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
			
 
				-					   &roots);
			
 
				+					   &roots, false);
			
 
				 		if (ret < 0)
			
 
				 			goto out;
			
 
				 		/* For rescan, just pass old_roots as NULL */
			
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1326,6 +1326,9 @@ write_data:
 
				 
			
 
				 cleanup:
			
 
				 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
			
 
				+
			
 
				+	while ((bio = bio_list_pop(&bio_list)))
			
 
				+		bio_put(bio);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1582,6 +1585,10 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 
				 
			
 
				 cleanup:
			
 
				 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
			
 
				+
			
 
				+	while ((bio = bio_list_pop(&bio_list)))
			
 
				+		bio_put(bio);
			
 
				+
			
 
				 	return -EIO;
			
 
				 
			
 
				 finish:
			
@@ -2107,6 +2114,10 @@ cleanup:
 
				 	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
			
 
				 	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
			
 
				 		rbio_orig_end_io(rbio, BLK_STS_IOERR);
			
 
				+
			
 
				+	while ((bio = bio_list_pop(&bio_list)))
			
 
				+		bio_put(bio);
			
 
				+
			
 
				 	return -EIO;
			
 
				 }
			
 
				 
			
@@ -2231,12 +2242,18 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 
				 	ASSERT(!bio->bi_iter.bi_size);
			
 
				 	rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
			
 
				 
			
 
				-	for (i = 0; i < rbio->real_stripes; i++) {
			
 
				+	/*
			
 
				+	 * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
			
 
				+	 * to the end position, so this search can start from the first parity
			
 
				+	 * stripe.
			
 
				+	 */
			
 
				+	for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
			
 
				 		if (bbio->stripes[i].dev == scrub_dev) {
			
 
				 			rbio->scrubp = i;
			
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				+	ASSERT(i < rbio->real_stripes);
			
 
				 
			
 
				 	/* Now we just support the sectorsize equals to page size */
			
 
				 	ASSERT(fs_info->sectorsize == PAGE_SIZE);
			
@@ -2454,6 +2471,9 @@ submit_write:
 
				 
			
 
				 cleanup:
			
 
				 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
			
 
				+
			
 
				+	while ((bio = bio_list_pop(&bio_list)))
			
 
				+		bio_put(bio);
			
 
				 }
			
 
				 
			
 
				 static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
			
@@ -2563,12 +2583,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 
				 	int stripe;
			
 
				 	struct bio *bio;
			
 
				 
			
 
				+	bio_list_init(&bio_list);
			
 
				+
			
 
				 	ret = alloc_rbio_essential_pages(rbio);
			
 
				 	if (ret)
			
 
				 		goto cleanup;
			
 
				 
			
 
				-	bio_list_init(&bio_list);
			
 
				-
			
 
				 	atomic_set(&rbio->error, 0);
			
 
				 	/*
			
 
				 	 * build a list of bios to read all the missing parts of this
			
@@ -2636,6 +2656,10 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 
				 
			
 
				 cleanup:
			
 
				 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
			
 
				+
			
 
				+	while ((bio = bio_list_pop(&bio_list)))
			
 
				+		bio_put(bio);
			
 
				+
			
 
				 	return;
			
 
				 
			
 
				 finish:
			
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -0,0 +1,1031 @@
 
				+/*
			
 
				+ * Copyright (C) 2014 Facebook.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License v2 as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public
			
 
				+ * License along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/stacktrace.h>
			
 
				+#include "ctree.h"
			
 
				+#include "disk-io.h"
			
 
				+#include "locking.h"
			
 
				+#include "delayed-ref.h"
			
 
				+#include "ref-verify.h"
			
 
				+
			
 
				+/*
			
 
				+ * Used to keep track the roots and number of refs each root has for a given
			
 
				+ * bytenr.  This just tracks the number of direct references, no shared
			
 
				+ * references.
			
 
				+ */
			
 
				+struct root_entry {
			
 
				+	u64 root_objectid;
			
 
				+	u64 num_refs;
			
 
				+	struct rb_node node;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * These are meant to represent what should exist in the extent tree, these can
			
 
				+ * be used to verify the extent tree is consistent as these should all match
			
 
				+ * what the extent tree says.
			
 
				+ */
			
 
				+struct ref_entry {
			
 
				+	u64 root_objectid;
			
 
				+	u64 parent;
			
 
				+	u64 owner;
			
 
				+	u64 offset;
			
 
				+	u64 num_refs;
			
 
				+	struct rb_node node;
			
 
				+};
			
 
				+
			
 
				+#define MAX_TRACE	16
			
 
				+
			
 
				+/*
			
 
				+ * Whenever we add/remove a reference we record the action.  The action maps
			
 
				+ * back to the delayed ref action.  We hold the ref we are changing in the
			
 
				+ * action so we can account for the history properly, and we record the root we
			
 
				+ * were called with since it could be different from ref_root.  We also store
			
 
				+ * stack traces because thats how I roll.
			
 
				+ */
			
 
				+struct ref_action {
			
 
				+	int action;
			
 
				+	u64 root;
			
 
				+	struct ref_entry ref;
			
 
				+	struct list_head list;
			
 
				+	unsigned long trace[MAX_TRACE];
			
 
				+	unsigned int trace_len;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * One of these for every block we reference, it holds the roots and references
			
 
				+ * to it as well as all of the ref actions that have occured to it.  We never
			
 
				+ * free it until we unmount the file system in order to make sure re-allocations
			
 
				+ * are happening properly.
			
 
				+ */
			
 
				+struct block_entry {
			
 
				+	u64 bytenr;
			
 
				+	u64 len;
			
 
				+	u64 num_refs;
			
 
				+	int metadata;
			
 
				+	int from_disk;
			
 
				+	struct rb_root roots;
			
 
				+	struct rb_root refs;
			
 
				+	struct rb_node node;
			
 
				+	struct list_head actions;
			
 
				+};
			
 
				+
			
 
				+static struct block_entry *insert_block_entry(struct rb_root *root,
			
 
				+					      struct block_entry *be)
			
 
				+{
			
 
				+	struct rb_node **p = &root->rb_node;
			
 
				+	struct rb_node *parent_node = NULL;
			
 
				+	struct block_entry *entry;
			
 
				+
			
 
				+	while (*p) {
			
 
				+		parent_node = *p;
			
 
				+		entry = rb_entry(parent_node, struct block_entry, node);
			
 
				+		if (entry->bytenr > be->bytenr)
			
 
				+			p = &(*p)->rb_left;
			
 
				+		else if (entry->bytenr < be->bytenr)
			
 
				+			p = &(*p)->rb_right;
			
 
				+		else
			
 
				+			return entry;
			
 
				+	}
			
 
				+
			
 
				+	rb_link_node(&be->node, parent_node, p);
			
 
				+	rb_insert_color(&be->node, root);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
			
 
				+{
			
 
				+	struct rb_node *n;
			
 
				+	struct block_entry *entry = NULL;
			
 
				+
			
 
				+	n = root->rb_node;
			
 
				+	while (n) {
			
 
				+		entry = rb_entry(n, struct block_entry, node);
			
 
				+		if (entry->bytenr < bytenr)
			
 
				+			n = n->rb_right;
			
 
				+		else if (entry->bytenr > bytenr)
			
 
				+			n = n->rb_left;
			
 
				+		else
			
 
				+			return entry;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static struct root_entry *insert_root_entry(struct rb_root *root,
			
 
				+					    struct root_entry *re)
			
 
				+{
			
 
				+	struct rb_node **p = &root->rb_node;
			
 
				+	struct rb_node *parent_node = NULL;
			
 
				+	struct root_entry *entry;
			
 
				+
			
 
				+	while (*p) {
			
 
				+		parent_node = *p;
			
 
				+		entry = rb_entry(parent_node, struct root_entry, node);
			
 
				+		if (entry->root_objectid > re->root_objectid)
			
 
				+			p = &(*p)->rb_left;
			
 
				+		else if (entry->root_objectid < re->root_objectid)
			
 
				+			p = &(*p)->rb_right;
			
 
				+		else
			
 
				+			return entry;
			
 
				+	}
			
 
				+
			
 
				+	rb_link_node(&re->node, parent_node, p);
			
 
				+	rb_insert_color(&re->node, root);
			
 
				+	return NULL;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
			
 
				+{
			
 
				+	if (ref1->root_objectid < ref2->root_objectid)
			
 
				+		return -1;
			
 
				+	if (ref1->root_objectid > ref2->root_objectid)
			
 
				+		return 1;
			
 
				+	if (ref1->parent < ref2->parent)
			
 
				+		return -1;
			
 
				+	if (ref1->parent > ref2->parent)
			
 
				+		return 1;
			
 
				+	if (ref1->owner < ref2->owner)
			
 
				+		return -1;
			
 
				+	if (ref1->owner > ref2->owner)
			
 
				+		return 1;
			
 
				+	if (ref1->offset < ref2->offset)
			
 
				+		return -1;
			
 
				+	if (ref1->offset > ref2->offset)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct ref_entry *insert_ref_entry(struct rb_root *root,
			
 
				+					  struct ref_entry *ref)
			
 
				+{
			
 
				+	struct rb_node **p = &root->rb_node;
			
 
				+	struct rb_node *parent_node = NULL;
			
 
				+	struct ref_entry *entry;
			
 
				+	int cmp;
			
 
				+
			
 
				+	while (*p) {
			
 
				+		parent_node = *p;
			
 
				+		entry = rb_entry(parent_node, struct ref_entry, node);
			
 
				+		cmp = comp_refs(entry, ref);
			
 
				+		if (cmp > 0)
			
 
				+			p = &(*p)->rb_left;
			
 
				+		else if (cmp < 0)
			
 
				+			p = &(*p)->rb_right;
			
 
				+		else
			
 
				+			return entry;
			
 
				+	}
			
 
				+
			
 
				+	rb_link_node(&ref->node, parent_node, p);
			
 
				+	rb_insert_color(&ref->node, root);
			
 
				+	return NULL;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
			
 
				+{
			
 
				+	struct rb_node *n;
			
 
				+	struct root_entry *entry = NULL;
			
 
				+
			
 
				+	n = root->rb_node;
			
 
				+	while (n) {
			
 
				+		entry = rb_entry(n, struct root_entry, node);
			
 
				+		if (entry->root_objectid < objectid)
			
 
				+			n = n->rb_right;
			
 
				+		else if (entry->root_objectid > objectid)
			
 
				+			n = n->rb_left;
			
 
				+		else
			
 
				+			return entry;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_STACKTRACE
			
 
				+static void __save_stack_trace(struct ref_action *ra)
			
 
				+{
			
 
				+	struct stack_trace stack_trace;
			
 
				+
			
 
				+	stack_trace.max_entries = MAX_TRACE;
			
 
				+	stack_trace.nr_entries = 0;
			
 
				+	stack_trace.entries = ra->trace;
			
 
				+	stack_trace.skip = 2;
			
 
				+	save_stack_trace(&stack_trace);
			
 
				+	ra->trace_len = stack_trace.nr_entries;
			
 
				+}
			
 
				+
			
 
				+static void __print_stack_trace(struct btrfs_fs_info *fs_info,
			
 
				+				struct ref_action *ra)
			
 
				+{
			
 
				+	struct stack_trace trace;
			
 
				+
			
 
				+	if (ra->trace_len == 0) {
			
 
				+		btrfs_err(fs_info, "  ref-verify: no stacktrace");
			
 
				+		return;
			
 
				+	}
			
 
				+	trace.nr_entries = ra->trace_len;
			
 
				+	trace.entries = ra->trace;
			
 
				+	print_stack_trace(&trace, 2);
			
 
				+}
			
 
				+#else
			
 
				+static void inline __save_stack_trace(struct ref_action *ra)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
			
 
				+				       struct ref_action *ra)
			
 
				+{
			
 
				+	btrfs_err(fs_info, "  ref-verify: no stacktrace support");
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static void free_block_entry(struct block_entry *be)
			
 
				+{
			
 
				+	struct root_entry *re;
			
 
				+	struct ref_entry *ref;
			
 
				+	struct ref_action *ra;
			
 
				+	struct rb_node *n;
			
 
				+
			
 
				+	while ((n = rb_first(&be->roots))) {
			
 
				+		re = rb_entry(n, struct root_entry, node);
			
 
				+		rb_erase(&re->node, &be->roots);
			
 
				+		kfree(re);
			
 
				+	}
			
 
				+
			
 
				+	while((n = rb_first(&be->refs))) {
			
 
				+		ref = rb_entry(n, struct ref_entry, node);
			
 
				+		rb_erase(&ref->node, &be->refs);
			
 
				+		kfree(ref);
			
 
				+	}
			
 
				+
			
 
				+	while (!list_empty(&be->actions)) {
			
 
				+		ra = list_first_entry(&be->actions, struct ref_action,
			
 
				+				      list);
			
 
				+		list_del(&ra->list);
			
 
				+		kfree(ra);
			
 
				+	}
			
 
				+	kfree(be);
			
 
				+}
			
 
				+
			
 
				+static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
			
 
				+					   u64 bytenr, u64 len,
			
 
				+					   u64 root_objectid)
			
 
				+{
			
 
				+	struct block_entry *be = NULL, *exist;
			
 
				+	struct root_entry *re = NULL;
			
 
				+
			
 
				+	re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
			
 
				+	be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
			
 
				+	if (!be || !re) {
			
 
				+		kfree(re);
			
 
				+		kfree(be);
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+	}
			
 
				+	be->bytenr = bytenr;
			
 
				+	be->len = len;
			
 
				+
			
 
				+	re->root_objectid = root_objectid;
			
 
				+	re->num_refs = 0;
			
 
				+
			
 
				+	spin_lock(&fs_info->ref_verify_lock);
			
 
				+	exist = insert_block_entry(&fs_info->block_tree, be);
			
 
				+	if (exist) {
			
 
				+		if (root_objectid) {
			
 
				+			struct root_entry *exist_re;
			
 
				+
			
 
				+			exist_re = insert_root_entry(&exist->roots, re);
			
 
				+			if (exist_re)
			
 
				+				kfree(re);
			
 
				+		}
			
 
				+		kfree(be);
			
 
				+		return exist;
			
 
				+	}
			
 
				+
			
 
				+	be->num_refs = 0;
			
 
				+	be->metadata = 0;
			
 
				+	be->from_disk = 0;
			
 
				+	be->roots = RB_ROOT;
			
 
				+	be->refs = RB_ROOT;
			
 
				+	INIT_LIST_HEAD(&be->actions);
			
 
				+	if (root_objectid)
			
 
				+		insert_root_entry(&be->roots, re);
			
 
				+	else
			
 
				+		kfree(re);
			
 
				+	return be;
			
 
				+}
			
 
				+
			
 
				+static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
			
 
				+			  u64 parent, u64 bytenr, int level)
			
 
				+{
			
 
				+	struct block_entry *be;
			
 
				+	struct root_entry *re;
			
 
				+	struct ref_entry *ref = NULL, *exist;
			
 
				+
			
 
				+	ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
			
 
				+	if (!ref)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	if (parent)
			
 
				+		ref->root_objectid = 0;
			
 
				+	else
			
 
				+		ref->root_objectid = ref_root;
			
 
				+	ref->parent = parent;
			
 
				+	ref->owner = level;
			
 
				+	ref->offset = 0;
			
 
				+	ref->num_refs = 1;
			
 
				+
			
 
				+	be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
			
 
				+	if (IS_ERR(be)) {
			
 
				+		kfree(ref);
			
 
				+		return PTR_ERR(be);
			
 
				+	}
			
 
				+	be->num_refs++;
			
 
				+	be->from_disk = 1;
			
 
				+	be->metadata = 1;
			
 
				+
			
 
				+	if (!parent) {
			
 
				+		ASSERT(ref_root);
			
 
				+		re = lookup_root_entry(&be->roots, ref_root);
			
 
				+		ASSERT(re);
			
 
				+		re->num_refs++;
			
 
				+	}
			
 
				+	exist = insert_ref_entry(&be->refs, ref);
			
 
				+	if (exist) {
			
 
				+		exist->num_refs++;
			
 
				+		kfree(ref);
			
 
				+	}
			
 
				+	spin_unlock(&fs_info->ref_verify_lock);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
			
 
				+			       u64 parent, u32 num_refs, u64 bytenr,
			
 
				+			       u64 num_bytes)
			
 
				+{
			
 
				+	struct block_entry *be;
			
 
				+	struct ref_entry *ref;
			
 
				+
			
 
				+	ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
			
 
				+	if (!ref)
			
 
				+		return -ENOMEM;
			
 
				+	be = add_block_entry(fs_info, bytenr, num_bytes, 0);
			
 
				+	if (IS_ERR(be)) {
			
 
				+		kfree(ref);
			
 
				+		return PTR_ERR(be);
			
 
				+	}
			
 
				+	be->num_refs += num_refs;
			
 
				+
			
 
				+	ref->parent = parent;
			
 
				+	ref->num_refs = num_refs;
			
 
				+	if (insert_ref_entry(&be->refs, ref)) {
			
 
				+		spin_unlock(&fs_info->ref_verify_lock);
			
 
				+		btrfs_err(fs_info, "existing shared ref when reading from disk?");
			
 
				+		kfree(ref);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	spin_unlock(&fs_info->ref_verify_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
			
 
				+			       struct extent_buffer *leaf,
			
 
				+			       struct btrfs_extent_data_ref *dref,
			
 
				+			       u64 bytenr, u64 num_bytes)
			
 
				+{
			
 
				+	struct block_entry *be;
			
 
				+	struct ref_entry *ref;
			
 
				+	struct root_entry *re;
			
 
				+	u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
			
 
				+	u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
			
 
				+	u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
			
 
				+	u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
			
 
				+
			
 
				+	ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
			
 
				+	if (!ref)
			
 
				+		return -ENOMEM;
			
 
				+	be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
			
 
				+	if (IS_ERR(be)) {
			
 
				+		kfree(ref);
			
 
				+		return PTR_ERR(be);
			
 
				+	}
			
 
				+	be->num_refs += num_refs;
			
 
				+
			
 
				+	ref->parent = 0;
			
 
				+	ref->owner = owner;
			
 
				+	ref->root_objectid = ref_root;
			
 
				+	ref->offset = offset;
			
 
				+	ref->num_refs = num_refs;
			
 
				+	if (insert_ref_entry(&be->refs, ref)) {
			
 
				+		spin_unlock(&fs_info->ref_verify_lock);
			
 
				+		btrfs_err(fs_info, "existing ref when reading from disk?");
			
 
				+		kfree(ref);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	re = lookup_root_entry(&be->roots, ref_root);
			
 
				+	if (!re) {
			
 
				+		spin_unlock(&fs_info->ref_verify_lock);
			
 
				+		btrfs_err(fs_info, "missing root in new block entry?");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	re->num_refs += num_refs;
			
 
				+	spin_unlock(&fs_info->ref_verify_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int process_extent_item(struct btrfs_fs_info *fs_info,
			
 
				+			       struct btrfs_path *path, struct btrfs_key *key,
			
 
				+			       int slot, int *tree_block_level)
			
 
				+{
			
 
				+	struct btrfs_extent_item *ei;
			
 
				+	struct btrfs_extent_inline_ref *iref;
			
 
				+	struct btrfs_extent_data_ref *dref;
			
 
				+	struct btrfs_shared_data_ref *sref;
			
 
				+	struct extent_buffer *leaf = path->nodes[0];
			
 
				+	u32 item_size = btrfs_item_size_nr(leaf, slot);
			
 
				+	unsigned long end, ptr;
			
 
				+	u64 offset, flags, count;
			
 
				+	int type, ret;
			
 
				+
			
 
				+	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
			
 
				+	flags = btrfs_extent_flags(leaf, ei);
			
 
				+
			
 
				+	if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
			
 
				+	    flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
			
 
				+		struct btrfs_tree_block_info *info;
			
 
				+
			
 
				+		info = (struct btrfs_tree_block_info *)(ei + 1);
			
 
				+		*tree_block_level = btrfs_tree_block_level(leaf, info);
			
 
				+		iref = (struct btrfs_extent_inline_ref *)(info + 1);
			
 
				+	} else {
			
 
				+		if (key->type == BTRFS_METADATA_ITEM_KEY)
			
 
				+			*tree_block_level = key->offset;
			
 
				+		iref = (struct btrfs_extent_inline_ref *)(ei + 1);
			
 
				+	}
			
 
				+
			
 
				+	ptr = (unsigned long)iref;
			
 
				+	end = (unsigned long)ei + item_size;
			
 
				+	while (ptr < end) {
			
 
				+		iref = (struct btrfs_extent_inline_ref *)ptr;
			
 
				+		type = btrfs_extent_inline_ref_type(leaf, iref);
			
 
				+		offset = btrfs_extent_inline_ref_offset(leaf, iref);
			
 
				+		switch (type) {
			
 
				+		case BTRFS_TREE_BLOCK_REF_KEY:
			
 
				+			ret = add_tree_block(fs_info, offset, 0, key->objectid,
			
 
				+					     *tree_block_level);
			
 
				+			break;
			
 
				+		case BTRFS_SHARED_BLOCK_REF_KEY:
			
 
				+			ret = add_tree_block(fs_info, 0, offset, key->objectid,
			
 
				+					     *tree_block_level);
			
 
				+			break;
			
 
				+		case BTRFS_EXTENT_DATA_REF_KEY:
			
 
				+			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
			
 
				+			ret = add_extent_data_ref(fs_info, leaf, dref,
			
 
				+						  key->objectid, key->offset);
			
 
				+			break;
			
 
				+		case BTRFS_SHARED_DATA_REF_KEY:
			
 
				+			sref = (struct btrfs_shared_data_ref *)(iref + 1);
			
 
				+			count = btrfs_shared_data_ref_count(leaf, sref);
			
 
				+			ret = add_shared_data_ref(fs_info, offset, count,
			
 
				+						  key->objectid, key->offset);
			
 
				+			break;
			
 
				+		default:
			
 
				+			btrfs_err(fs_info, "invalid key type in iref");
			
 
				+			ret = -EINVAL;
			
 
				+			break;
			
 
				+		}
			
 
				+		if (ret)
			
 
				+			break;
			
 
				+		ptr += btrfs_extent_inline_ref_size(type);
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int process_leaf(struct btrfs_root *root,
			
 
				+			struct btrfs_path *path, u64 *bytenr, u64 *num_bytes)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				+	struct extent_buffer *leaf = path->nodes[0];
			
 
				+	struct btrfs_extent_data_ref *dref;
			
 
				+	struct btrfs_shared_data_ref *sref;
			
 
				+	u32 count;
			
 
				+	int i = 0, tree_block_level = 0, ret;
			
 
				+	struct btrfs_key key;
			
 
				+	int nritems = btrfs_header_nritems(leaf);
			
 
				+
			
 
				+	for (i = 0; i < nritems; i++) {
			
 
				+		btrfs_item_key_to_cpu(leaf, &key, i);
			
 
				+		switch (key.type) {
			
 
				+		case BTRFS_EXTENT_ITEM_KEY:
			
 
				+			*num_bytes = key.offset;
			
 
				+		case BTRFS_METADATA_ITEM_KEY:
			
 
				+			*bytenr = key.objectid;
			
 
				+			ret = process_extent_item(fs_info, path, &key, i,
			
 
				+						  &tree_block_level);
			
 
				+			break;
			
 
				+		case BTRFS_TREE_BLOCK_REF_KEY:
			
 
				+			ret = add_tree_block(fs_info, key.offset, 0,
			
 
				+					     key.objectid, tree_block_level);
			
 
				+			break;
			
 
				+		case BTRFS_SHARED_BLOCK_REF_KEY:
			
 
				+			ret = add_tree_block(fs_info, 0, key.offset,
			
 
				+					     key.objectid, tree_block_level);
			
 
				+			break;
			
 
				+		case BTRFS_EXTENT_DATA_REF_KEY:
			
 
				+			dref = btrfs_item_ptr(leaf, i,
			
 
				+					      struct btrfs_extent_data_ref);
			
 
				+			ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
			
 
				+						  *num_bytes);
			
 
				+			break;
			
 
				+		case BTRFS_SHARED_DATA_REF_KEY:
			
 
				+			sref = btrfs_item_ptr(leaf, i,
			
 
				+					      struct btrfs_shared_data_ref);
			
 
				+			count = btrfs_shared_data_ref_count(leaf, sref);
			
 
				+			ret = add_shared_data_ref(fs_info, key.offset, count,
			
 
				+						  *bytenr, *num_bytes);
			
 
				+			break;
			
 
				+		default:
			
 
				+			break;
			
 
				+		}
			
 
				+		if (ret)
			
 
				+			break;
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* Walk down to the leaf from the given level */
			
 
				+static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
			
 
				+			  int level, u64 *bytenr, u64 *num_bytes)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				+	struct extent_buffer *eb;
			
 
				+	u64 block_bytenr, gen;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	while (level >= 0) {
			
 
				+		if (level) {
			
 
				+			block_bytenr = btrfs_node_blockptr(path->nodes[level],
			
 
				+							   path->slots[level]);
			
 
				+			gen = btrfs_node_ptr_generation(path->nodes[level],
			
 
				+							path->slots[level]);
			
 
				+			eb = read_tree_block(fs_info, block_bytenr, gen);
			
 
				+			if (IS_ERR(eb))
			
 
				+				return PTR_ERR(eb);
			
 
				+			if (!extent_buffer_uptodate(eb)) {
			
 
				+				free_extent_buffer(eb);
			
 
				+				return -EIO;
			
 
				+			}
			
 
				+			btrfs_tree_read_lock(eb);
			
 
				+			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
			
 
				+			path->nodes[level-1] = eb;
			
 
				+			path->slots[level-1] = 0;
			
 
				+			path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
			
 
				+		} else {
			
 
				+			ret = process_leaf(root, path, bytenr, num_bytes);
			
 
				+			if (ret)
			
 
				+				break;
			
 
				+		}
			
 
				+		level--;
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* Walk up to the next node that needs to be processed */
			
 
				+static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
			
 
				+			int *level)
			
 
				+{
			
 
				+	int l;
			
 
				+
			
 
				+	for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
			
 
				+		if (!path->nodes[l])
			
 
				+			continue;
			
 
				+		if (l) {
			
 
				+			path->slots[l]++;
			
 
				+			if (path->slots[l] <
			
 
				+			    btrfs_header_nritems(path->nodes[l])) {
			
 
				+				*level = l;
			
 
				+				return 0;
			
 
				+			}
			
 
				+		}
			
 
				+		btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
			
 
				+		free_extent_buffer(path->nodes[l]);
			
 
				+		path->nodes[l] = NULL;
			
 
				+		path->slots[l] = 0;
			
 
				+		path->locks[l] = 0;
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static void dump_ref_action(struct btrfs_fs_info *fs_info,
			
 
				+			    struct ref_action *ra)
			
 
				+{
			
 
				+	btrfs_err(fs_info,
			
 
				+"  Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
			
 
				+		  ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
			
 
				+		  ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
			
 
				+	__print_stack_trace(fs_info, ra);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Dumps all the information from the block entry to printk, it's going to be
			
 
				+ * awesome.
			
 
				+ */
			
 
				+static void dump_block_entry(struct btrfs_fs_info *fs_info,
			
 
				+			     struct block_entry *be)
			
 
				+{
			
 
				+	struct ref_entry *ref;
			
 
				+	struct root_entry *re;
			
 
				+	struct ref_action *ra;
			
 
				+	struct rb_node *n;
			
 
				+
			
 
				+	btrfs_err(fs_info,
			
 
				+"dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
			
 
				+		  be->bytenr, be->len, be->num_refs, be->metadata,
			
 
				+		  be->from_disk);
			
 
				+
			
 
				+	for (n = rb_first(&be->refs); n; n = rb_next(n)) {
			
 
				+		ref = rb_entry(n, struct ref_entry, node);
			
 
				+		btrfs_err(fs_info,
			
 
				+"  ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
			
 
				+			  ref->root_objectid, ref->parent, ref->owner,
			
 
				+			  ref->offset, ref->num_refs);
			
 
				+	}
			
 
				+
			
 
				+	for (n = rb_first(&be->roots); n; n = rb_next(n)) {
			
 
				+		re = rb_entry(n, struct root_entry, node);
			
 
				+		btrfs_err(fs_info, "  root entry %llu, num_refs %llu",
			
 
				+			  re->root_objectid, re->num_refs);
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry(ra, &be->actions, list)
			
 
				+		dump_ref_action(fs_info, ra);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
			
 
				+ * @root: the root we are making this modification from.
			
 
				+ * @bytenr: the bytenr we are modifying.
			
 
				+ * @num_bytes: number of bytes.
			
 
				+ * @parent: the parent bytenr.
			
 
				+ * @ref_root: the original root owner of the bytenr.
			
 
				+ * @owner: level in the case of metadata, inode in the case of data.
			
 
				+ * @offset: 0 for metadata, file offset for data.
			
 
				+ * @action: the action that we are doing, this is the same as the delayed ref
			
 
				+ *	action.
			
 
				+ *
			
 
				+ * This will add an action item to the given bytenr and do sanity checks to make
			
 
				+ * sure we haven't messed something up.  If we are making a new allocation and
			
 
				+ * this block entry has history we will delete all previous actions as long as
			
 
				+ * our sanity checks pass as they are no longer needed.
			
 
				+ */
			
 
				+int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
			
 
				+		       u64 parent, u64 ref_root, u64 owner, u64 offset,
			
 
				+		       int action)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				+	struct ref_entry *ref = NULL, *exist;
			
 
				+	struct ref_action *ra = NULL;
			
 
				+	struct block_entry *be = NULL;
			
 
				+	struct root_entry *re = NULL;
			
 
				+	int ret = 0;
			
 
				+	bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
			
 
				+
			
 
				+	if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
			
 
				+		return 0;
			
 
				+
			
 
				+	ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
			
 
				+	ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
			
 
				+	if (!ra || !ref) {
			
 
				+		kfree(ref);
			
 
				+		kfree(ra);
			
 
				+		ret = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (parent) {
			
 
				+		ref->parent = parent;
			
 
				+	} else {
			
 
				+		ref->root_objectid = ref_root;
			
 
				+		ref->owner = owner;
			
 
				+		ref->offset = offset;
			
 
				+	}
			
 
				+	ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
			
 
				+
			
 
				+	memcpy(&ra->ref, ref, sizeof(struct ref_entry));
			
 
				+	/*
			
 
				+	 * Save the extra info from the delayed ref in the ref action to make it
			
 
				+	 * easier to figure out what is happening.  The real ref's we add to the
			
 
				+	 * ref tree need to reflect what we save on disk so it matches any
			
 
				+	 * on-disk refs we pre-loaded.
			
 
				+	 */
			
 
				+	ra->ref.owner = owner;
			
 
				+	ra->ref.offset = offset;
			
 
				+	ra->ref.root_objectid = ref_root;
			
 
				+	__save_stack_trace(ra);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&ra->list);
			
 
				+	ra->action = action;
			
 
				+	ra->root = root->objectid;
			
 
				+
			
 
				+	/*
			
 
				+	 * This is an allocation, preallocate the block_entry in case we haven't
			
 
				+	 * used it before.
			
 
				+	 */
			
 
				+	ret = -EINVAL;
			
 
				+	if (action == BTRFS_ADD_DELAYED_EXTENT) {
			
 
				+		/*
			
 
				+		 * For subvol_create we'll just pass in whatever the parent root
			
 
				+		 * is and the new root objectid, so let's not treat the passed
			
 
				+		 * in root as if it really has a ref for this bytenr.
			
 
				+		 */
			
 
				+		be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
			
 
				+		if (IS_ERR(be)) {
			
 
				+			kfree(ra);
			
 
				+			ret = PTR_ERR(be);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		be->num_refs++;
			
 
				+		if (metadata)
			
 
				+			be->metadata = 1;
			
 
				+
			
 
				+		if (be->num_refs != 1) {
			
 
				+			btrfs_err(fs_info,
			
 
				+			"re-allocated a block that still has references to it!");
			
 
				+			dump_block_entry(fs_info, be);
			
 
				+			dump_ref_action(fs_info, ra);
			
 
				+			goto out_unlock;
			
 
				+		}
			
 
				+
			
 
				+		while (!list_empty(&be->actions)) {
			
 
				+			struct ref_action *tmp;
			
 
				+
			
 
				+			tmp = list_first_entry(&be->actions, struct ref_action,
			
 
				+					       list);
			
 
				+			list_del(&tmp->list);
			
 
				+			kfree(tmp);
			
 
				+		}
			
 
				+	} else {
			
 
				+		struct root_entry *tmp;
			
 
				+
			
 
				+		if (!parent) {
			
 
				+			re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
			
 
				+			if (!re) {
			
 
				+				kfree(ref);
			
 
				+				kfree(ra);
			
 
				+				ret = -ENOMEM;
			
 
				+				goto out;
			
 
				+			}
			
 
				+			/*
			
 
				+			 * This is the root that is modifying us, so it's the
			
 
				+			 * one we want to lookup below when we modify the
			
 
				+			 * re->num_refs.
			
 
				+			 */
			
 
				+			ref_root = root->objectid;
			
 
				+			re->root_objectid = root->objectid;
			
 
				+			re->num_refs = 0;
			
 
				+		}
			
 
				+
			
 
				+		spin_lock(&root->fs_info->ref_verify_lock);
			
 
				+		be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
			
 
				+		if (!be) {
			
 
				+			btrfs_err(fs_info,
			
 
				+"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
			
 
				+				  action, (unsigned long long)bytenr,
			
 
				+				  (unsigned long long)num_bytes);
			
 
				+			dump_ref_action(fs_info, ra);
			
 
				+			kfree(ref);
			
 
				+			kfree(ra);
			
 
				+			goto out_unlock;
			
 
				+		}
			
 
				+
			
 
				+		if (!parent) {
			
 
				+			tmp = insert_root_entry(&be->roots, re);
			
 
				+			if (tmp) {
			
 
				+				kfree(re);
			
 
				+				re = tmp;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	exist = insert_ref_entry(&be->refs, ref);
			
 
				+	if (exist) {
			
 
				+		if (action == BTRFS_DROP_DELAYED_REF) {
			
 
				+			if (exist->num_refs == 0) {
			
 
				+				btrfs_err(fs_info,
			
 
				+"dropping a ref for a existing root that doesn't have a ref on the block");
			
 
				+				dump_block_entry(fs_info, be);
			
 
				+				dump_ref_action(fs_info, ra);
			
 
				+				kfree(ra);
			
 
				+				goto out_unlock;
			
 
				+			}
			
 
				+			exist->num_refs--;
			
 
				+			if (exist->num_refs == 0) {
			
 
				+				rb_erase(&exist->node, &be->refs);
			
 
				+				kfree(exist);
			
 
				+			}
			
 
				+		} else if (!be->metadata) {
			
 
				+			exist->num_refs++;
			
 
				+		} else {
			
 
				+			btrfs_err(fs_info,
			
 
				+"attempting to add another ref for an existing ref on a tree block");
			
 
				+			dump_block_entry(fs_info, be);
			
 
				+			dump_ref_action(fs_info, ra);
			
 
				+			kfree(ra);
			
 
				+			goto out_unlock;
			
 
				+		}
			
 
				+		kfree(ref);
			
 
				+	} else {
			
 
				+		if (action == BTRFS_DROP_DELAYED_REF) {
			
 
				+			btrfs_err(fs_info,
			
 
				+"dropping a ref for a root that doesn't have a ref on the block");
			
 
				+			dump_block_entry(fs_info, be);
			
 
				+			dump_ref_action(fs_info, ra);
			
 
				+			kfree(ra);
			
 
				+			goto out_unlock;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (!parent && !re) {
			
 
				+		re = lookup_root_entry(&be->roots, ref_root);
			
 
				+		if (!re) {
			
 
				+			/*
			
 
				+			 * This shouldn't happen because we will add our re
			
 
				+			 * above when we lookup the be with !parent, but just in
			
 
				+			 * case catch this case so we don't panic because I
			
 
				+			 * didn't thik of some other corner case.
			
 
				+			 */
			
 
				+			btrfs_err(fs_info, "failed to find root %llu for %llu",
			
 
				+				  root->objectid, be->bytenr);
			
 
				+			dump_block_entry(fs_info, be);
			
 
				+			dump_ref_action(fs_info, ra);
			
 
				+			kfree(ra);
			
 
				+			goto out_unlock;
			
 
				+		}
			
 
				+	}
			
 
				+	if (action == BTRFS_DROP_DELAYED_REF) {
			
 
				+		if (re)
			
 
				+			re->num_refs--;
			
 
				+		be->num_refs--;
			
 
				+	} else if (action == BTRFS_ADD_DELAYED_REF) {
			
 
				+		be->num_refs++;
			
 
				+		if (re)
			
 
				+			re->num_refs++;
			
 
				+	}
			
 
				+	list_add_tail(&ra->list, &be->actions);
			
 
				+	ret = 0;
			
 
				+out_unlock:
			
 
				+	spin_unlock(&root->fs_info->ref_verify_lock);
			
 
				+out:
			
 
				+	if (ret)
			
 
				+		btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* Free up the ref cache */
			
 
				+void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+	struct block_entry *be;
			
 
				+	struct rb_node *n;
			
 
				+
			
 
				+	if (!btrfs_test_opt(fs_info, REF_VERIFY))
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&fs_info->ref_verify_lock);
			
 
				+	while ((n = rb_first(&fs_info->block_tree))) {
			
 
				+		be = rb_entry(n, struct block_entry, node);
			
 
				+		rb_erase(&be->node, &fs_info->block_tree);
			
 
				+		free_block_entry(be);
			
 
				+		cond_resched_lock(&fs_info->ref_verify_lock);
			
 
				+	}
			
 
				+	spin_unlock(&fs_info->ref_verify_lock);
			
 
				+}
			
 
				+
			
 
				+void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
			
 
				+			       u64 len)
			
 
				+{
			
 
				+	struct block_entry *be = NULL, *entry;
			
 
				+	struct rb_node *n;
			
 
				+
			
 
				+	if (!btrfs_test_opt(fs_info, REF_VERIFY))
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&fs_info->ref_verify_lock);
			
 
				+	n = fs_info->block_tree.rb_node;
			
 
				+	while (n) {
			
 
				+		entry = rb_entry(n, struct block_entry, node);
			
 
				+		if (entry->bytenr < start) {
			
 
				+			n = n->rb_right;
			
 
				+		} else if (entry->bytenr > start) {
			
 
				+			n = n->rb_left;
			
 
				+		} else {
			
 
				+			be = entry;
			
 
				+			break;
			
 
				+		}
			
 
				+		/* We want to get as close to start as possible */
			
 
				+		if (be == NULL ||
			
 
				+		    (entry->bytenr < start && be->bytenr > start) ||
			
 
				+		    (entry->bytenr < start && entry->bytenr > be->bytenr))
			
 
				+			be = entry;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Could have an empty block group, maybe have something to check for
			
 
				+	 * this case to verify we were actually empty?
			
 
				+	 */
			
 
				+	if (!be) {
			
 
				+		spin_unlock(&fs_info->ref_verify_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	n = &be->node;
			
 
				+	while (n) {
			
 
				+		be = rb_entry(n, struct block_entry, node);
			
 
				+		n = rb_next(n);
			
 
				+		if (be->bytenr < start && be->bytenr + be->len > start) {
			
 
				+			btrfs_err(fs_info,
			
 
				+				"block entry overlaps a block group [%llu,%llu]!",
			
 
				+				start, len);
			
 
				+			dump_block_entry(fs_info, be);
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (be->bytenr < start)
			
 
				+			continue;
			
 
				+		if (be->bytenr >= start + len)
			
 
				+			break;
			
 
				+		if (be->bytenr + be->len > start + len) {
			
 
				+			btrfs_err(fs_info,
			
 
				+				"block entry overlaps a block group [%llu,%llu]!",
			
 
				+				start, len);
			
 
				+			dump_block_entry(fs_info, be);
			
 
				+		}
			
 
				+		rb_erase(&be->node, &fs_info->block_tree);
			
 
				+		free_block_entry(be);
			
 
				+	}
			
 
				+	spin_unlock(&fs_info->ref_verify_lock);
			
 
				+}
			
 
				+
			
 
				+/* Walk down all roots and build the ref tree, meant to be called at mount */
			
 
				+int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+	struct btrfs_path *path;
			
 
				+	struct btrfs_root *root;
			
 
				+	struct extent_buffer *eb;
			
 
				+	u64 bytenr = 0, num_bytes = 0;
			
 
				+	int ret, level;
			
 
				+
			
 
				+	if (!btrfs_test_opt(fs_info, REF_VERIFY))
			
 
				+		return 0;
			
 
				+
			
 
				+	path = btrfs_alloc_path();
			
 
				+	if (!path)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	eb = btrfs_read_lock_root_node(fs_info->extent_root);
			
 
				+	btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
			
 
				+	level = btrfs_header_level(eb);
			
 
				+	path->nodes[level] = eb;
			
 
				+	path->slots[level] = 0;
			
 
				+	path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
			
 
				+
			
 
				+	while (1) {
			
 
				+		/*
			
 
				+		 * We have to keep track of the bytenr/num_bytes we last hit
			
 
				+		 * because we could have run out of space for an inline ref, and
			
 
				+		 * would have had to added a ref key item which may appear on a
			
 
				+		 * different leaf from the original extent item.
			
 
				+		 */
			
 
				+		ret = walk_down_tree(fs_info->extent_root, path, level,
			
 
				+				     &bytenr, &num_bytes);
			
 
				+		if (ret)
			
 
				+			break;
			
 
				+		ret = walk_up_tree(root, path, &level);
			
 
				+		if (ret < 0)
			
 
				+			break;
			
 
				+		if (ret > 0) {
			
 
				+			ret = 0;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	if (ret) {
			
 
				+		btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
			
 
				+		btrfs_free_ref_cache(fs_info);
			
 
				+	}
			
 
				+	btrfs_free_path(path);
			
 
				+	return ret;
			
 
				+}
			
--- a/fs/btrfs/ref-verify.h
+++ b/fs/btrfs/ref-verify.h
@@ -0,0 +1,62 @@
 
				+/*
			
 
				+ * Copyright (C) 2014 Facebook.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License v2 as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public
			
 
				+ * License along with this program; if not, write to the
			
 
				+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				+ * Boston, MA 021110-1307, USA.
			
 
				+ */
			
 
				+#ifndef __REF_VERIFY__
			
 
				+#define __REF_VERIFY__
			
 
				+
			
 
				+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			
 
				+int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
			
 
				+void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
			
 
				+int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
			
 
				+		       u64 parent, u64 ref_root, u64 owner, u64 offset,
			
 
				+		       int action);
			
 
				+void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
			
 
				+			       u64 len);
			
 
				+
			
 
				+static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+	spin_lock_init(&fs_info->ref_verify_lock);
			
 
				+	fs_info->block_tree = RB_ROOT;
			
 
				+}
			
 
				+#else
			
 
				+static inline int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr,
			
 
				+				     u64 num_bytes, u64 parent, u64 ref_root,
			
 
				+				     u64 owner, u64 offset, int action)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info,
			
 
				+					     u64 start, u64 len)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+#endif /* CONFIG_BTRFS_FS_REF_VERIFY */
			
 
				+#endif /* _REF_VERIFY__ */
			
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1742,7 +1742,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 
				 		dirty = 1;
			
 
				 
			
 
				 		key.offset -= btrfs_file_extent_offset(leaf, fi);
			
 
				-		ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr,
			
 
				+		ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
			
 
				 					   num_bytes, parent,
			
 
				 					   btrfs_header_owner(leaf),
			
 
				 					   key.objectid, key.offset);
			
@@ -1751,7 +1751,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
			
 
				+		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
			
 
				 					parent, btrfs_header_owner(leaf),
			
 
				 					key.objectid, key.offset);
			
 
				 		if (ret) {
			
@@ -1952,21 +1952,21 @@ again:
 
				 					      path->slots[level], old_ptr_gen);
			
 
				 		btrfs_mark_buffer_dirty(path->nodes[level]);
			
 
				 
			
 
				-		ret = btrfs_inc_extent_ref(trans, fs_info, old_bytenr,
			
 
				+		ret = btrfs_inc_extent_ref(trans, src, old_bytenr,
			
 
				 					blocksize, path->nodes[level]->start,
			
 
				 					src->root_key.objectid, level - 1, 0);
			
 
				 		BUG_ON(ret);
			
 
				-		ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr,
			
 
				+		ret = btrfs_inc_extent_ref(trans, dest, new_bytenr,
			
 
				 					blocksize, 0, dest->root_key.objectid,
			
 
				 					level - 1, 0);
			
 
				 		BUG_ON(ret);
			
 
				 
			
 
				-		ret = btrfs_free_extent(trans, fs_info, new_bytenr, blocksize,
			
 
				+		ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
			
 
				 					path->nodes[level]->start,
			
 
				 					src->root_key.objectid, level - 1, 0);
			
 
				 		BUG_ON(ret);
			
 
				 
			
 
				-		ret = btrfs_free_extent(trans, fs_info, old_bytenr, blocksize,
			
 
				+		ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
			
 
				 					0, dest->root_key.objectid, level - 1,
			
 
				 					0);
			
 
				 		BUG_ON(ret);
			
@@ -2808,7 +2808,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
				 						      trans->transid);
			
 
				 			btrfs_mark_buffer_dirty(upper->eb);
			
 
				 
			
 
				-			ret = btrfs_inc_extent_ref(trans, root->fs_info,
			
 
				+			ret = btrfs_inc_extent_ref(trans, root,
			
 
				 						node->eb->start, blocksize,
			
 
				 						upper->eb->start,
			
 
				 						btrfs_header_owner(upper->eb),
			
@@ -3246,6 +3246,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 				put_page(page);
			
 
				 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
			
 
				 							PAGE_SIZE);
			
 
				+				btrfs_delalloc_release_extents(BTRFS_I(inode),
			
 
				+							       PAGE_SIZE);
			
 
				 				ret = -EIO;
			
 
				 				goto out;
			
 
				 			}
			
@@ -3275,6 +3277,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 
				 		put_page(page);
			
 
				 
			
 
				 		index++;
			
 
				+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
			
 
				 		balance_dirty_pages_ratelimited(inode->i_mapping);
			
 
				 		btrfs_throttle(fs_info);
			
 
				 	}
			
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -226,10 +226,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 
				 	struct btrfs_root *root;
			
 
				 	int err = 0;
			
 
				 	int ret;
			
 
				-	bool can_recover = true;
			
 
				-
			
 
				-	if (sb_rdonly(fs_info->sb))
			
 
				-		can_recover = false;
			
 
				 
			
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path)
			
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -231,7 +231,7 @@ struct scrub_warning {
 
				 	struct btrfs_path	*path;
			
 
				 	u64			extent_item_size;
			
 
				 	const char		*errstr;
			
 
				-	sector_t		sector;
			
 
				+	u64			physical;
			
 
				 	u64			logical;
			
 
				 	struct btrfs_device	*dev;
			
 
				 };
			
@@ -797,10 +797,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 
				 	 */
			
 
				 	for (i = 0; i < ipath->fspath->elem_cnt; ++i)
			
 
				 		btrfs_warn_in_rcu(fs_info,
			
 
				-				  "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
			
 
				+"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
			
 
				 				  swarn->errstr, swarn->logical,
			
 
				 				  rcu_str_deref(swarn->dev->name),
			
 
				-				  (unsigned long long)swarn->sector,
			
 
				+				  swarn->physical,
			
 
				 				  root, inum, offset,
			
 
				 				  min(isize - offset, (u64)PAGE_SIZE), nlink,
			
 
				 				  (char *)(unsigned long)ipath->fspath->val[i]);
			
@@ -810,10 +810,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 
				 
			
 
				 err:
			
 
				 	btrfs_warn_in_rcu(fs_info,
			
 
				-			  "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
			
 
				+			  "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
			
 
				 			  swarn->errstr, swarn->logical,
			
 
				 			  rcu_str_deref(swarn->dev->name),
			
 
				-			  (unsigned long long)swarn->sector,
			
 
				+			  swarn->physical,
			
 
				 			  root, inum, offset, ret);
			
 
				 
			
 
				 	free_ipath(ipath);
			
@@ -845,7 +845,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
 
				 	if (!path)
			
 
				 		return;
			
 
				 
			
 
				-	swarn.sector = (sblock->pagev[0]->physical) >> 9;
			
 
				+	swarn.physical = sblock->pagev[0]->physical;
			
 
				 	swarn.logical = sblock->pagev[0]->logical;
			
 
				 	swarn.errstr = errstr;
			
 
				 	swarn.dev = NULL;
			
@@ -868,10 +868,10 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
 
				 						      item_size, &ref_root,
			
 
				 						      &ref_level);
			
 
				 			btrfs_warn_in_rcu(fs_info,
			
 
				-				"%s at logical %llu on dev %s, sector %llu: metadata %s (level %d) in tree %llu",
			
 
				+"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
			
 
				 				errstr, swarn.logical,
			
 
				 				rcu_str_deref(dev->name),
			
 
				-				(unsigned long long)swarn.sector,
			
 
				+				swarn.physical,
			
 
				 				ref_level ? "node" : "leaf",
			
 
				 				ret < 0 ? -1 : ref_level,
			
 
				 				ret < 0 ? -1 : ref_root);
			
@@ -883,7 +883,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
 
				 		swarn.dev = dev;
			
 
				 		iterate_extent_inodes(fs_info, found_key.objectid,
			
 
				 					extent_item_pos, 1,
			
 
				-					scrub_print_warning_inode, &swarn);
			
 
				+					scrub_print_warning_inode, &swarn, false);
			
 
				 	}
			
 
				 
			
 
				 out:
			
@@ -1047,7 +1047,7 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
 
				 	 * can be found.
			
 
				 	 */
			
 
				 	ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
			
 
				-					  scrub_fixup_readpage, fixup);
			
 
				+					  scrub_fixup_readpage, fixup, false);
			
 
				 	if (ret < 0) {
			
 
				 		uncorrectable = 1;
			
 
				 		goto out;
			
@@ -4390,7 +4390,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
 
				 	}
			
 
				 
			
 
				 	ret = iterate_inodes_from_logical(logical, fs_info, path,
			
 
				-					  record_inode_for_nocow, nocow_ctx);
			
 
				+			record_inode_for_nocow, nocow_ctx, false);
			
 
				 	if (ret != 0 && ret != -ENOENT) {
			
 
				 		btrfs_warn(fs_info,
			
 
				 			   "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
			
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -26,6 +26,7 @@
 
				 #include <linux/radix-tree.h>
			
 
				 #include <linux/vmalloc.h>
			
 
				 #include <linux/string.h>
			
 
				+#include <linux/compat.h>
			
 
				 
			
 
				 #include "send.h"
			
 
				 #include "backref.h"
			
@@ -992,7 +993,6 @@ typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
 
				  * path must point to the dir item when called.
			
 
				  */
			
 
				 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
			
 
				-			    struct btrfs_key *found_key,
			
 
				 			    iterate_dir_item_t iterate, void *ctx)
			
 
				 {
			
 
				 	int ret = 0;
			
@@ -1271,12 +1271,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
 
				 		 */
			
 
				 		if (ino >= bctx->cur_objectid)
			
 
				 			return 0;
			
 
				-#if 0
			
 
				-		if (ino > bctx->cur_objectid)
			
 
				-			return 0;
			
 
				-		if (offset + bctx->extent_len > bctx->cur_offset)
			
 
				-			return 0;
			
 
				-#endif
			
 
				 	}
			
 
				 
			
 
				 	bctx->found++;
			
@@ -1429,7 +1423,7 @@ static int find_extent_clone(struct send_ctx *sctx,
 
				 		extent_item_pos = 0;
			
 
				 	ret = iterate_extent_inodes(fs_info, found_key.objectid,
			
 
				 				    extent_item_pos, 1, __iterate_backrefs,
			
 
				-				    backref_ctx);
			
 
				+				    backref_ctx, false);
			
 
				 
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
@@ -4106,8 +4100,8 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int record_ref(struct btrfs_root *root, int num, u64 dir, int index,
			
 
				-		      struct fs_path *name, void *ctx, struct list_head *refs)
			
 
				+static int record_ref(struct btrfs_root *root, u64 dir, struct fs_path *name,
			
 
				+		      void *ctx, struct list_head *refs)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	struct send_ctx *sctx = ctx;
			
@@ -4143,8 +4137,7 @@ static int __record_new_ref(int num, u64 dir, int index,
 
				 			    void *ctx)
			
 
				 {
			
 
				 	struct send_ctx *sctx = ctx;
			
 
				-	return record_ref(sctx->send_root, num, dir, index, name,
			
 
				-			  ctx, &sctx->new_refs);
			
 
				+	return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -4153,8 +4146,8 @@ static int __record_deleted_ref(int num, u64 dir, int index,
 
				 				void *ctx)
			
 
				 {
			
 
				 	struct send_ctx *sctx = ctx;
			
 
				-	return record_ref(sctx->parent_root, num, dir, index, name,
			
 
				-			  ctx, &sctx->deleted_refs);
			
 
				+	return record_ref(sctx->parent_root, dir, name, ctx,
			
 
				+			  &sctx->deleted_refs);
			
 
				 }
			
 
				 
			
 
				 static int record_new_ref(struct send_ctx *sctx)
			
@@ -4498,7 +4491,7 @@ static int process_new_xattr(struct send_ctx *sctx)
 
				 	int ret = 0;
			
 
				 
			
 
				 	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
			
 
				-			       sctx->cmp_key, __process_new_xattr, sctx);
			
 
				+			       __process_new_xattr, sctx);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -4506,7 +4499,7 @@ static int process_new_xattr(struct send_ctx *sctx)
 
				 static int process_deleted_xattr(struct send_ctx *sctx)
			
 
				 {
			
 
				 	return iterate_dir_item(sctx->parent_root, sctx->right_path,
			
 
				-				sctx->cmp_key, __process_deleted_xattr, sctx);
			
 
				+				__process_deleted_xattr, sctx);
			
 
				 }
			
 
				 
			
 
				 struct find_xattr_ctx {
			
@@ -4551,7 +4544,7 @@ static int find_xattr(struct btrfs_root *root,
 
				 	ctx.found_data = NULL;
			
 
				 	ctx.found_data_len = 0;
			
 
				 
			
 
				-	ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
			
 
				+	ret = iterate_dir_item(root, path, __find_xattr, &ctx);
			
 
				 	if (ret < 0)
			
 
				 		return ret;
			
 
				 
			
@@ -4621,11 +4614,11 @@ static int process_changed_xattr(struct send_ctx *sctx)
 
				 	int ret = 0;
			
 
				 
			
 
				 	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
			
 
				-			sctx->cmp_key, __process_changed_new_xattr, sctx);
			
 
				+			__process_changed_new_xattr, sctx);
			
 
				 	if (ret < 0)
			
 
				 		goto out;
			
 
				 	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
			
 
				-			sctx->cmp_key, __process_changed_deleted_xattr, sctx);
			
 
				+			__process_changed_deleted_xattr, sctx);
			
 
				 
			
 
				 out:
			
 
				 	return ret;
			
@@ -4675,8 +4668,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				-		ret = iterate_dir_item(root, path, &found_key,
			
 
				-				       __process_new_xattr, sctx);
			
 
				+		ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
			
 
				 		if (ret < 0)
			
 
				 			goto out;
			
 
				 
			
@@ -4723,16 +4715,27 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 
				 	/* initial readahead */
			
 
				 	memset(&sctx->ra, 0, sizeof(struct file_ra_state));
			
 
				 	file_ra_state_init(&sctx->ra, inode->i_mapping);
			
 
				-	page_cache_sync_readahead(inode->i_mapping, &sctx->ra, NULL, index,
			
 
				-		       last_index - index + 1);
			
 
				 
			
 
				 	while (index <= last_index) {
			
 
				 		unsigned cur_len = min_t(unsigned, len,
			
 
				 					 PAGE_SIZE - pg_offset);
			
 
				-		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
			
 
				+
			
 
				+		page = find_lock_page(inode->i_mapping, index);
			
 
				 		if (!page) {
			
 
				-			ret = -ENOMEM;
			
 
				-			break;
			
 
				+			page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
			
 
				+				NULL, index, last_index + 1 - index);
			
 
				+
			
 
				+			page = find_or_create_page(inode->i_mapping, index,
			
 
				+					GFP_KERNEL);
			
 
				+			if (!page) {
			
 
				+				ret = -ENOMEM;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (PageReadahead(page)) {
			
 
				+			page_cache_async_readahead(inode->i_mapping, &sctx->ra,
			
 
				+				NULL, page, index, last_index + 1 - index);
			
 
				 		}
			
 
				 
			
 
				 		if (!PageUptodate(page)) {
			
@@ -6162,9 +6165,7 @@ out:
 
				  * Updates compare related fields in sctx and simply forwards to the actual
			
 
				  * changed_xxx functions.
			
 
				  */
			
 
				-static int changed_cb(struct btrfs_root *left_root,
			
 
				-		      struct btrfs_root *right_root,
			
 
				-		      struct btrfs_path *left_path,
			
 
				+static int changed_cb(struct btrfs_path *left_path,
			
 
				 		      struct btrfs_path *right_path,
			
 
				 		      struct btrfs_key *key,
			
 
				 		      enum btrfs_compare_tree_result result,
			
@@ -6246,8 +6247,8 @@ static int full_send_tree(struct send_ctx *sctx)
 
				 		slot = path->slots[0];
			
 
				 		btrfs_item_key_to_cpu(eb, &found_key, slot);
			
 
				 
			
 
				-		ret = changed_cb(send_root, NULL, path, NULL,
			
 
				-				&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
			
 
				+		ret = changed_cb(path, NULL, &found_key,
			
 
				+				 BTRFS_COMPARE_TREE_NEW, sctx);
			
 
				 		if (ret < 0)
			
 
				 			goto out;
			
 
				 
			
@@ -6365,13 +6366,12 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
 
				 	spin_unlock(&root->root_item_lock);
			
 
				 }
			
 
				 
			
 
				-long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
			
 
				+long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
			
 
				 	struct btrfs_fs_info *fs_info = send_root->fs_info;
			
 
				 	struct btrfs_root *clone_root;
			
 
				-	struct btrfs_ioctl_send_args *arg = NULL;
			
 
				 	struct btrfs_key key;
			
 
				 	struct send_ctx *sctx = NULL;
			
 
				 	u32 i;
			
@@ -6407,13 +6407,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	arg = memdup_user(arg_, sizeof(*arg));
			
 
				-	if (IS_ERR(arg)) {
			
 
				-		ret = PTR_ERR(arg);
			
 
				-		arg = NULL;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * Check that we don't overflow at later allocations, we request
			
 
				 	 * clone_sources_count + 1 items, and compare to unsigned long inside
			
@@ -6654,7 +6647,6 @@ out:
 
				 	if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
			
 
				 		btrfs_root_dec_send_in_progress(sctx->parent_root);
			
 
				 
			
 
				-	kfree(arg);
			
 
				 	kvfree(clone_sources_tmp);
			
 
				 
			
 
				 	if (sctx) {
			
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -130,5 +130,5 @@ enum {
 
				 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
			
 
				 
			
 
				 #ifdef __KERNEL__
			
 
				-long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
			
 
				+long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
			
 
				 #endif
			
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -202,7 +202,6 @@ static struct ratelimit_state printk_limits[] = {
 
				 
			
 
				 void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
			
 
				 {
			
 
				-	struct super_block *sb = fs_info->sb;
			
 
				 	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
			
 
				 	struct va_format vaf;
			
 
				 	va_list args;
			
@@ -228,7 +227,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 
				 	vaf.va = &args;
			
 
				 
			
 
				 	if (__ratelimit(ratelimit))
			
 
				-		printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
			
 
				+		printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
			
 
				+			fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
			
 
				 
			
 
				 	va_end(args);
			
 
				 }
			
@@ -292,7 +292,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
 
				 	vaf.va = &args;
			
 
				 
			
 
				 	errstr = btrfs_decode_error(errno);
			
 
				-	if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
			
 
				+	if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
			
 
				 		panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
			
 
				 			s_id, function, line, &vaf, errno, errstr);
			
 
				 
			
@@ -325,6 +325,9 @@ enum {
 
				 	Opt_nologreplay, Opt_norecovery,
			
 
				 #ifdef CONFIG_BTRFS_DEBUG
			
 
				 	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
			
 
				+#endif
			
 
				+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			
 
				+	Opt_ref_verify,
			
 
				 #endif
			
 
				 	Opt_err,
			
 
				 };
			
@@ -386,6 +389,9 @@ static const match_table_t tokens = {
 
				 	{Opt_fragment_data, "fragment=data"},
			
 
				 	{Opt_fragment_metadata, "fragment=metadata"},
			
 
				 	{Opt_fragment_all, "fragment=all"},
			
 
				+#endif
			
 
				+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			
 
				+	{Opt_ref_verify, "ref_verify"},
			
 
				 #endif
			
 
				 	{Opt_err, NULL},
			
 
				 };
			
@@ -502,6 +508,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 			    strncmp(args[0].from, "zlib", 4) == 0) {
			
 
				 				compress_type = "zlib";
			
 
				 				info->compress_type = BTRFS_COMPRESS_ZLIB;
			
 
				+				info->compress_level =
			
 
				+					btrfs_compress_str2level(args[0].from);
			
 
				 				btrfs_set_opt(info->mount_opt, COMPRESS);
			
 
				 				btrfs_clear_opt(info->mount_opt, NODATACOW);
			
 
				 				btrfs_clear_opt(info->mount_opt, NODATASUM);
			
@@ -549,9 +557,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 			      compress_force != saved_compress_force)) ||
			
 
				 			    (!btrfs_test_opt(info, COMPRESS) &&
			
 
				 			     no_compress == 1)) {
			
 
				-				btrfs_info(info, "%s %s compression",
			
 
				+				btrfs_info(info, "%s %s compression, level %d",
			
 
				 					   (compress_force) ? "force" : "use",
			
 
				-					   compress_type);
			
 
				+					   compress_type, info->compress_level);
			
 
				 			}
			
 
				 			compress_force = false;
			
 
				 			break;
			
@@ -824,6 +832,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 
				 			btrfs_info(info, "fragmenting data");
			
 
				 			btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
			
 
				 			break;
			
 
				+#endif
			
 
				+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			
 
				+		case Opt_ref_verify:
			
 
				+			btrfs_info(info, "doing ref verification");
			
 
				+			btrfs_set_opt(info->mount_opt, REF_VERIFY);
			
 
				+			break;
			
 
				 #endif
			
 
				 		case Opt_err:
			
 
				 			btrfs_info(info, "unrecognized mount option '%s'", p);
			
@@ -1205,8 +1219,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 
				 			 * happens. The pending operations are delayed to the
			
 
				 			 * next commit after thawing.
			
 
				 			 */
			
 
				-			if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
			
 
				-				__sb_end_write(sb, SB_FREEZE_WRITE);
			
 
				+			if (sb_start_write_trylock(sb))
			
 
				+				sb_end_write(sb);
			
 
				 			else
			
 
				 				return 0;
			
 
				 			trans = btrfs_start_transaction(root, 0);
			
@@ -1246,6 +1260,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 
				 			seq_printf(seq, ",compress-force=%s", compress_type);
			
 
				 		else
			
 
				 			seq_printf(seq, ",compress=%s", compress_type);
			
 
				+		if (info->compress_level)
			
 
				+			seq_printf(seq, ":%d", info->compress_level);
			
 
				 	}
			
 
				 	if (btrfs_test_opt(info, NOSSD))
			
 
				 		seq_puts(seq, ",nossd");
			
@@ -1305,6 +1321,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 
				 	if (btrfs_test_opt(info, FRAGMENT_METADATA))
			
 
				 		seq_puts(seq, ",fragment=metadata");
			
 
				 #endif
			
 
				+	if (btrfs_test_opt(info, REF_VERIFY))
			
 
				+		seq_puts(seq, ",ref_verify");
			
 
				 	seq_printf(seq, ",subvolid=%llu",
			
 
				 		  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
			
 
				 	seq_puts(seq, ",subvol=");
			
@@ -2112,7 +2130,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 	 * succeed even if the Avail is zero. But this is better than the other
			
 
				 	 * way around.
			
 
				 	 */
			
 
				-	thresh = 4 * 1024 * 1024;
			
 
				+	thresh = SZ_4M;
			
 
				 
			
 
				 	if (!mixed && total_free_meta - thresh < block_rsv->size)
			
 
				 		buf->f_bavail = 0;
			
@@ -2318,6 +2336,9 @@ static void btrfs_print_mod_info(void)
 
				 #endif
			
 
				 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
			
 
				 			", integrity-checker=on"
			
 
				+#endif
			
 
				+#ifdef CONFIG_BTRFS_FS_REF_VERIFY
			
 
				+			", ref-verify=on"
			
 
				 #endif
			
 
				 			"\n",
			
 
				 			btrfs_crc32c_impl());
			
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -247,7 +247,7 @@ static ssize_t global_rsv_size_show(struct kobject *kobj,
 
				 	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
			
 
				 	return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf);
			
 
				 }
			
 
				-BTRFS_ATTR(global_rsv_size, global_rsv_size_show);
			
 
				+BTRFS_ATTR(allocation, global_rsv_size, global_rsv_size_show);
			
 
				 
			
 
				 static ssize_t global_rsv_reserved_show(struct kobject *kobj,
			
 
				 					struct kobj_attribute *a, char *buf)
			
@@ -256,15 +256,15 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj,
 
				 	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
			
 
				 	return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf);
			
 
				 }
			
 
				-BTRFS_ATTR(global_rsv_reserved, global_rsv_reserved_show);
			
 
				+BTRFS_ATTR(allocation, global_rsv_reserved, global_rsv_reserved_show);
			
 
				 
			
 
				 #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
			
 
				 #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj)
			
 
				 
			
 
				 static ssize_t raid_bytes_show(struct kobject *kobj,
			
 
				 			       struct kobj_attribute *attr, char *buf);
			
 
				-BTRFS_RAID_ATTR(total_bytes, raid_bytes_show);
			
 
				-BTRFS_RAID_ATTR(used_bytes, raid_bytes_show);
			
 
				+BTRFS_ATTR(raid, total_bytes, raid_bytes_show);
			
 
				+BTRFS_ATTR(raid, used_bytes, raid_bytes_show);
			
 
				 
			
 
				 static ssize_t raid_bytes_show(struct kobject *kobj,
			
 
				 			       struct kobj_attribute *attr, char *buf)
			
@@ -277,7 +277,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
 
				 
			
 
				 	down_read(&sinfo->groups_sem);
			
 
				 	list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
			
 
				-		if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes))
			
 
				+		if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
			
 
				 			val += block_group->key.offset;
			
 
				 		else
			
 
				 			val += btrfs_block_group_used(&block_group->item);
			
@@ -287,8 +287,8 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
 
				 }
			
 
				 
			
 
				 static struct attribute *raid_attributes[] = {
			
 
				-	BTRFS_RAID_ATTR_PTR(total_bytes),
			
 
				-	BTRFS_RAID_ATTR_PTR(used_bytes),
			
 
				+	BTRFS_ATTR_PTR(raid, total_bytes),
			
 
				+	BTRFS_ATTR_PTR(raid, used_bytes),
			
 
				 	NULL
			
 
				 };
			
 
				 
			
@@ -311,7 +311,7 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj,	\
 
				 	struct btrfs_space_info *sinfo = to_space_info(kobj);		\
			
 
				 	return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf);	\
			
 
				 }									\
			
 
				-BTRFS_ATTR(field, btrfs_space_info_show_##field)
			
 
				+BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field)
			
 
				 
			
 
				 static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
			
 
				 						       struct kobj_attribute *a,
			
@@ -331,19 +331,20 @@ SPACE_INFO_ATTR(bytes_may_use);
 
				 SPACE_INFO_ATTR(bytes_readonly);
			
 
				 SPACE_INFO_ATTR(disk_used);
			
 
				 SPACE_INFO_ATTR(disk_total);
			
 
				-BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned);
			
 
				+BTRFS_ATTR(space_info, total_bytes_pinned,
			
 
				+	   btrfs_space_info_show_total_bytes_pinned);
			
 
				 
			
 
				 static struct attribute *space_info_attrs[] = {
			
 
				-	BTRFS_ATTR_PTR(flags),
			
 
				-	BTRFS_ATTR_PTR(total_bytes),
			
 
				-	BTRFS_ATTR_PTR(bytes_used),
			
 
				-	BTRFS_ATTR_PTR(bytes_pinned),
			
 
				-	BTRFS_ATTR_PTR(bytes_reserved),
			
 
				-	BTRFS_ATTR_PTR(bytes_may_use),
			
 
				-	BTRFS_ATTR_PTR(bytes_readonly),
			
 
				-	BTRFS_ATTR_PTR(disk_used),
			
 
				-	BTRFS_ATTR_PTR(disk_total),
			
 
				-	BTRFS_ATTR_PTR(total_bytes_pinned),
			
 
				+	BTRFS_ATTR_PTR(space_info, flags),
			
 
				+	BTRFS_ATTR_PTR(space_info, total_bytes),
			
 
				+	BTRFS_ATTR_PTR(space_info, bytes_used),
			
 
				+	BTRFS_ATTR_PTR(space_info, bytes_pinned),
			
 
				+	BTRFS_ATTR_PTR(space_info, bytes_reserved),
			
 
				+	BTRFS_ATTR_PTR(space_info, bytes_may_use),
			
 
				+	BTRFS_ATTR_PTR(space_info, bytes_readonly),
			
 
				+	BTRFS_ATTR_PTR(space_info, disk_used),
			
 
				+	BTRFS_ATTR_PTR(space_info, disk_total),
			
 
				+	BTRFS_ATTR_PTR(space_info, total_bytes_pinned),
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
@@ -361,8 +362,8 @@ struct kobj_type space_info_ktype = {
 
				 };
			
 
				 
			
 
				 static const struct attribute *allocation_attrs[] = {
			
 
				-	BTRFS_ATTR_PTR(global_rsv_reserved),
			
 
				-	BTRFS_ATTR_PTR(global_rsv_size),
			
 
				+	BTRFS_ATTR_PTR(allocation, global_rsv_reserved),
			
 
				+	BTRFS_ATTR_PTR(allocation, global_rsv_size),
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
@@ -415,7 +416,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj,
 
				 
			
 
				 	return len;
			
 
				 }
			
 
				-BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store);
			
 
				+BTRFS_ATTR_RW(, label, btrfs_label_show, btrfs_label_store);
			
 
				 
			
 
				 static ssize_t btrfs_nodesize_show(struct kobject *kobj,
			
 
				 				struct kobj_attribute *a, char *buf)
			
@@ -425,7 +426,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
 
				 	return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
			
 
				 }
			
 
				 
			
 
				-BTRFS_ATTR(nodesize, btrfs_nodesize_show);
			
 
				+BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
			
 
				 
			
 
				 static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
			
 
				 				struct kobj_attribute *a, char *buf)
			
@@ -436,7 +437,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
 
				 			fs_info->super_copy->sectorsize);
			
 
				 }
			
 
				 
			
 
				-BTRFS_ATTR(sectorsize, btrfs_sectorsize_show);
			
 
				+BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
			
 
				 
			
 
				 static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
			
 
				 				struct kobj_attribute *a, char *buf)
			
@@ -447,7 +448,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
 
				 			fs_info->super_copy->sectorsize);
			
 
				 }
			
 
				 
			
 
				-BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show);
			
 
				+BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
			
 
				 
			
 
				 static ssize_t quota_override_show(struct kobject *kobj,
			
 
				 				   struct kobj_attribute *a, char *buf)
			
@@ -487,14 +488,14 @@ static ssize_t quota_override_store(struct kobject *kobj,
 
				 	return len;
			
 
				 }
			
 
				 
			
 
				-BTRFS_ATTR_RW(quota_override, quota_override_show, quota_override_store);
			
 
				+BTRFS_ATTR_RW(, quota_override, quota_override_show, quota_override_store);
			
 
				 
			
 
				 static const struct attribute *btrfs_attrs[] = {
			
 
				-	BTRFS_ATTR_PTR(label),
			
 
				-	BTRFS_ATTR_PTR(nodesize),
			
 
				-	BTRFS_ATTR_PTR(sectorsize),
			
 
				-	BTRFS_ATTR_PTR(clone_alignment),
			
 
				-	BTRFS_ATTR_PTR(quota_override),
			
 
				+	BTRFS_ATTR_PTR(, label),
			
 
				+	BTRFS_ATTR_PTR(, nodesize),
			
 
				+	BTRFS_ATTR_PTR(, sectorsize),
			
 
				+	BTRFS_ATTR_PTR(, clone_alignment),
			
 
				+	BTRFS_ATTR_PTR(, quota_override),
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -21,21 +21,16 @@ enum btrfs_feature_set {
 
				 	.store	= _store,						\
			
 
				 }
			
 
				 
			
 
				-#define BTRFS_ATTR_RW(_name, _show, _store)			\
			
 
				-	static struct kobj_attribute btrfs_attr_##_name =		\
			
 
				+#define BTRFS_ATTR_RW(_prefix, _name, _show, _store)			\
			
 
				+	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
			
 
				 			__INIT_KOBJ_ATTR(_name, 0644, _show, _store)
			
 
				 
			
 
				-#define BTRFS_ATTR(_name, _show)					\
			
 
				-	static struct kobj_attribute btrfs_attr_##_name =		\
			
 
				+#define BTRFS_ATTR(_prefix, _name, _show)				\
			
 
				+	static struct kobj_attribute btrfs_attr_##_prefix##_##_name =	\
			
 
				 			__INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
			
 
				 
			
 
				-#define BTRFS_ATTR_PTR(_name)    (&btrfs_attr_##_name.attr)
			
 
				-
			
 
				-#define BTRFS_RAID_ATTR(_name, _show)					\
			
 
				-	static struct kobj_attribute btrfs_raid_attr_##_name =		\
			
 
				-			__INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
			
 
				-
			
 
				-#define BTRFS_RAID_ATTR_PTR(_name)    (&btrfs_raid_attr_##_name.attr)
			
 
				+#define BTRFS_ATTR_PTR(_prefix, _name)					\
			
 
				+	(&btrfs_attr_##_prefix##_##_name.attr)
			
 
				 
			
 
				 
			
 
				 struct btrfs_feature_attr {
			
@@ -44,15 +39,16 @@ struct btrfs_feature_attr {
 
				 	u64 feature_bit;
			
 
				 };
			
 
				 
			
 
				-#define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit)	     \
			
 
				-static struct btrfs_feature_attr btrfs_attr_##_name = {			     \
			
 
				+#define BTRFS_FEAT_ATTR(_name, _feature_set, _feature_prefix, _feature_bit)  \
			
 
				+static struct btrfs_feature_attr btrfs_attr_features_##_name = {	     \
			
 
				 	.kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO,			     \
			
 
				 				      btrfs_feature_attr_show,		     \
			
 
				 				      btrfs_feature_attr_store),	     \
			
 
				 	.feature_set	= _feature_set,					     \
			
 
				-	.feature_bit	= _prefix ##_## _feature_bit,			     \
			
 
				+	.feature_bit	= _feature_prefix ##_## _feature_bit,		     \
			
 
				 }
			
 
				-#define BTRFS_FEAT_ATTR_PTR(_name)    (&btrfs_attr_##_name.kobj_attr.attr)
			
 
				+#define BTRFS_FEAT_ATTR_PTR(_name)					     \
			
 
				+	(&btrfs_attr_features_##_name.kobj_attr.attr)
			
 
				 
			
 
				 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
			
 
				 	BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
			
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -500,7 +500,8 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
 
				 	path = btrfs_alloc_path();
			
 
				 	if (!path) {
			
 
				 		test_msg("Couldn't allocate path\n");
			
 
				-		return -ENOMEM;
			
 
				+		ret = -ENOMEM;
			
 
				+		goto out;
			
 
				 	}
			
 
				 
			
 
				 	ret = add_block_group_free_space(&trans, root->fs_info, cache);
			
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -770,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 
				 	offset = em->start + em->len;
			
 
				 	free_extent_map(em);
			
 
				 
			
 
				-	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, 4096 * 1024, 0);
			
 
				+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
			
 
				 	if (IS_ERR(em)) {
			
 
				 		test_msg("Got an error when we shouldn't have\n");
			
 
				 		goto out;
			
@@ -968,7 +968,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 	btrfs_test_inode_set_ops(inode);
			
 
				 
			
 
				 	/* [BTRFS_MAX_EXTENT_SIZE] */
			
 
				-	BTRFS_I(inode)->outstanding_extents++;
			
 
				 	ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
			
 
				 					NULL, 0);
			
 
				 	if (ret) {
			
@@ -983,7 +982,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 	}
			
 
				 
			
 
				 	/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
			
 
				-	BTRFS_I(inode)->outstanding_extents++;
			
 
				 	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
			
 
				 					BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
			
 
				 					NULL, 0);
			
@@ -1003,7 +1001,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 			       BTRFS_MAX_EXTENT_SIZE >> 1,
			
 
				 			       (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
			
 
				 			       EXTENT_DELALLOC | EXTENT_DIRTY |
			
 
				-			       EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
			
 
				+			       EXTENT_UPTODATE, 0, 0,
			
 
				 			       NULL, GFP_KERNEL);
			
 
				 	if (ret) {
			
 
				 		test_msg("clear_extent_bit returned %d\n", ret);
			
@@ -1017,7 +1015,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 	}
			
 
				 
			
 
				 	/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
			
 
				-	BTRFS_I(inode)->outstanding_extents++;
			
 
				 	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
			
 
				 					(BTRFS_MAX_EXTENT_SIZE >> 1)
			
 
				 					+ sectorsize - 1,
			
@@ -1035,12 +1032,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 
			
 
				 	/*
			
 
				 	 * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
			
 
				-	 *
			
 
				-	 * I'm artificially adding 2 to outstanding_extents because in the
			
 
				-	 * buffered IO case we'd add things up as we go, but I don't feel like
			
 
				-	 * doing that here, this isn't the interesting case we want to test.
			
 
				 	 */
			
 
				-	BTRFS_I(inode)->outstanding_extents += 2;
			
 
				 	ret = btrfs_set_extent_delalloc(inode,
			
 
				 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
			
 
				 			(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
			
@@ -1059,7 +1051,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 	/*
			
 
				 	* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
			
 
				 	*/
			
 
				-	BTRFS_I(inode)->outstanding_extents++;
			
 
				 	ret = btrfs_set_extent_delalloc(inode,
			
 
				 			BTRFS_MAX_EXTENT_SIZE + sectorsize,
			
 
				 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
			
@@ -1079,7 +1070,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 			       BTRFS_MAX_EXTENT_SIZE + sectorsize,
			
 
				 			       BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
			
 
				 			       EXTENT_DIRTY | EXTENT_DELALLOC |
			
 
				-			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
			
 
				+			       EXTENT_UPTODATE, 0, 0,
			
 
				 			       NULL, GFP_KERNEL);
			
 
				 	if (ret) {
			
 
				 		test_msg("clear_extent_bit returned %d\n", ret);
			
@@ -1096,7 +1087,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 	 * Refill the hole again just for good measure, because I thought it
			
 
				 	 * might fail and I'd rather satisfy my paranoia at this point.
			
 
				 	 */
			
 
				-	BTRFS_I(inode)->outstanding_extents++;
			
 
				 	ret = btrfs_set_extent_delalloc(inode,
			
 
				 			BTRFS_MAX_EXTENT_SIZE + sectorsize,
			
 
				 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
			
@@ -1114,7 +1104,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
				 	/* Empty */
			
 
				 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
			
 
				 			       EXTENT_DIRTY | EXTENT_DELALLOC |
			
 
				-			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
			
 
				+			       EXTENT_UPTODATE, 0, 0,
			
 
				 			       NULL, GFP_KERNEL);
			
 
				 	if (ret) {
			
 
				 		test_msg("clear_extent_bit returned %d\n", ret);
			
@@ -1131,7 +1121,7 @@ out:
 
				 	if (ret)
			
 
				 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
			
 
				 				 EXTENT_DIRTY | EXTENT_DELALLOC |
			
 
				-				 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
			
 
				+				 EXTENT_UPTODATE, 0, 0,
			
 
				 				 NULL, GFP_KERNEL);
			
 
				 	iput(inode);
			
 
				 	btrfs_free_dummy_root(root);
			
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -240,7 +240,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 
				 	 * we can only call btrfs_qgroup_account_extent() directly to test
			
 
				 	 * quota.
			
 
				 	 */
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		test_msg("Couldn't find old roots: %d\n", ret);
			
@@ -252,7 +253,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		ulist_free(new_roots);
			
@@ -275,7 +277,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 
				 	old_roots = NULL;
			
 
				 	new_roots = NULL;
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		test_msg("Couldn't find old roots: %d\n", ret);
			
@@ -286,7 +289,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 
				 	if (ret)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		ulist_free(new_roots);
			
@@ -337,7 +341,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 
				 		return ret;
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		test_msg("Couldn't find old roots: %d\n", ret);
			
@@ -349,7 +354,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		ulist_free(new_roots);
			
@@ -370,7 +376,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		test_msg("Couldn't find old roots: %d\n", ret);
			
@@ -382,7 +389,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		ulist_free(new_roots);
			
@@ -409,7 +417,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		test_msg("Couldn't find old roots: %d\n", ret);
			
@@ -421,7 +430,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
			
 
				+	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
			
 
				+			false);
			
 
				 	if (ret) {
			
 
				 		ulist_free(old_roots);
			
 
				 		ulist_free(new_roots);
			
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -797,8 +797,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans)
 
				 {
			
 
				 	struct btrfs_fs_info *fs_info = trans->fs_info;
			
 
				 
			
 
				-	if (fs_info->global_block_rsv.space_info->full &&
			
 
				-	    btrfs_check_space_for_delayed_refs(trans, fs_info))
			
 
				+	if (btrfs_check_space_for_delayed_refs(trans, fs_info))
			
 
				 		return 1;
			
 
				 
			
 
				 	return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
			
@@ -950,6 +949,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
 
				 	u64 start = 0;
			
 
				 	u64 end;
			
 
				 
			
 
				+	atomic_inc(&BTRFS_I(fs_info->btree_inode)->sync_writers);
			
 
				 	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
			
 
				 				      mark, &cached_state)) {
			
 
				 		bool wait_writeback = false;
			
@@ -985,6 +985,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
 
				 		cond_resched();
			
 
				 		start = end + 1;
			
 
				 	}
			
 
				+	atomic_dec(&BTRFS_I(fs_info->btree_inode)->sync_writers);
			
 
				 	return werr;
			
 
				 }
			
 
				 
			
@@ -1915,8 +1916,17 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
 
				 
			
 
				 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
			
 
				 {
			
 
				+	/*
			
 
				+	 * We use writeback_inodes_sb here because if we used
			
 
				+	 * btrfs_start_delalloc_roots we would deadlock with fs freeze.
			
 
				+	 * Currently are holding the fs freeze lock, if we do an async flush
			
 
				+	 * we'll do btrfs_join_transaction() and deadlock because we need to
			
 
				+	 * wait for the fs freeze lock.  Using the direct flushing we benefit
			
 
				+	 * from already being in a transaction and our join_transaction doesn't
			
 
				+	 * have to re-take the fs freeze lock.
			
 
				+	 */
			
 
				 	if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
			
 
				-		return btrfs_start_delalloc_roots(fs_info, 1, -1);
			
 
				+		writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -0,0 +1,425 @@
 
				+/*
			
 
				+ * Copyright (C) Qu Wenruo 2017.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License v2 as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public
			
 
				+ * License along with this program.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * The module is used to catch unexpected/corrupted tree block data.
			
 
				+ * Such behavior can be caused either by a fuzzed image or bugs.
			
 
				+ *
			
 
				+ * The objective is to do leaf/node validation checks when tree block is read
			
 
				+ * from disk, and check *every* possible member, so other code won't
			
 
				+ * need to checking them again.
			
 
				+ *
			
 
				+ * Due to the potential and unwanted damage, every checker needs to be
			
 
				+ * carefully reviewed otherwise so it does not prevent mount of valid images.
			
 
				+ */
			
 
				+
			
 
				+#include "ctree.h"
			
 
				+#include "tree-checker.h"
			
 
				+#include "disk-io.h"
			
 
				+#include "compression.h"
			
 
				+
			
 
				+/*
			
 
				+ * Error message should follow the following format:
			
 
				+ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
			
 
				+ *
			
 
				+ * @type:	leaf or node
			
 
				+ * @identifier:	the necessary info to locate the leaf/node.
			
 
				+ * 		It's recommened to decode key.objecitd/offset if it's
			
 
				+ * 		meaningful.
			
 
				+ * @reason:	describe the error
			
 
				+ * @bad_value:	optional, it's recommened to output bad value and its
			
 
				+ *		expected value (range).
			
 
				+ *
			
 
				+ * Since comma is used to separate the components, only space is allowed
			
 
				+ * inside each component.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
			
 
				+ * Allows callers to customize the output.
			
 
				+ */
			
 
				+__printf(4, 5)
			
 
				+static void generic_err(const struct btrfs_root *root,
			
 
				+			const struct extent_buffer *eb, int slot,
			
 
				+			const char *fmt, ...)
			
 
				+{
			
 
				+	struct va_format vaf;
			
 
				+	va_list args;
			
 
				+
			
 
				+	va_start(args, fmt);
			
 
				+
			
 
				+	vaf.fmt = fmt;
			
 
				+	vaf.va = &args;
			
 
				+
			
 
				+	btrfs_crit(root->fs_info,
			
 
				+		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
			
 
				+		btrfs_header_level(eb) == 0 ? "leaf" : "node",
			
 
				+		root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
			
 
				+	va_end(args);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Customized reporter for extent data item, since its key objectid and
			
 
				+ * offset has its own meaning.
			
 
				+ */
			
 
				+__printf(4, 5)
			
 
				+static void file_extent_err(const struct btrfs_root *root,
			
 
				+			    const struct extent_buffer *eb, int slot,
			
 
				+			    const char *fmt, ...)
			
 
				+{
			
 
				+	struct btrfs_key key;
			
 
				+	struct va_format vaf;
			
 
				+	va_list args;
			
 
				+
			
 
				+	btrfs_item_key_to_cpu(eb, &key, slot);
			
 
				+	va_start(args, fmt);
			
 
				+
			
 
				+	vaf.fmt = fmt;
			
 
				+	vaf.va = &args;
			
 
				+
			
 
				+	btrfs_crit(root->fs_info,
			
 
				+	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
			
 
				+		btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
			
 
				+		btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf);
			
 
				+	va_end(args);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
			
 
				+ * Else return 1
			
 
				+ */
			
 
				+#define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment)		      \
			
 
				+({									      \
			
 
				+	if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
			
 
				+		file_extent_err((root), (leaf), (slot),			      \
			
 
				+	"invalid %s for file extent, have %llu, should be aligned to %u",     \
			
 
				+			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
			
 
				+			(alignment));					      \
			
 
				+	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
			
 
				+})
			
 
				+
			
 
				+static int check_extent_data_item(struct btrfs_root *root,
			
 
				+				  struct extent_buffer *leaf,
			
 
				+				  struct btrfs_key *key, int slot)
			
 
				+{
			
 
				+	struct btrfs_file_extent_item *fi;
			
 
				+	u32 sectorsize = root->fs_info->sectorsize;
			
 
				+	u32 item_size = btrfs_item_size_nr(leaf, slot);
			
 
				+
			
 
				+	if (!IS_ALIGNED(key->offset, sectorsize)) {
			
 
				+		file_extent_err(root, leaf, slot,
			
 
				+"unaligned file_offset for file extent, have %llu should be aligned to %u",
			
 
				+			key->offset, sectorsize);
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+
			
 
				+	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
			
 
				+
			
 
				+	if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
			
 
				+		file_extent_err(root, leaf, slot,
			
 
				+		"invalid type for file extent, have %u expect range [0, %u]",
			
 
				+			btrfs_file_extent_type(leaf, fi),
			
 
				+			BTRFS_FILE_EXTENT_TYPES);
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Support for new compression/encrption must introduce incompat flag,
			
 
				+	 * and must be caught in open_ctree().
			
 
				+	 */
			
 
				+	if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
			
 
				+		file_extent_err(root, leaf, slot,
			
 
				+	"invalid compression for file extent, have %u expect range [0, %u]",
			
 
				+			btrfs_file_extent_compression(leaf, fi),
			
 
				+			BTRFS_COMPRESS_TYPES);
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+	if (btrfs_file_extent_encryption(leaf, fi)) {
			
 
				+		file_extent_err(root, leaf, slot,
			
 
				+			"invalid encryption for file extent, have %u expect 0",
			
 
				+			btrfs_file_extent_encryption(leaf, fi));
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
			
 
				+		/* Inline extent must have 0 as key offset */
			
 
				+		if (key->offset) {
			
 
				+			file_extent_err(root, leaf, slot,
			
 
				+		"invalid file_offset for inline file extent, have %llu expect 0",
			
 
				+				key->offset);
			
 
				+			return -EUCLEAN;
			
 
				+		}
			
 
				+
			
 
				+		/* Compressed inline extent has no on-disk size, skip it */
			
 
				+		if (btrfs_file_extent_compression(leaf, fi) !=
			
 
				+		    BTRFS_COMPRESS_NONE)
			
 
				+			return 0;
			
 
				+
			
 
				+		/* Uncompressed inline extent size must match item size */
			
 
				+		if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
			
 
				+		    btrfs_file_extent_ram_bytes(leaf, fi)) {
			
 
				+			file_extent_err(root, leaf, slot,
			
 
				+	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
			
 
				+				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
			
 
				+				btrfs_file_extent_ram_bytes(leaf, fi));
			
 
				+			return -EUCLEAN;
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* Regular or preallocated extent has fixed item size */
			
 
				+	if (item_size != sizeof(*fi)) {
			
 
				+		file_extent_err(root, leaf, slot,
			
 
				+	"invalid item size for reg/prealloc file extent, have %u expect %zu",
			
 
				+			item_size, sizeof(*fi));
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+	if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) ||
			
 
				+	    CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize))
			
 
				+		return -EUCLEAN;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
			
 
				+			   struct btrfs_key *key, int slot)
			
 
				+{
			
 
				+	u32 sectorsize = root->fs_info->sectorsize;
			
 
				+	u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
			
 
				+
			
 
				+	if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
			
 
				+		generic_err(root, leaf, slot,
			
 
				+		"invalid key objectid for csum item, have %llu expect %llu",
			
 
				+			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+	if (!IS_ALIGNED(key->offset, sectorsize)) {
			
 
				+		generic_err(root, leaf, slot,
			
 
				+	"unaligned key offset for csum item, have %llu should be aligned to %u",
			
 
				+			key->offset, sectorsize);
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+	if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
			
 
				+		generic_err(root, leaf, slot,
			
 
				+	"unaligned item size for csum item, have %u should be aligned to %u",
			
 
				+			btrfs_item_size_nr(leaf, slot), csumsize);
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Common point to switch the item-specific validation.
			
 
				+ */
			
 
				+static int check_leaf_item(struct btrfs_root *root,
			
 
				+			   struct extent_buffer *leaf,
			
 
				+			   struct btrfs_key *key, int slot)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	switch (key->type) {
			
 
				+	case BTRFS_EXTENT_DATA_KEY:
			
 
				+		ret = check_extent_data_item(root, leaf, key, slot);
			
 
				+		break;
			
 
				+	case BTRFS_EXTENT_CSUM_KEY:
			
 
				+		ret = check_csum_item(root, leaf, key, slot);
			
 
				+		break;
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = root->fs_info;
			
 
				+	/* No valid key type is 0, so all key should be larger than this key */
			
 
				+	struct btrfs_key prev_key = {0, 0, 0};
			
 
				+	struct btrfs_key key;
			
 
				+	u32 nritems = btrfs_header_nritems(leaf);
			
 
				+	int slot;
			
 
				+
			
 
				+	/*
			
 
				+	 * Extent buffers from a relocation tree have a owner field that
			
 
				+	 * corresponds to the subvolume tree they are based on. So just from an
			
 
				+	 * extent buffer alone we can not find out what is the id of the
			
 
				+	 * corresponding subvolume tree, so we can not figure out if the extent
			
 
				+	 * buffer corresponds to the root of the relocation tree or not. So
			
 
				+	 * skip this check for relocation trees.
			
 
				+	 */
			
 
				+	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
			
 
				+		struct btrfs_root *check_root;
			
 
				+
			
 
				+		key.objectid = btrfs_header_owner(leaf);
			
 
				+		key.type = BTRFS_ROOT_ITEM_KEY;
			
 
				+		key.offset = (u64)-1;
			
 
				+
			
 
				+		check_root = btrfs_get_fs_root(fs_info, &key, false);
			
 
				+		/*
			
 
				+		 * The only reason we also check NULL here is that during
			
 
				+		 * open_ctree() some roots has not yet been set up.
			
 
				+		 */
			
 
				+		if (!IS_ERR_OR_NULL(check_root)) {
			
 
				+			struct extent_buffer *eb;
			
 
				+
			
 
				+			eb = btrfs_root_node(check_root);
			
 
				+			/* if leaf is the root, then it's fine */
			
 
				+			if (leaf != eb) {
			
 
				+				generic_err(check_root, leaf, 0,
			
 
				+		"invalid nritems, have %u should not be 0 for non-root leaf",
			
 
				+					nritems);
			
 
				+				free_extent_buffer(eb);
			
 
				+				return -EUCLEAN;
			
 
				+			}
			
 
				+			free_extent_buffer(eb);
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (nritems == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Check the following things to make sure this is a good leaf, and
			
 
				+	 * leaf users won't need to bother with similar sanity checks:
			
 
				+	 *
			
 
				+	 * 1) key ordering
			
 
				+	 * 2) item offset and size
			
 
				+	 *    No overlap, no hole, all inside the leaf.
			
 
				+	 * 3) item content
			
 
				+	 *    If possible, do comprehensive sanity check.
			
 
				+	 *    NOTE: All checks must only rely on the item data itself.
			
 
				+	 */
			
 
				+	for (slot = 0; slot < nritems; slot++) {
			
 
				+		u32 item_end_expected;
			
 
				+		int ret;
			
 
				+
			
 
				+		btrfs_item_key_to_cpu(leaf, &key, slot);
			
 
				+
			
 
				+		/* Make sure the keys are in the right order */
			
 
				+		if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
			
 
				+			generic_err(root, leaf, slot,
			
 
				+	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
			
 
				+				prev_key.objectid, prev_key.type,
			
 
				+				prev_key.offset, key.objectid, key.type,
			
 
				+				key.offset);
			
 
				+			return -EUCLEAN;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Make sure the offset and ends are right, remember that the
			
 
				+		 * item data starts at the end of the leaf and grows towards the
			
 
				+		 * front.
			
 
				+		 */
			
 
				+		if (slot == 0)
			
 
				+			item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
			
 
				+		else
			
 
				+			item_end_expected = btrfs_item_offset_nr(leaf,
			
 
				+								 slot - 1);
			
 
				+		if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
			
 
				+			generic_err(root, leaf, slot,
			
 
				+				"unexpected item end, have %u expect %u",
			
 
				+				btrfs_item_end_nr(leaf, slot),
			
 
				+				item_end_expected);
			
 
				+			return -EUCLEAN;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Check to make sure that we don't point outside of the leaf,
			
 
				+		 * just in case all the items are consistent to each other, but
			
 
				+		 * all point outside of the leaf.
			
 
				+		 */
			
 
				+		if (btrfs_item_end_nr(leaf, slot) >
			
 
				+		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
			
 
				+			generic_err(root, leaf, slot,
			
 
				+			"slot end outside of leaf, have %u expect range [0, %u]",
			
 
				+				btrfs_item_end_nr(leaf, slot),
			
 
				+				BTRFS_LEAF_DATA_SIZE(fs_info));
			
 
				+			return -EUCLEAN;
			
 
				+		}
			
 
				+
			
 
				+		/* Also check if the item pointer overlaps with btrfs item. */
			
 
				+		if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
			
 
				+		    btrfs_item_ptr_offset(leaf, slot)) {
			
 
				+			generic_err(root, leaf, slot,
			
 
				+		"slot overlaps with its data, item end %lu data start %lu",
			
 
				+				btrfs_item_nr_offset(slot) +
			
 
				+				sizeof(struct btrfs_item),
			
 
				+				btrfs_item_ptr_offset(leaf, slot));
			
 
				+			return -EUCLEAN;
			
 
				+		}
			
 
				+
			
 
				+		/* Check if the item size and content meet other criteria */
			
 
				+		ret = check_leaf_item(root, leaf, &key, slot);
			
 
				+		if (ret < 0)
			
 
				+			return ret;
			
 
				+
			
 
				+		prev_key.objectid = key.objectid;
			
 
				+		prev_key.type = key.type;
			
 
				+		prev_key.offset = key.offset;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
			
 
				+{
			
 
				+	unsigned long nr = btrfs_header_nritems(node);
			
 
				+	struct btrfs_key key, next_key;
			
 
				+	int slot;
			
 
				+	u64 bytenr;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
			
 
				+		btrfs_crit(root->fs_info,
			
 
				+"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
			
 
				+			   root->objectid, node->start,
			
 
				+			   nr == 0 ? "small" : "large", nr,
			
 
				+			   BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
			
 
				+		return -EUCLEAN;
			
 
				+	}
			
 
				+
			
 
				+	for (slot = 0; slot < nr - 1; slot++) {
			
 
				+		bytenr = btrfs_node_blockptr(node, slot);
			
 
				+		btrfs_node_key_to_cpu(node, &key, slot);
			
 
				+		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
			
 
				+
			
 
				+		if (!bytenr) {
			
 
				+			generic_err(root, node, slot,
			
 
				+				"invalid NULL node pointer");
			
 
				+			ret = -EUCLEAN;
			
 
				+			goto out;
			
 
				+		}
			
 
				+		if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
			
 
				+			generic_err(root, node, slot,
			
 
				+			"unaligned pointer, have %llu should be aligned to %u",
			
 
				+				bytenr, root->fs_info->sectorsize);
			
 
				+			ret = -EUCLEAN;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
			
 
				+			generic_err(root, node, slot,
			
 
				+	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
			
 
				+				key.objectid, key.type, key.offset,
			
 
				+				next_key.objectid, next_key.type,
			
 
				+				next_key.offset);
			
 
				+			ret = -EUCLEAN;
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -0,0 +1,26 @@
 
				+/*
			
 
				+ * Copyright (C) Qu Wenruo 2017.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License v2 as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public
			
 
				+ * License along with this program.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __BTRFS_TREE_CHECKER__
			
 
				+#define __BTRFS_TREE_CHECKER__
			
 
				+
			
 
				+#include "ctree.h"
			
 
				+#include "extent_io.h"
			
 
				+
			
 
				+int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf);
			
 
				+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
			
 
				+
			
 
				+#endif
			
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -717,7 +717,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 
				 			ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
			
 
				 						ins.offset);
			
 
				 			if (ret == 0) {
			
 
				-				ret = btrfs_inc_extent_ref(trans, fs_info,
			
 
				+				ret = btrfs_inc_extent_ref(trans, root,
			
 
				 						ins.objectid, ins.offset,
			
 
				 						0, root->root_key.objectid,
			
 
				 						key->objectid, offset);
			
@@ -2699,34 +2699,36 @@ static void wait_log_commit(struct btrfs_root *root, int transid)
 
				 	 * so we know that if ours is more than 2 older than the
			
 
				 	 * current transaction, we're done
			
 
				 	 */
			
 
				-	do {
			
 
				+	for (;;) {
			
 
				 		prepare_to_wait(&root->log_commit_wait[index],
			
 
				 				&wait, TASK_UNINTERRUPTIBLE);
			
 
				-		mutex_unlock(&root->log_mutex);
			
 
				 
			
 
				-		if (root->log_transid_committed < transid &&
			
 
				-		    atomic_read(&root->log_commit[index]))
			
 
				-			schedule();
			
 
				+		if (!(root->log_transid_committed < transid &&
			
 
				+		      atomic_read(&root->log_commit[index])))
			
 
				+			break;
			
 
				 
			
 
				-		finish_wait(&root->log_commit_wait[index], &wait);
			
 
				+		mutex_unlock(&root->log_mutex);
			
 
				+		schedule();
			
 
				 		mutex_lock(&root->log_mutex);
			
 
				-	} while (root->log_transid_committed < transid &&
			
 
				-		 atomic_read(&root->log_commit[index]));
			
 
				+	}
			
 
				+	finish_wait(&root->log_commit_wait[index], &wait);
			
 
				 }
			
 
				 
			
 
				 static void wait_for_writer(struct btrfs_root *root)
			
 
				 {
			
 
				 	DEFINE_WAIT(wait);
			
 
				 
			
 
				-	while (atomic_read(&root->log_writers)) {
			
 
				-		prepare_to_wait(&root->log_writer_wait,
			
 
				-				&wait, TASK_UNINTERRUPTIBLE);
			
 
				+	for (;;) {
			
 
				+		prepare_to_wait(&root->log_writer_wait, &wait,
			
 
				+				TASK_UNINTERRUPTIBLE);
			
 
				+		if (!atomic_read(&root->log_writers))
			
 
				+			break;
			
 
				+
			
 
				 		mutex_unlock(&root->log_mutex);
			
 
				-		if (atomic_read(&root->log_writers))
			
 
				-			schedule();
			
 
				-		finish_wait(&root->log_writer_wait, &wait);
			
 
				+		schedule();
			
 
				 		mutex_lock(&root->log_mutex);
			
 
				 	}
			
 
				+	finish_wait(&root->log_writer_wait, &wait);
			
 
				 }
			
 
				 
			
 
				 static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
			
@@ -4645,7 +4647,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 	struct btrfs_key min_key;
			
 
				 	struct btrfs_key max_key;
			
 
				 	struct btrfs_root *log = root->log_root;
			
 
				-	struct extent_buffer *src = NULL;
			
 
				 	LIST_HEAD(logged_list);
			
 
				 	u64 last_extent = 0;
			
 
				 	int err = 0;
			
@@ -4888,7 +4889,6 @@ again:
 
				 			goto next_slot;
			
 
				 		}
			
 
				 
			
 
				-		src = path->nodes[0];
			
 
				 		if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
			
 
				 			ins_nr++;
			
 
				 			goto next_slot;
			
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -360,7 +360,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
 
				 	int again = 0;
			
 
				 	unsigned long num_run;
			
 
				 	unsigned long batch_run = 0;
			
 
				-	unsigned long limit;
			
 
				 	unsigned long last_waited = 0;
			
 
				 	int force_reg = 0;
			
 
				 	int sync_pending = 0;
			
@@ -375,8 +374,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
 
				 	blk_start_plug(&plug);
			
 
				 
			
 
				 	bdi = device->bdev->bd_bdi;
			
 
				-	limit = btrfs_async_submit_limit(fs_info);
			
 
				-	limit = limit * 2 / 3;
			
 
				 
			
 
				 loop:
			
 
				 	spin_lock(&device->io_lock);
			
@@ -443,13 +440,6 @@ loop_lock:
 
				 		pending = pending->bi_next;
			
 
				 		cur->bi_next = NULL;
			
 
				 
			
 
				-		/*
			
 
				-		 * atomic_dec_return implies a barrier for waitqueue_active
			
 
				-		 */
			
 
				-		if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
			
 
				-		    waitqueue_active(&fs_info->async_submit_wait))
			
 
				-			wake_up(&fs_info->async_submit_wait);
			
 
				-
			
 
				 		BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
			
 
				 
			
 
				 		/*
			
@@ -517,12 +507,6 @@ loop_lock:
 
				 					 &device->work);
			
 
				 			goto done;
			
 
				 		}
			
 
				-		/* unplug every 64 requests just for good measure */
			
 
				-		if (batch_run % 64 == 0) {
			
 
				-			blk_finish_plug(&plug);
			
 
				-			blk_start_plug(&plug);
			
 
				-			sync_pending = 0;
			
 
				-		}
			
 
				 	}
			
 
				 
			
 
				 	cond_resched();
			
@@ -547,7 +531,7 @@ static void pending_bios_fn(struct btrfs_work *work)
 
				 }
			
 
				 
			
 
				 
			
 
				-void btrfs_free_stale_device(struct btrfs_device *cur_dev)
			
 
				+static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
			
 
				 {
			
 
				 	struct btrfs_fs_devices *fs_devs;
			
 
				 	struct btrfs_device *dev;
			
@@ -1068,14 +1052,15 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-void btrfs_release_disk_super(struct page *page)
			
 
				+static void btrfs_release_disk_super(struct page *page)
			
 
				 {
			
 
				 	kunmap(page);
			
 
				 	put_page(page);
			
 
				 }
			
 
				 
			
 
				-int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
			
 
				-		struct page **page, struct btrfs_super_block **disk_super)
			
 
				+static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
			
 
				+				 struct page **page,
			
 
				+				 struct btrfs_super_block **disk_super)
			
 
				 {
			
 
				 	void *p;
			
 
				 	pgoff_t index;
			
@@ -1817,8 +1802,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs,
			
 
				-					struct btrfs_device *device)
			
 
				+static struct btrfs_device * btrfs_find_next_active_device(
			
 
				+		struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
			
 
				 {
			
 
				 	struct btrfs_device *next_device;
			
 
				 
			
@@ -2031,19 +2016,20 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 
				 	}
			
 
				 
			
 
				 	btrfs_close_bdev(srcdev);
			
 
				-
			
 
				 	call_rcu(&srcdev->rcu, free_device);
			
 
				 
			
 
				-	/*
			
 
				-	 * unless fs_devices is seed fs, num_devices shouldn't go
			
 
				-	 * zero
			
 
				-	 */
			
 
				-	BUG_ON(!fs_devices->num_devices && !fs_devices->seeding);
			
 
				-
			
 
				 	/* if this is no devs we rather delete the fs_devices */
			
 
				 	if (!fs_devices->num_devices) {
			
 
				 		struct btrfs_fs_devices *tmp_fs_devices;
			
 
				 
			
 
				+		/*
			
 
				+		 * On a mounted FS, num_devices can't be zero unless it's a
			
 
				+		 * seed. In case of a seed device being replaced, the replace
			
 
				+		 * target added to the sprout FS, so there will be no more
			
 
				+		 * device left under the seed FS.
			
 
				+		 */
			
 
				+		ASSERT(fs_devices->seeding);
			
 
				+
			
 
				 		tmp_fs_devices = fs_info->fs_devices;
			
 
				 		while (tmp_fs_devices) {
			
 
				 			if (tmp_fs_devices->seed == fs_devices) {
			
@@ -2323,6 +2309,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
				 	u64 tmp;
			
 
				 	int seeding_dev = 0;
			
 
				 	int ret = 0;
			
 
				+	bool unlocked = false;
			
 
				 
			
 
				 	if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
			
 
				 		return -EROFS;
			
@@ -2399,7 +2386,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
				 	if (seeding_dev) {
			
 
				 		sb->s_flags &= ~MS_RDONLY;
			
 
				 		ret = btrfs_prepare_sprout(fs_info);
			
 
				-		BUG_ON(ret); /* -ENOMEM */
			
 
				+		if (ret) {
			
 
				+			btrfs_abort_transaction(trans, ret);
			
 
				+			goto error_trans;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	device->fs_devices = fs_info->fs_devices;
			
@@ -2445,14 +2435,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
				 		mutex_unlock(&fs_info->chunk_mutex);
			
 
				 		if (ret) {
			
 
				 			btrfs_abort_transaction(trans, ret);
			
 
				-			goto error_trans;
			
 
				+			goto error_sysfs;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	ret = btrfs_add_device(trans, fs_info, device);
			
 
				 	if (ret) {
			
 
				 		btrfs_abort_transaction(trans, ret);
			
 
				-		goto error_trans;
			
 
				+		goto error_sysfs;
			
 
				 	}
			
 
				 
			
 
				 	if (seeding_dev) {
			
@@ -2461,7 +2451,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
				 		ret = btrfs_finish_sprout(trans, fs_info);
			
 
				 		if (ret) {
			
 
				 			btrfs_abort_transaction(trans, ret);
			
 
				-			goto error_trans;
			
 
				+			goto error_sysfs;
			
 
				 		}
			
 
				 
			
 
				 		/* Sprouting would change fsid of the mounted root,
			
@@ -2479,6 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
				 	if (seeding_dev) {
			
 
				 		mutex_unlock(&uuid_mutex);
			
 
				 		up_write(&sb->s_umount);
			
 
				+		unlocked = true;
			
 
				 
			
 
				 		if (ret) /* transaction commit */
			
 
				 			return ret;
			
@@ -2491,7 +2482,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
				 		if (IS_ERR(trans)) {
			
 
				 			if (PTR_ERR(trans) == -ENOENT)
			
 
				 				return 0;
			
 
				-			return PTR_ERR(trans);
			
 
				+			ret = PTR_ERR(trans);
			
 
				+			trans = NULL;
			
 
				+			goto error_sysfs;
			
 
				 		}
			
 
				 		ret = btrfs_commit_transaction(trans);
			
 
				 	}
			
@@ -2500,14 +2493,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
				 	update_dev_time(device_path);
			
 
				 	return ret;
			
 
				 
			
 
				+error_sysfs:
			
 
				+	btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
			
 
				 error_trans:
			
 
				-	btrfs_end_transaction(trans);
			
 
				+	if (seeding_dev)
			
 
				+		sb->s_flags |= MS_RDONLY;
			
 
				+	if (trans)
			
 
				+		btrfs_end_transaction(trans);
			
 
				 	rcu_string_free(device->name);
			
 
				-	btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
			
 
				 	kfree(device);
			
 
				 error:
			
 
				 	blkdev_put(bdev, FMODE_EXCL);
			
 
				-	if (seeding_dev) {
			
 
				+	if (seeding_dev && !unlocked) {
			
 
				 		mutex_unlock(&uuid_mutex);
			
 
				 		up_write(&sb->s_umount);
			
 
				 	}
			
@@ -4813,16 +4810,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
				 	em_tree = &info->mapping_tree.map_tree;
			
 
				 	write_lock(&em_tree->lock);
			
 
				 	ret = add_extent_mapping(em_tree, em, 0);
			
 
				-	if (!ret) {
			
 
				-		list_add_tail(&em->list, &trans->transaction->pending_chunks);
			
 
				-		refcount_inc(&em->refs);
			
 
				-	}
			
 
				-	write_unlock(&em_tree->lock);
			
 
				 	if (ret) {
			
 
				+		write_unlock(&em_tree->lock);
			
 
				 		free_extent_map(em);
			
 
				 		goto error;
			
 
				 	}
			
 
				 
			
 
				+	list_add_tail(&em->list, &trans->transaction->pending_chunks);
			
 
				+	refcount_inc(&em->refs);
			
 
				+	write_unlock(&em_tree->lock);
			
 
				+
			
 
				 	ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
			
 
				 	if (ret)
			
 
				 		goto error_del_extent;
			
@@ -5695,10 +5692,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
			
 
				 		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
			
 
				 				&stripe_index);
			
 
				-		if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS)
			
 
				+		if (!need_full_stripe(op))
			
 
				 			mirror_num = 1;
			
 
				 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
			
 
				-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
			
 
				+		if (need_full_stripe(op))
			
 
				 			num_stripes = map->num_stripes;
			
 
				 		else if (mirror_num)
			
 
				 			stripe_index = mirror_num - 1;
			
@@ -5711,7 +5708,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 		}
			
 
				 
			
 
				 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
			
 
				-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) {
			
 
				+		if (need_full_stripe(op)) {
			
 
				 			num_stripes = map->num_stripes;
			
 
				 		} else if (mirror_num) {
			
 
				 			stripe_index = mirror_num - 1;
			
@@ -5725,7 +5722,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 		stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
			
 
				 		stripe_index *= map->sub_stripes;
			
 
				 
			
 
				-		if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
			
 
				+		if (need_full_stripe(op))
			
 
				 			num_stripes = map->sub_stripes;
			
 
				 		else if (mirror_num)
			
 
				 			stripe_index += mirror_num - 1;
			
@@ -5740,9 +5737,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 		}
			
 
				 
			
 
				 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
			
 
				-		if (need_raid_map &&
			
 
				-		    (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS ||
			
 
				-		     mirror_num > 1)) {
			
 
				+		if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
			
 
				 			/* push stripe_nr back to the start of the full stripe */
			
 
				 			stripe_nr = div64_u64(raid56_full_stripe_start,
			
 
				 					stripe_len * nr_data_stripes(map));
			
@@ -5769,9 +5764,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
				 			/* We distribute the parity blocks across stripes */
			
 
				 			div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
			
 
				 					&stripe_index);
			
 
				-			if ((op != BTRFS_MAP_WRITE &&
			
 
				-			     op != BTRFS_MAP_GET_READ_MIRRORS) &&
			
 
				-			    mirror_num <= 1)
			
 
				+			if (!need_full_stripe(op) && mirror_num <= 1)
			
 
				 				mirror_num = 1;
			
 
				 		}
			
 
				 	} else {
			
@@ -6033,7 +6026,7 @@ static void btrfs_end_bio(struct bio *bio)
 
				 			 * this bio is actually up to date, we didn't
			
 
				 			 * go over the max number of errors
			
 
				 			 */
			
 
				-			bio->bi_status = 0;
			
 
				+			bio->bi_status = BLK_STS_OK;
			
 
				 		}
			
 
				 
			
 
				 		btrfs_end_bbio(bbio, bio);
			
@@ -6069,13 +6062,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device,
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * nr_async_bios allows us to reliably return congestion to the
			
 
				-	 * higher layers.  Otherwise, the async bio makes it appear we have
			
 
				-	 * made progress against dirty pages when we've really just put it
			
 
				-	 * on a queue for later
			
 
				-	 */
			
 
				-	atomic_inc(&fs_info->nr_async_bios);
			
 
				 	WARN_ON(bio->bi_next);
			
 
				 	bio->bi_next = NULL;
			
 
				 
			
@@ -6144,7 +6130,10 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 
				 
			
 
				 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
			
 
				 		bio->bi_iter.bi_sector = logical >> 9;
			
 
				-		bio->bi_status = BLK_STS_IOERR;
			
 
				+		if (atomic_read(&bbio->error) > bbio->max_errors)
			
 
				+			bio->bi_status = BLK_STS_IOERR;
			
 
				+		else
			
 
				+			bio->bi_status = BLK_STS_OK;
			
 
				 		btrfs_end_bbio(bbio, bio);
			
 
				 	}
			
 
				 }
			
@@ -6249,7 +6238,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
 
				 
			
 
				 	device = btrfs_alloc_device(NULL, &devid, dev_uuid);
			
 
				 	if (IS_ERR(device))
			
 
				-		return NULL;
			
 
				+		return device;
			
 
				 
			
 
				 	list_add(&device->dev_list, &fs_devices->devices);
			
 
				 	device->fs_devices = fs_devices;
			
@@ -6377,6 +6366,17 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
			
 
				+					u64 devid, u8 *uuid, bool error)
			
 
				+{
			
 
				+	if (error)
			
 
				+		btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
			
 
				+			      devid, uuid);
			
 
				+	else
			
 
				+		btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
			
 
				+			      devid, uuid);
			
 
				+}
			
 
				+
			
 
				 static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
			
 
				 			  struct extent_buffer *leaf,
			
 
				 			  struct btrfs_chunk *chunk)
			
@@ -6447,18 +6447,21 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
 
				 		if (!map->stripes[i].dev &&
			
 
				 		    !btrfs_test_opt(fs_info, DEGRADED)) {
			
 
				 			free_extent_map(em);
			
 
				-			btrfs_report_missing_device(fs_info, devid, uuid);
			
 
				-			return -EIO;
			
 
				+			btrfs_report_missing_device(fs_info, devid, uuid, true);
			
 
				+			return -ENOENT;
			
 
				 		}
			
 
				 		if (!map->stripes[i].dev) {
			
 
				 			map->stripes[i].dev =
			
 
				 				add_missing_dev(fs_info->fs_devices, devid,
			
 
				 						uuid);
			
 
				-			if (!map->stripes[i].dev) {
			
 
				+			if (IS_ERR(map->stripes[i].dev)) {
			
 
				 				free_extent_map(em);
			
 
				-				return -EIO;
			
 
				+				btrfs_err(fs_info,
			
 
				+					"failed to init missing dev %llu: %ld",
			
 
				+					devid, PTR_ERR(map->stripes[i].dev));
			
 
				+				return PTR_ERR(map->stripes[i].dev);
			
 
				 			}
			
 
				-			btrfs_report_missing_device(fs_info, devid, uuid);
			
 
				+			btrfs_report_missing_device(fs_info, devid, uuid, false);
			
 
				 		}
			
 
				 		map->stripes[i].dev->in_fs_metadata = 1;
			
 
				 	}
			
@@ -6577,19 +6580,28 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
 
				 	device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
			
 
				 	if (!device) {
			
 
				 		if (!btrfs_test_opt(fs_info, DEGRADED)) {
			
 
				-			btrfs_report_missing_device(fs_info, devid, dev_uuid);
			
 
				-			return -EIO;
			
 
				+			btrfs_report_missing_device(fs_info, devid,
			
 
				+							dev_uuid, true);
			
 
				+			return -ENOENT;
			
 
				 		}
			
 
				 
			
 
				 		device = add_missing_dev(fs_devices, devid, dev_uuid);
			
 
				-		if (!device)
			
 
				-			return -ENOMEM;
			
 
				-		btrfs_report_missing_device(fs_info, devid, dev_uuid);
			
 
				+		if (IS_ERR(device)) {
			
 
				+			btrfs_err(fs_info,
			
 
				+				"failed to add missing dev %llu: %ld",
			
 
				+				devid, PTR_ERR(device));
			
 
				+			return PTR_ERR(device);
			
 
				+		}
			
 
				+		btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
			
 
				 	} else {
			
 
				 		if (!device->bdev) {
			
 
				-			btrfs_report_missing_device(fs_info, devid, dev_uuid);
			
 
				-			if (!btrfs_test_opt(fs_info, DEGRADED))
			
 
				-				return -EIO;
			
 
				+			if (!btrfs_test_opt(fs_info, DEGRADED)) {
			
 
				+				btrfs_report_missing_device(fs_info,
			
 
				+						devid, dev_uuid, true);
			
 
				+				return -ENOENT;
			
 
				+			}
			
 
				+			btrfs_report_missing_device(fs_info, devid,
			
 
				+							dev_uuid, false);
			
 
				 		}
			
 
				 
			
 
				 		if(!device->bdev && !device->missing) {
			
@@ -6756,12 +6768,6 @@ out_short_read:
 
				 	return -EIO;
			
 
				 }
			
 
				 
			
 
				-void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
			
 
				-				 u8 *uuid)
			
 
				-{
			
 
				-	btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", devid, uuid);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Check if all chunks in the fs are OK for read-write degraded mount
			
 
				  *
			
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -542,7 +542,5 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
 
				 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
			
 
				 
			
 
				 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info);
			
 
				-void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
			
 
				-				 u8 *uuid);
			
 
				 
			
 
				 #endif
			
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -37,6 +37,7 @@ struct workspace {
 
				 	z_stream strm;
			
 
				 	char *buf;
			
 
				 	struct list_head list;
			
 
				+	int level;
			
 
				 };
			
 
				 
			
 
				 static void zlib_free_workspace(struct list_head *ws)
			
@@ -96,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws,
 
				 	*total_out = 0;
			
 
				 	*total_in = 0;
			
 
				 
			
 
				-	if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) {
			
 
				+	if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) {
			
 
				 		pr_warn("BTRFS: deflateInit failed\n");
			
 
				 		ret = -EIO;
			
 
				 		goto out;
			
@@ -402,10 +403,22 @@ next:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static void zlib_set_level(struct list_head *ws, unsigned int type)
			
 
				+{
			
 
				+	struct workspace *workspace = list_entry(ws, struct workspace, list);
			
 
				+	unsigned level = (type & 0xF0) >> 4;
			
 
				+
			
 
				+	if (level > 9)
			
 
				+		level = 9;
			
 
				+
			
 
				+	workspace->level = level > 0 ? level : 3;
			
 
				+}
			
 
				+
			
 
				 const struct btrfs_compress_op btrfs_zlib_compress = {
			
 
				 	.alloc_workspace	= zlib_alloc_workspace,
			
 
				 	.free_workspace		= zlib_free_workspace,
			
 
				 	.compress_pages		= zlib_compress_pages,
			
 
				 	.decompress_bio		= zlib_decompress_bio,
			
 
				 	.decompress		= zlib_decompress,
			
 
				+	.set_level              = zlib_set_level,
			
 
				 };
			
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -423,10 +423,15 @@ finish:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static void zstd_set_level(struct list_head *ws, unsigned int type)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				 const struct btrfs_compress_op btrfs_zstd_compress = {
			
 
				 	.alloc_workspace = zstd_alloc_workspace,
			
 
				 	.free_workspace = zstd_free_workspace,
			
 
				 	.compress_pages = zstd_compress_pages,
			
 
				 	.decompress_bio = zstd_decompress_bio,
			
 
				 	.decompress = zstd_decompress,
			
 
				+	.set_level = zstd_set_level,
			
 
				 };
			
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -29,6 +29,13 @@ struct btrfs_qgroup_extent_record;
 
				 struct btrfs_qgroup;
			
 
				 struct prelim_ref;
			
 
				 
			
 
				+TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
			
 
				+TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
			
 
				+TRACE_DEFINE_ENUM(FLUSH_DELALLOC);
			
 
				+TRACE_DEFINE_ENUM(FLUSH_DELALLOC_WAIT);
			
 
				+TRACE_DEFINE_ENUM(ALLOC_CHUNK);
			
 
				+TRACE_DEFINE_ENUM(COMMIT_TRANS);
			
 
				+
			
 
				 #define show_ref_type(type)						\
			
 
				 	__print_symbolic(type,						\
			
 
				 		{ BTRFS_TREE_BLOCK_REF_KEY, 	"TREE_BLOCK_REF" },	\
			
@@ -792,11 +799,10 @@ DEFINE_EVENT(btrfs_delayed_data_ref,  run_delayed_data_ref,
 
				 DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
			
 
				 
			
 
				 	TP_PROTO(const struct btrfs_fs_info *fs_info,
			
 
				-		 const struct btrfs_delayed_ref_node *ref,
			
 
				 		 const struct btrfs_delayed_ref_head *head_ref,
			
 
				 		 int action),
			
 
				 
			
 
				-	TP_ARGS(fs_info, ref, head_ref, action),
			
 
				+	TP_ARGS(fs_info, head_ref, action),
			
 
				 
			
 
				 	TP_STRUCT__entry_btrfs(
			
 
				 		__field(	u64,  bytenr		)
			
@@ -806,8 +812,8 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
 
				 	),
			
 
				 
			
 
				 	TP_fast_assign_btrfs(fs_info,
			
 
				-		__entry->bytenr		= ref->bytenr;
			
 
				-		__entry->num_bytes	= ref->num_bytes;
			
 
				+		__entry->bytenr		= head_ref->bytenr;
			
 
				+		__entry->num_bytes	= head_ref->num_bytes;
			
 
				 		__entry->action		= action;
			
 
				 		__entry->is_data	= head_ref->is_data;
			
 
				 	),
			
@@ -822,21 +828,19 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
 
				 DEFINE_EVENT(btrfs_delayed_ref_head,  add_delayed_ref_head,
			
 
				 
			
 
				 	TP_PROTO(const struct btrfs_fs_info *fs_info,
			
 
				-		 const struct btrfs_delayed_ref_node *ref,
			
 
				 		 const struct btrfs_delayed_ref_head *head_ref,
			
 
				 		 int action),
			
 
				 
			
 
				-	TP_ARGS(fs_info, ref, head_ref, action)
			
 
				+	TP_ARGS(fs_info, head_ref, action)
			
 
				 );
			
 
				 
			
 
				 DEFINE_EVENT(btrfs_delayed_ref_head,  run_delayed_ref_head,
			
 
				 
			
 
				 	TP_PROTO(const struct btrfs_fs_info *fs_info,
			
 
				-		 const struct btrfs_delayed_ref_node *ref,
			
 
				 		 const struct btrfs_delayed_ref_head *head_ref,
			
 
				 		 int action),
			
 
				 
			
 
				-	TP_ARGS(fs_info, ref, head_ref, action)
			
 
				+	TP_ARGS(fs_info, head_ref, action)
			
 
				 );
			
 
				 
			
 
				 #define show_chunk_type(type)					\
			
@@ -1692,6 +1696,27 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert,
 
				 	TP_ARGS(fs_info, oldref, newref, tree_size)
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
			
 
				+	TP_PROTO(struct btrfs_root *root, u64 ino, int mod),
			
 
				+
			
 
				+	TP_ARGS(root, ino, mod),
			
 
				+
			
 
				+	TP_STRUCT__entry_btrfs(
			
 
				+		__field(	u64, root_objectid	)
			
 
				+		__field(	u64, ino		)
			
 
				+		__field(	int, mod		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign_btrfs(root->fs_info,
			
 
				+		__entry->root_objectid	= root->objectid;
			
 
				+		__entry->ino		= ino;
			
 
				+		__entry->mod		= mod;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
			
 
				+			show_root_type(__entry->root_objectid),
			
 
				+			(unsigned long long)__entry->ino, __entry->mod)
			
 
				+);
			
 
				 #endif /* _TRACE_BTRFS_H */
			
 
				 
			
 
				 /* This part must be outside protection */
			
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -609,10 +609,14 @@ struct btrfs_ioctl_ino_path_args {
 
				 struct btrfs_ioctl_logical_ino_args {
			
 
				 	__u64				logical;	/* in */
			
 
				 	__u64				size;		/* in */
			
 
				-	__u64				reserved[4];
			
 
				+	__u64				reserved[3];	/* must be 0 for now */
			
 
				+	__u64				flags;		/* in, v2 only */
			
 
				 	/* struct btrfs_data_container	*inodes;	out   */
			
 
				 	__u64				inodes;
			
 
				 };
			
 
				+/* Return every ref to the extent, not just those containing logical block.
			
 
				+ * Requires logical == extent bytenr. */
			
 
				+#define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET	(1ULL << 0)
			
 
				 
			
 
				 enum btrfs_dev_stat_values {
			
 
				 	/* disk I/O failure stats */
			
@@ -836,5 +840,7 @@ enum btrfs_err_code {
 
				 				   struct btrfs_ioctl_feature_flags[3])
			
 
				 #define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
			
 
				 				   struct btrfs_ioctl_vol_args_v2)
			
 
				+#define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
			
 
				+					struct btrfs_ioctl_logical_ino_args)
			
 
				 
			
 
				 #endif /* _UAPI_LINUX_BTRFS_H */
			
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -733,6 +733,7 @@ struct btrfs_balance_item {
 
				 #define BTRFS_FILE_EXTENT_INLINE 0
			
 
				 #define BTRFS_FILE_EXTENT_REG 1
			
 
				 #define BTRFS_FILE_EXTENT_PREALLOC 2
			
 
				+#define BTRFS_FILE_EXTENT_TYPES	2
			
 
				 
			
 
				 struct btrfs_file_extent_item {
			
 
				 	/*