9 سال پیش · ba929b6646
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -439,6 +439,8 @@ struct btrfs_space_info {
 
				 	struct list_head list;
			
 
				 	/* Protected by the spinlock 'lock'. */
			
 
				 	struct list_head ro_bgs;
			
 
				+	struct list_head priority_tickets;
			
 
				+	struct list_head tickets;
			
 
				 
			
 
				 	struct rw_semaphore groups_sem;
			
 
				 	/* for block groups in our same type */
			
@@ -2624,6 +2626,15 @@ enum btrfs_reserve_flush_enum {
 
				 	BTRFS_RESERVE_FLUSH_ALL,
			
 
				 };
			
 
				 
			
 
				+enum btrfs_flush_state {
			
 
				+	FLUSH_DELAYED_ITEMS_NR	=	1,
			
 
				+	FLUSH_DELAYED_ITEMS	=	2,
			
 
				+	FLUSH_DELALLOC		=	3,
			
 
				+	FLUSH_DELALLOC_WAIT	=	4,
			
 
				+	ALLOC_CHUNK		=	5,
			
 
				+	COMMIT_TRANS		=	6,
			
 
				+};
			
 
				+
			
 
				 int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
			
 
				 int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
			
 
				 void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
			
@@ -2661,8 +2672,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
 
				 			   struct btrfs_block_rsv *block_rsv, u64 min_reserved,
			
 
				 			   enum btrfs_reserve_flush_enum flush);
			
 
				 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
			
 
				-			    struct btrfs_block_rsv *dst_rsv,
			
 
				-			    u64 num_bytes);
			
 
				+			    struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
			
 
				+			    int update_size);
			
 
				 int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
			
 
				 			     struct btrfs_block_rsv *dest, u64 num_bytes,
			
 
				 			     int min_factor);
			
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -553,7 +553,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 
				 	dst_rsv = &root->fs_info->delayed_block_rsv;
			
 
				 
			
 
				 	num_bytes = btrfs_calc_trans_metadata_size(root, 1);
			
 
				-	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
			
 
				+	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
			
 
				 	if (!ret) {
			
 
				 		trace_btrfs_space_reservation(root->fs_info, "delayed_item",
			
 
				 					      item->key.objectid,
			
@@ -597,6 +597,29 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 
			
 
				 	num_bytes = btrfs_calc_trans_metadata_size(root, 1);
			
 
				 
			
 
				+	/*
			
 
				+	 * If our block_rsv is the delalloc block reserve then check and see if
			
 
				+	 * we have our extra reservation for updating the inode.  If not fall
			
 
				+	 * through and try to reserve space quickly.
			
 
				+	 *
			
 
				+	 * We used to try and steal from the delalloc block rsv or the global
			
 
				+	 * reserve, but we'd steal a full reservation, which isn't kind.  We are
			
 
				+	 * here through delalloc which means we've likely just cowed down close
			
 
				+	 * to the leaf that contains the inode, so we would steal less just
			
 
				+	 * doing the fallback inode update, so if we do end up having to steal
			
 
				+	 * from the global block rsv we hopefully only steal one or two blocks
			
 
				+	 * worth which is less likely to hurt us.
			
 
				+	 */
			
 
				+	if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
			
 
				+		spin_lock(&BTRFS_I(inode)->lock);
			
 
				+		if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				+				       &BTRFS_I(inode)->runtime_flags))
			
 
				+			release = true;
			
 
				+		else
			
 
				+			src_rsv = NULL;
			
 
				+		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
			
 
				 	 * which doesn't reserve space for speed.  This is a problem since we
			
@@ -626,51 +649,10 @@ static int btrfs_delayed_inode_reserve_metadata(
 
				 						      num_bytes, 1);
			
 
				 		}
			
 
				 		return ret;
			
 
				-	} else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
			
 
				-		spin_lock(&BTRFS_I(inode)->lock);
			
 
				-		if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				-				       &BTRFS_I(inode)->runtime_flags)) {
			
 
				-			spin_unlock(&BTRFS_I(inode)->lock);
			
 
				-			release = true;
			
 
				-			goto migrate;
			
 
				-		}
			
 
				-		spin_unlock(&BTRFS_I(inode)->lock);
			
 
				-
			
 
				-		/* Ok we didn't have space pre-reserved.  This shouldn't happen
			
 
				-		 * too often but it can happen if we do delalloc to an existing
			
 
				-		 * inode which gets dirtied because of the time update, and then
			
 
				-		 * isn't touched again until after the transaction commits and
			
 
				-		 * then we try to write out the data.  First try to be nice and
			
 
				-		 * reserve something strictly for us.  If not be a pain and try
			
 
				-		 * to steal from the delalloc block rsv.
			
 
				-		 */
			
 
				-		ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
			
 
				-					  BTRFS_RESERVE_NO_FLUSH);
			
 
				-		if (!ret)
			
 
				-			goto out;
			
 
				-
			
 
				-		ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
			
 
				-		if (!ret)
			
 
				-			goto out;
			
 
				-
			
 
				-		if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
			
 
				-			btrfs_debug(root->fs_info,
			
 
				-				    "block rsv migrate returned %d", ret);
			
 
				-			WARN_ON(1);
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Ok this is a problem, let's just steal from the global rsv
			
 
				-		 * since this really shouldn't happen that often.
			
 
				-		 */
			
 
				-		ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
			
 
				-					      dst_rsv, num_bytes);
			
 
				-		goto out;
			
 
				 	}
			
 
				 
			
 
				-migrate:
			
 
				-	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
			
 
				+	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
			
 
				 
			
 
				-out:
			
 
				 	/*
			
 
				 	 * Migrate only takes a reservation, it doesn't touch the size of the
			
 
				 	 * block_rsv.  This is to simplify people who don't normally have things
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
 
				 			       u64 num_bytes);
			
 
				 int btrfs_pin_extent(struct btrfs_root *root,
			
 
				 		     u64 bytenr, u64 num_bytes, int reserved);
			
 
				+static int __reserve_metadata_bytes(struct btrfs_root *root,
			
 
				+				    struct btrfs_space_info *space_info,
			
 
				+				    u64 orig_bytes,
			
 
				+				    enum btrfs_reserve_flush_enum flush);
			
 
				+static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
			
 
				+				     struct btrfs_space_info *space_info,
			
 
				+				     u64 num_bytes);
			
 
				+static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
			
 
				+				     struct btrfs_space_info *space_info,
			
 
				+				     u64 num_bytes);
			
 
				 
			
 
				 static noinline int
			
 
				 block_group_cache_done(struct btrfs_block_group_cache *cache)
			
@@ -3913,6 +3923,7 @@ static const char *alloc_name(u64 flags)
 
				 
			
 
				 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
			
 
				 			     u64 total_bytes, u64 bytes_used,
			
 
				+			     u64 bytes_readonly,
			
 
				 			     struct btrfs_space_info **space_info)
			
 
				 {
			
 
				 	struct btrfs_space_info *found;
			
@@ -3933,8 +3944,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 
				 		found->disk_total += total_bytes * factor;
			
 
				 		found->bytes_used += bytes_used;
			
 
				 		found->disk_used += bytes_used * factor;
			
 
				+		found->bytes_readonly += bytes_readonly;
			
 
				 		if (total_bytes > 0)
			
 
				 			found->full = 0;
			
 
				+		space_info_add_new_bytes(info, found, total_bytes -
			
 
				+					 bytes_used - bytes_readonly);
			
 
				 		spin_unlock(&found->lock);
			
 
				 		*space_info = found;
			
 
				 		return 0;
			
@@ -3960,7 +3974,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 
				 	found->disk_used = bytes_used * factor;
			
 
				 	found->bytes_pinned = 0;
			
 
				 	found->bytes_reserved = 0;
			
 
				-	found->bytes_readonly = 0;
			
 
				+	found->bytes_readonly = bytes_readonly;
			
 
				 	found->bytes_may_use = 0;
			
 
				 	found->full = 0;
			
 
				 	found->max_extent_size = 0;
			
@@ -3969,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 
				 	found->flush = 0;
			
 
				 	init_waitqueue_head(&found->wait);
			
 
				 	INIT_LIST_HEAD(&found->ro_bgs);
			
 
				+	INIT_LIST_HEAD(&found->tickets);
			
 
				+	INIT_LIST_HEAD(&found->priority_tickets);
			
 
				 
			
 
				 	ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
			
 
				 				    info->space_info_kobj, "%s",
			
@@ -4470,7 +4486,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 
				 	space_info = __find_space_info(extent_root->fs_info, flags);
			
 
				 	if (!space_info) {
			
 
				 		ret = update_space_info(extent_root->fs_info, flags,
			
 
				-					0, 0, &space_info);
			
 
				+					0, 0, 0, &space_info);
			
 
				 		BUG_ON(ret); /* -ENOMEM */
			
 
				 	}
			
 
				 	BUG_ON(!space_info); /* Logic error */
			
@@ -4582,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root,
 
				 			  struct btrfs_space_info *space_info, u64 bytes,
			
 
				 			  enum btrfs_reserve_flush_enum flush)
			
 
				 {
			
 
				-	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
			
 
				-	u64 profile = btrfs_get_alloc_profile(root, 0);
			
 
				+	struct btrfs_block_rsv *global_rsv;
			
 
				+	u64 profile;
			
 
				 	u64 space_size;
			
 
				 	u64 avail;
			
 
				 	u64 used;
			
 
				 
			
 
				+	/* Don't overcommit when in mixed mode. */
			
 
				+	if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
			
 
				+		return 0;
			
 
				+
			
 
				+	BUG_ON(root->fs_info == NULL);
			
 
				+	global_rsv = &root->fs_info->global_block_rsv;
			
 
				+	profile = btrfs_get_alloc_profile(root, 0);
			
 
				 	used = space_info->bytes_used + space_info->bytes_reserved +
			
 
				 		space_info->bytes_pinned + space_info->bytes_readonly;
			
 
				 
			
@@ -4739,6 +4762,11 @@ skip_async:
 
				 			spin_unlock(&space_info->lock);
			
 
				 			break;
			
 
				 		}
			
 
				+		if (list_empty(&space_info->tickets) &&
			
 
				+		    list_empty(&space_info->priority_tickets)) {
			
 
				+			spin_unlock(&space_info->lock);
			
 
				+			break;
			
 
				+		}
			
 
				 		spin_unlock(&space_info->lock);
			
 
				 
			
 
				 		loops++;
			
@@ -4807,13 +4835,11 @@ commit:
 
				 	return btrfs_commit_transaction(trans, root);
			
 
				 }
			
 
				 
			
 
				-enum flush_state {
			
 
				-	FLUSH_DELAYED_ITEMS_NR	=	1,
			
 
				-	FLUSH_DELAYED_ITEMS	=	2,
			
 
				-	FLUSH_DELALLOC		=	3,
			
 
				-	FLUSH_DELALLOC_WAIT	=	4,
			
 
				-	ALLOC_CHUNK		=	5,
			
 
				-	COMMIT_TRANS		=	6,
			
 
				+struct reserve_ticket {
			
 
				+	u64 bytes;
			
 
				+	int error;
			
 
				+	struct list_head list;
			
 
				+	wait_queue_head_t wait;
			
 
				 };
			
 
				 
			
 
				 static int flush_space(struct btrfs_root *root,
			
@@ -4866,6 +4892,8 @@ static int flush_space(struct btrfs_root *root,
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				+	trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
			
 
				+				orig_bytes, state, ret);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -4873,17 +4901,22 @@ static inline u64
 
				 btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
			
 
				 				 struct btrfs_space_info *space_info)
			
 
				 {
			
 
				+	struct reserve_ticket *ticket;
			
 
				 	u64 used;
			
 
				 	u64 expected;
			
 
				-	u64 to_reclaim;
			
 
				+	u64 to_reclaim = 0;
			
 
				 
			
 
				 	to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
			
 
				-	spin_lock(&space_info->lock);
			
 
				 	if (can_overcommit(root, space_info, to_reclaim,
			
 
				-			   BTRFS_RESERVE_FLUSH_ALL)) {
			
 
				-		to_reclaim = 0;
			
 
				-		goto out;
			
 
				-	}
			
 
				+			   BTRFS_RESERVE_FLUSH_ALL))
			
 
				+		return 0;
			
 
				+
			
 
				+	list_for_each_entry(ticket, &space_info->tickets, list)
			
 
				+		to_reclaim += ticket->bytes;
			
 
				+	list_for_each_entry(ticket, &space_info->priority_tickets, list)
			
 
				+		to_reclaim += ticket->bytes;
			
 
				+	if (to_reclaim)
			
 
				+		return to_reclaim;
			
 
				 
			
 
				 	used = space_info->bytes_used + space_info->bytes_reserved +
			
 
				 	       space_info->bytes_pinned + space_info->bytes_readonly +
			
@@ -4899,14 +4932,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
 
				 		to_reclaim = 0;
			
 
				 	to_reclaim = min(to_reclaim, space_info->bytes_may_use +
			
 
				 				     space_info->bytes_reserved);
			
 
				-out:
			
 
				-	spin_unlock(&space_info->lock);
			
 
				-
			
 
				 	return to_reclaim;
			
 
				 }
			
 
				 
			
 
				 static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
			
 
				-					struct btrfs_fs_info *fs_info, u64 used)
			
 
				+					struct btrfs_root *root, u64 used)
			
 
				 {
			
 
				 	u64 thresh = div_factor_fine(space_info->total_bytes, 98);
			
 
				 
			
@@ -4914,73 +4944,177 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
 
				 	if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
			
 
				 		return 0;
			
 
				 
			
 
				-	return (used >= thresh && !btrfs_fs_closing(fs_info) &&
			
 
				-		!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
			
 
				+	if (!btrfs_calc_reclaim_metadata_size(root, space_info))
			
 
				+		return 0;
			
 
				+
			
 
				+	return (used >= thresh && !btrfs_fs_closing(root->fs_info) &&
			
 
				+		!test_bit(BTRFS_FS_STATE_REMOUNTING,
			
 
				+			  &root->fs_info->fs_state));
			
 
				 }
			
 
				 
			
 
				-static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
			
 
				-				       struct btrfs_fs_info *fs_info,
			
 
				-				       int flush_state)
			
 
				+static void wake_all_tickets(struct list_head *head)
			
 
				 {
			
 
				-	u64 used;
			
 
				-
			
 
				-	spin_lock(&space_info->lock);
			
 
				-	/*
			
 
				-	 * We run out of space and have not got any free space via flush_space,
			
 
				-	 * so don't bother doing async reclaim.
			
 
				-	 */
			
 
				-	if (flush_state > COMMIT_TRANS && space_info->full) {
			
 
				-		spin_unlock(&space_info->lock);
			
 
				-		return 0;
			
 
				-	}
			
 
				+	struct reserve_ticket *ticket;
			
 
				 
			
 
				-	used = space_info->bytes_used + space_info->bytes_reserved +
			
 
				-	       space_info->bytes_pinned + space_info->bytes_readonly +
			
 
				-	       space_info->bytes_may_use;
			
 
				-	if (need_do_async_reclaim(space_info, fs_info, used)) {
			
 
				-		spin_unlock(&space_info->lock);
			
 
				-		return 1;
			
 
				+	while (!list_empty(head)) {
			
 
				+		ticket = list_first_entry(head, struct reserve_ticket, list);
			
 
				+		list_del_init(&ticket->list);
			
 
				+		ticket->error = -ENOSPC;
			
 
				+		wake_up(&ticket->wait);
			
 
				 	}
			
 
				-	spin_unlock(&space_info->lock);
			
 
				-
			
 
				-	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This is for normal flushers, we can wait all goddamned day if we want to.  We
			
 
				+ * will loop and continuously try to flush as long as we are making progress.
			
 
				+ * We count progress as clearing off tickets each time we have to loop.
			
 
				+ */
			
 
				 static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
			
 
				 {
			
 
				+	struct reserve_ticket *last_ticket = NULL;
			
 
				 	struct btrfs_fs_info *fs_info;
			
 
				 	struct btrfs_space_info *space_info;
			
 
				 	u64 to_reclaim;
			
 
				 	int flush_state;
			
 
				+	int commit_cycles = 0;
			
 
				 
			
 
				 	fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
			
 
				 	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
			
 
				 
			
 
				+	spin_lock(&space_info->lock);
			
 
				 	to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
			
 
				 						      space_info);
			
 
				-	if (!to_reclaim)
			
 
				+	if (!to_reclaim) {
			
 
				+		space_info->flush = 0;
			
 
				+		spin_unlock(&space_info->lock);
			
 
				 		return;
			
 
				+	}
			
 
				+	last_ticket = list_first_entry(&space_info->tickets,
			
 
				+				       struct reserve_ticket, list);
			
 
				+	spin_unlock(&space_info->lock);
			
 
				 
			
 
				 	flush_state = FLUSH_DELAYED_ITEMS_NR;
			
 
				+	do {
			
 
				+		struct reserve_ticket *ticket;
			
 
				+		int ret;
			
 
				+
			
 
				+		ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
			
 
				+			    to_reclaim, flush_state);
			
 
				+		spin_lock(&space_info->lock);
			
 
				+		if (list_empty(&space_info->tickets)) {
			
 
				+			space_info->flush = 0;
			
 
				+			spin_unlock(&space_info->lock);
			
 
				+			return;
			
 
				+		}
			
 
				+		to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
			
 
				+							      space_info);
			
 
				+		ticket = list_first_entry(&space_info->tickets,
			
 
				+					  struct reserve_ticket, list);
			
 
				+		if (last_ticket == ticket) {
			
 
				+			flush_state++;
			
 
				+		} else {
			
 
				+			last_ticket = ticket;
			
 
				+			flush_state = FLUSH_DELAYED_ITEMS_NR;
			
 
				+			if (commit_cycles)
			
 
				+				commit_cycles--;
			
 
				+		}
			
 
				+
			
 
				+		if (flush_state > COMMIT_TRANS) {
			
 
				+			commit_cycles++;
			
 
				+			if (commit_cycles > 2) {
			
 
				+				wake_all_tickets(&space_info->tickets);
			
 
				+				space_info->flush = 0;
			
 
				+			} else {
			
 
				+				flush_state = FLUSH_DELAYED_ITEMS_NR;
			
 
				+			}
			
 
				+		}
			
 
				+		spin_unlock(&space_info->lock);
			
 
				+	} while (flush_state <= COMMIT_TRANS);
			
 
				+}
			
 
				+
			
 
				+void btrfs_init_async_reclaim_work(struct work_struct *work)
			
 
				+{
			
 
				+	INIT_WORK(work, btrfs_async_reclaim_metadata_space);
			
 
				+}
			
 
				+
			
 
				+static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
			
 
				+					    struct btrfs_space_info *space_info,
			
 
				+					    struct reserve_ticket *ticket)
			
 
				+{
			
 
				+	u64 to_reclaim;
			
 
				+	int flush_state = FLUSH_DELAYED_ITEMS_NR;
			
 
				+
			
 
				+	spin_lock(&space_info->lock);
			
 
				+	to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
			
 
				+						      space_info);
			
 
				+	if (!to_reclaim) {
			
 
				+		spin_unlock(&space_info->lock);
			
 
				+		return;
			
 
				+	}
			
 
				+	spin_unlock(&space_info->lock);
			
 
				+
			
 
				 	do {
			
 
				 		flush_space(fs_info->fs_root, space_info, to_reclaim,
			
 
				 			    to_reclaim, flush_state);
			
 
				 		flush_state++;
			
 
				-		if (!btrfs_need_do_async_reclaim(space_info, fs_info,
			
 
				-						 flush_state))
			
 
				+		spin_lock(&space_info->lock);
			
 
				+		if (ticket->bytes == 0) {
			
 
				+			spin_unlock(&space_info->lock);
			
 
				 			return;
			
 
				+		}
			
 
				+		spin_unlock(&space_info->lock);
			
 
				+
			
 
				+		/*
			
 
				+		 * Priority flushers can't wait on delalloc without
			
 
				+		 * deadlocking.
			
 
				+		 */
			
 
				+		if (flush_state == FLUSH_DELALLOC ||
			
 
				+		    flush_state == FLUSH_DELALLOC_WAIT)
			
 
				+			flush_state = ALLOC_CHUNK;
			
 
				 	} while (flush_state < COMMIT_TRANS);
			
 
				 }
			
 
				 
			
 
				-void btrfs_init_async_reclaim_work(struct work_struct *work)
			
 
				+static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
			
 
				+			       struct btrfs_space_info *space_info,
			
 
				+			       struct reserve_ticket *ticket, u64 orig_bytes)
			
 
				+
			
 
				 {
			
 
				-	INIT_WORK(work, btrfs_async_reclaim_metadata_space);
			
 
				+	DEFINE_WAIT(wait);
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	spin_lock(&space_info->lock);
			
 
				+	while (ticket->bytes > 0 && ticket->error == 0) {
			
 
				+		ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
			
 
				+		if (ret) {
			
 
				+			ret = -EINTR;
			
 
				+			break;
			
 
				+		}
			
 
				+		spin_unlock(&space_info->lock);
			
 
				+
			
 
				+		schedule();
			
 
				+
			
 
				+		finish_wait(&ticket->wait, &wait);
			
 
				+		spin_lock(&space_info->lock);
			
 
				+	}
			
 
				+	if (!ret)
			
 
				+		ret = ticket->error;
			
 
				+	if (!list_empty(&ticket->list))
			
 
				+		list_del_init(&ticket->list);
			
 
				+	if (ticket->bytes && ticket->bytes < orig_bytes) {
			
 
				+		u64 num_bytes = orig_bytes - ticket->bytes;
			
 
				+		space_info->bytes_may_use -= num_bytes;
			
 
				+		trace_btrfs_space_reservation(fs_info, "space_info",
			
 
				+					      space_info->flags, num_bytes, 0);
			
 
				+	}
			
 
				+	spin_unlock(&space_info->lock);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				  * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
			
 
				  * @root - the root we're allocating for
			
 
				- * @block_rsv - the block_rsv we're allocating for
			
 
				+ * @space_info - the space info we want to allocate from
			
 
				  * @orig_bytes - the number of bytes we want
			
 
				  * @flush - whether or not we can flush to make our reservation
			
 
				  *
			
@@ -4991,81 +5125,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
 
				  * regain reservations will be made and this will fail if there is not enough
			
 
				  * space already.
			
 
				  */
			
 
				-static int reserve_metadata_bytes(struct btrfs_root *root,
			
 
				-				  struct btrfs_block_rsv *block_rsv,
			
 
				-				  u64 orig_bytes,
			
 
				-				  enum btrfs_reserve_flush_enum flush)
			
 
				+static int __reserve_metadata_bytes(struct btrfs_root *root,
			
 
				+				    struct btrfs_space_info *space_info,
			
 
				+				    u64 orig_bytes,
			
 
				+				    enum btrfs_reserve_flush_enum flush)
			
 
				 {
			
 
				-	struct btrfs_space_info *space_info = block_rsv->space_info;
			
 
				+	struct reserve_ticket ticket;
			
 
				 	u64 used;
			
 
				-	u64 num_bytes = orig_bytes;
			
 
				-	int flush_state = FLUSH_DELAYED_ITEMS_NR;
			
 
				 	int ret = 0;
			
 
				-	bool flushing = false;
			
 
				 
			
 
				-again:
			
 
				-	ret = 0;
			
 
				-	spin_lock(&space_info->lock);
			
 
				-	/*
			
 
				-	 * We only want to wait if somebody other than us is flushing and we
			
 
				-	 * are actually allowed to flush all things.
			
 
				-	 */
			
 
				-	while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
			
 
				-	       space_info->flush) {
			
 
				-		spin_unlock(&space_info->lock);
			
 
				-		/*
			
 
				-		 * If we have a trans handle we can't wait because the flusher
			
 
				-		 * may have to commit the transaction, which would mean we would
			
 
				-		 * deadlock since we are waiting for the flusher to finish, but
			
 
				-		 * hold the current transaction open.
			
 
				-		 */
			
 
				-		if (current->journal_info)
			
 
				-			return -EAGAIN;
			
 
				-		ret = wait_event_killable(space_info->wait, !space_info->flush);
			
 
				-		/* Must have been killed, return */
			
 
				-		if (ret)
			
 
				-			return -EINTR;
			
 
				-
			
 
				-		spin_lock(&space_info->lock);
			
 
				-	}
			
 
				+	ASSERT(orig_bytes);
			
 
				+	ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
			
 
				 
			
 
				+	spin_lock(&space_info->lock);
			
 
				 	ret = -ENOSPC;
			
 
				 	used = space_info->bytes_used + space_info->bytes_reserved +
			
 
				 		space_info->bytes_pinned + space_info->bytes_readonly +
			
 
				 		space_info->bytes_may_use;
			
 
				 
			
 
				 	/*
			
 
				-	 * The idea here is that we've not already over-reserved the block group
			
 
				-	 * then we can go ahead and save our reservation first and then start
			
 
				-	 * flushing if we need to.  Otherwise if we've already overcommitted
			
 
				-	 * lets start flushing stuff first and then come back and try to make
			
 
				-	 * our reservation.
			
 
				+	 * If we have enough space then hooray, make our reservation and carry
			
 
				+	 * on.  If not see if we can overcommit, and if we can, hooray carry on.
			
 
				+	 * If not things get more complicated.
			
 
				 	 */
			
 
				-	if (used <= space_info->total_bytes) {
			
 
				-		if (used + orig_bytes <= space_info->total_bytes) {
			
 
				-			space_info->bytes_may_use += orig_bytes;
			
 
				-			trace_btrfs_space_reservation(root->fs_info,
			
 
				-				"space_info", space_info->flags, orig_bytes, 1);
			
 
				-			ret = 0;
			
 
				-		} else {
			
 
				-			/*
			
 
				-			 * Ok set num_bytes to orig_bytes since we aren't
			
 
				-			 * overocmmitted, this way we only try and reclaim what
			
 
				-			 * we need.
			
 
				-			 */
			
 
				-			num_bytes = orig_bytes;
			
 
				-		}
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * Ok we're over committed, set num_bytes to the overcommitted
			
 
				-		 * amount plus the amount of bytes that we need for this
			
 
				-		 * reservation.
			
 
				-		 */
			
 
				-		num_bytes = used - space_info->total_bytes +
			
 
				-			(orig_bytes * 2);
			
 
				-	}
			
 
				-
			
 
				-	if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
			
 
				+	if (used + orig_bytes <= space_info->total_bytes) {
			
 
				+		space_info->bytes_may_use += orig_bytes;
			
 
				+		trace_btrfs_space_reservation(root->fs_info, "space_info",
			
 
				+					      space_info->flags, orig_bytes,
			
 
				+					      1);
			
 
				+		ret = 0;
			
 
				+	} else if (can_overcommit(root, space_info, orig_bytes, flush)) {
			
 
				 		space_info->bytes_may_use += orig_bytes;
			
 
				 		trace_btrfs_space_reservation(root->fs_info, "space_info",
			
 
				 					      space_info->flags, orig_bytes,
			
@@ -5074,16 +5163,31 @@ again:
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Couldn't make our reservation, save our place so while we're trying
			
 
				-	 * to reclaim space we can actually use it instead of somebody else
			
 
				-	 * stealing it from us.
			
 
				+	 * If we couldn't make a reservation then setup our reservation ticket
			
 
				+	 * and kick the async worker if it's not already running.
			
 
				 	 *
			
 
				-	 * We make the other tasks wait for the flush only when we can flush
			
 
				-	 * all things.
			
 
				+	 * If we are a priority flusher then we just need to add our ticket to
			
 
				+	 * the list and we will do our own flushing further down.
			
 
				 	 */
			
 
				 	if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
			
 
				-		flushing = true;
			
 
				-		space_info->flush = 1;
			
 
				+		ticket.bytes = orig_bytes;
			
 
				+		ticket.error = 0;
			
 
				+		init_waitqueue_head(&ticket.wait);
			
 
				+		if (flush == BTRFS_RESERVE_FLUSH_ALL) {
			
 
				+			list_add_tail(&ticket.list, &space_info->tickets);
			
 
				+			if (!space_info->flush) {
			
 
				+				space_info->flush = 1;
			
 
				+				trace_btrfs_trigger_flush(root->fs_info,
			
 
				+							  space_info->flags,
			
 
				+							  orig_bytes, flush,
			
 
				+							  "enospc");
			
 
				+				queue_work(system_unbound_wq,
			
 
				+					   &root->fs_info->async_reclaim_work);
			
 
				+			}
			
 
				+		} else {
			
 
				+			list_add_tail(&ticket.list,
			
 
				+				      &space_info->priority_tickets);
			
 
				+		}
			
 
				 	} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
			
 
				 		used += orig_bytes;
			
 
				 		/*
			
@@ -5092,39 +5196,67 @@ again:
 
				 		 * the async reclaim as we will panic.
			
 
				 		 */
			
 
				 		if (!root->fs_info->log_root_recovering &&
			
 
				-		    need_do_async_reclaim(space_info, root->fs_info, used) &&
			
 
				-		    !work_busy(&root->fs_info->async_reclaim_work))
			
 
				+		    need_do_async_reclaim(space_info, root, used) &&
			
 
				+		    !work_busy(&root->fs_info->async_reclaim_work)) {
			
 
				+			trace_btrfs_trigger_flush(root->fs_info,
			
 
				+						  space_info->flags,
			
 
				+						  orig_bytes, flush,
			
 
				+						  "preempt");
			
 
				 			queue_work(system_unbound_wq,
			
 
				 				   &root->fs_info->async_reclaim_work);
			
 
				+		}
			
 
				 	}
			
 
				 	spin_unlock(&space_info->lock);
			
 
				-
			
 
				 	if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
			
 
				-		goto out;
			
 
				+		return ret;
			
 
				 
			
 
				-	ret = flush_space(root, space_info, num_bytes, orig_bytes,
			
 
				-			  flush_state);
			
 
				-	flush_state++;
			
 
				+	if (flush == BTRFS_RESERVE_FLUSH_ALL)
			
 
				+		return wait_reserve_ticket(root->fs_info, space_info, &ticket,
			
 
				+					   orig_bytes);
			
 
				 
			
 
				-	/*
			
 
				-	 * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
			
 
				-	 * would happen. So skip delalloc flush.
			
 
				-	 */
			
 
				-	if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
			
 
				-	    (flush_state == FLUSH_DELALLOC ||
			
 
				-	     flush_state == FLUSH_DELALLOC_WAIT))
			
 
				-		flush_state = ALLOC_CHUNK;
			
 
				+	ret = 0;
			
 
				+	priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
			
 
				+	spin_lock(&space_info->lock);
			
 
				+	if (ticket.bytes) {
			
 
				+		if (ticket.bytes < orig_bytes) {
			
 
				+			u64 num_bytes = orig_bytes - ticket.bytes;
			
 
				+			space_info->bytes_may_use -= num_bytes;
			
 
				+			trace_btrfs_space_reservation(root->fs_info,
			
 
				+					"space_info", space_info->flags,
			
 
				+					num_bytes, 0);
			
 
				 
			
 
				-	if (!ret)
			
 
				-		goto again;
			
 
				-	else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
			
 
				-		 flush_state < COMMIT_TRANS)
			
 
				-		goto again;
			
 
				-	else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
			
 
				-		 flush_state <= COMMIT_TRANS)
			
 
				-		goto again;
			
 
				+		}
			
 
				+		list_del_init(&ticket.list);
			
 
				+		ret = -ENOSPC;
			
 
				+	}
			
 
				+	spin_unlock(&space_info->lock);
			
 
				+	ASSERT(list_empty(&ticket.list));
			
 
				+	return ret;
			
 
				+}
			
 
				 
			
 
				-out:
			
 
				+/**
			
 
				+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
			
 
				+ * @root - the root we're allocating for
			
 
				+ * @block_rsv - the block_rsv we're allocating for
			
 
				+ * @orig_bytes - the number of bytes we want
			
 
				+ * @flush - whether or not we can flush to make our reservation
			
 
				+ *
			
 
				+ * This will reserve orgi_bytes number of bytes from the space info associated
			
 
				+ * with the block_rsv.  If there is not enough space it will make an attempt to
			
 
				+ * flush out space to make room.  It will do this by flushing delalloc if
			
 
				+ * possible or committing the transaction.  If flush is 0 then no attempts to
			
 
				+ * regain reservations will be made and this will fail if there is not enough
			
 
				+ * space already.
			
 
				+ */
			
 
				+static int reserve_metadata_bytes(struct btrfs_root *root,
			
 
				+				  struct btrfs_block_rsv *block_rsv,
			
 
				+				  u64 orig_bytes,
			
 
				+				  enum btrfs_reserve_flush_enum flush)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
			
 
				+				       flush);
			
 
				 	if (ret == -ENOSPC &&
			
 
				 	    unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
			
 
				 		struct btrfs_block_rsv *global_rsv =
			
@@ -5137,13 +5269,8 @@ out:
 
				 	if (ret == -ENOSPC)
			
 
				 		trace_btrfs_space_reservation(root->fs_info,
			
 
				 					      "space_info:enospc",
			
 
				-					      space_info->flags, orig_bytes, 1);
			
 
				-	if (flushing) {
			
 
				-		spin_lock(&space_info->lock);
			
 
				-		space_info->flush = 0;
			
 
				-		wake_up_all(&space_info->wait);
			
 
				-		spin_unlock(&space_info->lock);
			
 
				-	}
			
 
				+					      block_rsv->space_info->flags,
			
 
				+					      orig_bytes, 1);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -5219,6 +5346,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This is for space we already have accounted in space_info->bytes_may_use, so
			
 
				+ * basically when we're returning space from block_rsv's.
			
 
				+ */
			
 
				+static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
			
 
				+				     struct btrfs_space_info *space_info,
			
 
				+				     u64 num_bytes)
			
 
				+{
			
 
				+	struct reserve_ticket *ticket;
			
 
				+	struct list_head *head;
			
 
				+	u64 used;
			
 
				+	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
			
 
				+	bool check_overcommit = false;
			
 
				+
			
 
				+	spin_lock(&space_info->lock);
			
 
				+	head = &space_info->priority_tickets;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are over our limit then we need to check and see if we can
			
 
				+	 * overcommit, and if we can't then we just need to free up our space
			
 
				+	 * and not satisfy any requests.
			
 
				+	 */
			
 
				+	used = space_info->bytes_used + space_info->bytes_reserved +
			
 
				+		space_info->bytes_pinned + space_info->bytes_readonly +
			
 
				+		space_info->bytes_may_use;
			
 
				+	if (used - num_bytes >= space_info->total_bytes)
			
 
				+		check_overcommit = true;
			
 
				+again:
			
 
				+	while (!list_empty(head) && num_bytes) {
			
 
				+		ticket = list_first_entry(head, struct reserve_ticket,
			
 
				+					  list);
			
 
				+		/*
			
 
				+		 * We use 0 bytes because this space is already reserved, so
			
 
				+		 * adding the ticket space would be a double count.
			
 
				+		 */
			
 
				+		if (check_overcommit &&
			
 
				+		    !can_overcommit(fs_info->extent_root, space_info, 0,
			
 
				+				    flush))
			
 
				+			break;
			
 
				+		if (num_bytes >= ticket->bytes) {
			
 
				+			list_del_init(&ticket->list);
			
 
				+			num_bytes -= ticket->bytes;
			
 
				+			ticket->bytes = 0;
			
 
				+			wake_up(&ticket->wait);
			
 
				+		} else {
			
 
				+			ticket->bytes -= num_bytes;
			
 
				+			num_bytes = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (num_bytes && head == &space_info->priority_tickets) {
			
 
				+		head = &space_info->tickets;
			
 
				+		flush = BTRFS_RESERVE_FLUSH_ALL;
			
 
				+		goto again;
			
 
				+	}
			
 
				+	space_info->bytes_may_use -= num_bytes;
			
 
				+	trace_btrfs_space_reservation(fs_info, "space_info",
			
 
				+				      space_info->flags, num_bytes, 0);
			
 
				+	spin_unlock(&space_info->lock);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is for newly allocated space that isn't accounted in
			
 
				+ * space_info->bytes_may_use yet.  So if we allocate a chunk or unpin an extent
			
 
				+ * we use this helper.
			
 
				+ */
			
 
				+static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
			
 
				+				     struct btrfs_space_info *space_info,
			
 
				+				     u64 num_bytes)
			
 
				+{
			
 
				+	struct reserve_ticket *ticket;
			
 
				+	struct list_head *head = &space_info->priority_tickets;
			
 
				+
			
 
				+again:
			
 
				+	while (!list_empty(head) && num_bytes) {
			
 
				+		ticket = list_first_entry(head, struct reserve_ticket,
			
 
				+					  list);
			
 
				+		if (num_bytes >= ticket->bytes) {
			
 
				+			trace_btrfs_space_reservation(fs_info, "space_info",
			
 
				+						      space_info->flags,
			
 
				+						      ticket->bytes, 1);
			
 
				+			list_del_init(&ticket->list);
			
 
				+			num_bytes -= ticket->bytes;
			
 
				+			space_info->bytes_may_use += ticket->bytes;
			
 
				+			ticket->bytes = 0;
			
 
				+			wake_up(&ticket->wait);
			
 
				+		} else {
			
 
				+			trace_btrfs_space_reservation(fs_info, "space_info",
			
 
				+						      space_info->flags,
			
 
				+						      num_bytes, 1);
			
 
				+			space_info->bytes_may_use += num_bytes;
			
 
				+			ticket->bytes -= num_bytes;
			
 
				+			num_bytes = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (num_bytes && head == &space_info->priority_tickets) {
			
 
				+		head = &space_info->tickets;
			
 
				+		goto again;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
			
 
				 				    struct btrfs_block_rsv *block_rsv,
			
 
				 				    struct btrfs_block_rsv *dest, u64 num_bytes)
			
@@ -5253,18 +5482,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
 
				 			}
			
 
				 			spin_unlock(&dest->lock);
			
 
				 		}
			
 
				-		if (num_bytes) {
			
 
				-			spin_lock(&space_info->lock);
			
 
				-			space_info->bytes_may_use -= num_bytes;
			
 
				-			trace_btrfs_space_reservation(fs_info, "space_info",
			
 
				-					space_info->flags, num_bytes, 0);
			
 
				-			spin_unlock(&space_info->lock);
			
 
				-		}
			
 
				+		if (num_bytes)
			
 
				+			space_info_add_old_bytes(fs_info, space_info,
			
 
				+						 num_bytes);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
			
 
				-				   struct btrfs_block_rsv *dst, u64 num_bytes)
			
 
				+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
			
 
				+			    struct btrfs_block_rsv *dst, u64 num_bytes,
			
 
				+			    int update_size)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -5272,7 +5498,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	block_rsv_add_bytes(dst, num_bytes, 1);
			
 
				+	block_rsv_add_bytes(dst, num_bytes, update_size);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -5379,13 +5605,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
			
 
				-			    struct btrfs_block_rsv *dst_rsv,
			
 
				-			    u64 num_bytes)
			
 
				-{
			
 
				-	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
			
 
				-}
			
 
				-
			
 
				 void btrfs_block_rsv_release(struct btrfs_root *root,
			
 
				 			     struct btrfs_block_rsv *block_rsv,
			
 
				 			     u64 num_bytes)
			
@@ -5398,48 +5617,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
 
				 				num_bytes);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * helper to calculate size of global block reservation.
			
 
				- * the desired value is sum of space used by extent tree,
			
 
				- * checksum tree and root tree
			
 
				- */
			
 
				-static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
			
 
				-{
			
 
				-	struct btrfs_space_info *sinfo;
			
 
				-	u64 num_bytes;
			
 
				-	u64 meta_used;
			
 
				-	u64 data_used;
			
 
				-	int csum_size = btrfs_super_csum_size(fs_info->super_copy);
			
 
				-
			
 
				-	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
			
 
				-	spin_lock(&sinfo->lock);
			
 
				-	data_used = sinfo->bytes_used;
			
 
				-	spin_unlock(&sinfo->lock);
			
 
				-
			
 
				-	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
			
 
				-	spin_lock(&sinfo->lock);
			
 
				-	if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
			
 
				-		data_used = 0;
			
 
				-	meta_used = sinfo->bytes_used;
			
 
				-	spin_unlock(&sinfo->lock);
			
 
				-
			
 
				-	num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
			
 
				-		    csum_size * 2;
			
 
				-	num_bytes += div_u64(data_used + meta_used, 50);
			
 
				-
			
 
				-	if (num_bytes * 3 > meta_used)
			
 
				-		num_bytes = div_u64(meta_used, 3);
			
 
				-
			
 
				-	return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
			
 
				-}
			
 
				-
			
 
				 static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
			
 
				 {
			
 
				 	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
			
 
				 	struct btrfs_space_info *sinfo = block_rsv->space_info;
			
 
				 	u64 num_bytes;
			
 
				 
			
 
				-	num_bytes = calc_global_metadata_size(fs_info);
			
 
				+	/*
			
 
				+	 * The global block rsv is based on the size of the extent tree, the
			
 
				+	 * checksum tree and the root tree.  If the fs is empty we want to set
			
 
				+	 * it to a minimal amount for safety.
			
 
				+	 */
			
 
				+	num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
			
 
				+		btrfs_root_used(&fs_info->csum_root->root_item) +
			
 
				+		btrfs_root_used(&fs_info->tree_root->root_item);
			
 
				+	num_bytes = max_t(u64, num_bytes, SZ_16M);
			
 
				 
			
 
				 	spin_lock(&sinfo->lock);
			
 
				 	spin_lock(&block_rsv->lock);
			
@@ -5554,7 +5746,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 
				 				  struct inode *inode)
			
 
				 {
			
 
				 	struct btrfs_root *root = BTRFS_I(inode)->root;
			
 
				-	struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
			
 
				+	/*
			
 
				+	 * We always use trans->block_rsv here as we will have reserved space
			
 
				+	 * for our orphan when starting the transaction, using get_block_rsv()
			
 
				+	 * here will sometimes make us choose the wrong block rsv as we could be
			
 
				+	 * doing a reloc inode for a non refcounted root.
			
 
				+	 */
			
 
				+	struct btrfs_block_rsv *src_rsv = trans->block_rsv;
			
 
				 	struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
			
 
				 
			
 
				 	/*
			
@@ -5565,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 
				 	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
			
 
				 	trace_btrfs_space_reservation(root->fs_info, "orphan",
			
 
				 				      btrfs_ino(inode), num_bytes, 1);
			
 
				-	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
			
 
				+	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
			
 
				 }
			
 
				 
			
 
				 void btrfs_orphan_release_metadata(struct inode *inode)
			
@@ -5620,7 +5818,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 
				 				  BTRFS_RESERVE_FLUSH_ALL);
			
 
				 
			
 
				 	if (ret == -ENOSPC && use_global_rsv)
			
 
				-		ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
			
 
				+		ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
			
 
				 
			
 
				 	if (ret && *qgroup_reserved)
			
 
				 		btrfs_qgroup_free_meta(root, *qgroup_reserved);
			
@@ -5730,21 +5928,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
				 	u64 to_reserve = 0;
			
 
				 	u64 csum_bytes;
			
 
				 	unsigned nr_extents = 0;
			
 
				-	int extra_reserve = 0;
			
 
				 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
			
 
				 	int ret = 0;
			
 
				 	bool delalloc_lock = true;
			
 
				 	u64 to_free = 0;
			
 
				 	unsigned dropped;
			
 
				+	bool release_extra = false;
			
 
				 
			
 
				 	/* If we are a free space inode we need to not flush since we will be in
			
 
				 	 * the middle of a transaction commit.  We also don't need the delalloc
			
 
				 	 * mutex since we won't race with anybody.  We need this mostly to make
			
 
				 	 * lockdep shut its filthy mouth.
			
 
				+	 *
			
 
				+	 * If we have a transaction open (can happen if we call truncate_block
			
 
				+	 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
			
 
				 	 */
			
 
				 	if (btrfs_is_free_space_inode(inode)) {
			
 
				 		flush = BTRFS_RESERVE_NO_FLUSH;
			
 
				 		delalloc_lock = false;
			
 
				+	} else if (current->journal_info) {
			
 
				+		flush = BTRFS_RESERVE_FLUSH_LIMIT;
			
 
				 	}
			
 
				 
			
 
				 	if (flush != BTRFS_RESERVE_NO_FLUSH &&
			
@@ -5761,24 +5964,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
				 					 BTRFS_MAX_EXTENT_SIZE - 1,
			
 
				 					 BTRFS_MAX_EXTENT_SIZE);
			
 
				 	BTRFS_I(inode)->outstanding_extents += nr_extents;
			
 
				-	nr_extents = 0;
			
 
				 
			
 
				+	nr_extents = 0;
			
 
				 	if (BTRFS_I(inode)->outstanding_extents >
			
 
				 	    BTRFS_I(inode)->reserved_extents)
			
 
				-		nr_extents = BTRFS_I(inode)->outstanding_extents -
			
 
				+		nr_extents += BTRFS_I(inode)->outstanding_extents -
			
 
				 			BTRFS_I(inode)->reserved_extents;
			
 
				 
			
 
				-	/*
			
 
				-	 * Add an item to reserve for updating the inode when we complete the
			
 
				-	 * delalloc io.
			
 
				-	 */
			
 
				-	if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				-		      &BTRFS_I(inode)->runtime_flags)) {
			
 
				-		nr_extents++;
			
 
				-		extra_reserve = 1;
			
 
				-	}
			
 
				-
			
 
				-	to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
			
 
				+	/* We always want to reserve a slot for updating the inode. */
			
 
				+	to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
			
 
				 	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
			
 
				 	csum_bytes = BTRFS_I(inode)->csum_bytes;
			
 
				 	spin_unlock(&BTRFS_I(inode)->lock);
			
@@ -5790,17 +5984,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
				 			goto out_fail;
			
 
				 	}
			
 
				 
			
 
				-	ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
			
 
				+	ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
			
 
				 	if (unlikely(ret)) {
			
 
				 		btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
			
 
				 		goto out_fail;
			
 
				 	}
			
 
				 
			
 
				 	spin_lock(&BTRFS_I(inode)->lock);
			
 
				-	if (extra_reserve) {
			
 
				-		set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				-			&BTRFS_I(inode)->runtime_flags);
			
 
				-		nr_extents--;
			
 
				+	if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
			
 
				+			     &BTRFS_I(inode)->runtime_flags)) {
			
 
				+		to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
			
 
				+		release_extra = true;
			
 
				 	}
			
 
				 	BTRFS_I(inode)->reserved_extents += nr_extents;
			
 
				 	spin_unlock(&BTRFS_I(inode)->lock);
			
@@ -5811,8 +6005,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
				 	if (to_reserve)
			
 
				 		trace_btrfs_space_reservation(root->fs_info, "delalloc",
			
 
				 					      btrfs_ino(inode), to_reserve, 1);
			
 
				-	block_rsv_add_bytes(block_rsv, to_reserve, 1);
			
 
				-
			
 
				+	if (release_extra)
			
 
				+		btrfs_block_rsv_release(root, block_rsv,
			
 
				+					btrfs_calc_trans_metadata_size(root,
			
 
				+								       1));
			
 
				 	return 0;
			
 
				 
			
 
				 out_fail:
			
@@ -6044,6 +6240,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 
				 			spin_unlock(&cache->lock);
			
 
				 			spin_unlock(&cache->space_info->lock);
			
 
				 
			
 
				+			trace_btrfs_space_reservation(root->fs_info, "pinned",
			
 
				+						      cache->space_info->flags,
			
 
				+						      num_bytes, 1);
			
 
				 			set_extent_dirty(info->pinned_extents,
			
 
				 					 bytenr, bytenr + num_bytes - 1,
			
 
				 					 GFP_NOFS | __GFP_NOFAIL);
			
@@ -6118,10 +6317,10 @@ static int pin_down_extent(struct btrfs_root *root,
 
				 	spin_unlock(&cache->lock);
			
 
				 	spin_unlock(&cache->space_info->lock);
			
 
				 
			
 
				+	trace_btrfs_space_reservation(root->fs_info, "pinned",
			
 
				+				      cache->space_info->flags, num_bytes, 1);
			
 
				 	set_extent_dirty(root->fs_info->pinned_extents, bytenr,
			
 
				 			 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
			
 
				-	if (reserved)
			
 
				-		trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -6476,6 +6675,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
 
				 		spin_lock(&cache->lock);
			
 
				 		cache->pinned -= len;
			
 
				 		space_info->bytes_pinned -= len;
			
 
				+
			
 
				+		trace_btrfs_space_reservation(fs_info, "pinned",
			
 
				+					      space_info->flags, len, 0);
			
 
				 		space_info->max_extent_size = 0;
			
 
				 		percpu_counter_add(&space_info->total_bytes_pinned, -len);
			
 
				 		if (cache->ro) {
			
@@ -6483,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
 
				 			readonly = true;
			
 
				 		}
			
 
				 		spin_unlock(&cache->lock);
			
 
				-		if (!readonly && global_rsv->space_info == space_info) {
			
 
				+		if (!readonly && return_free_space &&
			
 
				+		    global_rsv->space_info == space_info) {
			
 
				+			u64 to_add = len;
			
 
				+			WARN_ON(!return_free_space);
			
 
				 			spin_lock(&global_rsv->lock);
			
 
				 			if (!global_rsv->full) {
			
 
				-				len = min(len, global_rsv->size -
			
 
				-					  global_rsv->reserved);
			
 
				-				global_rsv->reserved += len;
			
 
				-				space_info->bytes_may_use += len;
			
 
				+				to_add = min(len, global_rsv->size -
			
 
				+					     global_rsv->reserved);
			
 
				+				global_rsv->reserved += to_add;
			
 
				+				space_info->bytes_may_use += to_add;
			
 
				 				if (global_rsv->reserved >= global_rsv->size)
			
 
				 					global_rsv->full = 1;
			
 
				+				trace_btrfs_space_reservation(fs_info,
			
 
				+							      "space_info",
			
 
				+							      space_info->flags,
			
 
				+							      to_add, 1);
			
 
				+				len -= to_add;
			
 
				 			}
			
 
				 			spin_unlock(&global_rsv->lock);
			
 
				+			/* Add to any tickets we may have */
			
 
				+			if (len)
			
 
				+				space_info_add_new_bytes(fs_info, space_info,
			
 
				+							 len);
			
 
				 		}
			
 
				 		spin_unlock(&space_info->lock);
			
 
				 	}
			
@@ -7782,12 +7996,10 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
 
				 			ret = btrfs_discard_extent(root, start, len, NULL);
			
 
				 		btrfs_add_free_space(cache, start, len);
			
 
				 		btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
			
 
				+		trace_btrfs_reserved_extent_free(root, start, len);
			
 
				 	}
			
 
				 
			
 
				 	btrfs_put_block_group(cache);
			
 
				-
			
 
				-	trace_btrfs_reserved_extent_free(root, start, len);
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -9791,13 +10003,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 
				 		space_info = list_entry(info->space_info.next,
			
 
				 					struct btrfs_space_info,
			
 
				 					list);
			
 
				-		if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
			
 
				-			if (WARN_ON(space_info->bytes_pinned > 0 ||
			
 
				+
			
 
				+		/*
			
 
				+		 * Do not hide this behind enospc_debug, this is actually
			
 
				+		 * important and indicates a real bug if this happens.
			
 
				+		 */
			
 
				+		if (WARN_ON(space_info->bytes_pinned > 0 ||
			
 
				 			    space_info->bytes_reserved > 0 ||
			
 
				-			    space_info->bytes_may_use > 0)) {
			
 
				-				dump_space_info(space_info, 0, 0);
			
 
				-			}
			
 
				-		}
			
 
				+			    space_info->bytes_may_use > 0))
			
 
				+			dump_space_info(space_info, 0, 0);
			
 
				 		list_del(&space_info->list);
			
 
				 		for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
			
 
				 			struct kobject *kobj;
			
@@ -10005,9 +10219,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
				 			goto error;
			
 
				 		}
			
 
				 
			
 
				+		trace_btrfs_add_block_group(root->fs_info, cache, 0);
			
 
				 		ret = update_space_info(info, cache->flags, found_key.offset,
			
 
				 					btrfs_block_group_used(&cache->item),
			
 
				-					&space_info);
			
 
				+					cache->bytes_super, &space_info);
			
 
				 		if (ret) {
			
 
				 			btrfs_remove_free_space_cache(cache);
			
 
				 			spin_lock(&info->block_group_cache_lock);
			
@@ -10020,9 +10235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
				 		}
			
 
				 
			
 
				 		cache->space_info = space_info;
			
 
				-		spin_lock(&cache->space_info->lock);
			
 
				-		cache->space_info->bytes_readonly += cache->bytes_super;
			
 
				-		spin_unlock(&cache->space_info->lock);
			
 
				 
			
 
				 		__link_block_group(space_info, cache);
			
 
				 
			
@@ -10114,7 +10326,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
				 	int ret;
			
 
				 	struct btrfs_root *extent_root;
			
 
				 	struct btrfs_block_group_cache *cache;
			
 
				-
			
 
				 	extent_root = root->fs_info->extent_root;
			
 
				 
			
 
				 	btrfs_set_log_full_commit(root->fs_info, trans);
			
@@ -10160,7 +10371,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
				 	 * assigned to our block group, but don't update its counters just yet.
			
 
				 	 * We want our bg to be added to the rbtree with its ->space_info set.
			
 
				 	 */
			
 
				-	ret = update_space_info(root->fs_info, cache->flags, 0, 0,
			
 
				+	ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0,
			
 
				 				&cache->space_info);
			
 
				 	if (ret) {
			
 
				 		btrfs_remove_free_space_cache(cache);
			
@@ -10179,8 +10390,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
				 	 * Now that our block group has its ->space_info set and is inserted in
			
 
				 	 * the rbtree, update the space info's counters.
			
 
				 	 */
			
 
				+	trace_btrfs_add_block_group(root->fs_info, cache, 1);
			
 
				 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
			
 
				-				&cache->space_info);
			
 
				+				cache->bytes_super, &cache->space_info);
			
 
				 	if (ret) {
			
 
				 		btrfs_remove_free_space_cache(cache);
			
 
				 		spin_lock(&root->fs_info->block_group_cache_lock);
			
@@ -10193,16 +10405,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
				 	}
			
 
				 	update_global_block_rsv(root->fs_info);
			
 
				 
			
 
				-	spin_lock(&cache->space_info->lock);
			
 
				-	cache->space_info->bytes_readonly += cache->bytes_super;
			
 
				-	spin_unlock(&cache->space_info->lock);
			
 
				-
			
 
				 	__link_block_group(cache->space_info, cache);
			
 
				 
			
 
				 	list_add_tail(&cache->bg_list, &trans->new_bgs);
			
 
				 
			
 
				 	set_avail_alloc_bits(extent_root->fs_info, type);
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -10747,21 +10954,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 
				 		mixed = 1;
			
 
				 
			
 
				 	flags = BTRFS_BLOCK_GROUP_SYSTEM;
			
 
				-	ret = update_space_info(fs_info, flags, 0, 0, &space_info);
			
 
				+	ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
			
 
				 	if (ret)
			
 
				 		goto out;
			
 
				 
			
 
				 	if (mixed) {
			
 
				 		flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
			
 
				-		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
			
 
				+		ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
			
 
				 	} else {
			
 
				 		flags = BTRFS_BLOCK_GROUP_METADATA;
			
 
				-		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
			
 
				+		ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
			
 
				 		if (ret)
			
 
				 			goto out;
			
 
				 
			
 
				 		flags = BTRFS_BLOCK_GROUP_DATA;
			
 
				-		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
			
 
				+		ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
			
 
				 	}
			
 
				 out:
			
 
				 	return ret;
			
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1629,13 +1629,11 @@ again:
 
				 		 * managed to copy.
			
 
				 		 */
			
 
				 		if (num_sectors > dirty_sectors) {
			
 
				-			/*
			
 
				-			 * we round down because we don't want to count
			
 
				-			 * any partial blocks actually sent through the
			
 
				-			 * IO machines
			
 
				-			 */
			
 
				-			release_bytes = round_down(release_bytes - copied,
			
 
				-				      root->sectorsize);
			
 
				+
			
 
				+			/* release everything except the sectors we dirtied */
			
 
				+			release_bytes -= dirty_sectors <<
			
 
				+				root->fs_info->sb->s_blocksize_bits;
			
 
				+
			
 
				 			if (copied > 0) {
			
 
				 				spin_lock(&BTRFS_I(inode)->lock);
			
 
				 				BTRFS_I(inode)->outstanding_extents++;
			
@@ -2479,7 +2477,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
				 	}
			
 
				 
			
 
				 	ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
			
 
				-				      min_size);
			
 
				+				      min_size, 0);
			
 
				 	BUG_ON(ret);
			
 
				 	trans->block_rsv = rsv;
			
 
				 
			
@@ -2522,7 +2520,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
				 		}
			
 
				 
			
 
				 		ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
			
 
				-					      rsv, min_size);
			
 
				+					      rsv, min_size, 0);
			
 
				 		BUG_ON(ret);	/* shouldn't happen */
			
 
				 		trans->block_rsv = rsv;
			
 
				 
			
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5263,7 +5263,7 @@ void btrfs_evict_inode(struct inode *inode)
 
				 		if (steal_from_global) {
			
 
				 			if (!btrfs_check_space_for_delayed_refs(trans, root))
			
 
				 				ret = btrfs_block_rsv_migrate(global_rsv, rsv,
			
 
				-							      min_size);
			
 
				+							      min_size, 0);
			
 
				 			else
			
 
				 				ret = -ENOSPC;
			
 
				 		}
			
@@ -9116,7 +9116,7 @@ static int btrfs_truncate(struct inode *inode)
 
				 
			
 
				 	/* Migrate the slack space for the truncate to our reserve */
			
 
				 	ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
			
 
				-				      min_size);
			
 
				+				      min_size, 0);
			
 
				 	BUG_ON(ret);
			
 
				 
			
 
				 	/*
			
@@ -9156,7 +9156,7 @@ static int btrfs_truncate(struct inode *inode)
 
				 		}
			
 
				 
			
 
				 		ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
			
 
				-					      rsv, min_size);
			
 
				+					      rsv, min_size, 0);
			
 
				 		BUG_ON(ret);	/* shouldn't happen */
			
 
				 		trans->block_rsv = rsv;
			
 
				 	}
			
@@ -9177,7 +9177,6 @@ static int btrfs_truncate(struct inode *inode)
 
				 		ret = btrfs_end_transaction(trans, root);
			
 
				 		btrfs_btree_balance_dirty(root);
			
 
				 	}
			
 
				-
			
 
				 out:
			
 
				 	btrfs_free_block_rsv(root, rsv);
			
 
				 
			
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2604,25 +2604,28 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	trans->block_rsv = rc->block_rsv;
			
 
				 	rc->reserved_bytes += num_bytes;
			
 
				+
			
 
				+	/*
			
 
				+	 * We are under a transaction here so we can only do limited flushing.
			
 
				+	 * If we get an enospc just kick back -EAGAIN so we know to drop the
			
 
				+	 * transaction and try to refill when we can flush all the things.
			
 
				+	 */
			
 
				 	ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
			
 
				-				BTRFS_RESERVE_FLUSH_ALL);
			
 
				+				BTRFS_RESERVE_FLUSH_LIMIT);
			
 
				 	if (ret) {
			
 
				-		if (ret == -EAGAIN) {
			
 
				-			tmp = rc->extent_root->nodesize *
			
 
				-				RELOCATION_RESERVED_NODES;
			
 
				-			while (tmp <= rc->reserved_bytes)
			
 
				-				tmp <<= 1;
			
 
				-			/*
			
 
				-			 * only one thread can access block_rsv at this point,
			
 
				-			 * so we don't need hold lock to protect block_rsv.
			
 
				-			 * we expand more reservation size here to allow enough
			
 
				-			 * space for relocation and we will return earlier in
			
 
				-			 * enospc case.
			
 
				-			 */
			
 
				-			rc->block_rsv->size = tmp + rc->extent_root->nodesize *
			
 
				-					      RELOCATION_RESERVED_NODES;
			
 
				-		}
			
 
				-		return ret;
			
 
				+		tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES;
			
 
				+		while (tmp <= rc->reserved_bytes)
			
 
				+			tmp <<= 1;
			
 
				+		/*
			
 
				+		 * only one thread can access block_rsv at this point,
			
 
				+		 * so we don't need hold lock to protect block_rsv.
			
 
				+		 * we expand more reservation size here to allow enough
			
 
				+		 * space for relocation and we will return eailer in
			
 
				+		 * enospc case.
			
 
				+		 */
			
 
				+		rc->block_rsv->size = tmp + rc->extent_root->nodesize *
			
 
				+			RELOCATION_RESERVED_NODES;
			
 
				+		return -EAGAIN;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -3871,6 +3874,7 @@ static noinline_for_stack
 
				 int prepare_to_relocate(struct reloc_control *rc)
			
 
				 {
			
 
				 	struct btrfs_trans_handle *trans;
			
 
				+	int ret;
			
 
				 
			
 
				 	rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
			
 
				 					      BTRFS_BLOCK_RSV_TEMP);
			
@@ -3885,6 +3889,11 @@ int prepare_to_relocate(struct reloc_control *rc)
 
				 	rc->reserved_bytes = 0;
			
 
				 	rc->block_rsv->size = rc->extent_root->nodesize *
			
 
				 			      RELOCATION_RESERVED_NODES;
			
 
				+	ret = btrfs_block_rsv_refill(rc->extent_root,
			
 
				+				     rc->block_rsv, rc->block_rsv->size,
			
 
				+				     BTRFS_RESERVE_FLUSH_ALL);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 
			
 
				 	rc->create_reloc_tree = 1;
			
 
				 	set_reloc_control(rc);
			
@@ -4643,7 +4652,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
 
				 	if (rc->merge_reloc_tree) {
			
 
				 		ret = btrfs_block_rsv_migrate(&pending->block_rsv,
			
 
				 					      rc->block_rsv,
			
 
				-					      rc->nodes_relocated);
			
 
				+					      rc->nodes_relocated, 1);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 	}
			
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -440,6 +440,46 @@ TRACE_EVENT(btrfs_sync_fs,
 
				 	TP_printk("wait = %d", __entry->wait)
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(btrfs_add_block_group,
			
 
				+
			
 
				+	TP_PROTO(struct btrfs_fs_info *fs_info,
			
 
				+		 struct btrfs_block_group_cache *block_group, int create),
			
 
				+
			
 
				+	TP_ARGS(fs_info, block_group, create),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
			
 
				+		__field(	u64,	offset			)
			
 
				+		__field(	u64,	size			)
			
 
				+		__field(	u64,	flags			)
			
 
				+		__field(	u64,	bytes_used		)
			
 
				+		__field(	u64,	bytes_super		)
			
 
				+		__field(	int,	create			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
			
 
				+		__entry->offset		= block_group->key.objectid;
			
 
				+		__entry->size		= block_group->key.offset;
			
 
				+		__entry->flags		= block_group->flags;
			
 
				+		__entry->bytes_used	=
			
 
				+			btrfs_block_group_used(&block_group->item);
			
 
				+		__entry->bytes_super	= block_group->bytes_super;
			
 
				+		__entry->create		= create;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("%pU: block_group offset = %llu, size = %llu, "
			
 
				+		  "flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, "
			
 
				+		  "create = %d", __entry->fsid,
			
 
				+		  (unsigned long long)__entry->offset,
			
 
				+		  (unsigned long long)__entry->size,
			
 
				+		  (unsigned long long)__entry->flags,
			
 
				+		  __print_flags((unsigned long)__entry->flags, "|",
			
 
				+				BTRFS_GROUP_FLAGS),
			
 
				+		  (unsigned long long)__entry->bytes_used,
			
 
				+		  (unsigned long long)__entry->bytes_super, __entry->create)
			
 
				+);
			
 
				+
			
 
				 #define show_ref_action(action)						\
			
 
				 	__print_symbolic(action,					\
			
 
				 		{ BTRFS_ADD_DELAYED_REF,    "ADD_DELAYED_REF" },	\
			
@@ -744,6 +784,88 @@ TRACE_EVENT(btrfs_space_reservation,
 
				 		  __entry->bytes)
			
 
				 );
			
 
				 
			
 
				+#define show_flush_action(action)						\
			
 
				+	__print_symbolic(action,						\
			
 
				+		{ BTRFS_RESERVE_NO_FLUSH,	"BTRFS_RESERVE_NO_FLUSH"},	\
			
 
				+		{ BTRFS_RESERVE_FLUSH_LIMIT,	"BTRFS_RESERVE_FLUSH_LIMIT"},	\
			
 
				+		{ BTRFS_RESERVE_FLUSH_ALL,	"BTRFS_RESERVE_FLUSH_ALL"})
			
 
				+
			
 
				+TRACE_EVENT(btrfs_trigger_flush,
			
 
				+
			
 
				+	TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
			
 
				+		 int flush, char *reason),
			
 
				+
			
 
				+	TP_ARGS(fs_info, flags, bytes, flush, reason),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
			
 
				+		__field(	u64,	flags			)
			
 
				+		__field(	u64,	bytes			)
			
 
				+		__field(	int,	flush			)
			
 
				+		__string(	reason,	reason			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
			
 
				+		__entry->flags	= flags;
			
 
				+		__entry->bytes	= bytes;
			
 
				+		__entry->flush	= flush;
			
 
				+		__assign_str(reason, reason)
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu",
			
 
				+		  __entry->fsid, __get_str(reason), __entry->flush,
			
 
				+		  show_flush_action(__entry->flush),
			
 
				+		  (unsigned long long)__entry->flags,
			
 
				+		  __print_flags((unsigned long)__entry->flags, "|",
			
 
				+				BTRFS_GROUP_FLAGS),
			
 
				+		  (unsigned long long)__entry->bytes)
			
 
				+);
			
 
				+
			
 
				+#define show_flush_state(state)							\
			
 
				+	__print_symbolic(state,							\
			
 
				+		{ FLUSH_DELAYED_ITEMS_NR,	"FLUSH_DELAYED_ITEMS_NR"},	\
			
 
				+		{ FLUSH_DELAYED_ITEMS,		"FLUSH_DELAYED_ITEMS"},		\
			
 
				+		{ FLUSH_DELALLOC,		"FLUSH_DELALLOC"},		\
			
 
				+		{ FLUSH_DELALLOC_WAIT,		"FLUSH_DELALLOC_WAIT"},		\
			
 
				+		{ ALLOC_CHUNK,			"ALLOC_CHUNK"},			\
			
 
				+		{ COMMIT_TRANS,			"COMMIT_TRANS"})
			
 
				+
			
 
				+TRACE_EVENT(btrfs_flush_space,
			
 
				+
			
 
				+	TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
			
 
				+		 u64 orig_bytes, int state, int ret),
			
 
				+
			
 
				+	TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
			
 
				+		__field(	u64,	flags			)
			
 
				+		__field(	u64,	num_bytes		)
			
 
				+		__field(	u64,	orig_bytes		)
			
 
				+		__field(	int,	state			)
			
 
				+		__field(	int,	ret			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
			
 
				+		__entry->flags		=	flags;
			
 
				+		__entry->num_bytes	=	num_bytes;
			
 
				+		__entry->orig_bytes	=	orig_bytes;
			
 
				+		__entry->state		=	state;
			
 
				+		__entry->ret		=	ret;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
			
 
				+		  "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
			
 
				+		  show_flush_state(__entry->state),
			
 
				+		  (unsigned long long)__entry->flags,
			
 
				+		  __print_flags((unsigned long)__entry->flags, "|",
			
 
				+				BTRFS_GROUP_FLAGS),
			
 
				+		  (unsigned long long)__entry->num_bytes,
			
 
				+		  (unsigned long long)__entry->orig_bytes, __entry->ret)
			
 
				+);
			
 
				+
			
 
				 DECLARE_EVENT_CLASS(btrfs__reserved_extent,
			
 
				 
			
 
				 	TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
			
@@ -751,18 +873,21 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
 
				 	TP_ARGS(root, start, len),
			
 
				 
			
 
				 	TP_STRUCT__entry(
			
 
				-		__field(	u64,  root_objectid		)
			
 
				-		__field(	u64,  start			)
			
 
				-		__field(	u64,  len			)
			
 
				+		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
			
 
				+		__field(	u64,	root_objectid		)
			
 
				+		__field(	u64,	start			)
			
 
				+		__field(	u64,	len			)
			
 
				 	),
			
 
				 
			
 
				 	TP_fast_assign(
			
 
				+		memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
			
 
				 		__entry->root_objectid	= root->root_key.objectid;
			
 
				 		__entry->start		= start;
			
 
				 		__entry->len		= len;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk("root = %llu(%s), start = %llu, len = %llu",
			
 
				+	TP_printk("%pU: root = %llu(%s), start = %llu, len = %llu",
			
 
				+		  __entry->fsid,
			
 
				 		  show_root_type(__entry->root_objectid),
			
 
				 		  (unsigned long long)__entry->start,
			
 
				 		  (unsigned long long)__entry->len)
			
@@ -819,6 +944,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
 
				 	TP_ARGS(root, block_group, start, len),
			
 
				 
			
 
				 	TP_STRUCT__entry(
			
 
				+		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
			
 
				 		__field(	u64,	root_objectid		)
			
 
				 		__field(	u64,	bg_objectid		)
			
 
				 		__field(	u64,	flags			)
			
@@ -827,6 +953,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
 
				 	),
			
 
				 
			
 
				 	TP_fast_assign(
			
 
				+		memcpy(__entry->fsid, root->fs_info->fsid, BTRFS_UUID_SIZE);
			
 
				 		__entry->root_objectid	= root->root_key.objectid;
			
 
				 		__entry->bg_objectid	= block_group->key.objectid;
			
 
				 		__entry->flags		= block_group->flags;
			
@@ -834,8 +961,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
 
				 		__entry->len		= len;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
			
 
				-		  "start = %Lu, len = %Lu",
			
 
				+	TP_printk("%pU: root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
			
 
				+		  "start = %Lu, len = %Lu", __entry->fsid,
			
 
				 		  show_root_type(__entry->root_objectid), __entry->bg_objectid,
			
 
				 		  __entry->flags, __print_flags((unsigned long)__entry->flags,
			
 
				 						"|", BTRFS_GROUP_FLAGS),