|
@@ -2374,7 +2374,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
|
|
{
|
|
|
struct btrfs_delayed_ref_node *ref;
|
|
|
|
|
|
- if (RB_EMPTY_ROOT(&head->ref_tree))
|
|
|
+ if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
|
|
|
return NULL;
|
|
|
|
|
|
/*
|
|
@@ -2387,7 +2387,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
|
|
return list_first_entry(&head->ref_add_list,
|
|
|
struct btrfs_delayed_ref_node, add_list);
|
|
|
|
|
|
- ref = rb_entry(rb_first(&head->ref_tree),
|
|
|
+ ref = rb_entry(rb_first_cached(&head->ref_tree),
|
|
|
struct btrfs_delayed_ref_node, ref_node);
|
|
|
ASSERT(list_empty(&ref->add_list));
|
|
|
return ref;
|
|
@@ -2448,13 +2448,13 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
|
|
spin_unlock(&head->lock);
|
|
|
spin_lock(&delayed_refs->lock);
|
|
|
spin_lock(&head->lock);
|
|
|
- if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
|
|
|
+ if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
|
|
|
spin_unlock(&head->lock);
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
|
return 1;
|
|
|
}
|
|
|
delayed_refs->num_heads--;
|
|
|
- rb_erase(&head->href_node, &delayed_refs->href_root);
|
|
|
+ rb_erase_cached(&head->href_node, &delayed_refs->href_root);
|
|
|
RB_CLEAR_NODE(&head->href_node);
|
|
|
spin_unlock(&head->lock);
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
@@ -2502,102 +2502,66 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Returns 0 on success or if called with an already aborted transaction.
|
|
|
- * Returns -ENOMEM or -EIO on failure and will abort the transaction.
|
|
|
- */
|
|
|
-static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
- unsigned long nr)
|
|
|
+static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
|
|
|
+ struct btrfs_trans_handle *trans)
|
|
|
+{
|
|
|
+ struct btrfs_delayed_ref_root *delayed_refs =
|
|
|
+ &trans->transaction->delayed_refs;
|
|
|
+ struct btrfs_delayed_ref_head *head = NULL;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ spin_lock(&delayed_refs->lock);
|
|
|
+ head = btrfs_select_ref_head(delayed_refs);
|
|
|
+ if (!head) {
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+ return head;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Grab the lock that says we are going to process all the refs for
|
|
|
+ * this head
|
|
|
+ */
|
|
|
+ ret = btrfs_delayed_ref_lock(delayed_refs, head);
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We may have dropped the spin lock to get the head mutex lock, and
|
|
|
+ * that might have given someone else time to free the head. If that's
|
|
|
+ * true, it has been removed from our list and we can move on.
|
|
|
+ */
|
|
|
+ if (ret == -EAGAIN)
|
|
|
+ head = ERR_PTR(-EAGAIN);
|
|
|
+
|
|
|
+ return head;
|
|
|
+}
|
|
|
+
|
|
|
+static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_delayed_ref_head *locked_ref,
|
|
|
+ unsigned long *run_refs)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
struct btrfs_delayed_ref_root *delayed_refs;
|
|
|
- struct btrfs_delayed_ref_node *ref;
|
|
|
- struct btrfs_delayed_ref_head *locked_ref = NULL;
|
|
|
struct btrfs_delayed_extent_op *extent_op;
|
|
|
- ktime_t start = ktime_get();
|
|
|
- int ret;
|
|
|
- unsigned long count = 0;
|
|
|
- unsigned long actual_count = 0;
|
|
|
+ struct btrfs_delayed_ref_node *ref;
|
|
|
int must_insert_reserved = 0;
|
|
|
+ int ret;
|
|
|
|
|
|
delayed_refs = &trans->transaction->delayed_refs;
|
|
|
- while (1) {
|
|
|
- if (!locked_ref) {
|
|
|
- if (count >= nr)
|
|
|
- break;
|
|
|
|
|
|
- spin_lock(&delayed_refs->lock);
|
|
|
- locked_ref = btrfs_select_ref_head(trans);
|
|
|
- if (!locked_ref) {
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- break;
|
|
|
- }
|
|
|
+ lockdep_assert_held(&locked_ref->mutex);
|
|
|
+ lockdep_assert_held(&locked_ref->lock);
|
|
|
|
|
|
- /* grab the lock that says we are going to process
|
|
|
- * all the refs for this head */
|
|
|
- ret = btrfs_delayed_ref_lock(trans, locked_ref);
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- /*
|
|
|
- * we may have dropped the spin lock to get the head
|
|
|
- * mutex lock, and that might have given someone else
|
|
|
- * time to free the head. If that's true, it has been
|
|
|
- * removed from our list and we can move on.
|
|
|
- */
|
|
|
- if (ret == -EAGAIN) {
|
|
|
- locked_ref = NULL;
|
|
|
- count++;
|
|
|
- continue;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * We need to try and merge add/drops of the same ref since we
|
|
|
- * can run into issues with relocate dropping the implicit ref
|
|
|
- * and then it being added back again before the drop can
|
|
|
- * finish. If we merged anything we need to re-loop so we can
|
|
|
- * get a good ref.
|
|
|
- * Or we can get node references of the same type that weren't
|
|
|
- * merged when created due to bumps in the tree mod seq, and
|
|
|
- * we need to merge them to prevent adding an inline extent
|
|
|
- * backref before dropping it (triggering a BUG_ON at
|
|
|
- * insert_inline_extent_backref()).
|
|
|
- */
|
|
|
- spin_lock(&locked_ref->lock);
|
|
|
- btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
|
|
|
-
|
|
|
- ref = select_delayed_ref(locked_ref);
|
|
|
-
|
|
|
- if (ref && ref->seq &&
|
|
|
+ while ((ref = select_delayed_ref(locked_ref))) {
|
|
|
+ if (ref->seq &&
|
|
|
btrfs_check_delayed_seq(fs_info, ref->seq)) {
|
|
|
spin_unlock(&locked_ref->lock);
|
|
|
unselect_delayed_ref_head(delayed_refs, locked_ref);
|
|
|
- locked_ref = NULL;
|
|
|
- cond_resched();
|
|
|
- count++;
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * We're done processing refs in this ref_head, clean everything
|
|
|
- * up and move on to the next ref_head.
|
|
|
- */
|
|
|
- if (!ref) {
|
|
|
- ret = cleanup_ref_head(trans, locked_ref);
|
|
|
- if (ret > 0 ) {
|
|
|
- /* We dropped our lock, we need to loop. */
|
|
|
- ret = 0;
|
|
|
- continue;
|
|
|
- } else if (ret) {
|
|
|
- return ret;
|
|
|
- }
|
|
|
- locked_ref = NULL;
|
|
|
- count++;
|
|
|
- continue;
|
|
|
+ return -EAGAIN;
|
|
|
}
|
|
|
|
|
|
- actual_count++;
|
|
|
+ (*run_refs)++;
|
|
|
ref->in_tree = 0;
|
|
|
- rb_erase(&ref->ref_node, &locked_ref->ref_tree);
|
|
|
+ rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
|
|
|
RB_CLEAR_NODE(&ref->ref_node);
|
|
|
if (!list_empty(&ref->add_list))
|
|
|
list_del(&ref->add_list);
|
|
@@ -2619,8 +2583,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
atomic_dec(&delayed_refs->num_entries);
|
|
|
|
|
|
/*
|
|
|
- * Record the must-insert_reserved flag before we drop the spin
|
|
|
- * lock.
|
|
|
+ * Record the must_insert_reserved flag before we drop the
|
|
|
+ * spin lock.
|
|
|
*/
|
|
|
must_insert_reserved = locked_ref->must_insert_reserved;
|
|
|
locked_ref->must_insert_reserved = 0;
|
|
@@ -2642,10 +2606,90 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
}
|
|
|
|
|
|
btrfs_put_delayed_ref(ref);
|
|
|
- count++;
|
|
|
cond_resched();
|
|
|
+
|
|
|
+ spin_lock(&locked_ref->lock);
|
|
|
+ btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
|
|
|
}
|
|
|
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Returns 0 on success or if called with an already aborted transaction.
|
|
|
+ * Returns -ENOMEM or -EIO on failure and will abort the transaction.
|
|
|
+ */
|
|
|
+static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
+ unsigned long nr)
|
|
|
+{
|
|
|
+ struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
+ struct btrfs_delayed_ref_root *delayed_refs;
|
|
|
+ struct btrfs_delayed_ref_head *locked_ref = NULL;
|
|
|
+ ktime_t start = ktime_get();
|
|
|
+ int ret;
|
|
|
+ unsigned long count = 0;
|
|
|
+ unsigned long actual_count = 0;
|
|
|
+
|
|
|
+ delayed_refs = &trans->transaction->delayed_refs;
|
|
|
+ do {
|
|
|
+ if (!locked_ref) {
|
|
|
+ locked_ref = btrfs_obtain_ref_head(trans);
|
|
|
+ if (IS_ERR_OR_NULL(locked_ref)) {
|
|
|
+ if (PTR_ERR(locked_ref) == -EAGAIN) {
|
|
|
+ continue;
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ count++;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * We need to try and merge add/drops of the same ref since we
|
|
|
+ * can run into issues with relocate dropping the implicit ref
|
|
|
+ * and then it being added back again before the drop can
|
|
|
+ * finish. If we merged anything we need to re-loop so we can
|
|
|
+ * get a good ref.
|
|
|
+ * Or we can get node references of the same type that weren't
|
|
|
+ * merged when created due to bumps in the tree mod seq, and
|
|
|
+ * we need to merge them to prevent adding an inline extent
|
|
|
+ * backref before dropping it (triggering a BUG_ON at
|
|
|
+ * insert_inline_extent_backref()).
|
|
|
+ */
|
|
|
+ spin_lock(&locked_ref->lock);
|
|
|
+ btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
|
|
|
+
|
|
|
+ ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
|
|
|
+ &actual_count);
|
|
|
+ if (ret < 0 && ret != -EAGAIN) {
|
|
|
+ /*
|
|
|
+ * Error, btrfs_run_delayed_refs_for_head already
|
|
|
+ * unlocked everything so just bail out
|
|
|
+ */
|
|
|
+ return ret;
|
|
|
+ } else if (!ret) {
|
|
|
+ /*
|
|
|
+ * Success, perform the usual cleanup of a processed
|
|
|
+ * head
|
|
|
+ */
|
|
|
+ ret = cleanup_ref_head(trans, locked_ref);
|
|
|
+ if (ret > 0 ) {
|
|
|
+ /* We dropped our lock, we need to loop. */
|
|
|
+ ret = 0;
|
|
|
+ continue;
|
|
|
+ } else if (ret) {
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Either success case or btrfs_run_delayed_refs_for_head
|
|
|
+ * returned -EAGAIN, meaning we need to select another head
|
|
|
+ */
|
|
|
+
|
|
|
+ locked_ref = NULL;
|
|
|
+ cond_resched();
|
|
|
+ } while ((nr != -1 && count < nr) || locked_ref);
|
|
|
+
|
|
|
/*
|
|
|
* We don't want to include ref heads since we can have empty ref heads
|
|
|
* and those will drastically skew our runtime down since we just do
|
|
@@ -2745,9 +2789,9 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
|
|
|
return num_csums;
|
|
|
}
|
|
|
|
|
|
-int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info)
|
|
|
+int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
struct btrfs_block_rsv *global_rsv;
|
|
|
u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
|
|
|
u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
|
|
@@ -2782,8 +2826,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info)
|
|
|
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
|
|
|
{
|
|
|
u64 num_entries =
|
|
|
atomic_read(&trans->transaction->delayed_refs.num_entries);
|
|
@@ -2791,14 +2834,14 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
u64 val;
|
|
|
|
|
|
smp_mb();
|
|
|
- avg_runtime = fs_info->avg_delayed_ref_runtime;
|
|
|
+ avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
|
|
|
val = num_entries * avg_runtime;
|
|
|
if (val >= NSEC_PER_SEC)
|
|
|
return 1;
|
|
|
if (val >= NSEC_PER_SEC / 2)
|
|
|
return 2;
|
|
|
|
|
|
- return btrfs_check_space_for_delayed_refs(trans, fs_info);
|
|
|
+ return btrfs_check_space_for_delayed_refs(trans);
|
|
|
}
|
|
|
|
|
|
struct async_delayed_refs {
|
|
@@ -2940,7 +2983,7 @@ again:
|
|
|
btrfs_create_pending_block_groups(trans);
|
|
|
|
|
|
spin_lock(&delayed_refs->lock);
|
|
|
- node = rb_first(&delayed_refs->href_root);
|
|
|
+ node = rb_first_cached(&delayed_refs->href_root);
|
|
|
if (!node) {
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
|
goto out;
|
|
@@ -3040,7 +3083,8 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
|
|
* XXX: We should replace this with a proper search function in the
|
|
|
* future.
|
|
|
*/
|
|
|
- for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
|
|
|
+ for (node = rb_first_cached(&head->ref_tree); node;
|
|
|
+ node = rb_next(node)) {
|
|
|
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
|
|
|
/* If it's a shared ref we know a cross reference exists */
|
|
|
if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
|
|
@@ -3139,7 +3183,6 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
|
|
|
{
|
|
|
struct btrfs_path *path;
|
|
|
int ret;
|
|
|
- int ret2;
|
|
|
|
|
|
path = btrfs_alloc_path();
|
|
|
if (!path)
|
|
@@ -3151,17 +3194,9 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
|
|
|
if (ret && ret != -ENOENT)
|
|
|
goto out;
|
|
|
|
|
|
- ret2 = check_delayed_ref(root, path, objectid,
|
|
|
- offset, bytenr);
|
|
|
- } while (ret2 == -EAGAIN);
|
|
|
-
|
|
|
- if (ret2 && ret2 != -ENOENT) {
|
|
|
- ret = ret2;
|
|
|
- goto out;
|
|
|
- }
|
|
|
+ ret = check_delayed_ref(root, path, objectid, offset, bytenr);
|
|
|
+ } while (ret == -EAGAIN);
|
|
|
|
|
|
- if (ret != -ENOENT || ret2 != -ENOENT)
|
|
|
- ret = 0;
|
|
|
out:
|
|
|
btrfs_free_path(path);
|
|
|
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
|
|
@@ -5284,7 +5319,7 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
|
|
|
}
|
|
|
|
|
|
static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
|
|
|
- u64 num_bytes, int update_size)
|
|
|
+ u64 num_bytes, bool update_size)
|
|
|
{
|
|
|
spin_lock(&block_rsv->lock);
|
|
|
block_rsv->reserved += num_bytes;
|
|
@@ -5316,7 +5351,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
|
|
global_rsv->full = 0;
|
|
|
spin_unlock(&global_rsv->lock);
|
|
|
|
|
|
- block_rsv_add_bytes(dest, num_bytes, 1);
|
|
|
+ block_rsv_add_bytes(dest, num_bytes, true);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -5479,7 +5514,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
|
|
|
|
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
|
|
|
struct btrfs_block_rsv *dst, u64 num_bytes,
|
|
|
- int update_size)
|
|
|
+ bool update_size)
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
@@ -5539,10 +5574,8 @@ int btrfs_block_rsv_add(struct btrfs_root *root,
|
|
|
return 0;
|
|
|
|
|
|
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
|
|
- if (!ret) {
|
|
|
- block_rsv_add_bytes(block_rsv, num_bytes, 1);
|
|
|
- return 0;
|
|
|
- }
|
|
|
+ if (!ret)
|
|
|
+ block_rsv_add_bytes(block_rsv, num_bytes, true);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -5587,7 +5620,7 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
|
|
|
|
|
|
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
|
|
if (!ret) {
|
|
|
- block_rsv_add_bytes(block_rsv, num_bytes, 0);
|
|
|
+ block_rsv_add_bytes(block_rsv, num_bytes, false);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -5629,7 +5662,7 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
|
|
return ret;
|
|
|
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
|
|
if (!ret) {
|
|
|
- block_rsv_add_bytes(block_rsv, num_bytes, 0);
|
|
|
+ block_rsv_add_bytes(block_rsv, num_bytes, false);
|
|
|
trace_btrfs_space_reservation(root->fs_info, "delalloc",
|
|
|
btrfs_ino(inode), num_bytes, 1);
|
|
|
|
|
@@ -5835,7 +5868,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
|
|
BTRFS_RESERVE_FLUSH_ALL);
|
|
|
|
|
|
if (ret == -ENOSPC && use_global_rsv)
|
|
|
- ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
|
|
|
+ ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
|
|
|
|
|
|
if (ret && qgroup_num_bytes)
|
|
|
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
|
|
@@ -6399,10 +6432,6 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
|
|
|
} else {
|
|
|
cache->reserved += num_bytes;
|
|
|
space_info->bytes_reserved += num_bytes;
|
|
|
-
|
|
|
- trace_btrfs_space_reservation(cache->fs_info,
|
|
|
- "space_info", space_info->flags,
|
|
|
- ram_bytes, 0);
|
|
|
space_info->bytes_may_use -= ram_bytes;
|
|
|
if (delalloc)
|
|
|
cache->delalloc_bytes += num_bytes;
|
|
@@ -6424,11 +6453,10 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
|
|
|
* reserve set to 0 in order to clear the reservation.
|
|
|
*/
|
|
|
|
|
|
-static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
|
|
|
- u64 num_bytes, int delalloc)
|
|
|
+static void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
|
|
|
+ u64 num_bytes, int delalloc)
|
|
|
{
|
|
|
struct btrfs_space_info *space_info = cache->space_info;
|
|
|
- int ret = 0;
|
|
|
|
|
|
spin_lock(&space_info->lock);
|
|
|
spin_lock(&cache->lock);
|
|
@@ -6441,7 +6469,6 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
|
|
|
cache->delalloc_bytes -= num_bytes;
|
|
|
spin_unlock(&cache->lock);
|
|
|
spin_unlock(&space_info->lock);
|
|
|
- return ret;
|
|
|
}
|
|
|
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
|
|
|
{
|
|
@@ -6925,7 +6952,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|
|
goto out_delayed_unlock;
|
|
|
|
|
|
spin_lock(&head->lock);
|
|
|
- if (!RB_EMPTY_ROOT(&head->ref_tree))
|
|
|
+ if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
|
|
|
goto out;
|
|
|
|
|
|
if (head->extent_op) {
|
|
@@ -6946,7 +6973,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|
|
* at this point we have a head with no other entries. Go
|
|
|
* ahead and process it.
|
|
|
*/
|
|
|
- rb_erase(&head->href_node, &delayed_refs->href_root);
|
|
|
+ rb_erase_cached(&head->href_node, &delayed_refs->href_root);
|
|
|
RB_CLEAR_NODE(&head->href_node);
|
|
|
atomic_dec(&delayed_refs->num_entries);
|
|
|
|
|
@@ -8119,6 +8146,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|
|
if (IS_ERR(buf))
|
|
|
return buf;
|
|
|
|
|
|
+ /*
|
|
|
+ * Extra safety check in case the extent tree is corrupted and extent
|
|
|
+ * allocator chooses to use a tree block which is already used and
|
|
|
+ * locked.
|
|
|
+ */
|
|
|
+ if (buf->lock_owner == current->pid) {
|
|
|
+ btrfs_err_rl(fs_info,
|
|
|
+"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
|
|
|
+ buf->start, btrfs_header_owner(buf), current->pid);
|
|
|
+ free_extent_buffer(buf);
|
|
|
+ return ERR_PTR(-EUCLEAN);
|
|
|
+ }
|
|
|
+
|
|
|
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
|
|
|
btrfs_tree_lock(buf);
|
|
|
clean_tree_block(fs_info, buf);
|
|
@@ -8215,7 +8255,7 @@ try_reserve:
|
|
|
static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
|
|
|
struct btrfs_block_rsv *block_rsv, u32 blocksize)
|
|
|
{
|
|
|
- block_rsv_add_bytes(block_rsv, blocksize, 0);
|
|
|
+ block_rsv_add_bytes(block_rsv, blocksize, false);
|
|
|
block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
|
|
|
}
|
|
|
|
|
@@ -8642,7 +8682,13 @@ skip:
|
|
|
parent = 0;
|
|
|
}
|
|
|
|
|
|
- if (need_account) {
|
|
|
+ /*
|
|
|
+ * Reloc tree doesn't contribute to qgroup numbers, and we have
|
|
|
+ * already accounted them at merge time (replace_path),
|
|
|
+ * thus we could skip expensive subtree trace here.
|
|
|
+ */
|
|
|
+ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
|
|
|
+ need_account) {
|
|
|
ret = btrfs_qgroup_trace_subtree(trans, next,
|
|
|
generation, level - 1);
|
|
|
if (ret) {
|
|
@@ -8763,15 +8809,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
|
|
|
if (eb == root->node) {
|
|
|
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
|
|
|
parent = eb->start;
|
|
|
- else
|
|
|
- BUG_ON(root->root_key.objectid !=
|
|
|
- btrfs_header_owner(eb));
|
|
|
+ else if (root->root_key.objectid != btrfs_header_owner(eb))
|
|
|
+ goto owner_mismatch;
|
|
|
} else {
|
|
|
if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
|
|
|
parent = path->nodes[level + 1]->start;
|
|
|
- else
|
|
|
- BUG_ON(root->root_key.objectid !=
|
|
|
- btrfs_header_owner(path->nodes[level + 1]));
|
|
|
+ else if (root->root_key.objectid !=
|
|
|
+ btrfs_header_owner(path->nodes[level + 1]))
|
|
|
+ goto owner_mismatch;
|
|
|
}
|
|
|
|
|
|
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
|
|
@@ -8779,6 +8824,11 @@ out:
|
|
|
wc->refs[level] = 0;
|
|
|
wc->flags[level] = 0;
|
|
|
return 0;
|
|
|
+
|
|
|
+owner_mismatch:
|
|
|
+ btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
|
|
|
+ btrfs_header_owner(eb), root->root_key.objectid);
|
|
|
+ return -EUCLEAN;
|
|
|
}
|
|
|
|
|
|
static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
|
|
@@ -8832,6 +8882,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
|
|
|
ret = walk_up_proc(trans, root, path, wc);
|
|
|
if (ret > 0)
|
|
|
return 0;
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
|
|
|
if (path->locks[level]) {
|
|
|
btrfs_tree_unlock_rw(path->nodes[level],
|
|
@@ -8875,7 +8927,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
|
|
|
int level;
|
|
|
bool root_dropped = false;
|
|
|
|
|
|
- btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
|
|
|
+ btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
|
|
|
|
|
|
path = btrfs_alloc_path();
|
|
|
if (!path) {
|
|
@@ -9613,6 +9665,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
|
|
|
|
|
|
block_group = btrfs_lookup_first_block_group(info, last);
|
|
|
while (block_group) {
|
|
|
+ wait_block_group_cache_done(block_group);
|
|
|
spin_lock(&block_group->lock);
|
|
|
if (block_group->iref)
|
|
|
break;
|
|
@@ -10074,7 +10127,7 @@ error:
|
|
|
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
- struct btrfs_block_group_cache *block_group, *tmp;
|
|
|
+ struct btrfs_block_group_cache *block_group;
|
|
|
struct btrfs_root *extent_root = fs_info->extent_root;
|
|
|
struct btrfs_block_group_item item;
|
|
|
struct btrfs_key key;
|
|
@@ -10082,7 +10135,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
|
|
|
bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
|
|
|
|
|
|
trans->can_flush_pending_bgs = false;
|
|
|
- list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
|
|
|
+ while (!list_empty(&trans->new_bgs)) {
|
|
|
+ block_group = list_first_entry(&trans->new_bgs,
|
|
|
+ struct btrfs_block_group_cache,
|
|
|
+ bg_list);
|
|
|
if (ret)
|
|
|
goto next;
|
|
|
|
|
@@ -10753,14 +10809,16 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
|
|
|
* We don't want a transaction for this since the discard may take a
|
|
|
* substantial amount of time. We don't require that a transaction be
|
|
|
* running, but we do need to take a running transaction into account
|
|
|
- * to ensure that we're not discarding chunks that were released in
|
|
|
- * the current transaction.
|
|
|
+ * to ensure that we're not discarding chunks that were released or
|
|
|
+ * allocated in the current transaction.
|
|
|
*
|
|
|
* Holding the chunks lock will prevent other threads from allocating
|
|
|
* or releasing chunks, but it won't prevent a running transaction
|
|
|
* from committing and releasing the memory that the pending chunks
|
|
|
* list head uses. For that, we need to take a reference to the
|
|
|
- * transaction.
|
|
|
+ * transaction and hold the commit root sem. We only need to hold
|
|
|
+ * it while performing the free space search since we have already
|
|
|
+ * held back allocations.
|
|
|
*/
|
|
|
static int btrfs_trim_free_extents(struct btrfs_device *device,
|
|
|
u64 minlen, u64 *trimmed)
|
|
@@ -10770,6 +10828,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
|
|
|
|
|
|
*trimmed = 0;
|
|
|
|
|
|
+ /* Discard not supported = nothing to do. */
|
|
|
+ if (!blk_queue_discard(bdev_get_queue(device->bdev)))
|
|
|
+ return 0;
|
|
|
+
|
|
|
/* Not writeable = nothing to do. */
|
|
|
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
|
|
|
return 0;
|
|
@@ -10787,9 +10849,13 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
|
|
|
|
|
|
ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
|
|
|
if (ret)
|
|
|
- return ret;
|
|
|
+ break;
|
|
|
|
|
|
- down_read(&fs_info->commit_root_sem);
|
|
|
+ ret = down_read_killable(&fs_info->commit_root_sem);
|
|
|
+ if (ret) {
|
|
|
+ mutex_unlock(&fs_info->chunk_mutex);
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
spin_lock(&fs_info->trans_lock);
|
|
|
trans = fs_info->running_transaction;
|
|
@@ -10797,13 +10863,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
|
|
|
refcount_inc(&trans->use_count);
|
|
|
spin_unlock(&fs_info->trans_lock);
|
|
|
|
|
|
+ if (!trans)
|
|
|
+ up_read(&fs_info->commit_root_sem);
|
|
|
+
|
|
|
ret = find_free_dev_extent_start(trans, device, minlen, start,
|
|
|
&start, &len);
|
|
|
- if (trans)
|
|
|
+ if (trans) {
|
|
|
+ up_read(&fs_info->commit_root_sem);
|
|
|
btrfs_put_transaction(trans);
|
|
|
+ }
|
|
|
|
|
|
if (ret) {
|
|
|
- up_read(&fs_info->commit_root_sem);
|
|
|
mutex_unlock(&fs_info->chunk_mutex);
|
|
|
if (ret == -ENOSPC)
|
|
|
ret = 0;
|
|
@@ -10811,7 +10881,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
|
|
|
}
|
|
|
|
|
|
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
|
|
|
- up_read(&fs_info->commit_root_sem);
|
|
|
mutex_unlock(&fs_info->chunk_mutex);
|
|
|
|
|
|
if (ret)
|
|
@@ -10831,6 +10900,15 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Trim the whole filesystem by:
|
|
|
+ * 1) trimming the free space in each block group
|
|
|
+ * 2) trimming the unallocated space on each device
|
|
|
+ *
|
|
|
+ * This will also continue trimming even if a block group or device encounters
|
|
|
+ * an error. The return value will be the last error, or 0 if nothing bad
|
|
|
+ * happens.
|
|
|
+ */
|
|
|
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
|
|
|
{
|
|
|
struct btrfs_block_group_cache *cache = NULL;
|
|
@@ -10840,18 +10918,14 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
|
|
|
u64 start;
|
|
|
u64 end;
|
|
|
u64 trimmed = 0;
|
|
|
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
|
|
|
+ u64 bg_failed = 0;
|
|
|
+ u64 dev_failed = 0;
|
|
|
+ int bg_ret = 0;
|
|
|
+ int dev_ret = 0;
|
|
|
int ret = 0;
|
|
|
|
|
|
- /*
|
|
|
- * try to trim all FS space, our block group may start from non-zero.
|
|
|
- */
|
|
|
- if (range->len == total_bytes)
|
|
|
- cache = btrfs_lookup_first_block_group(fs_info, range->start);
|
|
|
- else
|
|
|
- cache = btrfs_lookup_block_group(fs_info, range->start);
|
|
|
-
|
|
|
- while (cache) {
|
|
|
+ cache = btrfs_lookup_first_block_group(fs_info, range->start);
|
|
|
+ for (; cache; cache = next_block_group(fs_info, cache)) {
|
|
|
if (cache->key.objectid >= (range->start + range->len)) {
|
|
|
btrfs_put_block_group(cache);
|
|
|
break;
|
|
@@ -10865,13 +10939,15 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
|
|
|
if (!block_group_cache_done(cache)) {
|
|
|
ret = cache_block_group(cache, 0);
|
|
|
if (ret) {
|
|
|
- btrfs_put_block_group(cache);
|
|
|
- break;
|
|
|
+ bg_failed++;
|
|
|
+ bg_ret = ret;
|
|
|
+ continue;
|
|
|
}
|
|
|
ret = wait_block_group_cache_done(cache);
|
|
|
if (ret) {
|
|
|
- btrfs_put_block_group(cache);
|
|
|
- break;
|
|
|
+ bg_failed++;
|
|
|
+ bg_ret = ret;
|
|
|
+ continue;
|
|
|
}
|
|
|
}
|
|
|
ret = btrfs_trim_block_group(cache,
|
|
@@ -10882,28 +10958,40 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
|
|
|
|
|
|
trimmed += group_trimmed;
|
|
|
if (ret) {
|
|
|
- btrfs_put_block_group(cache);
|
|
|
- break;
|
|
|
+ bg_failed++;
|
|
|
+ bg_ret = ret;
|
|
|
+ continue;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- cache = next_block_group(fs_info, cache);
|
|
|
}
|
|
|
|
|
|
+ if (bg_failed)
|
|
|
+ btrfs_warn(fs_info,
|
|
|
+ "failed to trim %llu block group(s), last error %d",
|
|
|
+ bg_failed, bg_ret);
|
|
|
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
|
|
- devices = &fs_info->fs_devices->alloc_list;
|
|
|
- list_for_each_entry(device, devices, dev_alloc_list) {
|
|
|
+ devices = &fs_info->fs_devices->devices;
|
|
|
+ list_for_each_entry(device, devices, dev_list) {
|
|
|
ret = btrfs_trim_free_extents(device, range->minlen,
|
|
|
&group_trimmed);
|
|
|
- if (ret)
|
|
|
+ if (ret) {
|
|
|
+ dev_failed++;
|
|
|
+ dev_ret = ret;
|
|
|
break;
|
|
|
+ }
|
|
|
|
|
|
trimmed += group_trimmed;
|
|
|
}
|
|
|
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
|
|
|
|
|
+ if (dev_failed)
|
|
|
+ btrfs_warn(fs_info,
|
|
|
+ "failed to trim %llu device(s), last error %d",
|
|
|
+ dev_failed, dev_ret);
|
|
|
range->len = trimmed;
|
|
|
- return ret;
|
|
|
+ if (bg_ret)
|
|
|
+ return bg_ret;
|
|
|
+ return dev_ret;
|
|
|
}
|
|
|
|
|
|
/*
|