|
@@ -26,6 +26,7 @@
|
|
|
#include <linux/slab.h>
|
|
|
#include <linux/ratelimit.h>
|
|
|
#include <linux/percpu_counter.h>
|
|
|
+#include <linux/lockdep.h>
|
|
|
#include "hash.h"
|
|
|
#include "tree-log.h"
|
|
|
#include "disk-io.h"
|
|
@@ -38,6 +39,7 @@
|
|
|
#include "math.h"
|
|
|
#include "sysfs.h"
|
|
|
#include "qgroup.h"
|
|
|
+#include "ref-verify.h"
|
|
|
|
|
|
#undef SCRAMBLE_DELAYED_REFS
|
|
|
|
|
@@ -61,9 +63,6 @@ enum {
|
|
|
CHUNK_ALLOC_FORCE = 2,
|
|
|
};
|
|
|
|
|
|
-static int update_block_group(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info, u64 bytenr,
|
|
|
- u64 num_bytes, int alloc);
|
|
|
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_fs_info *fs_info,
|
|
|
struct btrfs_delayed_ref_node *node, u64 parent,
|
|
@@ -91,17 +90,8 @@ static int find_next_key(struct btrfs_path *path, int level,
|
|
|
static void dump_space_info(struct btrfs_fs_info *fs_info,
|
|
|
struct btrfs_space_info *info, u64 bytes,
|
|
|
int dump_block_groups);
|
|
|
-static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
|
|
|
- u64 ram_bytes, u64 num_bytes, int delalloc);
|
|
|
-static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
|
|
|
- u64 num_bytes, int delalloc);
|
|
|
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
|
|
|
u64 num_bytes);
|
|
|
-static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
|
|
- struct btrfs_space_info *space_info,
|
|
|
- u64 orig_bytes,
|
|
|
- enum btrfs_reserve_flush_enum flush,
|
|
|
- bool system_chunk);
|
|
|
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
|
|
|
struct btrfs_space_info *space_info,
|
|
|
u64 num_bytes);
|
|
@@ -652,7 +642,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
|
|
|
cache->cached = BTRFS_CACHE_FAST;
|
|
|
spin_unlock(&cache->lock);
|
|
|
|
|
|
- if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
|
|
|
+ if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
|
|
|
mutex_lock(&caching_ctl->mutex);
|
|
|
ret = load_free_space_cache(fs_info, cache);
|
|
|
|
|
@@ -923,7 +913,7 @@ search_again:
|
|
|
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
|
|
if (head) {
|
|
|
if (!mutex_trylock(&head->mutex)) {
|
|
|
- refcount_inc(&head->node.refs);
|
|
|
+ refcount_inc(&head->refs);
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
|
|
|
|
btrfs_release_path(path);
|
|
@@ -934,7 +924,7 @@ search_again:
|
|
|
*/
|
|
|
mutex_lock(&head->mutex);
|
|
|
mutex_unlock(&head->mutex);
|
|
|
- btrfs_put_delayed_ref(&head->node);
|
|
|
+ btrfs_put_delayed_ref_head(head);
|
|
|
goto search_again;
|
|
|
}
|
|
|
spin_lock(&head->lock);
|
|
@@ -943,7 +933,7 @@ search_again:
|
|
|
else
|
|
|
BUG_ON(num_refs == 0);
|
|
|
|
|
|
- num_refs += head->node.ref_mod;
|
|
|
+ num_refs += head->ref_mod;
|
|
|
spin_unlock(&head->lock);
|
|
|
mutex_unlock(&head->mutex);
|
|
|
}
|
|
@@ -2189,16 +2179,20 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|
|
|
|
|
/* Can return -ENOMEM */
|
|
|
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_root *root,
|
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
|
u64 root_objectid, u64 owner, u64 offset)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
int old_ref_mod, new_ref_mod;
|
|
|
int ret;
|
|
|
|
|
|
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
|
|
|
root_objectid == BTRFS_TREE_LOG_OBJECTID);
|
|
|
|
|
|
+ btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
|
|
|
+ owner, offset, BTRFS_ADD_DELAYED_REF);
|
|
|
+
|
|
|
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
|
|
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
|
|
|
num_bytes, parent,
|
|
@@ -2344,7 +2338,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
|
|
|
|
|
|
static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_fs_info *fs_info,
|
|
|
- struct btrfs_delayed_ref_node *node,
|
|
|
+ struct btrfs_delayed_ref_head *head,
|
|
|
struct btrfs_delayed_extent_op *extent_op)
|
|
|
{
|
|
|
struct btrfs_key key;
|
|
@@ -2366,14 +2360,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
|
|
|
if (!path)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
- key.objectid = node->bytenr;
|
|
|
+ key.objectid = head->bytenr;
|
|
|
|
|
|
if (metadata) {
|
|
|
key.type = BTRFS_METADATA_ITEM_KEY;
|
|
|
key.offset = extent_op->level;
|
|
|
} else {
|
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
|
- key.offset = node->num_bytes;
|
|
|
+ key.offset = head->num_bytes;
|
|
|
}
|
|
|
|
|
|
again:
|
|
@@ -2390,17 +2384,17 @@ again:
|
|
|
path->slots[0]--;
|
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key,
|
|
|
path->slots[0]);
|
|
|
- if (key.objectid == node->bytenr &&
|
|
|
+ if (key.objectid == head->bytenr &&
|
|
|
key.type == BTRFS_EXTENT_ITEM_KEY &&
|
|
|
- key.offset == node->num_bytes)
|
|
|
+ key.offset == head->num_bytes)
|
|
|
ret = 0;
|
|
|
}
|
|
|
if (ret > 0) {
|
|
|
btrfs_release_path(path);
|
|
|
metadata = 0;
|
|
|
|
|
|
- key.objectid = node->bytenr;
|
|
|
- key.offset = node->num_bytes;
|
|
|
+ key.objectid = head->bytenr;
|
|
|
+ key.offset = head->num_bytes;
|
|
|
key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
|
goto again;
|
|
|
}
|
|
@@ -2507,44 +2501,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
- if (btrfs_delayed_ref_is_head(node)) {
|
|
|
- struct btrfs_delayed_ref_head *head;
|
|
|
- /*
|
|
|
- * we've hit the end of the chain and we were supposed
|
|
|
- * to insert this extent into the tree. But, it got
|
|
|
- * deleted before we ever needed to insert it, so all
|
|
|
- * we have to do is clean up the accounting
|
|
|
- */
|
|
|
- BUG_ON(extent_op);
|
|
|
- head = btrfs_delayed_node_to_head(node);
|
|
|
- trace_run_delayed_ref_head(fs_info, node, head, node->action);
|
|
|
-
|
|
|
- if (head->total_ref_mod < 0) {
|
|
|
- struct btrfs_block_group_cache *cache;
|
|
|
-
|
|
|
- cache = btrfs_lookup_block_group(fs_info, node->bytenr);
|
|
|
- ASSERT(cache);
|
|
|
- percpu_counter_add(&cache->space_info->total_bytes_pinned,
|
|
|
- -node->num_bytes);
|
|
|
- btrfs_put_block_group(cache);
|
|
|
- }
|
|
|
-
|
|
|
- if (insert_reserved) {
|
|
|
- btrfs_pin_extent(fs_info, node->bytenr,
|
|
|
- node->num_bytes, 1);
|
|
|
- if (head->is_data) {
|
|
|
- ret = btrfs_del_csums(trans, fs_info,
|
|
|
- node->bytenr,
|
|
|
- node->num_bytes);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /* Also free its reserved qgroup space */
|
|
|
- btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
|
|
|
- head->qgroup_reserved);
|
|
|
- return ret;
|
|
|
- }
|
|
|
-
|
|
|
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
|
|
node->type == BTRFS_SHARED_BLOCK_REF_KEY)
|
|
|
ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
|
|
@@ -2563,7 +2519,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
|
|
{
|
|
|
struct btrfs_delayed_ref_node *ref;
|
|
|
|
|
|
- if (list_empty(&head->ref_list))
|
|
|
+ if (RB_EMPTY_ROOT(&head->ref_tree))
|
|
|
return NULL;
|
|
|
|
|
|
/*
|
|
@@ -2576,12 +2532,114 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
|
|
return list_first_entry(&head->ref_add_list,
|
|
|
struct btrfs_delayed_ref_node, add_list);
|
|
|
|
|
|
- ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
|
|
|
- list);
|
|
|
+ ref = rb_entry(rb_first(&head->ref_tree),
|
|
|
+ struct btrfs_delayed_ref_node, ref_node);
|
|
|
ASSERT(list_empty(&ref->add_list));
|
|
|
return ref;
|
|
|
}
|
|
|
|
|
|
+static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
|
|
+ struct btrfs_delayed_ref_head *head)
|
|
|
+{
|
|
|
+ spin_lock(&delayed_refs->lock);
|
|
|
+ head->processing = 0;
|
|
|
+ delayed_refs->num_heads_ready++;
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+ btrfs_delayed_ref_unlock(head);
|
|
|
+}
|
|
|
+
|
|
|
+static int cleanup_extent_op(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_delayed_ref_head *head)
|
|
|
+{
|
|
|
+ struct btrfs_delayed_extent_op *extent_op = head->extent_op;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ if (!extent_op)
|
|
|
+ return 0;
|
|
|
+ head->extent_op = NULL;
|
|
|
+ if (head->must_insert_reserved) {
|
|
|
+ btrfs_free_delayed_extent_op(extent_op);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ spin_unlock(&head->lock);
|
|
|
+ ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
|
|
|
+ btrfs_free_delayed_extent_op(extent_op);
|
|
|
+ return ret ? ret : 1;
|
|
|
+}
|
|
|
+
|
|
|
+static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_delayed_ref_head *head)
|
|
|
+{
|
|
|
+ struct btrfs_delayed_ref_root *delayed_refs;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ delayed_refs = &trans->transaction->delayed_refs;
|
|
|
+
|
|
|
+ ret = cleanup_extent_op(trans, fs_info, head);
|
|
|
+ if (ret < 0) {
|
|
|
+ unselect_delayed_ref_head(delayed_refs, head);
|
|
|
+ btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
|
|
|
+ return ret;
|
|
|
+ } else if (ret) {
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Need to drop our head ref lock and re-acquire the delayed ref lock
|
|
|
+ * and then re-check to make sure nobody got added.
|
|
|
+ */
|
|
|
+ spin_unlock(&head->lock);
|
|
|
+ spin_lock(&delayed_refs->lock);
|
|
|
+ spin_lock(&head->lock);
|
|
|
+ if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
|
|
|
+ spin_unlock(&head->lock);
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ delayed_refs->num_heads--;
|
|
|
+ rb_erase(&head->href_node, &delayed_refs->href_root);
|
|
|
+ RB_CLEAR_NODE(&head->href_node);
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+ spin_unlock(&head->lock);
|
|
|
+ atomic_dec(&delayed_refs->num_entries);
|
|
|
+
|
|
|
+ trace_run_delayed_ref_head(fs_info, head, 0);
|
|
|
+
|
|
|
+ if (head->total_ref_mod < 0) {
|
|
|
+ struct btrfs_block_group_cache *cache;
|
|
|
+
|
|
|
+ cache = btrfs_lookup_block_group(fs_info, head->bytenr);
|
|
|
+ ASSERT(cache);
|
|
|
+ percpu_counter_add(&cache->space_info->total_bytes_pinned,
|
|
|
+ -head->num_bytes);
|
|
|
+ btrfs_put_block_group(cache);
|
|
|
+
|
|
|
+ if (head->is_data) {
|
|
|
+ spin_lock(&delayed_refs->lock);
|
|
|
+ delayed_refs->pending_csums -= head->num_bytes;
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (head->must_insert_reserved) {
|
|
|
+ btrfs_pin_extent(fs_info, head->bytenr,
|
|
|
+ head->num_bytes, 1);
|
|
|
+ if (head->is_data) {
|
|
|
+ ret = btrfs_del_csums(trans, fs_info, head->bytenr,
|
|
|
+ head->num_bytes);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Also free its reserved qgroup space */
|
|
|
+ btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
|
|
|
+ head->qgroup_reserved);
|
|
|
+ btrfs_delayed_ref_unlock(head);
|
|
|
+ btrfs_put_delayed_ref_head(head);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Returns 0 on success or if called with an already aborted transaction.
|
|
|
* Returns -ENOMEM or -EIO on failure and will abort the transaction.
|
|
@@ -2655,11 +2713,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
if (ref && ref->seq &&
|
|
|
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
|
|
|
spin_unlock(&locked_ref->lock);
|
|
|
- spin_lock(&delayed_refs->lock);
|
|
|
- locked_ref->processing = 0;
|
|
|
- delayed_refs->num_heads_ready++;
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- btrfs_delayed_ref_unlock(locked_ref);
|
|
|
+ unselect_delayed_ref_head(delayed_refs, locked_ref);
|
|
|
locked_ref = NULL;
|
|
|
cond_resched();
|
|
|
count++;
|
|
@@ -2667,102 +2721,55 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * record the must insert reserved flag before we
|
|
|
- * drop the spin lock.
|
|
|
+ * We're done processing refs in this ref_head, clean everything
|
|
|
+ * up and move on to the next ref_head.
|
|
|
*/
|
|
|
- must_insert_reserved = locked_ref->must_insert_reserved;
|
|
|
- locked_ref->must_insert_reserved = 0;
|
|
|
-
|
|
|
- extent_op = locked_ref->extent_op;
|
|
|
- locked_ref->extent_op = NULL;
|
|
|
-
|
|
|
if (!ref) {
|
|
|
-
|
|
|
-
|
|
|
- /* All delayed refs have been processed, Go ahead
|
|
|
- * and send the head node to run_one_delayed_ref,
|
|
|
- * so that any accounting fixes can happen
|
|
|
- */
|
|
|
- ref = &locked_ref->node;
|
|
|
-
|
|
|
- if (extent_op && must_insert_reserved) {
|
|
|
- btrfs_free_delayed_extent_op(extent_op);
|
|
|
- extent_op = NULL;
|
|
|
- }
|
|
|
-
|
|
|
- if (extent_op) {
|
|
|
- spin_unlock(&locked_ref->lock);
|
|
|
- ret = run_delayed_extent_op(trans, fs_info,
|
|
|
- ref, extent_op);
|
|
|
- btrfs_free_delayed_extent_op(extent_op);
|
|
|
-
|
|
|
- if (ret) {
|
|
|
- /*
|
|
|
- * Need to reset must_insert_reserved if
|
|
|
- * there was an error so the abort stuff
|
|
|
- * can cleanup the reserved space
|
|
|
- * properly.
|
|
|
- */
|
|
|
- if (must_insert_reserved)
|
|
|
- locked_ref->must_insert_reserved = 1;
|
|
|
- spin_lock(&delayed_refs->lock);
|
|
|
- locked_ref->processing = 0;
|
|
|
- delayed_refs->num_heads_ready++;
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- btrfs_debug(fs_info,
|
|
|
- "run_delayed_extent_op returned %d",
|
|
|
- ret);
|
|
|
- btrfs_delayed_ref_unlock(locked_ref);
|
|
|
- return ret;
|
|
|
- }
|
|
|
+ ret = cleanup_ref_head(trans, fs_info, locked_ref);
|
|
|
+ if (ret > 0 ) {
|
|
|
+ /* We dropped our lock, we need to loop. */
|
|
|
+ ret = 0;
|
|
|
continue;
|
|
|
+ } else if (ret) {
|
|
|
+ return ret;
|
|
|
}
|
|
|
+ locked_ref = NULL;
|
|
|
+ count++;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
- /*
|
|
|
- * Need to drop our head ref lock and re-acquire the
|
|
|
- * delayed ref lock and then re-check to make sure
|
|
|
- * nobody got added.
|
|
|
- */
|
|
|
- spin_unlock(&locked_ref->lock);
|
|
|
- spin_lock(&delayed_refs->lock);
|
|
|
- spin_lock(&locked_ref->lock);
|
|
|
- if (!list_empty(&locked_ref->ref_list) ||
|
|
|
- locked_ref->extent_op) {
|
|
|
- spin_unlock(&locked_ref->lock);
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- continue;
|
|
|
- }
|
|
|
- ref->in_tree = 0;
|
|
|
- delayed_refs->num_heads--;
|
|
|
- rb_erase(&locked_ref->href_node,
|
|
|
- &delayed_refs->href_root);
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- } else {
|
|
|
- actual_count++;
|
|
|
- ref->in_tree = 0;
|
|
|
- list_del(&ref->list);
|
|
|
- if (!list_empty(&ref->add_list))
|
|
|
- list_del(&ref->add_list);
|
|
|
+ actual_count++;
|
|
|
+ ref->in_tree = 0;
|
|
|
+ rb_erase(&ref->ref_node, &locked_ref->ref_tree);
|
|
|
+ RB_CLEAR_NODE(&ref->ref_node);
|
|
|
+ if (!list_empty(&ref->add_list))
|
|
|
+ list_del(&ref->add_list);
|
|
|
+ /*
|
|
|
+ * When we play the delayed ref, also correct the ref_mod on
|
|
|
+ * head
|
|
|
+ */
|
|
|
+ switch (ref->action) {
|
|
|
+ case BTRFS_ADD_DELAYED_REF:
|
|
|
+ case BTRFS_ADD_DELAYED_EXTENT:
|
|
|
+ locked_ref->ref_mod -= ref->ref_mod;
|
|
|
+ break;
|
|
|
+ case BTRFS_DROP_DELAYED_REF:
|
|
|
+ locked_ref->ref_mod += ref->ref_mod;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ WARN_ON(1);
|
|
|
}
|
|
|
atomic_dec(&delayed_refs->num_entries);
|
|
|
|
|
|
- if (!btrfs_delayed_ref_is_head(ref)) {
|
|
|
- /*
|
|
|
- * when we play the delayed ref, also correct the
|
|
|
- * ref_mod on head
|
|
|
- */
|
|
|
- switch (ref->action) {
|
|
|
- case BTRFS_ADD_DELAYED_REF:
|
|
|
- case BTRFS_ADD_DELAYED_EXTENT:
|
|
|
- locked_ref->node.ref_mod -= ref->ref_mod;
|
|
|
- break;
|
|
|
- case BTRFS_DROP_DELAYED_REF:
|
|
|
- locked_ref->node.ref_mod += ref->ref_mod;
|
|
|
- break;
|
|
|
- default:
|
|
|
- WARN_ON(1);
|
|
|
- }
|
|
|
- }
|
|
|
+ /*
|
|
|
+ * Record the must-insert_reserved flag before we drop the spin
|
|
|
+ * lock.
|
|
|
+ */
|
|
|
+ must_insert_reserved = locked_ref->must_insert_reserved;
|
|
|
+ locked_ref->must_insert_reserved = 0;
|
|
|
+
|
|
|
+ extent_op = locked_ref->extent_op;
|
|
|
+ locked_ref->extent_op = NULL;
|
|
|
spin_unlock(&locked_ref->lock);
|
|
|
|
|
|
ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
|
|
@@ -2770,33 +2777,13 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
|
|
|
btrfs_free_delayed_extent_op(extent_op);
|
|
|
if (ret) {
|
|
|
- spin_lock(&delayed_refs->lock);
|
|
|
- locked_ref->processing = 0;
|
|
|
- delayed_refs->num_heads_ready++;
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- btrfs_delayed_ref_unlock(locked_ref);
|
|
|
+ unselect_delayed_ref_head(delayed_refs, locked_ref);
|
|
|
btrfs_put_delayed_ref(ref);
|
|
|
btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
|
|
|
ret);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * If this node is a head, that means all the refs in this head
|
|
|
- * have been dealt with, and we will pick the next head to deal
|
|
|
- * with, so we must unlock the head and drop it from the cluster
|
|
|
- * list before we release it.
|
|
|
- */
|
|
|
- if (btrfs_delayed_ref_is_head(ref)) {
|
|
|
- if (locked_ref->is_data &&
|
|
|
- locked_ref->total_ref_mod < 0) {
|
|
|
- spin_lock(&delayed_refs->lock);
|
|
|
- delayed_refs->pending_csums -= ref->num_bytes;
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- }
|
|
|
- btrfs_delayed_ref_unlock(locked_ref);
|
|
|
- locked_ref = NULL;
|
|
|
- }
|
|
|
btrfs_put_delayed_ref(ref);
|
|
|
count++;
|
|
|
cond_resched();
|
|
@@ -3100,33 +3087,16 @@ again:
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
|
goto out;
|
|
|
}
|
|
|
+ head = rb_entry(node, struct btrfs_delayed_ref_head,
|
|
|
+ href_node);
|
|
|
+ refcount_inc(&head->refs);
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
|
|
|
- while (node) {
|
|
|
- head = rb_entry(node, struct btrfs_delayed_ref_head,
|
|
|
- href_node);
|
|
|
- if (btrfs_delayed_ref_is_head(&head->node)) {
|
|
|
- struct btrfs_delayed_ref_node *ref;
|
|
|
-
|
|
|
- ref = &head->node;
|
|
|
- refcount_inc(&ref->refs);
|
|
|
-
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
- /*
|
|
|
- * Mutex was contended, block until it's
|
|
|
- * released and try again
|
|
|
- */
|
|
|
- mutex_lock(&head->mutex);
|
|
|
- mutex_unlock(&head->mutex);
|
|
|
+ /* Mutex was contended, block until it's released and retry. */
|
|
|
+ mutex_lock(&head->mutex);
|
|
|
+ mutex_unlock(&head->mutex);
|
|
|
|
|
|
- btrfs_put_delayed_ref(ref);
|
|
|
- cond_resched();
|
|
|
- goto again;
|
|
|
- } else {
|
|
|
- WARN_ON(1);
|
|
|
- }
|
|
|
- node = rb_next(node);
|
|
|
- }
|
|
|
- spin_unlock(&delayed_refs->lock);
|
|
|
+ btrfs_put_delayed_ref_head(head);
|
|
|
cond_resched();
|
|
|
goto again;
|
|
|
}
|
|
@@ -3169,6 +3139,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
|
|
struct btrfs_delayed_data_ref *data_ref;
|
|
|
struct btrfs_delayed_ref_root *delayed_refs;
|
|
|
struct btrfs_transaction *cur_trans;
|
|
|
+ struct rb_node *node;
|
|
|
int ret = 0;
|
|
|
|
|
|
cur_trans = root->fs_info->running_transaction;
|
|
@@ -3184,7 +3155,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
|
|
}
|
|
|
|
|
|
if (!mutex_trylock(&head->mutex)) {
|
|
|
- refcount_inc(&head->node.refs);
|
|
|
+ refcount_inc(&head->refs);
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
|
|
|
|
btrfs_release_path(path);
|
|
@@ -3195,13 +3166,18 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
|
|
*/
|
|
|
mutex_lock(&head->mutex);
|
|
|
mutex_unlock(&head->mutex);
|
|
|
- btrfs_put_delayed_ref(&head->node);
|
|
|
+ btrfs_put_delayed_ref_head(head);
|
|
|
return -EAGAIN;
|
|
|
}
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
|
|
|
|
spin_lock(&head->lock);
|
|
|
- list_for_each_entry(ref, &head->ref_list, list) {
|
|
|
+ /*
|
|
|
+ * XXX: We should replace this with a proper search function in the
|
|
|
+ * future.
|
|
|
+ */
|
|
|
+ for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
|
|
|
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
|
|
|
/* If it's a shared ref we know a cross reference exists */
|
|
|
if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
|
|
|
ret = 1;
|
|
@@ -3351,7 +3327,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|
|
int level;
|
|
|
int ret = 0;
|
|
|
int (*process_func)(struct btrfs_trans_handle *,
|
|
|
- struct btrfs_fs_info *,
|
|
|
+ struct btrfs_root *,
|
|
|
u64, u64, u64, u64, u64, u64);
|
|
|
|
|
|
|
|
@@ -3391,7 +3367,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|
|
|
|
|
num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
|
|
|
key.offset -= btrfs_file_extent_offset(buf, fi);
|
|
|
- ret = process_func(trans, fs_info, bytenr, num_bytes,
|
|
|
+ ret = process_func(trans, root, bytenr, num_bytes,
|
|
|
parent, ref_root, key.objectid,
|
|
|
key.offset);
|
|
|
if (ret)
|
|
@@ -3399,7 +3375,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|
|
} else {
|
|
|
bytenr = btrfs_node_blockptr(buf, i);
|
|
|
num_bytes = fs_info->nodesize;
|
|
|
- ret = process_func(trans, fs_info, bytenr, num_bytes,
|
|
|
+ ret = process_func(trans, root, bytenr, num_bytes,
|
|
|
parent, ref_root, level - 1, 0);
|
|
|
if (ret)
|
|
|
goto fail;
|
|
@@ -4843,7 +4819,6 @@ static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
|
|
|
static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
|
|
|
u64 orig, bool wait_ordered)
|
|
|
{
|
|
|
- struct btrfs_block_rsv *block_rsv;
|
|
|
struct btrfs_space_info *space_info;
|
|
|
struct btrfs_trans_handle *trans;
|
|
|
u64 delalloc_bytes;
|
|
@@ -4859,8 +4834,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
|
|
|
to_reclaim = items * EXTENT_SIZE_PER_ITEM;
|
|
|
|
|
|
trans = (struct btrfs_trans_handle *)current->journal_info;
|
|
|
- block_rsv = &fs_info->delalloc_block_rsv;
|
|
|
- space_info = block_rsv->space_info;
|
|
|
+ space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
|
|
|
|
|
delalloc_bytes = percpu_counter_sum_positive(
|
|
|
&fs_info->delalloc_bytes);
|
|
@@ -4919,6 +4893,13 @@ skip_async:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+struct reserve_ticket {
|
|
|
+ u64 bytes;
|
|
|
+ int error;
|
|
|
+ struct list_head list;
|
|
|
+ wait_queue_head_t wait;
|
|
|
+};
|
|
|
+
|
|
|
/**
|
|
|
* maybe_commit_transaction - possibly commit the transaction if its ok to
|
|
|
* @root - the root we're allocating for
|
|
@@ -4930,18 +4911,29 @@ skip_async:
|
|
|
* will return -ENOSPC.
|
|
|
*/
|
|
|
static int may_commit_transaction(struct btrfs_fs_info *fs_info,
|
|
|
- struct btrfs_space_info *space_info,
|
|
|
- u64 bytes, int force)
|
|
|
+ struct btrfs_space_info *space_info)
|
|
|
{
|
|
|
+ struct reserve_ticket *ticket = NULL;
|
|
|
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
|
|
|
struct btrfs_trans_handle *trans;
|
|
|
+ u64 bytes;
|
|
|
|
|
|
trans = (struct btrfs_trans_handle *)current->journal_info;
|
|
|
if (trans)
|
|
|
return -EAGAIN;
|
|
|
|
|
|
- if (force)
|
|
|
- goto commit;
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ if (!list_empty(&space_info->priority_tickets))
|
|
|
+ ticket = list_first_entry(&space_info->priority_tickets,
|
|
|
+ struct reserve_ticket, list);
|
|
|
+ else if (!list_empty(&space_info->tickets))
|
|
|
+ ticket = list_first_entry(&space_info->tickets,
|
|
|
+ struct reserve_ticket, list);
|
|
|
+ bytes = (ticket) ? ticket->bytes : 0;
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+
|
|
|
+ if (!bytes)
|
|
|
+ return 0;
|
|
|
|
|
|
/* See if there is enough pinned space to make this reservation */
|
|
|
if (percpu_counter_compare(&space_info->total_bytes_pinned,
|
|
@@ -4956,8 +4948,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
|
|
|
return -ENOSPC;
|
|
|
|
|
|
spin_lock(&delayed_rsv->lock);
|
|
|
+ if (delayed_rsv->size > bytes)
|
|
|
+ bytes = 0;
|
|
|
+ else
|
|
|
+ bytes -= delayed_rsv->size;
|
|
|
if (percpu_counter_compare(&space_info->total_bytes_pinned,
|
|
|
- bytes - delayed_rsv->size) < 0) {
|
|
|
+ bytes) < 0) {
|
|
|
spin_unlock(&delayed_rsv->lock);
|
|
|
return -ENOSPC;
|
|
|
}
|
|
@@ -4971,13 +4967,6 @@ commit:
|
|
|
return btrfs_commit_transaction(trans);
|
|
|
}
|
|
|
|
|
|
-struct reserve_ticket {
|
|
|
- u64 bytes;
|
|
|
- int error;
|
|
|
- struct list_head list;
|
|
|
- wait_queue_head_t wait;
|
|
|
-};
|
|
|
-
|
|
|
/*
|
|
|
* Try to flush some data based on policy set by @state. This is only advisory
|
|
|
* and may fail for various reasons. The caller is supposed to examine the
|
|
@@ -5027,8 +5016,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
|
|
ret = 0;
|
|
|
break;
|
|
|
case COMMIT_TRANS:
|
|
|
- ret = may_commit_transaction(fs_info, space_info,
|
|
|
- num_bytes, 0);
|
|
|
+ ret = may_commit_transaction(fs_info, space_info);
|
|
|
break;
|
|
|
default:
|
|
|
ret = -ENOSPC;
|
|
@@ -5582,11 +5570,12 @@ again:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
|
+static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
|
struct btrfs_block_rsv *block_rsv,
|
|
|
struct btrfs_block_rsv *dest, u64 num_bytes)
|
|
|
{
|
|
|
struct btrfs_space_info *space_info = block_rsv->space_info;
|
|
|
+ u64 ret;
|
|
|
|
|
|
spin_lock(&block_rsv->lock);
|
|
|
if (num_bytes == (u64)-1)
|
|
@@ -5601,6 +5590,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
|
}
|
|
|
spin_unlock(&block_rsv->lock);
|
|
|
|
|
|
+ ret = num_bytes;
|
|
|
if (num_bytes > 0) {
|
|
|
if (dest) {
|
|
|
spin_lock(&dest->lock);
|
|
@@ -5620,6 +5610,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
|
space_info_add_old_bytes(fs_info, space_info,
|
|
|
num_bytes);
|
|
|
}
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
|
|
@@ -5643,6 +5634,15 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
|
|
|
rsv->type = type;
|
|
|
}
|
|
|
|
|
|
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_block_rsv *rsv,
|
|
|
+ unsigned short type)
|
|
|
+{
|
|
|
+ btrfs_init_block_rsv(rsv, type);
|
|
|
+ rsv->space_info = __find_space_info(fs_info,
|
|
|
+ BTRFS_BLOCK_GROUP_METADATA);
|
|
|
+}
|
|
|
+
|
|
|
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
|
|
unsigned short type)
|
|
|
{
|
|
@@ -5652,9 +5652,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
|
|
if (!block_rsv)
|
|
|
return NULL;
|
|
|
|
|
|
- btrfs_init_block_rsv(block_rsv, type);
|
|
|
- block_rsv->space_info = __find_space_info(fs_info,
|
|
|
- BTRFS_BLOCK_GROUP_METADATA);
|
|
|
+ btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
|
|
|
return block_rsv;
|
|
|
}
|
|
|
|
|
@@ -5737,6 +5735,66 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * btrfs_inode_rsv_refill - refill the inode block rsv.
|
|
|
+ * @inode - the inode we are refilling.
|
|
|
+ * @flush - the flusing restriction.
|
|
|
+ *
|
|
|
+ * Essentially the same as btrfs_block_rsv_refill, except it uses the
|
|
|
+ * block_rsv->size as the minimum size. We'll either refill the missing amount
|
|
|
+ * or return if we already have enough space. This will also handle the resreve
|
|
|
+ * tracepoint for the reserved amount.
|
|
|
+ */
|
|
|
+int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
|
|
+ enum btrfs_reserve_flush_enum flush)
|
|
|
+{
|
|
|
+ struct btrfs_root *root = inode->root;
|
|
|
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
|
|
+ u64 num_bytes = 0;
|
|
|
+ int ret = -ENOSPC;
|
|
|
+
|
|
|
+ spin_lock(&block_rsv->lock);
|
|
|
+ if (block_rsv->reserved < block_rsv->size)
|
|
|
+ num_bytes = block_rsv->size - block_rsv->reserved;
|
|
|
+ spin_unlock(&block_rsv->lock);
|
|
|
+
|
|
|
+ if (num_bytes == 0)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
|
|
+ if (!ret) {
|
|
|
+ block_rsv_add_bytes(block_rsv, num_bytes, 0);
|
|
|
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
|
|
|
+ btrfs_ino(inode), num_bytes, 1);
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * btrfs_inode_rsv_release - release any excessive reservation.
|
|
|
+ * @inode - the inode we need to release from.
|
|
|
+ *
|
|
|
+ * This is the same as btrfs_block_rsv_release, except that it handles the
|
|
|
+ * tracepoint for the reservation.
|
|
|
+ */
|
|
|
+void btrfs_inode_rsv_release(struct btrfs_inode *inode)
|
|
|
+{
|
|
|
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
|
|
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
|
|
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
|
|
+ u64 released = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Since we statically set the block_rsv->size we just want to say we
|
|
|
+ * are releasing 0 bytes, and then we'll just get the reservation over
|
|
|
+ * the size free'd.
|
|
|
+ */
|
|
|
+ released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
|
|
|
+ if (released > 0)
|
|
|
+ trace_btrfs_space_reservation(fs_info, "delalloc",
|
|
|
+ btrfs_ino(inode), released, 0);
|
|
|
+}
|
|
|
+
|
|
|
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
|
|
struct btrfs_block_rsv *block_rsv,
|
|
|
u64 num_bytes)
|
|
@@ -5808,7 +5866,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
|
|
|
|
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
|
|
fs_info->global_block_rsv.space_info = space_info;
|
|
|
- fs_info->delalloc_block_rsv.space_info = space_info;
|
|
|
fs_info->trans_block_rsv.space_info = space_info;
|
|
|
fs_info->empty_block_rsv.space_info = space_info;
|
|
|
fs_info->delayed_block_rsv.space_info = space_info;
|
|
@@ -5828,8 +5885,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
|
{
|
|
|
block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
|
|
|
(u64)-1);
|
|
|
- WARN_ON(fs_info->delalloc_block_rsv.size > 0);
|
|
|
- WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
|
|
|
WARN_ON(fs_info->trans_block_rsv.size > 0);
|
|
|
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
|
|
|
WARN_ON(fs_info->chunk_block_rsv.size > 0);
|
|
@@ -5841,12 +5896,15 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
|
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_fs_info *fs_info)
|
|
|
{
|
|
|
- if (!trans->block_rsv)
|
|
|
+ if (!trans->block_rsv) {
|
|
|
+ ASSERT(!trans->bytes_reserved);
|
|
|
return;
|
|
|
+ }
|
|
|
|
|
|
if (!trans->bytes_reserved)
|
|
|
return;
|
|
|
|
|
|
+ ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
|
|
|
trace_btrfs_space_reservation(fs_info, "transaction",
|
|
|
trans->transid, trans->bytes_reserved, 0);
|
|
|
btrfs_block_rsv_release(fs_info, trans->block_rsv,
|
|
@@ -5968,104 +6026,37 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
|
|
|
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
|
|
|
}
|
|
|
|
|
|
-/**
|
|
|
- * drop_outstanding_extent - drop an outstanding extent
|
|
|
- * @inode: the inode we're dropping the extent for
|
|
|
- * @num_bytes: the number of bytes we're releasing.
|
|
|
- *
|
|
|
- * This is called when we are freeing up an outstanding extent, either called
|
|
|
- * after an error or after an extent is written. This will return the number of
|
|
|
- * reserved extents that need to be freed. This must be called with
|
|
|
- * BTRFS_I(inode)->lock held.
|
|
|
- */
|
|
|
-static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
|
|
|
- u64 num_bytes)
|
|
|
-{
|
|
|
- unsigned drop_inode_space = 0;
|
|
|
- unsigned dropped_extents = 0;
|
|
|
- unsigned num_extents;
|
|
|
-
|
|
|
- num_extents = count_max_extents(num_bytes);
|
|
|
- ASSERT(num_extents);
|
|
|
- ASSERT(inode->outstanding_extents >= num_extents);
|
|
|
- inode->outstanding_extents -= num_extents;
|
|
|
-
|
|
|
- if (inode->outstanding_extents == 0 &&
|
|
|
- test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
|
|
|
- &inode->runtime_flags))
|
|
|
- drop_inode_space = 1;
|
|
|
-
|
|
|
- /*
|
|
|
- * If we have more or the same amount of outstanding extents than we have
|
|
|
- * reserved then we need to leave the reserved extents count alone.
|
|
|
- */
|
|
|
- if (inode->outstanding_extents >= inode->reserved_extents)
|
|
|
- return drop_inode_space;
|
|
|
-
|
|
|
- dropped_extents = inode->reserved_extents - inode->outstanding_extents;
|
|
|
- inode->reserved_extents -= dropped_extents;
|
|
|
- return dropped_extents + drop_inode_space;
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * calc_csum_metadata_size - return the amount of metadata space that must be
|
|
|
- * reserved/freed for the given bytes.
|
|
|
- * @inode: the inode we're manipulating
|
|
|
- * @num_bytes: the number of bytes in question
|
|
|
- * @reserve: 1 if we are reserving space, 0 if we are freeing space
|
|
|
- *
|
|
|
- * This adjusts the number of csum_bytes in the inode and then returns the
|
|
|
- * correct amount of metadata that must either be reserved or freed. We
|
|
|
- * calculate how many checksums we can fit into one leaf and then divide the
|
|
|
- * number of bytes that will need to be checksumed by this value to figure out
|
|
|
- * how many checksums will be required. If we are adding bytes then the number
|
|
|
- * may go up and we will return the number of additional bytes that must be
|
|
|
- * reserved. If it is going down we will return the number of bytes that must
|
|
|
- * be freed.
|
|
|
- *
|
|
|
- * This must be called with BTRFS_I(inode)->lock held.
|
|
|
- */
|
|
|
-static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
|
|
|
- int reserve)
|
|
|
+static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_inode *inode)
|
|
|
{
|
|
|
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
|
|
|
- u64 old_csums, num_csums;
|
|
|
-
|
|
|
- if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0)
|
|
|
- return 0;
|
|
|
-
|
|
|
- old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
|
|
|
- if (reserve)
|
|
|
- inode->csum_bytes += num_bytes;
|
|
|
- else
|
|
|
- inode->csum_bytes -= num_bytes;
|
|
|
- num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
|
|
|
-
|
|
|
- /* No change, no need to reserve more */
|
|
|
- if (old_csums == num_csums)
|
|
|
- return 0;
|
|
|
+ struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
|
|
+ u64 reserve_size = 0;
|
|
|
+ u64 csum_leaves;
|
|
|
+ unsigned outstanding_extents;
|
|
|
|
|
|
- if (reserve)
|
|
|
- return btrfs_calc_trans_metadata_size(fs_info,
|
|
|
- num_csums - old_csums);
|
|
|
+ lockdep_assert_held(&inode->lock);
|
|
|
+ outstanding_extents = inode->outstanding_extents;
|
|
|
+ if (outstanding_extents)
|
|
|
+ reserve_size = btrfs_calc_trans_metadata_size(fs_info,
|
|
|
+ outstanding_extents + 1);
|
|
|
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
|
|
|
+ inode->csum_bytes);
|
|
|
+ reserve_size += btrfs_calc_trans_metadata_size(fs_info,
|
|
|
+ csum_leaves);
|
|
|
|
|
|
- return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums);
|
|
|
+ spin_lock(&block_rsv->lock);
|
|
|
+ block_rsv->size = reserve_size;
|
|
|
+ spin_unlock(&block_rsv->lock);
|
|
|
}
|
|
|
|
|
|
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
|
|
|
struct btrfs_root *root = inode->root;
|
|
|
- struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
|
|
|
- u64 to_reserve = 0;
|
|
|
- u64 csum_bytes;
|
|
|
unsigned nr_extents;
|
|
|
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
|
|
|
int ret = 0;
|
|
|
bool delalloc_lock = true;
|
|
|
- u64 to_free = 0;
|
|
|
- unsigned dropped;
|
|
|
- bool release_extra = false;
|
|
|
|
|
|
/* If we are a free space inode we need to not flush since we will be in
|
|
|
* the middle of a transaction commit. We also don't need the delalloc
|
|
@@ -6091,19 +6082,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
|
|
|
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
|
|
|
|
|
|
+ /* Add our new extents and calculate the new rsv size. */
|
|
|
spin_lock(&inode->lock);
|
|
|
nr_extents = count_max_extents(num_bytes);
|
|
|
- inode->outstanding_extents += nr_extents;
|
|
|
-
|
|
|
- nr_extents = 0;
|
|
|
- if (inode->outstanding_extents > inode->reserved_extents)
|
|
|
- nr_extents += inode->outstanding_extents -
|
|
|
- inode->reserved_extents;
|
|
|
-
|
|
|
- /* We always want to reserve a slot for updating the inode. */
|
|
|
- to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
|
|
|
- to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
|
|
|
- csum_bytes = inode->csum_bytes;
|
|
|
+ btrfs_mod_outstanding_extents(inode, nr_extents);
|
|
|
+ inode->csum_bytes += num_bytes;
|
|
|
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
|
|
spin_unlock(&inode->lock);
|
|
|
|
|
|
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
|
|
@@ -6113,92 +6097,26 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
goto out_fail;
|
|
|
}
|
|
|
|
|
|
- ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
|
|
|
+ ret = btrfs_inode_rsv_refill(inode, flush);
|
|
|
if (unlikely(ret)) {
|
|
|
btrfs_qgroup_free_meta(root,
|
|
|
nr_extents * fs_info->nodesize);
|
|
|
goto out_fail;
|
|
|
}
|
|
|
|
|
|
- spin_lock(&inode->lock);
|
|
|
- if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
|
|
|
- &inode->runtime_flags)) {
|
|
|
- to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
|
|
|
- release_extra = true;
|
|
|
- }
|
|
|
- inode->reserved_extents += nr_extents;
|
|
|
- spin_unlock(&inode->lock);
|
|
|
-
|
|
|
if (delalloc_lock)
|
|
|
mutex_unlock(&inode->delalloc_mutex);
|
|
|
-
|
|
|
- if (to_reserve)
|
|
|
- trace_btrfs_space_reservation(fs_info, "delalloc",
|
|
|
- btrfs_ino(inode), to_reserve, 1);
|
|
|
- if (release_extra)
|
|
|
- btrfs_block_rsv_release(fs_info, block_rsv,
|
|
|
- btrfs_calc_trans_metadata_size(fs_info, 1));
|
|
|
return 0;
|
|
|
|
|
|
out_fail:
|
|
|
spin_lock(&inode->lock);
|
|
|
- dropped = drop_outstanding_extent(inode, num_bytes);
|
|
|
- /*
|
|
|
- * If the inodes csum_bytes is the same as the original
|
|
|
- * csum_bytes then we know we haven't raced with any free()ers
|
|
|
- * so we can just reduce our inodes csum bytes and carry on.
|
|
|
- */
|
|
|
- if (inode->csum_bytes == csum_bytes) {
|
|
|
- calc_csum_metadata_size(inode, num_bytes, 0);
|
|
|
- } else {
|
|
|
- u64 orig_csum_bytes = inode->csum_bytes;
|
|
|
- u64 bytes;
|
|
|
-
|
|
|
- /*
|
|
|
- * This is tricky, but first we need to figure out how much we
|
|
|
- * freed from any free-ers that occurred during this
|
|
|
- * reservation, so we reset ->csum_bytes to the csum_bytes
|
|
|
- * before we dropped our lock, and then call the free for the
|
|
|
- * number of bytes that were freed while we were trying our
|
|
|
- * reservation.
|
|
|
- */
|
|
|
- bytes = csum_bytes - inode->csum_bytes;
|
|
|
- inode->csum_bytes = csum_bytes;
|
|
|
- to_free = calc_csum_metadata_size(inode, bytes, 0);
|
|
|
-
|
|
|
-
|
|
|
- /*
|
|
|
- * Now we need to see how much we would have freed had we not
|
|
|
- * been making this reservation and our ->csum_bytes were not
|
|
|
- * artificially inflated.
|
|
|
- */
|
|
|
- inode->csum_bytes = csum_bytes - num_bytes;
|
|
|
- bytes = csum_bytes - orig_csum_bytes;
|
|
|
- bytes = calc_csum_metadata_size(inode, bytes, 0);
|
|
|
-
|
|
|
- /*
|
|
|
- * Now reset ->csum_bytes to what it should be. If bytes is
|
|
|
- * more than to_free then we would have freed more space had we
|
|
|
- * not had an artificially high ->csum_bytes, so we need to free
|
|
|
- * the remainder. If bytes is the same or less then we don't
|
|
|
- * need to do anything, the other free-ers did the correct
|
|
|
- * thing.
|
|
|
- */
|
|
|
- inode->csum_bytes = orig_csum_bytes - num_bytes;
|
|
|
- if (bytes > to_free)
|
|
|
- to_free = bytes - to_free;
|
|
|
- else
|
|
|
- to_free = 0;
|
|
|
- }
|
|
|
+ nr_extents = count_max_extents(num_bytes);
|
|
|
+ btrfs_mod_outstanding_extents(inode, -nr_extents);
|
|
|
+ inode->csum_bytes -= num_bytes;
|
|
|
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
|
|
spin_unlock(&inode->lock);
|
|
|
- if (dropped)
|
|
|
- to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
|
|
|
|
|
|
- if (to_free) {
|
|
|
- btrfs_block_rsv_release(fs_info, block_rsv, to_free);
|
|
|
- trace_btrfs_space_reservation(fs_info, "delalloc",
|
|
|
- btrfs_ino(inode), to_free, 0);
|
|
|
- }
|
|
|
+ btrfs_inode_rsv_release(inode);
|
|
|
if (delalloc_lock)
|
|
|
mutex_unlock(&inode->delalloc_mutex);
|
|
|
return ret;
|
|
@@ -6206,36 +6124,55 @@ out_fail:
|
|
|
|
|
|
/**
|
|
|
* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
|
|
|
- * @inode: the inode to release the reservation for
|
|
|
- * @num_bytes: the number of bytes we're releasing
|
|
|
+ * @inode: the inode to release the reservation for.
|
|
|
+ * @num_bytes: the number of bytes we are releasing.
|
|
|
*
|
|
|
* This will release the metadata reservation for an inode. This can be called
|
|
|
* once we complete IO for a given set of bytes to release their metadata
|
|
|
- * reservations.
|
|
|
+ * reservations, or on error for the same reason.
|
|
|
*/
|
|
|
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
|
|
|
- u64 to_free = 0;
|
|
|
- unsigned dropped;
|
|
|
|
|
|
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
|
|
|
spin_lock(&inode->lock);
|
|
|
- dropped = drop_outstanding_extent(inode, num_bytes);
|
|
|
-
|
|
|
- if (num_bytes)
|
|
|
- to_free = calc_csum_metadata_size(inode, num_bytes, 0);
|
|
|
+ inode->csum_bytes -= num_bytes;
|
|
|
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
|
|
spin_unlock(&inode->lock);
|
|
|
- if (dropped > 0)
|
|
|
- to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
|
|
|
|
|
|
if (btrfs_is_testing(fs_info))
|
|
|
return;
|
|
|
|
|
|
- trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
|
|
|
- to_free, 0);
|
|
|
+ btrfs_inode_rsv_release(inode);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * btrfs_delalloc_release_extents - release our outstanding_extents
|
|
|
+ * @inode: the inode to balance the reservation for.
|
|
|
+ * @num_bytes: the number of bytes we originally reserved with
|
|
|
+ *
|
|
|
+ * When we reserve space we increase outstanding_extents for the extents we may
|
|
|
+ * add. Once we've set the range as delalloc or created our ordered extents we
|
|
|
+ * have outstanding_extents to track the real usage, so we use this to free our
|
|
|
+ * temporarily tracked outstanding_extents. This _must_ be used in conjunction
|
|
|
+ * with btrfs_delalloc_reserve_metadata.
|
|
|
+ */
|
|
|
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
+{
|
|
|
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
|
|
|
+ unsigned num_extents;
|
|
|
+
|
|
|
+ spin_lock(&inode->lock);
|
|
|
+ num_extents = count_max_extents(num_bytes);
|
|
|
+ btrfs_mod_outstanding_extents(inode, -num_extents);
|
|
|
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
|
|
+ spin_unlock(&inode->lock);
|
|
|
+
|
|
|
+ if (btrfs_is_testing(fs_info))
|
|
|
+ return;
|
|
|
|
|
|
- btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
|
|
|
+ btrfs_inode_rsv_release(inode);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -6282,10 +6219,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
|
|
|
* @inode: inode we're releasing space for
|
|
|
* @start: start position of the space already reserved
|
|
|
* @len: the len of the space already reserved
|
|
|
- *
|
|
|
- * This must be matched with a call to btrfs_delalloc_reserve_space. This is
|
|
|
- * called in the case that we don't need the metadata AND data reservations
|
|
|
- * anymore. So if there is an error or we insert an inline extent.
|
|
|
+ * @release_bytes: the len of the space we consumed or didn't use
|
|
|
*
|
|
|
* This function will release the metadata space that was not used and will
|
|
|
* decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
|
|
@@ -6293,7 +6227,8 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
|
|
|
* Also it will handle the qgroup reserved space.
|
|
|
*/
|
|
|
void btrfs_delalloc_release_space(struct inode *inode,
|
|
|
- struct extent_changeset *reserved, u64 start, u64 len)
|
|
|
+ struct extent_changeset *reserved,
|
|
|
+ u64 start, u64 len)
|
|
|
{
|
|
|
btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
|
|
|
btrfs_free_reserved_data_space(inode, reserved, start, len);
|
|
@@ -6958,7 +6893,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|
|
BUG_ON(!is_data && refs_to_drop != 1);
|
|
|
|
|
|
if (is_data)
|
|
|
- skinny_metadata = 0;
|
|
|
+ skinny_metadata = false;
|
|
|
|
|
|
ret = lookup_extent_backref(trans, info, path, &iref,
|
|
|
bytenr, num_bytes, parent,
|
|
@@ -7213,7 +7148,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|
|
goto out_delayed_unlock;
|
|
|
|
|
|
spin_lock(&head->lock);
|
|
|
- if (!list_empty(&head->ref_list))
|
|
|
+ if (!RB_EMPTY_ROOT(&head->ref_tree))
|
|
|
goto out;
|
|
|
|
|
|
if (head->extent_op) {
|
|
@@ -7234,9 +7169,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|
|
* at this point we have a head with no other entries. Go
|
|
|
* ahead and process it.
|
|
|
*/
|
|
|
- head->node.in_tree = 0;
|
|
|
rb_erase(&head->href_node, &delayed_refs->href_root);
|
|
|
-
|
|
|
+ RB_CLEAR_NODE(&head->href_node);
|
|
|
atomic_dec(&delayed_refs->num_entries);
|
|
|
|
|
|
/*
|
|
@@ -7255,7 +7189,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|
|
ret = 1;
|
|
|
|
|
|
mutex_unlock(&head->mutex);
|
|
|
- btrfs_put_delayed_ref(&head->node);
|
|
|
+ btrfs_put_delayed_ref_head(head);
|
|
|
return ret;
|
|
|
out:
|
|
|
spin_unlock(&head->lock);
|
|
@@ -7277,6 +7211,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
|
|
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
|
|
|
int old_ref_mod, new_ref_mod;
|
|
|
|
|
|
+ btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
|
|
|
+ root->root_key.objectid,
|
|
|
+ btrfs_header_level(buf), 0,
|
|
|
+ BTRFS_DROP_DELAYED_REF);
|
|
|
ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
|
|
|
buf->len, parent,
|
|
|
root->root_key.objectid,
|
|
@@ -7329,16 +7267,21 @@ out:
|
|
|
|
|
|
/* Can return -ENOMEM */
|
|
|
int btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_root *root,
|
|
|
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
|
|
|
u64 owner, u64 offset)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
int old_ref_mod, new_ref_mod;
|
|
|
int ret;
|
|
|
|
|
|
if (btrfs_is_testing(fs_info))
|
|
|
return 0;
|
|
|
|
|
|
+ if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
|
|
|
+ btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
|
|
|
+ root_objectid, owner, offset,
|
|
|
+ BTRFS_DROP_DELAYED_REF);
|
|
|
|
|
|
/*
|
|
|
* tree log blocks never actually go into the extent allocation
|
|
@@ -8306,17 +8249,22 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
|
|
}
|
|
|
|
|
|
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
|
|
- u64 root_objectid, u64 owner,
|
|
|
+ struct btrfs_root *root, u64 owner,
|
|
|
u64 offset, u64 ram_bytes,
|
|
|
struct btrfs_key *ins)
|
|
|
{
|
|
|
- struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
+ struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
int ret;
|
|
|
|
|
|
- BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
|
|
|
+ BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
|
|
+
|
|
|
+ btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
|
|
|
+ root->root_key.objectid, owner, offset,
|
|
|
+ BTRFS_ADD_DELAYED_EXTENT);
|
|
|
|
|
|
ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
|
|
|
- ins->offset, 0, root_objectid, owner,
|
|
|
+ ins->offset, 0,
|
|
|
+ root->root_key.objectid, owner,
|
|
|
offset, ram_bytes,
|
|
|
BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
|
|
|
return ret;
|
|
@@ -8538,6 +8486,9 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|
|
extent_op->is_data = false;
|
|
|
extent_op->level = level;
|
|
|
|
|
|
+ btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
|
|
|
+ root_objectid, level, 0,
|
|
|
+ BTRFS_ADD_DELAYED_EXTENT);
|
|
|
ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
|
|
|
ins.offset, parent,
|
|
|
root_objectid, level,
|
|
@@ -8894,7 +8845,7 @@ skip:
|
|
|
ret);
|
|
|
}
|
|
|
}
|
|
|
- ret = btrfs_free_extent(trans, fs_info, bytenr, blocksize,
|
|
|
+ ret = btrfs_free_extent(trans, root, bytenr, blocksize,
|
|
|
parent, root->root_key.objectid,
|
|
|
level - 1, 0);
|
|
|
if (ret)
|
|
@@ -9311,7 +9262,7 @@ out:
|
|
|
* don't have it in the radix (like when we recover after a power fail
|
|
|
* or unmount) so we don't leak memory.
|
|
|
*/
|
|
|
- if (!for_reloc && root_dropped == false)
|
|
|
+ if (!for_reloc && !root_dropped)
|
|
|
btrfs_add_dead_root(root);
|
|
|
if (err && err != -EAGAIN)
|
|
|
btrfs_handle_fs_error(fs_info, err, NULL);
|
|
@@ -9968,9 +9919,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static void __link_block_group(struct btrfs_space_info *space_info,
|
|
|
- struct btrfs_block_group_cache *cache)
|
|
|
+static void link_block_group(struct btrfs_block_group_cache *cache)
|
|
|
{
|
|
|
+ struct btrfs_space_info *space_info = cache->space_info;
|
|
|
int index = get_block_group_index(cache);
|
|
|
bool first = false;
|
|
|
|
|
@@ -10178,7 +10129,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
|
|
|
|
|
cache->space_info = space_info;
|
|
|
|
|
|
- __link_block_group(space_info, cache);
|
|
|
+ link_block_group(cache);
|
|
|
|
|
|
set_avail_alloc_bits(info, cache->flags);
|
|
|
if (btrfs_chunk_readonly(info, cache->key.objectid)) {
|
|
@@ -10337,7 +10288,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|
|
cache->bytes_super, &cache->space_info);
|
|
|
update_global_block_rsv(fs_info);
|
|
|
|
|
|
- __link_block_group(cache->space_info, cache);
|
|
|
+ link_block_group(cache);
|
|
|
|
|
|
list_add_tail(&cache->bg_list, &trans->new_bgs);
|
|
|
|
|
@@ -10387,6 +10338,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|
|
* remove it.
|
|
|
*/
|
|
|
free_excluded_extents(fs_info, block_group);
|
|
|
+ btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
|
|
|
+ block_group->key.offset);
|
|
|
|
|
|
memcpy(&key, &block_group->key, sizeof(key));
|
|
|
index = get_block_group_index(block_group);
|