|
@@ -111,6 +111,16 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
|
|
|
u64 num_bytes);
|
|
|
int btrfs_pin_extent(struct btrfs_root *root,
|
|
|
u64 bytenr, u64 num_bytes, int reserved);
|
|
|
+static int __reserve_metadata_bytes(struct btrfs_root *root,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ u64 orig_bytes,
|
|
|
+ enum btrfs_reserve_flush_enum flush);
|
|
|
+static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ u64 num_bytes);
|
|
|
+static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ u64 num_bytes);
|
|
|
|
|
|
static noinline int
|
|
|
block_group_cache_done(struct btrfs_block_group_cache *cache)
|
|
@@ -3913,6 +3923,7 @@ static const char *alloc_name(u64 flags)
|
|
|
|
|
|
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
|
|
u64 total_bytes, u64 bytes_used,
|
|
|
+ u64 bytes_readonly,
|
|
|
struct btrfs_space_info **space_info)
|
|
|
{
|
|
|
struct btrfs_space_info *found;
|
|
@@ -3933,8 +3944,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
|
|
found->disk_total += total_bytes * factor;
|
|
|
found->bytes_used += bytes_used;
|
|
|
found->disk_used += bytes_used * factor;
|
|
|
+ found->bytes_readonly += bytes_readonly;
|
|
|
if (total_bytes > 0)
|
|
|
found->full = 0;
|
|
|
+ space_info_add_new_bytes(info, found, total_bytes -
|
|
|
+ bytes_used - bytes_readonly);
|
|
|
spin_unlock(&found->lock);
|
|
|
*space_info = found;
|
|
|
return 0;
|
|
@@ -3960,7 +3974,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
|
|
found->disk_used = bytes_used * factor;
|
|
|
found->bytes_pinned = 0;
|
|
|
found->bytes_reserved = 0;
|
|
|
- found->bytes_readonly = 0;
|
|
|
+ found->bytes_readonly = bytes_readonly;
|
|
|
found->bytes_may_use = 0;
|
|
|
found->full = 0;
|
|
|
found->max_extent_size = 0;
|
|
@@ -3969,6 +3983,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
|
|
found->flush = 0;
|
|
|
init_waitqueue_head(&found->wait);
|
|
|
INIT_LIST_HEAD(&found->ro_bgs);
|
|
|
+ INIT_LIST_HEAD(&found->tickets);
|
|
|
+ INIT_LIST_HEAD(&found->priority_tickets);
|
|
|
|
|
|
ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
|
|
|
info->space_info_kobj, "%s",
|
|
@@ -4470,7 +4486,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
|
|
space_info = __find_space_info(extent_root->fs_info, flags);
|
|
|
if (!space_info) {
|
|
|
ret = update_space_info(extent_root->fs_info, flags,
|
|
|
- 0, 0, &space_info);
|
|
|
+ 0, 0, 0, &space_info);
|
|
|
BUG_ON(ret); /* -ENOMEM */
|
|
|
}
|
|
|
BUG_ON(!space_info); /* Logic error */
|
|
@@ -4582,12 +4598,19 @@ static int can_overcommit(struct btrfs_root *root,
|
|
|
struct btrfs_space_info *space_info, u64 bytes,
|
|
|
enum btrfs_reserve_flush_enum flush)
|
|
|
{
|
|
|
- struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
|
|
|
- u64 profile = btrfs_get_alloc_profile(root, 0);
|
|
|
+ struct btrfs_block_rsv *global_rsv;
|
|
|
+ u64 profile;
|
|
|
u64 space_size;
|
|
|
u64 avail;
|
|
|
u64 used;
|
|
|
|
|
|
+ /* Don't overcommit when in mixed mode. */
|
|
|
+ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ BUG_ON(root->fs_info == NULL);
|
|
|
+ global_rsv = &root->fs_info->global_block_rsv;
|
|
|
+ profile = btrfs_get_alloc_profile(root, 0);
|
|
|
used = space_info->bytes_used + space_info->bytes_reserved +
|
|
|
space_info->bytes_pinned + space_info->bytes_readonly;
|
|
|
|
|
@@ -4739,6 +4762,11 @@ skip_async:
|
|
|
spin_unlock(&space_info->lock);
|
|
|
break;
|
|
|
}
|
|
|
+ if (list_empty(&space_info->tickets) &&
|
|
|
+ list_empty(&space_info->priority_tickets)) {
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+ break;
|
|
|
+ }
|
|
|
spin_unlock(&space_info->lock);
|
|
|
|
|
|
loops++;
|
|
@@ -4807,13 +4835,11 @@ commit:
|
|
|
return btrfs_commit_transaction(trans, root);
|
|
|
}
|
|
|
|
|
|
-enum flush_state {
|
|
|
- FLUSH_DELAYED_ITEMS_NR = 1,
|
|
|
- FLUSH_DELAYED_ITEMS = 2,
|
|
|
- FLUSH_DELALLOC = 3,
|
|
|
- FLUSH_DELALLOC_WAIT = 4,
|
|
|
- ALLOC_CHUNK = 5,
|
|
|
- COMMIT_TRANS = 6,
|
|
|
+struct reserve_ticket {
|
|
|
+ u64 bytes;
|
|
|
+ int error;
|
|
|
+ struct list_head list;
|
|
|
+ wait_queue_head_t wait;
|
|
|
};
|
|
|
|
|
|
static int flush_space(struct btrfs_root *root,
|
|
@@ -4866,6 +4892,8 @@ static int flush_space(struct btrfs_root *root,
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
+ trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
|
|
|
+ orig_bytes, state, ret);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -4873,17 +4901,22 @@ static inline u64
|
|
|
btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
|
|
|
struct btrfs_space_info *space_info)
|
|
|
{
|
|
|
+ struct reserve_ticket *ticket;
|
|
|
u64 used;
|
|
|
u64 expected;
|
|
|
- u64 to_reclaim;
|
|
|
+ u64 to_reclaim = 0;
|
|
|
|
|
|
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
|
|
|
- spin_lock(&space_info->lock);
|
|
|
if (can_overcommit(root, space_info, to_reclaim,
|
|
|
- BTRFS_RESERVE_FLUSH_ALL)) {
|
|
|
- to_reclaim = 0;
|
|
|
- goto out;
|
|
|
- }
|
|
|
+ BTRFS_RESERVE_FLUSH_ALL))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ list_for_each_entry(ticket, &space_info->tickets, list)
|
|
|
+ to_reclaim += ticket->bytes;
|
|
|
+ list_for_each_entry(ticket, &space_info->priority_tickets, list)
|
|
|
+ to_reclaim += ticket->bytes;
|
|
|
+ if (to_reclaim)
|
|
|
+ return to_reclaim;
|
|
|
|
|
|
used = space_info->bytes_used + space_info->bytes_reserved +
|
|
|
space_info->bytes_pinned + space_info->bytes_readonly +
|
|
@@ -4899,14 +4932,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
|
|
|
to_reclaim = 0;
|
|
|
to_reclaim = min(to_reclaim, space_info->bytes_may_use +
|
|
|
space_info->bytes_reserved);
|
|
|
-out:
|
|
|
- spin_unlock(&space_info->lock);
|
|
|
-
|
|
|
return to_reclaim;
|
|
|
}
|
|
|
|
|
|
static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
|
|
|
- struct btrfs_fs_info *fs_info, u64 used)
|
|
|
+ struct btrfs_root *root, u64 used)
|
|
|
{
|
|
|
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
|
|
|
|
|
@@ -4914,73 +4944,177 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
|
|
|
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
|
|
|
return 0;
|
|
|
|
|
|
- return (used >= thresh && !btrfs_fs_closing(fs_info) &&
|
|
|
- !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
|
|
|
+ if (!btrfs_calc_reclaim_metadata_size(root, space_info))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ return (used >= thresh && !btrfs_fs_closing(root->fs_info) &&
|
|
|
+ !test_bit(BTRFS_FS_STATE_REMOUNTING,
|
|
|
+ &root->fs_info->fs_state));
|
|
|
}
|
|
|
|
|
|
-static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
- int flush_state)
|
|
|
+static void wake_all_tickets(struct list_head *head)
|
|
|
{
|
|
|
- u64 used;
|
|
|
-
|
|
|
- spin_lock(&space_info->lock);
|
|
|
- /*
|
|
|
- * We run out of space and have not got any free space via flush_space,
|
|
|
- * so don't bother doing async reclaim.
|
|
|
- */
|
|
|
- if (flush_state > COMMIT_TRANS && space_info->full) {
|
|
|
- spin_unlock(&space_info->lock);
|
|
|
- return 0;
|
|
|
- }
|
|
|
+ struct reserve_ticket *ticket;
|
|
|
|
|
|
- used = space_info->bytes_used + space_info->bytes_reserved +
|
|
|
- space_info->bytes_pinned + space_info->bytes_readonly +
|
|
|
- space_info->bytes_may_use;
|
|
|
- if (need_do_async_reclaim(space_info, fs_info, used)) {
|
|
|
- spin_unlock(&space_info->lock);
|
|
|
- return 1;
|
|
|
+ while (!list_empty(head)) {
|
|
|
+ ticket = list_first_entry(head, struct reserve_ticket, list);
|
|
|
+ list_del_init(&ticket->list);
|
|
|
+ ticket->error = -ENOSPC;
|
|
|
+ wake_up(&ticket->wait);
|
|
|
}
|
|
|
- spin_unlock(&space_info->lock);
|
|
|
-
|
|
|
- return 0;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * This is for normal flushers, we can wait all goddamned day if we want to. We
|
|
|
+ * will loop and continuously try to flush as long as we are making progress.
|
|
|
+ * We count progress as clearing off tickets each time we have to loop.
|
|
|
+ */
|
|
|
static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
|
|
{
|
|
|
+ struct reserve_ticket *last_ticket = NULL;
|
|
|
struct btrfs_fs_info *fs_info;
|
|
|
struct btrfs_space_info *space_info;
|
|
|
u64 to_reclaim;
|
|
|
int flush_state;
|
|
|
+ int commit_cycles = 0;
|
|
|
|
|
|
fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
|
|
|
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
|
|
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
|
|
|
space_info);
|
|
|
- if (!to_reclaim)
|
|
|
+ if (!to_reclaim) {
|
|
|
+ space_info->flush = 0;
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
return;
|
|
|
+ }
|
|
|
+ last_ticket = list_first_entry(&space_info->tickets,
|
|
|
+ struct reserve_ticket, list);
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
|
|
|
flush_state = FLUSH_DELAYED_ITEMS_NR;
|
|
|
+ do {
|
|
|
+ struct reserve_ticket *ticket;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
|
|
|
+ to_reclaim, flush_state);
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ if (list_empty(&space_info->tickets)) {
|
|
|
+ space_info->flush = 0;
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
|
|
|
+ space_info);
|
|
|
+ ticket = list_first_entry(&space_info->tickets,
|
|
|
+ struct reserve_ticket, list);
|
|
|
+ if (last_ticket == ticket) {
|
|
|
+ flush_state++;
|
|
|
+ } else {
|
|
|
+ last_ticket = ticket;
|
|
|
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
|
|
|
+ if (commit_cycles)
|
|
|
+ commit_cycles--;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (flush_state > COMMIT_TRANS) {
|
|
|
+ commit_cycles++;
|
|
|
+ if (commit_cycles > 2) {
|
|
|
+ wake_all_tickets(&space_info->tickets);
|
|
|
+ space_info->flush = 0;
|
|
|
+ } else {
|
|
|
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+ } while (flush_state <= COMMIT_TRANS);
|
|
|
+}
|
|
|
+
|
|
|
+void btrfs_init_async_reclaim_work(struct work_struct *work)
|
|
|
+{
|
|
|
+ INIT_WORK(work, btrfs_async_reclaim_metadata_space);
|
|
|
+}
|
|
|
+
|
|
|
+static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ struct reserve_ticket *ticket)
|
|
|
+{
|
|
|
+ u64 to_reclaim;
|
|
|
+ int flush_state = FLUSH_DELAYED_ITEMS_NR;
|
|
|
+
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
|
|
|
+ space_info);
|
|
|
+ if (!to_reclaim) {
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+
|
|
|
do {
|
|
|
flush_space(fs_info->fs_root, space_info, to_reclaim,
|
|
|
to_reclaim, flush_state);
|
|
|
flush_state++;
|
|
|
- if (!btrfs_need_do_async_reclaim(space_info, fs_info,
|
|
|
- flush_state))
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ if (ticket->bytes == 0) {
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
return;
|
|
|
+ }
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Priority flushers can't wait on delalloc without
|
|
|
+ * deadlocking.
|
|
|
+ */
|
|
|
+ if (flush_state == FLUSH_DELALLOC ||
|
|
|
+ flush_state == FLUSH_DELALLOC_WAIT)
|
|
|
+ flush_state = ALLOC_CHUNK;
|
|
|
} while (flush_state < COMMIT_TRANS);
|
|
|
}
|
|
|
|
|
|
-void btrfs_init_async_reclaim_work(struct work_struct *work)
|
|
|
+static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ struct reserve_ticket *ticket, u64 orig_bytes)
|
|
|
+
|
|
|
{
|
|
|
- INIT_WORK(work, btrfs_async_reclaim_metadata_space);
|
|
|
+ DEFINE_WAIT(wait);
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ while (ticket->bytes > 0 && ticket->error == 0) {
|
|
|
+ ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
|
|
|
+ if (ret) {
|
|
|
+ ret = -EINTR;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+
|
|
|
+ schedule();
|
|
|
+
|
|
|
+ finish_wait(&ticket->wait, &wait);
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ }
|
|
|
+ if (!ret)
|
|
|
+ ret = ticket->error;
|
|
|
+ if (!list_empty(&ticket->list))
|
|
|
+ list_del_init(&ticket->list);
|
|
|
+ if (ticket->bytes && ticket->bytes < orig_bytes) {
|
|
|
+ u64 num_bytes = orig_bytes - ticket->bytes;
|
|
|
+ space_info->bytes_may_use -= num_bytes;
|
|
|
+ trace_btrfs_space_reservation(fs_info, "space_info",
|
|
|
+ space_info->flags, num_bytes, 0);
|
|
|
+ }
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
|
|
|
* @root - the root we're allocating for
|
|
|
- * @block_rsv - the block_rsv we're allocating for
|
|
|
+ * @space_info - the space info we want to allocate from
|
|
|
* @orig_bytes - the number of bytes we want
|
|
|
* @flush - whether or not we can flush to make our reservation
|
|
|
*
|
|
@@ -4991,81 +5125,36 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
|
|
|
* regain reservations will be made and this will fail if there is not enough
|
|
|
* space already.
|
|
|
*/
|
|
|
-static int reserve_metadata_bytes(struct btrfs_root *root,
|
|
|
- struct btrfs_block_rsv *block_rsv,
|
|
|
- u64 orig_bytes,
|
|
|
- enum btrfs_reserve_flush_enum flush)
|
|
|
+static int __reserve_metadata_bytes(struct btrfs_root *root,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ u64 orig_bytes,
|
|
|
+ enum btrfs_reserve_flush_enum flush)
|
|
|
{
|
|
|
- struct btrfs_space_info *space_info = block_rsv->space_info;
|
|
|
+ struct reserve_ticket ticket;
|
|
|
u64 used;
|
|
|
- u64 num_bytes = orig_bytes;
|
|
|
- int flush_state = FLUSH_DELAYED_ITEMS_NR;
|
|
|
int ret = 0;
|
|
|
- bool flushing = false;
|
|
|
|
|
|
-again:
|
|
|
- ret = 0;
|
|
|
- spin_lock(&space_info->lock);
|
|
|
- /*
|
|
|
- * We only want to wait if somebody other than us is flushing and we
|
|
|
- * are actually allowed to flush all things.
|
|
|
- */
|
|
|
- while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
|
|
|
- space_info->flush) {
|
|
|
- spin_unlock(&space_info->lock);
|
|
|
- /*
|
|
|
- * If we have a trans handle we can't wait because the flusher
|
|
|
- * may have to commit the transaction, which would mean we would
|
|
|
- * deadlock since we are waiting for the flusher to finish, but
|
|
|
- * hold the current transaction open.
|
|
|
- */
|
|
|
- if (current->journal_info)
|
|
|
- return -EAGAIN;
|
|
|
- ret = wait_event_killable(space_info->wait, !space_info->flush);
|
|
|
- /* Must have been killed, return */
|
|
|
- if (ret)
|
|
|
- return -EINTR;
|
|
|
-
|
|
|
- spin_lock(&space_info->lock);
|
|
|
- }
|
|
|
+ ASSERT(orig_bytes);
|
|
|
+ ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
|
|
|
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
ret = -ENOSPC;
|
|
|
used = space_info->bytes_used + space_info->bytes_reserved +
|
|
|
space_info->bytes_pinned + space_info->bytes_readonly +
|
|
|
space_info->bytes_may_use;
|
|
|
|
|
|
/*
|
|
|
- * The idea here is that we've not already over-reserved the block group
|
|
|
- * then we can go ahead and save our reservation first and then start
|
|
|
- * flushing if we need to. Otherwise if we've already overcommitted
|
|
|
- * lets start flushing stuff first and then come back and try to make
|
|
|
- * our reservation.
|
|
|
+ * If we have enough space then hooray, make our reservation and carry
|
|
|
+ * on. If not see if we can overcommit, and if we can, hooray carry on.
|
|
|
+ * If not things get more complicated.
|
|
|
*/
|
|
|
- if (used <= space_info->total_bytes) {
|
|
|
- if (used + orig_bytes <= space_info->total_bytes) {
|
|
|
- space_info->bytes_may_use += orig_bytes;
|
|
|
- trace_btrfs_space_reservation(root->fs_info,
|
|
|
- "space_info", space_info->flags, orig_bytes, 1);
|
|
|
- ret = 0;
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * Ok set num_bytes to orig_bytes since we aren't
|
|
|
- * overocmmitted, this way we only try and reclaim what
|
|
|
- * we need.
|
|
|
- */
|
|
|
- num_bytes = orig_bytes;
|
|
|
- }
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * Ok we're over committed, set num_bytes to the overcommitted
|
|
|
- * amount plus the amount of bytes that we need for this
|
|
|
- * reservation.
|
|
|
- */
|
|
|
- num_bytes = used - space_info->total_bytes +
|
|
|
- (orig_bytes * 2);
|
|
|
- }
|
|
|
-
|
|
|
- if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
|
|
|
+ if (used + orig_bytes <= space_info->total_bytes) {
|
|
|
+ space_info->bytes_may_use += orig_bytes;
|
|
|
+ trace_btrfs_space_reservation(root->fs_info, "space_info",
|
|
|
+ space_info->flags, orig_bytes,
|
|
|
+ 1);
|
|
|
+ ret = 0;
|
|
|
+ } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
|
|
|
space_info->bytes_may_use += orig_bytes;
|
|
|
trace_btrfs_space_reservation(root->fs_info, "space_info",
|
|
|
space_info->flags, orig_bytes,
|
|
@@ -5074,16 +5163,31 @@ again:
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Couldn't make our reservation, save our place so while we're trying
|
|
|
- * to reclaim space we can actually use it instead of somebody else
|
|
|
- * stealing it from us.
|
|
|
+ * If we couldn't make a reservation then setup our reservation ticket
|
|
|
+ * and kick the async worker if it's not already running.
|
|
|
*
|
|
|
- * We make the other tasks wait for the flush only when we can flush
|
|
|
- * all things.
|
|
|
+ * If we are a priority flusher then we just need to add our ticket to
|
|
|
+ * the list and we will do our own flushing further down.
|
|
|
*/
|
|
|
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
|
|
|
- flushing = true;
|
|
|
- space_info->flush = 1;
|
|
|
+ ticket.bytes = orig_bytes;
|
|
|
+ ticket.error = 0;
|
|
|
+ init_waitqueue_head(&ticket.wait);
|
|
|
+ if (flush == BTRFS_RESERVE_FLUSH_ALL) {
|
|
|
+ list_add_tail(&ticket.list, &space_info->tickets);
|
|
|
+ if (!space_info->flush) {
|
|
|
+ space_info->flush = 1;
|
|
|
+ trace_btrfs_trigger_flush(root->fs_info,
|
|
|
+ space_info->flags,
|
|
|
+ orig_bytes, flush,
|
|
|
+ "enospc");
|
|
|
+ queue_work(system_unbound_wq,
|
|
|
+ &root->fs_info->async_reclaim_work);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ list_add_tail(&ticket.list,
|
|
|
+ &space_info->priority_tickets);
|
|
|
+ }
|
|
|
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
|
|
|
used += orig_bytes;
|
|
|
/*
|
|
@@ -5092,39 +5196,67 @@ again:
|
|
|
* the async reclaim as we will panic.
|
|
|
*/
|
|
|
if (!root->fs_info->log_root_recovering &&
|
|
|
- need_do_async_reclaim(space_info, root->fs_info, used) &&
|
|
|
- !work_busy(&root->fs_info->async_reclaim_work))
|
|
|
+ need_do_async_reclaim(space_info, root, used) &&
|
|
|
+ !work_busy(&root->fs_info->async_reclaim_work)) {
|
|
|
+ trace_btrfs_trigger_flush(root->fs_info,
|
|
|
+ space_info->flags,
|
|
|
+ orig_bytes, flush,
|
|
|
+ "preempt");
|
|
|
queue_work(system_unbound_wq,
|
|
|
&root->fs_info->async_reclaim_work);
|
|
|
+ }
|
|
|
}
|
|
|
spin_unlock(&space_info->lock);
|
|
|
-
|
|
|
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
|
|
|
- goto out;
|
|
|
+ return ret;
|
|
|
|
|
|
- ret = flush_space(root, space_info, num_bytes, orig_bytes,
|
|
|
- flush_state);
|
|
|
- flush_state++;
|
|
|
+ if (flush == BTRFS_RESERVE_FLUSH_ALL)
|
|
|
+ return wait_reserve_ticket(root->fs_info, space_info, &ticket,
|
|
|
+ orig_bytes);
|
|
|
|
|
|
- /*
|
|
|
- * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
|
|
|
- * would happen. So skip delalloc flush.
|
|
|
- */
|
|
|
- if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
|
|
|
- (flush_state == FLUSH_DELALLOC ||
|
|
|
- flush_state == FLUSH_DELALLOC_WAIT))
|
|
|
- flush_state = ALLOC_CHUNK;
|
|
|
+ ret = 0;
|
|
|
+ priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ if (ticket.bytes) {
|
|
|
+ if (ticket.bytes < orig_bytes) {
|
|
|
+ u64 num_bytes = orig_bytes - ticket.bytes;
|
|
|
+ space_info->bytes_may_use -= num_bytes;
|
|
|
+ trace_btrfs_space_reservation(root->fs_info,
|
|
|
+ "space_info", space_info->flags,
|
|
|
+ num_bytes, 0);
|
|
|
|
|
|
- if (!ret)
|
|
|
- goto again;
|
|
|
- else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
|
|
|
- flush_state < COMMIT_TRANS)
|
|
|
- goto again;
|
|
|
- else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
|
|
|
- flush_state <= COMMIT_TRANS)
|
|
|
- goto again;
|
|
|
+ }
|
|
|
+ list_del_init(&ticket.list);
|
|
|
+ ret = -ENOSPC;
|
|
|
+ }
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+ ASSERT(list_empty(&ticket.list));
|
|
|
+ return ret;
|
|
|
+}
|
|
|
|
|
|
-out:
|
|
|
+/**
|
|
|
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
|
|
|
+ * @root - the root we're allocating for
|
|
|
+ * @block_rsv - the block_rsv we're allocating for
|
|
|
+ * @orig_bytes - the number of bytes we want
|
|
|
+ * @flush - whether or not we can flush to make our reservation
|
|
|
+ *
|
|
|
+ * This will reserve orgi_bytes number of bytes from the space info associated
|
|
|
+ * with the block_rsv. If there is not enough space it will make an attempt to
|
|
|
+ * flush out space to make room. It will do this by flushing delalloc if
|
|
|
+ * possible or committing the transaction. If flush is 0 then no attempts to
|
|
|
+ * regain reservations will be made and this will fail if there is not enough
|
|
|
+ * space already.
|
|
|
+ */
|
|
|
+static int reserve_metadata_bytes(struct btrfs_root *root,
|
|
|
+ struct btrfs_block_rsv *block_rsv,
|
|
|
+ u64 orig_bytes,
|
|
|
+ enum btrfs_reserve_flush_enum flush)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
|
|
|
+ flush);
|
|
|
if (ret == -ENOSPC &&
|
|
|
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
|
|
|
struct btrfs_block_rsv *global_rsv =
|
|
@@ -5137,13 +5269,8 @@ out:
|
|
|
if (ret == -ENOSPC)
|
|
|
trace_btrfs_space_reservation(root->fs_info,
|
|
|
"space_info:enospc",
|
|
|
- space_info->flags, orig_bytes, 1);
|
|
|
- if (flushing) {
|
|
|
- spin_lock(&space_info->lock);
|
|
|
- space_info->flush = 0;
|
|
|
- wake_up_all(&space_info->wait);
|
|
|
- spin_unlock(&space_info->lock);
|
|
|
- }
|
|
|
+ block_rsv->space_info->flags,
|
|
|
+ orig_bytes, 1);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -5219,6 +5346,108 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * This is for space we already have accounted in space_info->bytes_may_use, so
|
|
|
+ * basically when we're returning space from block_rsv's.
|
|
|
+ */
|
|
|
+static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ u64 num_bytes)
|
|
|
+{
|
|
|
+ struct reserve_ticket *ticket;
|
|
|
+ struct list_head *head;
|
|
|
+ u64 used;
|
|
|
+ enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
|
|
|
+ bool check_overcommit = false;
|
|
|
+
|
|
|
+ spin_lock(&space_info->lock);
|
|
|
+ head = &space_info->priority_tickets;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we are over our limit then we need to check and see if we can
|
|
|
+ * overcommit, and if we can't then we just need to free up our space
|
|
|
+ * and not satisfy any requests.
|
|
|
+ */
|
|
|
+ used = space_info->bytes_used + space_info->bytes_reserved +
|
|
|
+ space_info->bytes_pinned + space_info->bytes_readonly +
|
|
|
+ space_info->bytes_may_use;
|
|
|
+ if (used - num_bytes >= space_info->total_bytes)
|
|
|
+ check_overcommit = true;
|
|
|
+again:
|
|
|
+ while (!list_empty(head) && num_bytes) {
|
|
|
+ ticket = list_first_entry(head, struct reserve_ticket,
|
|
|
+ list);
|
|
|
+ /*
|
|
|
+ * We use 0 bytes because this space is already reserved, so
|
|
|
+ * adding the ticket space would be a double count.
|
|
|
+ */
|
|
|
+ if (check_overcommit &&
|
|
|
+ !can_overcommit(fs_info->extent_root, space_info, 0,
|
|
|
+ flush))
|
|
|
+ break;
|
|
|
+ if (num_bytes >= ticket->bytes) {
|
|
|
+ list_del_init(&ticket->list);
|
|
|
+ num_bytes -= ticket->bytes;
|
|
|
+ ticket->bytes = 0;
|
|
|
+ wake_up(&ticket->wait);
|
|
|
+ } else {
|
|
|
+ ticket->bytes -= num_bytes;
|
|
|
+ num_bytes = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (num_bytes && head == &space_info->priority_tickets) {
|
|
|
+ head = &space_info->tickets;
|
|
|
+ flush = BTRFS_RESERVE_FLUSH_ALL;
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+ space_info->bytes_may_use -= num_bytes;
|
|
|
+ trace_btrfs_space_reservation(fs_info, "space_info",
|
|
|
+ space_info->flags, num_bytes, 0);
|
|
|
+ spin_unlock(&space_info->lock);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * This is for newly allocated space that isn't accounted in
|
|
|
+ * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent
|
|
|
+ * we use this helper.
|
|
|
+ */
|
|
|
+static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_space_info *space_info,
|
|
|
+ u64 num_bytes)
|
|
|
+{
|
|
|
+ struct reserve_ticket *ticket;
|
|
|
+ struct list_head *head = &space_info->priority_tickets;
|
|
|
+
|
|
|
+again:
|
|
|
+ while (!list_empty(head) && num_bytes) {
|
|
|
+ ticket = list_first_entry(head, struct reserve_ticket,
|
|
|
+ list);
|
|
|
+ if (num_bytes >= ticket->bytes) {
|
|
|
+ trace_btrfs_space_reservation(fs_info, "space_info",
|
|
|
+ space_info->flags,
|
|
|
+ ticket->bytes, 1);
|
|
|
+ list_del_init(&ticket->list);
|
|
|
+ num_bytes -= ticket->bytes;
|
|
|
+ space_info->bytes_may_use += ticket->bytes;
|
|
|
+ ticket->bytes = 0;
|
|
|
+ wake_up(&ticket->wait);
|
|
|
+ } else {
|
|
|
+ trace_btrfs_space_reservation(fs_info, "space_info",
|
|
|
+ space_info->flags,
|
|
|
+ num_bytes, 1);
|
|
|
+ space_info->bytes_may_use += num_bytes;
|
|
|
+ ticket->bytes -= num_bytes;
|
|
|
+ num_bytes = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (num_bytes && head == &space_info->priority_tickets) {
|
|
|
+ head = &space_info->tickets;
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
|
struct btrfs_block_rsv *block_rsv,
|
|
|
struct btrfs_block_rsv *dest, u64 num_bytes)
|
|
@@ -5253,18 +5482,15 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
|
}
|
|
|
spin_unlock(&dest->lock);
|
|
|
}
|
|
|
- if (num_bytes) {
|
|
|
- spin_lock(&space_info->lock);
|
|
|
- space_info->bytes_may_use -= num_bytes;
|
|
|
- trace_btrfs_space_reservation(fs_info, "space_info",
|
|
|
- space_info->flags, num_bytes, 0);
|
|
|
- spin_unlock(&space_info->lock);
|
|
|
- }
|
|
|
+ if (num_bytes)
|
|
|
+ space_info_add_old_bytes(fs_info, space_info,
|
|
|
+ num_bytes);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
|
|
|
- struct btrfs_block_rsv *dst, u64 num_bytes)
|
|
|
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
|
|
|
+ struct btrfs_block_rsv *dst, u64 num_bytes,
|
|
|
+ int update_size)
|
|
|
{
|
|
|
int ret;
|
|
|
|
|
@@ -5272,7 +5498,7 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
|
- block_rsv_add_bytes(dst, num_bytes, 1);
|
|
|
+ block_rsv_add_bytes(dst, num_bytes, update_size);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -5379,13 +5605,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
|
|
|
- struct btrfs_block_rsv *dst_rsv,
|
|
|
- u64 num_bytes)
|
|
|
-{
|
|
|
- return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
|
|
|
-}
|
|
|
-
|
|
|
void btrfs_block_rsv_release(struct btrfs_root *root,
|
|
|
struct btrfs_block_rsv *block_rsv,
|
|
|
u64 num_bytes)
|
|
@@ -5398,48 +5617,21 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
|
|
|
num_bytes);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * helper to calculate size of global block reservation.
|
|
|
- * the desired value is sum of space used by extent tree,
|
|
|
- * checksum tree and root tree
|
|
|
- */
|
|
|
-static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
|
|
|
-{
|
|
|
- struct btrfs_space_info *sinfo;
|
|
|
- u64 num_bytes;
|
|
|
- u64 meta_used;
|
|
|
- u64 data_used;
|
|
|
- int csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
|
-
|
|
|
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
|
|
|
- spin_lock(&sinfo->lock);
|
|
|
- data_used = sinfo->bytes_used;
|
|
|
- spin_unlock(&sinfo->lock);
|
|
|
-
|
|
|
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
|
|
- spin_lock(&sinfo->lock);
|
|
|
- if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
|
|
|
- data_used = 0;
|
|
|
- meta_used = sinfo->bytes_used;
|
|
|
- spin_unlock(&sinfo->lock);
|
|
|
-
|
|
|
- num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
|
|
|
- csum_size * 2;
|
|
|
- num_bytes += div_u64(data_used + meta_used, 50);
|
|
|
-
|
|
|
- if (num_bytes * 3 > meta_used)
|
|
|
- num_bytes = div_u64(meta_used, 3);
|
|
|
-
|
|
|
- return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
|
|
|
-}
|
|
|
-
|
|
|
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
|
{
|
|
|
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
|
|
struct btrfs_space_info *sinfo = block_rsv->space_info;
|
|
|
u64 num_bytes;
|
|
|
|
|
|
- num_bytes = calc_global_metadata_size(fs_info);
|
|
|
+ /*
|
|
|
+ * The global block rsv is based on the size of the extent tree, the
|
|
|
+ * checksum tree and the root tree. If the fs is empty we want to set
|
|
|
+ * it to a minimal amount for safety.
|
|
|
+ */
|
|
|
+ num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
|
|
|
+ btrfs_root_used(&fs_info->csum_root->root_item) +
|
|
|
+ btrfs_root_used(&fs_info->tree_root->root_item);
|
|
|
+ num_bytes = max_t(u64, num_bytes, SZ_16M);
|
|
|
|
|
|
spin_lock(&sinfo->lock);
|
|
|
spin_lock(&block_rsv->lock);
|
|
@@ -5554,7 +5746,13 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
|
|
|
struct inode *inode)
|
|
|
{
|
|
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
|
- struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
|
|
|
+ /*
|
|
|
+ * We always use trans->block_rsv here as we will have reserved space
|
|
|
+ * for our orphan when starting the transaction, using get_block_rsv()
|
|
|
+ * here will sometimes make us choose the wrong block rsv as we could be
|
|
|
+ * doing a reloc inode for a non refcounted root.
|
|
|
+ */
|
|
|
+ struct btrfs_block_rsv *src_rsv = trans->block_rsv;
|
|
|
struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
|
|
|
|
|
|
/*
|
|
@@ -5565,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
|
|
|
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
|
|
|
trace_btrfs_space_reservation(root->fs_info, "orphan",
|
|
|
btrfs_ino(inode), num_bytes, 1);
|
|
|
- return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
|
|
|
+ return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
|
|
|
}
|
|
|
|
|
|
void btrfs_orphan_release_metadata(struct inode *inode)
|
|
@@ -5620,7 +5818,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
|
|
BTRFS_RESERVE_FLUSH_ALL);
|
|
|
|
|
|
if (ret == -ENOSPC && use_global_rsv)
|
|
|
- ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
|
|
|
+ ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
|
|
|
|
|
|
if (ret && *qgroup_reserved)
|
|
|
btrfs_qgroup_free_meta(root, *qgroup_reserved);
|
|
@@ -5730,21 +5928,26 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
|
|
|
u64 to_reserve = 0;
|
|
|
u64 csum_bytes;
|
|
|
unsigned nr_extents = 0;
|
|
|
- int extra_reserve = 0;
|
|
|
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
|
|
|
int ret = 0;
|
|
|
bool delalloc_lock = true;
|
|
|
u64 to_free = 0;
|
|
|
unsigned dropped;
|
|
|
+ bool release_extra = false;
|
|
|
|
|
|
/* If we are a free space inode we need to not flush since we will be in
|
|
|
* the middle of a transaction commit. We also don't need the delalloc
|
|
|
* mutex since we won't race with anybody. We need this mostly to make
|
|
|
* lockdep shut its filthy mouth.
|
|
|
+ *
|
|
|
+ * If we have a transaction open (can happen if we call truncate_block
|
|
|
+ * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
|
|
|
*/
|
|
|
if (btrfs_is_free_space_inode(inode)) {
|
|
|
flush = BTRFS_RESERVE_NO_FLUSH;
|
|
|
delalloc_lock = false;
|
|
|
+ } else if (current->journal_info) {
|
|
|
+ flush = BTRFS_RESERVE_FLUSH_LIMIT;
|
|
|
}
|
|
|
|
|
|
if (flush != BTRFS_RESERVE_NO_FLUSH &&
|
|
@@ -5761,24 +5964,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
|
|
|
BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
BTRFS_MAX_EXTENT_SIZE);
|
|
|
BTRFS_I(inode)->outstanding_extents += nr_extents;
|
|
|
- nr_extents = 0;
|
|
|
|
|
|
+ nr_extents = 0;
|
|
|
if (BTRFS_I(inode)->outstanding_extents >
|
|
|
BTRFS_I(inode)->reserved_extents)
|
|
|
- nr_extents = BTRFS_I(inode)->outstanding_extents -
|
|
|
+ nr_extents += BTRFS_I(inode)->outstanding_extents -
|
|
|
BTRFS_I(inode)->reserved_extents;
|
|
|
|
|
|
- /*
|
|
|
- * Add an item to reserve for updating the inode when we complete the
|
|
|
- * delalloc io.
|
|
|
- */
|
|
|
- if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
|
|
|
- &BTRFS_I(inode)->runtime_flags)) {
|
|
|
- nr_extents++;
|
|
|
- extra_reserve = 1;
|
|
|
- }
|
|
|
-
|
|
|
- to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
|
|
|
+ /* We always want to reserve a slot for updating the inode. */
|
|
|
+ to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
|
|
|
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
|
|
|
csum_bytes = BTRFS_I(inode)->csum_bytes;
|
|
|
spin_unlock(&BTRFS_I(inode)->lock);
|
|
@@ -5790,17 +5984,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
|
|
|
goto out_fail;
|
|
|
}
|
|
|
|
|
|
- ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
|
|
|
+ ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
|
|
|
if (unlikely(ret)) {
|
|
|
btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
|
|
|
goto out_fail;
|
|
|
}
|
|
|
|
|
|
spin_lock(&BTRFS_I(inode)->lock);
|
|
|
- if (extra_reserve) {
|
|
|
- set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
|
|
|
- &BTRFS_I(inode)->runtime_flags);
|
|
|
- nr_extents--;
|
|
|
+ if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
|
|
|
+ &BTRFS_I(inode)->runtime_flags)) {
|
|
|
+ to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
|
|
|
+ release_extra = true;
|
|
|
}
|
|
|
BTRFS_I(inode)->reserved_extents += nr_extents;
|
|
|
spin_unlock(&BTRFS_I(inode)->lock);
|
|
@@ -5811,8 +6005,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
|
|
|
if (to_reserve)
|
|
|
trace_btrfs_space_reservation(root->fs_info, "delalloc",
|
|
|
btrfs_ino(inode), to_reserve, 1);
|
|
|
- block_rsv_add_bytes(block_rsv, to_reserve, 1);
|
|
|
-
|
|
|
+ if (release_extra)
|
|
|
+ btrfs_block_rsv_release(root, block_rsv,
|
|
|
+ btrfs_calc_trans_metadata_size(root,
|
|
|
+ 1));
|
|
|
return 0;
|
|
|
|
|
|
out_fail:
|
|
@@ -6044,6 +6240,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
|
|
|
spin_unlock(&cache->lock);
|
|
|
spin_unlock(&cache->space_info->lock);
|
|
|
|
|
|
+ trace_btrfs_space_reservation(root->fs_info, "pinned",
|
|
|
+ cache->space_info->flags,
|
|
|
+ num_bytes, 1);
|
|
|
set_extent_dirty(info->pinned_extents,
|
|
|
bytenr, bytenr + num_bytes - 1,
|
|
|
GFP_NOFS | __GFP_NOFAIL);
|
|
@@ -6118,10 +6317,10 @@ static int pin_down_extent(struct btrfs_root *root,
|
|
|
spin_unlock(&cache->lock);
|
|
|
spin_unlock(&cache->space_info->lock);
|
|
|
|
|
|
+ trace_btrfs_space_reservation(root->fs_info, "pinned",
|
|
|
+ cache->space_info->flags, num_bytes, 1);
|
|
|
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
|
|
|
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
|
|
|
- if (reserved)
|
|
|
- trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -6476,6 +6675,9 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
|
|
|
spin_lock(&cache->lock);
|
|
|
cache->pinned -= len;
|
|
|
space_info->bytes_pinned -= len;
|
|
|
+
|
|
|
+ trace_btrfs_space_reservation(fs_info, "pinned",
|
|
|
+ space_info->flags, len, 0);
|
|
|
space_info->max_extent_size = 0;
|
|
|
percpu_counter_add(&space_info->total_bytes_pinned, -len);
|
|
|
if (cache->ro) {
|
|
@@ -6483,17 +6685,29 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
|
|
|
readonly = true;
|
|
|
}
|
|
|
spin_unlock(&cache->lock);
|
|
|
- if (!readonly && global_rsv->space_info == space_info) {
|
|
|
+ if (!readonly && return_free_space &&
|
|
|
+ global_rsv->space_info == space_info) {
|
|
|
+ u64 to_add = len;
|
|
|
+ WARN_ON(!return_free_space);
|
|
|
spin_lock(&global_rsv->lock);
|
|
|
if (!global_rsv->full) {
|
|
|
- len = min(len, global_rsv->size -
|
|
|
- global_rsv->reserved);
|
|
|
- global_rsv->reserved += len;
|
|
|
- space_info->bytes_may_use += len;
|
|
|
+ to_add = min(len, global_rsv->size -
|
|
|
+ global_rsv->reserved);
|
|
|
+ global_rsv->reserved += to_add;
|
|
|
+ space_info->bytes_may_use += to_add;
|
|
|
if (global_rsv->reserved >= global_rsv->size)
|
|
|
global_rsv->full = 1;
|
|
|
+ trace_btrfs_space_reservation(fs_info,
|
|
|
+ "space_info",
|
|
|
+ space_info->flags,
|
|
|
+ to_add, 1);
|
|
|
+ len -= to_add;
|
|
|
}
|
|
|
spin_unlock(&global_rsv->lock);
|
|
|
+ /* Add to any tickets we may have */
|
|
|
+ if (len)
|
|
|
+ space_info_add_new_bytes(fs_info, space_info,
|
|
|
+ len);
|
|
|
}
|
|
|
spin_unlock(&space_info->lock);
|
|
|
}
|
|
@@ -7782,12 +7996,10 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
|
|
|
ret = btrfs_discard_extent(root, start, len, NULL);
|
|
|
btrfs_add_free_space(cache, start, len);
|
|
|
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
|
|
|
+ trace_btrfs_reserved_extent_free(root, start, len);
|
|
|
}
|
|
|
|
|
|
btrfs_put_block_group(cache);
|
|
|
-
|
|
|
- trace_btrfs_reserved_extent_free(root, start, len);
|
|
|
-
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -9791,13 +10003,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|
|
space_info = list_entry(info->space_info.next,
|
|
|
struct btrfs_space_info,
|
|
|
list);
|
|
|
- if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
|
|
|
- if (WARN_ON(space_info->bytes_pinned > 0 ||
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Do not hide this behind enospc_debug, this is actually
|
|
|
+ * important and indicates a real bug if this happens.
|
|
|
+ */
|
|
|
+ if (WARN_ON(space_info->bytes_pinned > 0 ||
|
|
|
space_info->bytes_reserved > 0 ||
|
|
|
- space_info->bytes_may_use > 0)) {
|
|
|
- dump_space_info(space_info, 0, 0);
|
|
|
- }
|
|
|
- }
|
|
|
+ space_info->bytes_may_use > 0))
|
|
|
+ dump_space_info(space_info, 0, 0);
|
|
|
list_del(&space_info->list);
|
|
|
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
|
|
|
struct kobject *kobj;
|
|
@@ -10005,9 +10219,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
|
|
|
goto error;
|
|
|
}
|
|
|
|
|
|
+ trace_btrfs_add_block_group(root->fs_info, cache, 0);
|
|
|
ret = update_space_info(info, cache->flags, found_key.offset,
|
|
|
btrfs_block_group_used(&cache->item),
|
|
|
- &space_info);
|
|
|
+ cache->bytes_super, &space_info);
|
|
|
if (ret) {
|
|
|
btrfs_remove_free_space_cache(cache);
|
|
|
spin_lock(&info->block_group_cache_lock);
|
|
@@ -10020,9 +10235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
|
|
|
}
|
|
|
|
|
|
cache->space_info = space_info;
|
|
|
- spin_lock(&cache->space_info->lock);
|
|
|
- cache->space_info->bytes_readonly += cache->bytes_super;
|
|
|
- spin_unlock(&cache->space_info->lock);
|
|
|
|
|
|
__link_block_group(space_info, cache);
|
|
|
|
|
@@ -10114,7 +10326,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|
|
int ret;
|
|
|
struct btrfs_root *extent_root;
|
|
|
struct btrfs_block_group_cache *cache;
|
|
|
-
|
|
|
extent_root = root->fs_info->extent_root;
|
|
|
|
|
|
btrfs_set_log_full_commit(root->fs_info, trans);
|
|
@@ -10160,7 +10371,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|
|
* assigned to our block group, but don't update its counters just yet.
|
|
|
* We want our bg to be added to the rbtree with its ->space_info set.
|
|
|
*/
|
|
|
- ret = update_space_info(root->fs_info, cache->flags, 0, 0,
|
|
|
+ ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0,
|
|
|
&cache->space_info);
|
|
|
if (ret) {
|
|
|
btrfs_remove_free_space_cache(cache);
|
|
@@ -10179,8 +10390,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|
|
* Now that our block group has its ->space_info set and is inserted in
|
|
|
* the rbtree, update the space info's counters.
|
|
|
*/
|
|
|
+ trace_btrfs_add_block_group(root->fs_info, cache, 1);
|
|
|
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
|
|
|
- &cache->space_info);
|
|
|
+ cache->bytes_super, &cache->space_info);
|
|
|
if (ret) {
|
|
|
btrfs_remove_free_space_cache(cache);
|
|
|
spin_lock(&root->fs_info->block_group_cache_lock);
|
|
@@ -10193,16 +10405,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|
|
}
|
|
|
update_global_block_rsv(root->fs_info);
|
|
|
|
|
|
- spin_lock(&cache->space_info->lock);
|
|
|
- cache->space_info->bytes_readonly += cache->bytes_super;
|
|
|
- spin_unlock(&cache->space_info->lock);
|
|
|
-
|
|
|
__link_block_group(cache->space_info, cache);
|
|
|
|
|
|
list_add_tail(&cache->bg_list, &trans->new_bgs);
|
|
|
|
|
|
set_avail_alloc_bits(extent_root->fs_info, type);
|
|
|
-
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -10747,21 +10954,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
|
|
|
mixed = 1;
|
|
|
|
|
|
flags = BTRFS_BLOCK_GROUP_SYSTEM;
|
|
|
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
|
|
|
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
|
|
|
if (ret)
|
|
|
goto out;
|
|
|
|
|
|
if (mixed) {
|
|
|
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
|
|
|
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
|
|
|
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
|
|
|
} else {
|
|
|
flags = BTRFS_BLOCK_GROUP_METADATA;
|
|
|
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
|
|
|
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
|
|
|
if (ret)
|
|
|
goto out;
|
|
|
|
|
|
flags = BTRFS_BLOCK_GROUP_DATA;
|
|
|
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
|
|
|
+ ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
|
|
|
}
|
|
|
out:
|
|
|
return ret;
|