|
@@ -27,7 +27,7 @@
|
|
|
#include <linux/ratelimit.h>
|
|
|
#include <linux/percpu_counter.h>
|
|
|
#include <linux/lockdep.h>
|
|
|
-#include "hash.h"
|
|
|
+#include <linux/crc32c.h>
|
|
|
#include "tree-log.h"
|
|
|
#include "disk-io.h"
|
|
|
#include "print-tree.h"
|
|
@@ -535,13 +535,11 @@ static noinline void caching_thread(struct btrfs_work *work)
|
|
|
struct btrfs_block_group_cache *block_group;
|
|
|
struct btrfs_fs_info *fs_info;
|
|
|
struct btrfs_caching_control *caching_ctl;
|
|
|
- struct btrfs_root *extent_root;
|
|
|
int ret;
|
|
|
|
|
|
caching_ctl = container_of(work, struct btrfs_caching_control, work);
|
|
|
block_group = caching_ctl->block_group;
|
|
|
fs_info = block_group->fs_info;
|
|
|
- extent_root = fs_info->extent_root;
|
|
|
|
|
|
mutex_lock(&caching_ctl->mutex);
|
|
|
down_read(&fs_info->commit_root_sem);
|
|
@@ -1203,11 +1201,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
|
|
|
__le64 lenum;
|
|
|
|
|
|
lenum = cpu_to_le64(root_objectid);
|
|
|
- high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
|
|
|
+ high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
|
|
|
lenum = cpu_to_le64(owner);
|
|
|
- low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
|
|
|
+ low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
|
|
|
lenum = cpu_to_le64(offset);
|
|
|
- low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
|
|
|
+ low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
|
|
|
|
|
|
return ((u64)high_crc << 31) ^ (u64)low_crc;
|
|
|
}
|
|
@@ -2652,9 +2650,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
|
|
* Returns -ENOMEM or -EIO on failure and will abort the transaction.
|
|
|
*/
|
|
|
static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
unsigned long nr)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
struct btrfs_delayed_ref_root *delayed_refs;
|
|
|
struct btrfs_delayed_ref_node *ref;
|
|
|
struct btrfs_delayed_ref_head *locked_ref = NULL;
|
|
@@ -2994,7 +2992,7 @@ static void delayed_ref_async_start(struct btrfs_work *work)
|
|
|
if (trans->transid > async->transid)
|
|
|
goto end;
|
|
|
|
|
|
- ret = btrfs_run_delayed_refs(trans, fs_info, async->count);
|
|
|
+ ret = btrfs_run_delayed_refs(trans, async->count);
|
|
|
if (ret)
|
|
|
async->error = ret;
|
|
|
end:
|
|
@@ -3053,8 +3051,9 @@ int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
|
|
|
* Returns <0 on error and aborts the transaction
|
|
|
*/
|
|
|
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info, unsigned long count)
|
|
|
+ unsigned long count)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
struct rb_node *node;
|
|
|
struct btrfs_delayed_ref_root *delayed_refs;
|
|
|
struct btrfs_delayed_ref_head *head;
|
|
@@ -3078,7 +3077,7 @@ again:
|
|
|
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
|
|
|
#endif
|
|
|
trans->can_flush_pending_bgs = false;
|
|
|
- ret = __btrfs_run_delayed_refs(trans, fs_info, count);
|
|
|
+ ret = __btrfs_run_delayed_refs(trans, count);
|
|
|
if (ret < 0) {
|
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
return ret;
|
|
@@ -3086,7 +3085,7 @@ again:
|
|
|
|
|
|
if (run_all) {
|
|
|
if (!list_empty(&trans->new_bgs))
|
|
|
- btrfs_create_pending_block_groups(trans, fs_info);
|
|
|
+ btrfs_create_pending_block_groups(trans);
|
|
|
|
|
|
spin_lock(&delayed_refs->lock);
|
|
|
node = rb_first(&delayed_refs->href_root);
|
|
@@ -3660,9 +3659,9 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
|
|
|
* the commit latency by getting rid of the easy block groups while
|
|
|
* we're still allowing others to join the commit.
|
|
|
*/
|
|
|
-int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info)
|
|
|
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
struct btrfs_block_group_cache *cache;
|
|
|
struct btrfs_transaction *cur_trans = trans->transaction;
|
|
|
int ret = 0;
|
|
@@ -3686,7 +3685,7 @@ again:
|
|
|
* make sure all the block groups on our dirty list actually
|
|
|
* exist
|
|
|
*/
|
|
|
- btrfs_create_pending_block_groups(trans, fs_info);
|
|
|
+ btrfs_create_pending_block_groups(trans);
|
|
|
|
|
|
if (!path) {
|
|
|
path = btrfs_alloc_path();
|
|
@@ -3741,8 +3740,9 @@ again:
|
|
|
should_put = 0;
|
|
|
|
|
|
/*
|
|
|
- * the cache_write_mutex is protecting
|
|
|
- * the io_list
|
|
|
+ * The cache_write_mutex is protecting the
|
|
|
+ * io_list, also refer to the definition of
|
|
|
+ * btrfs_transaction::io_bgs for more details
|
|
|
*/
|
|
|
list_add_tail(&cache->io_list, io);
|
|
|
} else {
|
|
@@ -3800,7 +3800,7 @@ again:
|
|
|
* go through delayed refs for all the stuff we've just kicked off
|
|
|
* and then loop back (just once)
|
|
|
*/
|
|
|
- ret = btrfs_run_delayed_refs(trans, fs_info, 0);
|
|
|
+ ret = btrfs_run_delayed_refs(trans, 0);
|
|
|
if (!ret && loops == 0) {
|
|
|
loops++;
|
|
|
spin_lock(&cur_trans->dirty_bgs_lock);
|
|
@@ -3882,7 +3882,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
|
|
|
cache_save_setup(cache, trans, path);
|
|
|
|
|
|
if (!ret)
|
|
|
- ret = btrfs_run_delayed_refs(trans, fs_info,
|
|
|
+ ret = btrfs_run_delayed_refs(trans,
|
|
|
(unsigned long) -1);
|
|
|
|
|
|
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
|
|
@@ -3934,6 +3934,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
|
|
|
}
|
|
|
spin_unlock(&cur_trans->dirty_bgs_lock);
|
|
|
|
|
|
+ /*
|
|
|
+ * Refer to the definition of io_bgs member for details why it's safe
|
|
|
+ * to use it without any locking
|
|
|
+ */
|
|
|
while (!list_empty(io)) {
|
|
|
cache = list_first_entry(io, struct btrfs_block_group_cache,
|
|
|
io_list);
|
|
@@ -4332,8 +4336,7 @@ again:
|
|
|
|
|
|
/* commit the current transaction and try again */
|
|
|
commit_trans:
|
|
|
- if (need_commit &&
|
|
|
- !atomic_read(&fs_info->open_ioctl_trans)) {
|
|
|
+ if (need_commit) {
|
|
|
need_commit--;
|
|
|
|
|
|
if (need_commit > 0) {
|
|
@@ -4541,7 +4544,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
|
|
|
* Needed because we can end up allocating a system chunk and for an
|
|
|
* atomic and race free space reservation in the chunk block reserve.
|
|
|
*/
|
|
|
- ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
|
|
|
+ lockdep_assert_held(&fs_info->chunk_mutex);
|
|
|
|
|
|
info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
|
|
|
spin_lock(&info->lock);
|
|
@@ -4602,11 +4605,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
|
|
return -ENOSPC;
|
|
|
|
|
|
space_info = __find_space_info(fs_info, flags);
|
|
|
- if (!space_info) {
|
|
|
- ret = create_space_info(fs_info, flags, &space_info);
|
|
|
- if (ret)
|
|
|
- return ret;
|
|
|
- }
|
|
|
+ ASSERT(space_info);
|
|
|
|
|
|
again:
|
|
|
spin_lock(&space_info->lock);
|
|
@@ -4705,7 +4704,7 @@ out:
|
|
|
*/
|
|
|
if (trans->can_flush_pending_bgs &&
|
|
|
trans->chunk_bytes_reserved >= (u64)SZ_2M) {
|
|
|
- btrfs_create_pending_block_groups(trans, fs_info);
|
|
|
+ btrfs_create_pending_block_groups(trans);
|
|
|
btrfs_trans_release_chunk_metadata(trans);
|
|
|
}
|
|
|
return ret;
|
|
@@ -4826,7 +4825,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
|
|
|
long time_left;
|
|
|
unsigned long nr_pages;
|
|
|
int loops;
|
|
|
- enum btrfs_reserve_flush_enum flush;
|
|
|
|
|
|
/* Calc the number of the pages we need flush for space reservation */
|
|
|
items = calc_reclaim_items_nr(fs_info, to_reclaim);
|
|
@@ -4867,10 +4865,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
|
|
|
atomic_read(&fs_info->async_delalloc_pages) <=
|
|
|
(int)max_reclaim);
|
|
|
skip_async:
|
|
|
- if (!trans)
|
|
|
- flush = BTRFS_RESERVE_FLUSH_ALL;
|
|
|
- else
|
|
|
- flush = BTRFS_RESERVE_NO_FLUSH;
|
|
|
spin_lock(&space_info->lock);
|
|
|
if (list_empty(&space_info->tickets) &&
|
|
|
list_empty(&space_info->priority_tickets)) {
|
|
@@ -4993,7 +4987,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
|
|
ret = PTR_ERR(trans);
|
|
|
break;
|
|
|
}
|
|
|
- ret = btrfs_run_delayed_items_nr(trans, fs_info, nr);
|
|
|
+ ret = btrfs_run_delayed_items_nr(trans, nr);
|
|
|
btrfs_end_transaction(trans);
|
|
|
break;
|
|
|
case FLUSH_DELALLOC:
|
|
@@ -5388,10 +5382,15 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
|
|
|
!block_rsv_use_bytes(global_rsv, orig_bytes))
|
|
|
ret = 0;
|
|
|
}
|
|
|
- if (ret == -ENOSPC)
|
|
|
+ if (ret == -ENOSPC) {
|
|
|
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
|
|
|
block_rsv->space_info->flags,
|
|
|
orig_bytes, 1);
|
|
|
+
|
|
|
+ if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
|
|
|
+ dump_space_info(fs_info, block_rsv->space_info,
|
|
|
+ orig_bytes, 0);
|
|
|
+ }
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -5760,6 +5759,9 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
|
|
if (num_bytes == 0)
|
|
|
return 0;
|
|
|
|
|
|
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
|
|
if (!ret) {
|
|
|
block_rsv_add_bytes(block_rsv, num_bytes, 0);
|
|
@@ -5772,11 +5774,15 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
|
|
/**
|
|
|
* btrfs_inode_rsv_release - release any excessive reservation.
|
|
|
* @inode - the inode we need to release from.
|
|
|
+ * @qgroup_free - free or convert qgroup meta.
|
|
|
+ * Unlike normal operation, qgroup meta reservation needs to know if we are
|
|
|
+ * freeing qgroup reservation or just converting it into per-trans. Normally
|
|
|
+ * @qgroup_free is true for error handling, and false for normal release.
|
|
|
*
|
|
|
* This is the same as btrfs_block_rsv_release, except that it handles the
|
|
|
* tracepoint for the reservation.
|
|
|
*/
|
|
|
-static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
|
|
|
+static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
|
|
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
|
@@ -5792,6 +5798,10 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
|
|
|
if (released > 0)
|
|
|
trace_btrfs_space_reservation(fs_info, "delalloc",
|
|
|
btrfs_ino(inode), released, 0);
|
|
|
+ if (qgroup_free)
|
|
|
+ btrfs_qgroup_free_meta_prealloc(inode->root, released);
|
|
|
+ else
|
|
|
+ btrfs_qgroup_convert_reserved_meta(inode->root, released);
|
|
|
}
|
|
|
|
|
|
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
|
@@ -5892,24 +5902,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
|
WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
|
|
|
}
|
|
|
|
|
|
-void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info)
|
|
|
-{
|
|
|
- if (!trans->block_rsv) {
|
|
|
- ASSERT(!trans->bytes_reserved);
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- if (!trans->bytes_reserved)
|
|
|
- return;
|
|
|
-
|
|
|
- ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
|
|
|
- trace_btrfs_space_reservation(fs_info, "transaction",
|
|
|
- trans->transid, trans->bytes_reserved, 0);
|
|
|
- btrfs_block_rsv_release(fs_info, trans->block_rsv,
|
|
|
- trans->bytes_reserved);
|
|
|
- trans->bytes_reserved = 0;
|
|
|
-}
|
|
|
|
|
|
/*
|
|
|
* To be called after all the new block groups attached to the transaction
|
|
@@ -5951,7 +5943,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
|
|
|
*/
|
|
|
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
|
|
|
|
|
|
- trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
|
|
|
+ trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
|
|
|
num_bytes, 1);
|
|
|
return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
|
|
|
}
|
|
@@ -5995,7 +5987,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
|
|
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
|
|
|
/* One for parent inode, two for dir entries */
|
|
|
num_bytes = 3 * fs_info->nodesize;
|
|
|
- ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
|
|
|
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
} else {
|
|
@@ -6014,7 +6006,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
|
|
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
|
|
|
|
|
|
if (ret && *qgroup_reserved)
|
|
|
- btrfs_qgroup_free_meta(root, *qgroup_reserved);
|
|
|
+ btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -6051,7 +6043,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
|
|
|
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
|
|
|
- struct btrfs_root *root = inode->root;
|
|
|
unsigned nr_extents;
|
|
|
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
|
|
|
int ret = 0;
|
|
@@ -6068,13 +6059,13 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
if (btrfs_is_free_space_inode(inode)) {
|
|
|
flush = BTRFS_RESERVE_NO_FLUSH;
|
|
|
delalloc_lock = false;
|
|
|
- } else if (current->journal_info) {
|
|
|
- flush = BTRFS_RESERVE_FLUSH_LIMIT;
|
|
|
- }
|
|
|
+ } else {
|
|
|
+ if (current->journal_info)
|
|
|
+ flush = BTRFS_RESERVE_FLUSH_LIMIT;
|
|
|
|
|
|
- if (flush != BTRFS_RESERVE_NO_FLUSH &&
|
|
|
- btrfs_transaction_in_commit(fs_info))
|
|
|
- schedule_timeout(1);
|
|
|
+ if (btrfs_transaction_in_commit(fs_info))
|
|
|
+ schedule_timeout(1);
|
|
|
+ }
|
|
|
|
|
|
if (delalloc_lock)
|
|
|
mutex_lock(&inode->delalloc_mutex);
|
|
@@ -6089,19 +6080,9 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
|
|
spin_unlock(&inode->lock);
|
|
|
|
|
|
- if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
|
|
|
- ret = btrfs_qgroup_reserve_meta(root,
|
|
|
- nr_extents * fs_info->nodesize, true);
|
|
|
- if (ret)
|
|
|
- goto out_fail;
|
|
|
- }
|
|
|
-
|
|
|
ret = btrfs_inode_rsv_refill(inode, flush);
|
|
|
- if (unlikely(ret)) {
|
|
|
- btrfs_qgroup_free_meta(root,
|
|
|
- nr_extents * fs_info->nodesize);
|
|
|
+ if (unlikely(ret))
|
|
|
goto out_fail;
|
|
|
- }
|
|
|
|
|
|
if (delalloc_lock)
|
|
|
mutex_unlock(&inode->delalloc_mutex);
|
|
@@ -6115,7 +6096,7 @@ out_fail:
|
|
|
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
|
|
spin_unlock(&inode->lock);
|
|
|
|
|
|
- btrfs_inode_rsv_release(inode);
|
|
|
+ btrfs_inode_rsv_release(inode, true);
|
|
|
if (delalloc_lock)
|
|
|
mutex_unlock(&inode->delalloc_mutex);
|
|
|
return ret;
|
|
@@ -6125,12 +6106,14 @@ out_fail:
|
|
|
* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
|
|
|
* @inode: the inode to release the reservation for.
|
|
|
* @num_bytes: the number of bytes we are releasing.
|
|
|
+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
|
|
|
*
|
|
|
* This will release the metadata reservation for an inode. This can be called
|
|
|
* once we complete IO for a given set of bytes to release their metadata
|
|
|
* reservations, or on error for the same reason.
|
|
|
*/
|
|
|
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
|
|
+ bool qgroup_free)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
|
|
|
|
|
@@ -6143,13 +6126,14 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
if (btrfs_is_testing(fs_info))
|
|
|
return;
|
|
|
|
|
|
- btrfs_inode_rsv_release(inode);
|
|
|
+ btrfs_inode_rsv_release(inode, qgroup_free);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* btrfs_delalloc_release_extents - release our outstanding_extents
|
|
|
* @inode: the inode to balance the reservation for.
|
|
|
* @num_bytes: the number of bytes we originally reserved with
|
|
|
+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
|
|
|
*
|
|
|
* When we reserve space we increase outstanding_extents for the extents we may
|
|
|
* add. Once we've set the range as delalloc or created our ordered extents we
|
|
@@ -6157,7 +6141,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
* temporarily tracked outstanding_extents. This _must_ be used in conjunction
|
|
|
* with btrfs_delalloc_reserve_metadata.
|
|
|
*/
|
|
|
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
|
|
|
+ bool qgroup_free)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
|
|
|
unsigned num_extents;
|
|
@@ -6171,7 +6156,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
|
|
|
if (btrfs_is_testing(fs_info))
|
|
|
return;
|
|
|
|
|
|
- btrfs_inode_rsv_release(inode);
|
|
|
+ btrfs_inode_rsv_release(inode, qgroup_free);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -6227,9 +6212,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
|
|
|
*/
|
|
|
void btrfs_delalloc_release_space(struct inode *inode,
|
|
|
struct extent_changeset *reserved,
|
|
|
- u64 start, u64 len)
|
|
|
+ u64 start, u64 len, bool qgroup_free)
|
|
|
{
|
|
|
- btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
|
|
|
+ btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
|
|
|
btrfs_free_reserved_data_space(inode, reserved, start, len);
|
|
|
}
|
|
|
|
|
@@ -6783,9 +6768,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info)
|
|
|
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
struct btrfs_block_group_cache *block_group, *tmp;
|
|
|
struct list_head *deleted_bgs;
|
|
|
struct extent_io_tree *unpin;
|
|
@@ -7351,29 +7336,6 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-int __get_raid_index(u64 flags)
|
|
|
-{
|
|
|
- if (flags & BTRFS_BLOCK_GROUP_RAID10)
|
|
|
- return BTRFS_RAID_RAID10;
|
|
|
- else if (flags & BTRFS_BLOCK_GROUP_RAID1)
|
|
|
- return BTRFS_RAID_RAID1;
|
|
|
- else if (flags & BTRFS_BLOCK_GROUP_DUP)
|
|
|
- return BTRFS_RAID_DUP;
|
|
|
- else if (flags & BTRFS_BLOCK_GROUP_RAID0)
|
|
|
- return BTRFS_RAID_RAID0;
|
|
|
- else if (flags & BTRFS_BLOCK_GROUP_RAID5)
|
|
|
- return BTRFS_RAID_RAID5;
|
|
|
- else if (flags & BTRFS_BLOCK_GROUP_RAID6)
|
|
|
- return BTRFS_RAID_RAID6;
|
|
|
-
|
|
|
- return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
|
|
|
-}
|
|
|
-
|
|
|
-int get_block_group_index(struct btrfs_block_group_cache *cache)
|
|
|
-{
|
|
|
- return __get_raid_index(cache->flags);
|
|
|
-}
|
|
|
-
|
|
|
static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
|
|
|
[BTRFS_RAID_RAID10] = "raid10",
|
|
|
[BTRFS_RAID_RAID1] = "raid1",
|
|
@@ -7488,7 +7450,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
|
|
u64 empty_cluster = 0;
|
|
|
struct btrfs_space_info *space_info;
|
|
|
int loop = 0;
|
|
|
- int index = __get_raid_index(flags);
|
|
|
+ int index = btrfs_bg_flags_to_raid_index(flags);
|
|
|
bool failed_cluster_refill = false;
|
|
|
bool failed_alloc = false;
|
|
|
bool use_cluster = true;
|
|
@@ -7574,7 +7536,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
|
|
btrfs_put_block_group(block_group);
|
|
|
up_read(&space_info->groups_sem);
|
|
|
} else {
|
|
|
- index = get_block_group_index(block_group);
|
|
|
+ index = btrfs_bg_flags_to_raid_index(
|
|
|
+ block_group->flags);
|
|
|
btrfs_lock_block_group(block_group, delalloc);
|
|
|
goto have_block_group;
|
|
|
}
|
|
@@ -7584,7 +7547,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
|
|
}
|
|
|
search:
|
|
|
have_caching_bg = false;
|
|
|
- if (index == 0 || index == __get_raid_index(flags))
|
|
|
+ if (index == 0 || index == btrfs_bg_flags_to_raid_index(flags))
|
|
|
full_search = true;
|
|
|
down_read(&space_info->groups_sem);
|
|
|
list_for_each_entry(block_group, &space_info->block_groups[index],
|
|
@@ -7842,7 +7805,8 @@ checks:
|
|
|
loop:
|
|
|
failed_cluster_refill = false;
|
|
|
failed_alloc = false;
|
|
|
- BUG_ON(index != get_block_group_index(block_group));
|
|
|
+ BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
|
|
|
+ index);
|
|
|
btrfs_release_block_group(block_group, delalloc);
|
|
|
cond_resched();
|
|
|
}
|
|
@@ -7996,6 +7960,51 @@ again:
|
|
|
up_read(&info->groups_sem);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
|
|
|
+ * hole that is at least as big as @num_bytes.
|
|
|
+ *
|
|
|
+ * @root - The root that will contain this extent
|
|
|
+ *
|
|
|
+ * @ram_bytes - The amount of space in ram that @num_bytes take. This
|
|
|
+ * is used for accounting purposes. This value differs
|
|
|
+ * from @num_bytes only in the case of compressed extents.
|
|
|
+ *
|
|
|
+ * @num_bytes - Number of bytes to allocate on-disk.
|
|
|
+ *
|
|
|
+ * @min_alloc_size - Indicates the minimum amount of space that the
|
|
|
+ * allocator should try to satisfy. In some cases
|
|
|
+ * @num_bytes may be larger than what is required and if
|
|
|
+ * the filesystem is fragmented then allocation fails.
|
|
|
+ * However, the presence of @min_alloc_size gives a
|
|
|
+ * chance to try and satisfy the smaller allocation.
|
|
|
+ *
|
|
|
+ * @empty_size - A hint that you plan on doing more COW. This is the
|
|
|
+ * size in bytes the allocator should try to find free
|
|
|
+ * next to the block it returns. This is just a hint and
|
|
|
+ * may be ignored by the allocator.
|
|
|
+ *
|
|
|
+ * @hint_byte - Hint to the allocator to start searching above the byte
|
|
|
+ * address passed. It might be ignored.
|
|
|
+ *
|
|
|
+ * @ins - This key is modified to record the found hole. It will
|
|
|
+ * have the following values:
|
|
|
+ * ins->objectid == start position
|
|
|
+ * ins->flags = BTRFS_EXTENT_ITEM_KEY
|
|
|
+ * ins->offset == the size of the hole.
|
|
|
+ *
|
|
|
+ * @is_data - Boolean flag indicating whether an extent is
|
|
|
+ * allocated for data (true) or metadata (false)
|
|
|
+ *
|
|
|
+ * @delalloc - Boolean flag indicating whether this allocation is for
|
|
|
+ * delalloc or not. If 'true' data_rwsem of block groups
|
|
|
+ * is going to be acquired.
|
|
|
+ *
|
|
|
+ *
|
|
|
+ * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
|
|
|
+ * case -ENOSPC is returned then @ins->offset will contain the size of the
|
|
|
+ * largest available hole the allocator managed to find.
|
|
|
+ */
|
|
|
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
|
|
|
u64 num_bytes, u64 min_alloc_size,
|
|
|
u64 empty_size, u64 hint_byte,
|
|
@@ -8699,6 +8708,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
|
|
u64 parent;
|
|
|
u32 blocksize;
|
|
|
struct btrfs_key key;
|
|
|
+ struct btrfs_key first_key;
|
|
|
struct extent_buffer *next;
|
|
|
int level = wc->level;
|
|
|
int reada = 0;
|
|
@@ -8719,6 +8729,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
|
|
}
|
|
|
|
|
|
bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
|
|
|
+ btrfs_node_key_to_cpu(path->nodes[level], &first_key,
|
|
|
+ path->slots[level]);
|
|
|
blocksize = fs_info->nodesize;
|
|
|
|
|
|
next = find_extent_buffer(fs_info, bytenr);
|
|
@@ -8783,7 +8795,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
|
|
if (!next) {
|
|
|
if (reada && level == 1)
|
|
|
reada_walk_down(trans, root, wc, path);
|
|
|
- next = read_tree_block(fs_info, bytenr, generation);
|
|
|
+ next = read_tree_block(fs_info, bytenr, generation, level - 1,
|
|
|
+ &first_key);
|
|
|
if (IS_ERR(next)) {
|
|
|
return PTR_ERR(next);
|
|
|
} else if (!extent_buffer_uptodate(next)) {
|
|
@@ -9648,7 +9661,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
|
|
|
*/
|
|
|
target = get_restripe_target(fs_info, block_group->flags);
|
|
|
if (target) {
|
|
|
- index = __get_raid_index(extended_to_chunk(target));
|
|
|
+ index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
|
|
|
} else {
|
|
|
/*
|
|
|
* this is just a balance, so if we were marked as full
|
|
@@ -9662,7 +9675,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- index = get_block_group_index(block_group);
|
|
|
+ index = btrfs_bg_flags_to_raid_index(block_group->flags);
|
|
|
}
|
|
|
|
|
|
if (index == BTRFS_RAID_RAID10) {
|
|
@@ -9911,10 +9924,40 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/* link_block_group will queue up kobjects to add when we're reclaim-safe */
|
|
|
+void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
|
|
|
+{
|
|
|
+ struct btrfs_space_info *space_info;
|
|
|
+ struct raid_kobject *rkobj;
|
|
|
+ LIST_HEAD(list);
|
|
|
+ int index;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ spin_lock(&fs_info->pending_raid_kobjs_lock);
|
|
|
+ list_splice_init(&fs_info->pending_raid_kobjs, &list);
|
|
|
+ spin_unlock(&fs_info->pending_raid_kobjs_lock);
|
|
|
+
|
|
|
+ list_for_each_entry(rkobj, &list, list) {
|
|
|
+ space_info = __find_space_info(fs_info, rkobj->flags);
|
|
|
+ index = btrfs_bg_flags_to_raid_index(rkobj->flags);
|
|
|
+
|
|
|
+ ret = kobject_add(&rkobj->kobj, &space_info->kobj,
|
|
|
+ "%s", get_raid_name(index));
|
|
|
+ if (ret) {
|
|
|
+ kobject_put(&rkobj->kobj);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (ret)
|
|
|
+ btrfs_warn(fs_info,
|
|
|
+ "failed to add kobject for block cache, ignoring");
|
|
|
+}
|
|
|
+
|
|
|
static void link_block_group(struct btrfs_block_group_cache *cache)
|
|
|
{
|
|
|
struct btrfs_space_info *space_info = cache->space_info;
|
|
|
- int index = get_block_group_index(cache);
|
|
|
+ struct btrfs_fs_info *fs_info = cache->fs_info;
|
|
|
+ int index = btrfs_bg_flags_to_raid_index(cache->flags);
|
|
|
bool first = false;
|
|
|
|
|
|
down_write(&space_info->groups_sem);
|
|
@@ -9924,27 +9967,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
|
|
|
up_write(&space_info->groups_sem);
|
|
|
|
|
|
if (first) {
|
|
|
- struct raid_kobject *rkobj;
|
|
|
- int ret;
|
|
|
-
|
|
|
- rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
|
|
|
- if (!rkobj)
|
|
|
- goto out_err;
|
|
|
- rkobj->raid_type = index;
|
|
|
- kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
|
|
|
- ret = kobject_add(&rkobj->kobj, &space_info->kobj,
|
|
|
- "%s", get_raid_name(index));
|
|
|
- if (ret) {
|
|
|
- kobject_put(&rkobj->kobj);
|
|
|
- goto out_err;
|
|
|
+ struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
|
|
|
+ if (!rkobj) {
|
|
|
+ btrfs_warn(cache->fs_info,
|
|
|
+ "couldn't alloc memory for raid level kobject");
|
|
|
+ return;
|
|
|
}
|
|
|
+ rkobj->flags = cache->flags;
|
|
|
+ kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
|
|
|
+
|
|
|
+ spin_lock(&fs_info->pending_raid_kobjs_lock);
|
|
|
+ list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
|
|
|
+ spin_unlock(&fs_info->pending_raid_kobjs_lock);
|
|
|
space_info->block_group_kobjs[index] = &rkobj->kobj;
|
|
|
}
|
|
|
-
|
|
|
- return;
|
|
|
-out_err:
|
|
|
- btrfs_warn(cache->fs_info,
|
|
|
- "failed to add kobject for block cache, ignoring");
|
|
|
}
|
|
|
|
|
|
static struct btrfs_block_group_cache *
|
|
@@ -10160,6 +10196,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
|
|
inc_block_group_ro(cache, 1);
|
|
|
}
|
|
|
|
|
|
+ btrfs_add_raid_kobjects(info);
|
|
|
init_global_block_rsv(info);
|
|
|
ret = 0;
|
|
|
error:
|
|
@@ -10167,9 +10204,9 @@ error:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info)
|
|
|
+void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
|
|
|
{
|
|
|
+ struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
struct btrfs_block_group_cache *block_group, *tmp;
|
|
|
struct btrfs_root *extent_root = fs_info->extent_root;
|
|
|
struct btrfs_block_group_item item;
|
|
@@ -10254,15 +10291,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|
|
* with its ->space_info set.
|
|
|
*/
|
|
|
cache->space_info = __find_space_info(fs_info, cache->flags);
|
|
|
- if (!cache->space_info) {
|
|
|
- ret = create_space_info(fs_info, cache->flags,
|
|
|
- &cache->space_info);
|
|
|
- if (ret) {
|
|
|
- btrfs_remove_free_space_cache(cache);
|
|
|
- btrfs_put_block_group(cache);
|
|
|
- return ret;
|
|
|
- }
|
|
|
- }
|
|
|
+ ASSERT(cache->space_info);
|
|
|
|
|
|
ret = btrfs_add_block_group_cache(fs_info, cache);
|
|
|
if (ret) {
|
|
@@ -10334,7 +10363,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|
|
block_group->key.offset);
|
|
|
|
|
|
memcpy(&key, &block_group->key, sizeof(key));
|
|
|
- index = get_block_group_index(block_group);
|
|
|
+ index = btrfs_bg_flags_to_raid_index(block_group->flags);
|
|
|
if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
|
|
|
BTRFS_BLOCK_GROUP_RAID1 |
|
|
|
BTRFS_BLOCK_GROUP_RAID10))
|