|
@@ -9005,6 +9005,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
|
|
|
INIT_LIST_HEAD(&cache->bg_list);
|
|
|
INIT_LIST_HEAD(&cache->ro_list);
|
|
|
btrfs_init_free_space_ctl(cache);
|
|
|
+ atomic_set(&cache->trimming, 0);
|
|
|
|
|
|
return cache;
|
|
|
}
|
|
@@ -9306,7 +9307,8 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
|
|
}
|
|
|
|
|
|
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_root *root, u64 group_start)
|
|
|
+ struct btrfs_root *root, u64 group_start,
|
|
|
+ struct extent_map *em)
|
|
|
{
|
|
|
struct btrfs_path *path;
|
|
|
struct btrfs_block_group_cache *block_group;
|
|
@@ -9319,6 +9321,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|
|
int index;
|
|
|
int factor;
|
|
|
struct btrfs_caching_control *caching_ctl = NULL;
|
|
|
+ bool remove_em;
|
|
|
|
|
|
root = root->fs_info->extent_root;
|
|
|
|
|
@@ -9464,6 +9467,61 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|
|
|
|
|
memcpy(&key, &block_group->key, sizeof(key));
|
|
|
|
|
|
+ lock_chunks(root);
|
|
|
+ spin_lock(&block_group->lock);
|
|
|
+ block_group->removed = 1;
|
|
|
+ /*
|
|
|
+ * At this point trimming can't start on this block group, because we
|
|
|
+ * removed the block group from the tree fs_info->block_group_cache_tree
|
|
|
+ * so no one can't find it anymore and even if someone already got this
|
|
|
+ * block group before we removed it from the rbtree, they have already
|
|
|
+ * incremented block_group->trimming - if they didn't, they won't find
|
|
|
+ * any free space entries because we already removed them all when we
|
|
|
+ * called btrfs_remove_free_space_cache().
|
|
|
+ *
|
|
|
+ * And we must not remove the extent map from the fs_info->mapping_tree
|
|
|
+ * to prevent the same logical address range and physical device space
|
|
|
+ * ranges from being reused for a new block group. This is because our
|
|
|
+ * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
|
|
|
+ * completely transactionless, so while it is trimming a range the
|
|
|
+ * currently running transaction might finish and a new one start,
|
|
|
+ * allowing for new block groups to be created that can reuse the same
|
|
|
+ * physical device locations unless we take this special care.
|
|
|
+ */
|
|
|
+ remove_em = (atomic_read(&block_group->trimming) == 0);
|
|
|
+ /*
|
|
|
+ * Make sure a trimmer task always sees the em in the pinned_chunks list
|
|
|
+ * if it sees block_group->removed == 1 (needs to lock block_group->lock
|
|
|
+ * before checking block_group->removed).
|
|
|
+ */
|
|
|
+ if (!remove_em) {
|
|
|
+ /*
|
|
|
+ * Our em might be in trans->transaction->pending_chunks which
|
|
|
+ * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
|
|
|
+ * and so is the fs_info->pinned_chunks list.
|
|
|
+ *
|
|
|
+ * So at this point we must be holding the chunk_mutex to avoid
|
|
|
+ * any races with chunk allocation (more specifically at
|
|
|
+ * volumes.c:contains_pending_extent()), to ensure it always
|
|
|
+ * sees the em, either in the pending_chunks list or in the
|
|
|
+ * pinned_chunks list.
|
|
|
+ */
|
|
|
+ list_move_tail(&em->list, &root->fs_info->pinned_chunks);
|
|
|
+ }
|
|
|
+ spin_unlock(&block_group->lock);
|
|
|
+ unlock_chunks(root);
|
|
|
+
|
|
|
+ if (remove_em) {
|
|
|
+ struct extent_map_tree *em_tree;
|
|
|
+
|
|
|
+ em_tree = &root->fs_info->mapping_tree.map_tree;
|
|
|
+ write_lock(&em_tree->lock);
|
|
|
+ remove_extent_mapping(em_tree, em);
|
|
|
+ write_unlock(&em_tree->lock);
|
|
|
+ /* once for the tree */
|
|
|
+ free_extent_map(em);
|
|
|
+ }
|
|
|
+
|
|
|
btrfs_put_block_group(block_group);
|
|
|
btrfs_put_block_group(block_group);
|
|
|
|