10 years ago · 04216820fe
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1279,6 +1279,7 @@ struct btrfs_block_group_cache {
 
				 	unsigned int dirty:1;
			
 
				 	unsigned int iref:1;
			
 
				 	unsigned int has_caching_ctl:1;
			
 
				+	unsigned int removed:1;
			
 
				 
			
 
				 	int disk_cache_state;
			
 
				 
			
@@ -1311,6 +1312,8 @@ struct btrfs_block_group_cache {
 
				 
			
 
				 	/* For read-only block groups */
			
 
				 	struct list_head ro_list;
			
 
				+
			
 
				+	atomic_t trimming;
			
 
				 };
			
 
				 
			
 
				 /* delayed seq elem */
			
@@ -1740,6 +1743,12 @@ struct btrfs_fs_info {
 
				 
			
 
				 	/* For btrfs to record security options */
			
 
				 	struct security_mnt_opts security_opts;
			
 
				+
			
 
				+	/*
			
 
				+	 * Chunks that can't be freed yet (under a trim/discard operation)
			
 
				+	 * and will be latter freed. Protected by fs_info->chunk_mutex.
			
 
				+	 */
			
 
				+	struct list_head pinned_chunks;
			
 
				 };
			
 
				 
			
 
				 struct btrfs_subvolume_writers {
			
@@ -3405,7 +3414,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
				 			   u64 type, u64 chunk_objectid, u64 chunk_offset,
			
 
				 			   u64 size);
			
 
				 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			
 
				-			     struct btrfs_root *root, u64 group_start);
			
 
				+			     struct btrfs_root *root, u64 group_start,
			
 
				+			     struct extent_map *em);
			
 
				 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
			
 
				 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
			
 
				 				       struct btrfs_root *root);
			
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2384,6 +2384,8 @@ int open_ctree(struct super_block *sb,
 
				 	init_waitqueue_head(&fs_info->transaction_blocked_wait);
			
 
				 	init_waitqueue_head(&fs_info->async_submit_wait);
			
 
				 
			
 
				+	INIT_LIST_HEAD(&fs_info->pinned_chunks);
			
 
				+
			
 
				 	ret = btrfs_alloc_stripe_hash_table(fs_info);
			
 
				 	if (ret) {
			
 
				 		err = ret;
			
@@ -3715,6 +3717,17 @@ void close_ctree(struct btrfs_root *root)
 
				 
			
 
				 	btrfs_free_block_rsv(root, root->orphan_block_rsv);
			
 
				 	root->orphan_block_rsv = NULL;
			
 
				+
			
 
				+	lock_chunks(root);
			
 
				+	while (!list_empty(&fs_info->pinned_chunks)) {
			
 
				+		struct extent_map *em;
			
 
				+
			
 
				+		em = list_first_entry(&fs_info->pinned_chunks,
			
 
				+				      struct extent_map, list);
			
 
				+		list_del_init(&em->list);
			
 
				+		free_extent_map(em);
			
 
				+	}
			
 
				+	unlock_chunks(root);
			
 
				 }
			
 
				 
			
 
				 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
			
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9005,6 +9005,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
 
				 	INIT_LIST_HEAD(&cache->bg_list);
			
 
				 	INIT_LIST_HEAD(&cache->ro_list);
			
 
				 	btrfs_init_free_space_ctl(cache);
			
 
				+	atomic_set(&cache->trimming, 0);
			
 
				 
			
 
				 	return cache;
			
 
				 }
			
@@ -9306,7 +9307,8 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 
				 }
			
 
				 
			
 
				 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			
 
				-			     struct btrfs_root *root, u64 group_start)
			
 
				+			     struct btrfs_root *root, u64 group_start,
			
 
				+			     struct extent_map *em)
			
 
				 {
			
 
				 	struct btrfs_path *path;
			
 
				 	struct btrfs_block_group_cache *block_group;
			
@@ -9319,6 +9321,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
				 	int index;
			
 
				 	int factor;
			
 
				 	struct btrfs_caching_control *caching_ctl = NULL;
			
 
				+	bool remove_em;
			
 
				 
			
 
				 	root = root->fs_info->extent_root;
			
 
				 
			
@@ -9464,6 +9467,61 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
				 
			
 
				 	memcpy(&key, &block_group->key, sizeof(key));
			
 
				 
			
 
				+	lock_chunks(root);
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	block_group->removed = 1;
			
 
				+	/*
			
 
				+	 * At this point trimming can't start on this block group, because we
			
 
				+	 * removed the block group from the tree fs_info->block_group_cache_tree
			
 
				+	 * so no one can't find it anymore and even if someone already got this
			
 
				+	 * block group before we removed it from the rbtree, they have already
			
 
				+	 * incremented block_group->trimming - if they didn't, they won't find
			
 
				+	 * any free space entries because we already removed them all when we
			
 
				+	 * called btrfs_remove_free_space_cache().
			
 
				+	 *
			
 
				+	 * And we must not remove the extent map from the fs_info->mapping_tree
			
 
				+	 * to prevent the same logical address range and physical device space
			
 
				+	 * ranges from being reused for a new block group. This is because our
			
 
				+	 * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
			
 
				+	 * completely transactionless, so while it is trimming a range the
			
 
				+	 * currently running transaction might finish and a new one start,
			
 
				+	 * allowing for new block groups to be created that can reuse the same
			
 
				+	 * physical device locations unless we take this special care.
			
 
				+	 */
			
 
				+	remove_em = (atomic_read(&block_group->trimming) == 0);
			
 
				+	/*
			
 
				+	 * Make sure a trimmer task always sees the em in the pinned_chunks list
			
 
				+	 * if it sees block_group->removed == 1 (needs to lock block_group->lock
			
 
				+	 * before checking block_group->removed).
			
 
				+	 */
			
 
				+	if (!remove_em) {
			
 
				+		/*
			
 
				+		 * Our em might be in trans->transaction->pending_chunks which
			
 
				+		 * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
			
 
				+		 * and so is the fs_info->pinned_chunks list.
			
 
				+		 *
			
 
				+		 * So at this point we must be holding the chunk_mutex to avoid
			
 
				+		 * any races with chunk allocation (more specifically at
			
 
				+		 * volumes.c:contains_pending_extent()), to ensure it always
			
 
				+		 * sees the em, either in the pending_chunks list or in the
			
 
				+		 * pinned_chunks list.
			
 
				+		 */
			
 
				+		list_move_tail(&em->list, &root->fs_info->pinned_chunks);
			
 
				+	}
			
 
				+	spin_unlock(&block_group->lock);
			
 
				+	unlock_chunks(root);
			
 
				+
			
 
				+	if (remove_em) {
			
 
				+		struct extent_map_tree *em_tree;
			
 
				+
			
 
				+		em_tree = &root->fs_info->mapping_tree.map_tree;
			
 
				+		write_lock(&em_tree->lock);
			
 
				+		remove_extent_mapping(em_tree, em);
			
 
				+		write_unlock(&em_tree->lock);
			
 
				+		/* once for the tree */
			
 
				+		free_extent_map(em);
			
 
				+	}
			
 
				+
			
 
				 	btrfs_put_block_group(block_group);
			
 
				 	btrfs_put_block_group(block_group);
			
 
				 
			
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -27,6 +27,7 @@
 
				 #include "disk-io.h"
			
 
				 #include "extent_io.h"
			
 
				 #include "inode-map.h"
			
 
				+#include "volumes.h"
			
 
				 
			
 
				 #define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
			
 
				 #define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
			
@@ -3101,11 +3102,46 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
 
				 
			
 
				 	*trimmed = 0;
			
 
				 
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	if (block_group->removed) {
			
 
				+		spin_unlock(&block_group->lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	atomic_inc(&block_group->trimming);
			
 
				+	spin_unlock(&block_group->lock);
			
 
				+
			
 
				 	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
			
 
				 	if (ret)
			
 
				-		return ret;
			
 
				+		goto out;
			
 
				 
			
 
				 	ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
			
 
				+out:
			
 
				+	spin_lock(&block_group->lock);
			
 
				+	if (atomic_dec_and_test(&block_group->trimming) &&
			
 
				+	    block_group->removed) {
			
 
				+		struct extent_map_tree *em_tree;
			
 
				+		struct extent_map *em;
			
 
				+
			
 
				+		spin_unlock(&block_group->lock);
			
 
				+
			
 
				+		em_tree = &block_group->fs_info->mapping_tree.map_tree;
			
 
				+		write_lock(&em_tree->lock);
			
 
				+		em = lookup_extent_mapping(em_tree, block_group->key.objectid,
			
 
				+					   1);
			
 
				+		BUG_ON(!em); /* logic error, can't happen */
			
 
				+		remove_extent_mapping(em_tree, em);
			
 
				+		write_unlock(&em_tree->lock);
			
 
				+
			
 
				+		lock_chunks(block_group->fs_info->chunk_root);
			
 
				+		list_del_init(&em->list);
			
 
				+		unlock_chunks(block_group->fs_info->chunk_root);
			
 
				+
			
 
				+		/* once for us and once for the tree */
			
 
				+		free_extent_map(em);
			
 
				+		free_extent_map(em);
			
 
				+	} else {
			
 
				+		spin_unlock(&block_group->lock);
			
 
				+	}
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -53,16 +53,6 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
 
				 DEFINE_MUTEX(uuid_mutex);
			
 
				 static LIST_HEAD(fs_uuids);
			
 
				 
			
 
				-static void lock_chunks(struct btrfs_root *root)
			
 
				-{
			
 
				-	mutex_lock(&root->fs_info->chunk_mutex);
			
 
				-}
			
 
				-
			
 
				-static void unlock_chunks(struct btrfs_root *root)
			
 
				-{
			
 
				-	mutex_unlock(&root->fs_info->chunk_mutex);
			
 
				-}
			
 
				-
			
 
				 static struct btrfs_fs_devices *__alloc_fs_devices(void)
			
 
				 {
			
 
				 	struct btrfs_fs_devices *fs_devs;
			
@@ -1068,9 +1058,11 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
 
				 				   u64 *start, u64 len)
			
 
				 {
			
 
				 	struct extent_map *em;
			
 
				+	struct list_head *search_list = &trans->transaction->pending_chunks;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	list_for_each_entry(em, &trans->transaction->pending_chunks, list) {
			
 
				+again:
			
 
				+	list_for_each_entry(em, search_list, list) {
			
 
				 		struct map_lookup *map;
			
 
				 		int i;
			
 
				 
			
@@ -1087,6 +1079,10 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
 
				 			ret = 1;
			
 
				 		}
			
 
				 	}
			
 
				+	if (search_list == &trans->transaction->pending_chunks) {
			
 
				+		search_list = &trans->root->fs_info->pinned_chunks;
			
 
				+		goto again;
			
 
				+	}
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
@@ -2653,18 +2649,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
			
 
				+	ret = btrfs_remove_block_group(trans, extent_root, chunk_offset, em);
			
 
				 	if (ret) {
			
 
				 		btrfs_abort_transaction(trans, extent_root, ret);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	write_lock(&em_tree->lock);
			
 
				-	remove_extent_mapping(em_tree, em);
			
 
				-	write_unlock(&em_tree->lock);
			
 
				-
			
 
				-	/* once for the tree */
			
 
				-	free_extent_map(em);
			
 
				 out:
			
 
				 	/* once for us */
			
 
				 	free_extent_map(em);
			
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -515,4 +515,16 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
 
				 void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
			
 
				 void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
			
 
				 					struct btrfs_transaction *transaction);
			
 
				+
			
 
				+static inline void lock_chunks(struct btrfs_root *root)
			
 
				+{
			
 
				+	mutex_lock(&root->fs_info->chunk_mutex);
			
 
				+}
			
 
				+
			
 
				+static inline void unlock_chunks(struct btrfs_root *root)
			
 
				+{
			
 
				+	mutex_unlock(&root->fs_info->chunk_mutex);
			
 
				+}
			
 
				+
			
 
				+
			
 
				 #endif