|
@@ -32,6 +32,12 @@
|
|
|
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
|
|
|
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
|
|
|
|
|
|
+struct btrfs_trim_range {
|
|
|
+ u64 start;
|
|
|
+ u64 bytes;
|
|
|
+ struct list_head list;
|
|
|
+};
|
|
|
+
|
|
|
static int link_free_space(struct btrfs_free_space_ctl *ctl,
|
|
|
struct btrfs_free_space *info);
|
|
|
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
|
|
@@ -882,6 +888,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
|
|
|
int ret;
|
|
|
struct btrfs_free_cluster *cluster = NULL;
|
|
|
struct rb_node *node = rb_first(&ctl->free_space_offset);
|
|
|
+ struct btrfs_trim_range *trim_entry;
|
|
|
|
|
|
/* Get the cluster for this block_group if it exists */
|
|
|
if (block_group && !list_empty(&block_group->cluster_list)) {
|
|
@@ -917,6 +924,21 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
|
|
|
cluster = NULL;
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Make sure we don't miss any range that was removed from our rbtree
|
|
|
+ * because trimming is running. Otherwise after a umount+mount (or crash
|
|
|
+ * after committing the transaction) we would leak free space and get
|
|
|
+ * an inconsistent free space cache report from fsck.
|
|
|
+ */
|
|
|
+ list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
|
|
|
+ ret = io_ctl_add_entry(io_ctl, trim_entry->start,
|
|
|
+ trim_entry->bytes, NULL);
|
|
|
+ if (ret)
|
|
|
+ goto fail;
|
|
|
+ *entries += 1;
|
|
|
+ }
|
|
|
+
|
|
|
return 0;
|
|
|
fail:
|
|
|
return -ENOSPC;
|
|
@@ -1136,12 +1158,15 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
|
|
|
|
|
io_ctl_set_generation(&io_ctl, trans->transid);
|
|
|
|
|
|
+ mutex_lock(&ctl->cache_writeout_mutex);
|
|
|
/* Write out the extent entries in the free space cache */
|
|
|
ret = write_cache_extent_entries(&io_ctl, ctl,
|
|
|
block_group, &entries, &bitmaps,
|
|
|
&bitmap_list);
|
|
|
- if (ret)
|
|
|
+ if (ret) {
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
goto out_nospc;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Some spaces that are freed in the current transaction are pinned,
|
|
@@ -1149,11 +1174,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
|
|
* committed, we shouldn't lose them.
|
|
|
*/
|
|
|
ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries);
|
|
|
- if (ret)
|
|
|
+ if (ret) {
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
goto out_nospc;
|
|
|
+ }
|
|
|
|
|
|
- /* At last, we write out all the bitmaps. */
|
|
|
+ /*
|
|
|
+ * At last, we write out all the bitmaps and keep cache_writeout_mutex
|
|
|
+ * locked while doing it because a concurrent trim can be manipulating
|
|
|
+ * or freeing the bitmap.
|
|
|
+ */
|
|
|
ret = write_bitmap_entries(&io_ctl, &bitmap_list);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
if (ret)
|
|
|
goto out_nospc;
|
|
|
|
|
@@ -2296,6 +2328,8 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
|
|
|
ctl->start = block_group->key.objectid;
|
|
|
ctl->private = block_group;
|
|
|
ctl->op = &free_space_op;
|
|
|
+ INIT_LIST_HEAD(&ctl->trimming_ranges);
|
|
|
+ mutex_init(&ctl->cache_writeout_mutex);
|
|
|
|
|
|
/*
|
|
|
* we only want to have 32k of ram per block group for keeping
|
|
@@ -2912,10 +2946,12 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
|
|
|
|
|
|
static int do_trimming(struct btrfs_block_group_cache *block_group,
|
|
|
u64 *total_trimmed, u64 start, u64 bytes,
|
|
|
- u64 reserved_start, u64 reserved_bytes)
|
|
|
+ u64 reserved_start, u64 reserved_bytes,
|
|
|
+ struct btrfs_trim_range *trim_entry)
|
|
|
{
|
|
|
struct btrfs_space_info *space_info = block_group->space_info;
|
|
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
|
|
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
|
|
int ret;
|
|
|
int update = 0;
|
|
|
u64 trimmed = 0;
|
|
@@ -2935,7 +2971,10 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
|
|
|
if (!ret)
|
|
|
*total_trimmed += trimmed;
|
|
|
|
|
|
+ mutex_lock(&ctl->cache_writeout_mutex);
|
|
|
btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
|
|
|
+ list_del(&trim_entry->list);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
|
|
|
if (update) {
|
|
|
spin_lock(&space_info->lock);
|
|
@@ -2963,16 +3002,21 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
|
|
|
u64 bytes;
|
|
|
|
|
|
while (start < end) {
|
|
|
+ struct btrfs_trim_range trim_entry;
|
|
|
+
|
|
|
+ mutex_lock(&ctl->cache_writeout_mutex);
|
|
|
spin_lock(&ctl->tree_lock);
|
|
|
|
|
|
if (ctl->free_space < minlen) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
entry = tree_search_offset(ctl, start, 0, 1);
|
|
|
if (!entry) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
break;
|
|
|
}
|
|
|
|
|
@@ -2981,6 +3025,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
|
|
|
node = rb_next(&entry->offset_index);
|
|
|
if (!node) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
goto out;
|
|
|
}
|
|
|
entry = rb_entry(node, struct btrfs_free_space,
|
|
@@ -2989,6 +3034,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
|
|
|
|
|
|
if (entry->offset >= end) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
break;
|
|
|
}
|
|
|
|
|
@@ -2998,6 +3044,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
|
|
|
bytes = min(extent_start + extent_bytes, end) - start;
|
|
|
if (bytes < minlen) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
goto next;
|
|
|
}
|
|
|
|
|
@@ -3005,9 +3052,13 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
|
|
|
kmem_cache_free(btrfs_free_space_cachep, entry);
|
|
|
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ trim_entry.start = extent_start;
|
|
|
+ trim_entry.bytes = extent_bytes;
|
|
|
+ list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
|
|
|
ret = do_trimming(block_group, total_trimmed, start, bytes,
|
|
|
- extent_start, extent_bytes);
|
|
|
+ extent_start, extent_bytes, &trim_entry);
|
|
|
if (ret)
|
|
|
break;
|
|
|
next:
|
|
@@ -3036,17 +3087,21 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
|
|
|
|
|
|
while (offset < end) {
|
|
|
bool next_bitmap = false;
|
|
|
+ struct btrfs_trim_range trim_entry;
|
|
|
|
|
|
+ mutex_lock(&ctl->cache_writeout_mutex);
|
|
|
spin_lock(&ctl->tree_lock);
|
|
|
|
|
|
if (ctl->free_space < minlen) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
entry = tree_search_offset(ctl, offset, 1, 0);
|
|
|
if (!entry) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
next_bitmap = true;
|
|
|
goto next;
|
|
|
}
|
|
@@ -3055,6 +3110,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
|
|
|
ret2 = search_bitmap(ctl, entry, &start, &bytes);
|
|
|
if (ret2 || start >= end) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
next_bitmap = true;
|
|
|
goto next;
|
|
|
}
|
|
@@ -3062,6 +3118,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
|
|
|
bytes = min(bytes, end - start);
|
|
|
if (bytes < minlen) {
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
goto next;
|
|
|
}
|
|
|
|
|
@@ -3070,9 +3127,13 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
|
|
|
free_bitmap(ctl, entry);
|
|
|
|
|
|
spin_unlock(&ctl->tree_lock);
|
|
|
+ trim_entry.start = start;
|
|
|
+ trim_entry.bytes = bytes;
|
|
|
+ list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
|
|
|
+ mutex_unlock(&ctl->cache_writeout_mutex);
|
|
|
|
|
|
ret = do_trimming(block_group, total_trimmed, start, bytes,
|
|
|
- start, bytes);
|
|
|
+ start, bytes, &trim_entry);
|
|
|
if (ret)
|
|
|
break;
|
|
|
next:
|