|
@@ -1882,10 +1882,77 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static int btrfs_issue_discard(struct block_device *bdev,
|
|
|
- u64 start, u64 len)
|
|
|
+#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
|
|
|
+static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
|
|
|
+ u64 *discarded_bytes)
|
|
|
{
|
|
|
- return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
|
|
|
+ int j, ret = 0;
|
|
|
+ u64 bytes_left, end;
|
|
|
+ u64 aligned_start = ALIGN(start, 1 << 9);
|
|
|
+
|
|
|
+ if (WARN_ON(start != aligned_start)) {
|
|
|
+ len -= aligned_start - start;
|
|
|
+ len = round_down(len, 1 << 9);
|
|
|
+ start = aligned_start;
|
|
|
+ }
|
|
|
+
|
|
|
+ *discarded_bytes = 0;
|
|
|
+
|
|
|
+ if (!len)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ end = start + len;
|
|
|
+ bytes_left = len;
|
|
|
+
|
|
|
+ /* Skip any superblocks on this device. */
|
|
|
+ for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
|
|
|
+ u64 sb_start = btrfs_sb_offset(j);
|
|
|
+ u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
|
|
|
+ u64 size = sb_start - start;
|
|
|
+
|
|
|
+ if (!in_range(sb_start, start, bytes_left) &&
|
|
|
+ !in_range(sb_end, start, bytes_left) &&
|
|
|
+ !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Superblock spans beginning of range. Adjust start and
|
|
|
+ * try again.
|
|
|
+ */
|
|
|
+ if (sb_start <= start) {
|
|
|
+ start += sb_end - start;
|
|
|
+ if (start > end) {
|
|
|
+ bytes_left = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ bytes_left = end - start;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (size) {
|
|
|
+ ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
|
|
|
+ GFP_NOFS, 0);
|
|
|
+ if (!ret)
|
|
|
+ *discarded_bytes += size;
|
|
|
+ else if (ret != -EOPNOTSUPP)
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ start = sb_end;
|
|
|
+ if (start > end) {
|
|
|
+ bytes_left = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ bytes_left = end - start;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (bytes_left) {
|
|
|
+ ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
|
|
|
+ GFP_NOFS, 0);
|
|
|
+ if (!ret)
|
|
|
+ *discarded_bytes += bytes_left;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
|
@@ -1906,14 +1973,16 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
|
|
|
|
|
|
|
|
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
|
|
|
+ u64 bytes;
|
|
|
if (!stripe->dev->can_discard)
|
|
|
continue;
|
|
|
|
|
|
ret = btrfs_issue_discard(stripe->dev->bdev,
|
|
|
stripe->physical,
|
|
|
- stripe->length);
|
|
|
+ stripe->length,
|
|
|
+ &bytes);
|
|
|
if (!ret)
|
|
|
- discarded_bytes += stripe->length;
|
|
|
+ discarded_bytes += bytes;
|
|
|
else if (ret != -EOPNOTSUPP)
|
|
|
break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
|
|
|
|
|
@@ -6061,20 +6130,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
+ struct btrfs_block_group_cache *block_group, *tmp;
|
|
|
+ struct list_head *deleted_bgs;
|
|
|
struct extent_io_tree *unpin;
|
|
|
u64 start;
|
|
|
u64 end;
|
|
|
int ret;
|
|
|
|
|
|
- if (trans->aborted)
|
|
|
- return 0;
|
|
|
-
|
|
|
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
|
|
|
unpin = &fs_info->freed_extents[1];
|
|
|
else
|
|
|
unpin = &fs_info->freed_extents[0];
|
|
|
|
|
|
- while (1) {
|
|
|
+ while (!trans->aborted) {
|
|
|
mutex_lock(&fs_info->unused_bg_unpin_mutex);
|
|
|
ret = find_first_extent_bit(unpin, 0, &start, &end,
|
|
|
EXTENT_DIRTY, NULL);
|
|
@@ -6093,6 +6161,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
|
|
|
cond_resched();
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * Transaction is finished. We don't need the lock anymore. We
|
|
|
+ * do need to clean up the block groups in case of a transaction
|
|
|
+ * abort.
|
|
|
+ */
|
|
|
+ deleted_bgs = &trans->transaction->deleted_bgs;
|
|
|
+ list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
|
|
|
+ u64 trimmed = 0;
|
|
|
+
|
|
|
+ ret = -EROFS;
|
|
|
+ if (!trans->aborted)
|
|
|
+ ret = btrfs_discard_extent(root,
|
|
|
+ block_group->key.objectid,
|
|
|
+ block_group->key.offset,
|
|
|
+ &trimmed);
|
|
|
+
|
|
|
+ list_del_init(&block_group->bg_list);
|
|
|
+ btrfs_put_block_group_trimming(block_group);
|
|
|
+ btrfs_put_block_group(block_group);
|
|
|
+
|
|
|
+ if (ret) {
|
|
|
+ const char *errstr = btrfs_decode_error(ret);
|
|
|
+ btrfs_warn(fs_info,
|
|
|
+ "Discard failed while removing blockgroup: errno=%d %s\n",
|
|
|
+ ret, errstr);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -9830,6 +9926,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|
|
* currently running transaction might finish and a new one start,
|
|
|
* allowing for new block groups to be created that can reuse the same
|
|
|
* physical device locations unless we take this special care.
|
|
|
+ *
|
|
|
+ * There may also be an implicit trim operation if the file system
|
|
|
+ * is mounted with -odiscard. The same protections must remain
|
|
|
+ * in place until the extents have been discarded completely when
|
|
|
+ * the transaction commit has completed.
|
|
|
*/
|
|
|
remove_em = (atomic_read(&block_group->trimming) == 0);
|
|
|
/*
|
|
@@ -9904,6 +10005,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|
|
spin_lock(&fs_info->unused_bgs_lock);
|
|
|
while (!list_empty(&fs_info->unused_bgs)) {
|
|
|
u64 start, end;
|
|
|
+ int trimming;
|
|
|
|
|
|
block_group = list_first_entry(&fs_info->unused_bgs,
|
|
|
struct btrfs_block_group_cache,
|
|
@@ -10003,12 +10105,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|
|
spin_unlock(&block_group->lock);
|
|
|
spin_unlock(&space_info->lock);
|
|
|
|
|
|
+ /* DISCARD can flip during remount */
|
|
|
+ trimming = btrfs_test_opt(root, DISCARD);
|
|
|
+
|
|
|
+ /* Implicit trim during transaction commit. */
|
|
|
+ if (trimming)
|
|
|
+ btrfs_get_block_group_trimming(block_group);
|
|
|
+
|
|
|
/*
|
|
|
* Btrfs_remove_chunk will abort the transaction if things go
|
|
|
* horribly wrong.
|
|
|
*/
|
|
|
ret = btrfs_remove_chunk(trans, root,
|
|
|
block_group->key.objectid);
|
|
|
+
|
|
|
+ if (ret) {
|
|
|
+ if (trimming)
|
|
|
+ btrfs_put_block_group_trimming(block_group);
|
|
|
+ goto end_trans;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we're not mounted with -odiscard, we can just forget
|
|
|
+ * about this block group. Otherwise we'll need to wait
|
|
|
+ * until transaction commit to do the actual discard.
|
|
|
+ */
|
|
|
+ if (trimming) {
|
|
|
+ WARN_ON(!list_empty(&block_group->bg_list));
|
|
|
+ spin_lock(&trans->transaction->deleted_bgs_lock);
|
|
|
+ list_move(&block_group->bg_list,
|
|
|
+ &trans->transaction->deleted_bgs);
|
|
|
+ spin_unlock(&trans->transaction->deleted_bgs_lock);
|
|
|
+ btrfs_get_block_group(block_group);
|
|
|
+ }
|
|
|
end_trans:
|
|
|
btrfs_end_transaction(trans, root);
|
|
|
next:
|
|
@@ -10062,10 +10191,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
|
|
|
return unpin_extent_range(root, start, end, false);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * It used to be that old block groups would be left around forever.
|
|
|
+ * Iterating over them would be enough to trim unused space. Since we
|
|
|
+ * now automatically remove them, we also need to iterate over unallocated
|
|
|
+ * space.
|
|
|
+ *
|
|
|
+ * We don't want a transaction for this since the discard may take a
|
|
|
+ * substantial amount of time. We don't require that a transaction be
|
|
|
+ * running, but we do need to take a running transaction into account
|
|
|
+ * to ensure that we're not discarding chunks that were released in
|
|
|
+ * the current transaction.
|
|
|
+ *
|
|
|
+ * Holding the chunks lock will prevent other threads from allocating
|
|
|
+ * or releasing chunks, but it won't prevent a running transaction
|
|
|
+ * from committing and releasing the memory that the pending chunks
|
|
|
+ * list head uses. For that, we need to take a reference to the
|
|
|
+ * transaction.
|
|
|
+ */
|
|
|
+static int btrfs_trim_free_extents(struct btrfs_device *device,
|
|
|
+ u64 minlen, u64 *trimmed)
|
|
|
+{
|
|
|
+ u64 start = 0, len = 0;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ *trimmed = 0;
|
|
|
+
|
|
|
+ /* Not writeable = nothing to do. */
|
|
|
+ if (!device->writeable)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /* No free space = nothing to do. */
|
|
|
+ if (device->total_bytes <= device->bytes_used)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ ret = 0;
|
|
|
+
|
|
|
+ while (1) {
|
|
|
+ struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
|
|
|
+ struct btrfs_transaction *trans;
|
|
|
+ u64 bytes;
|
|
|
+
|
|
|
+ ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ down_read(&fs_info->commit_root_sem);
|
|
|
+
|
|
|
+ spin_lock(&fs_info->trans_lock);
|
|
|
+ trans = fs_info->running_transaction;
|
|
|
+ if (trans)
|
|
|
+ atomic_inc(&trans->use_count);
|
|
|
+ spin_unlock(&fs_info->trans_lock);
|
|
|
+
|
|
|
+ ret = find_free_dev_extent_start(trans, device, minlen, start,
|
|
|
+ &start, &len);
|
|
|
+ if (trans)
|
|
|
+ btrfs_put_transaction(trans);
|
|
|
+
|
|
|
+ if (ret) {
|
|
|
+ up_read(&fs_info->commit_root_sem);
|
|
|
+ mutex_unlock(&fs_info->chunk_mutex);
|
|
|
+ if (ret == -ENOSPC)
|
|
|
+ ret = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
|
|
|
+ up_read(&fs_info->commit_root_sem);
|
|
|
+ mutex_unlock(&fs_info->chunk_mutex);
|
|
|
+
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+
|
|
|
+ start += len;
|
|
|
+ *trimmed += bytes;
|
|
|
+
|
|
|
+ if (fatal_signal_pending(current)) {
|
|
|
+ ret = -ERESTARTSYS;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ cond_resched();
|
|
|
+ }
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
struct btrfs_block_group_cache *cache = NULL;
|
|
|
+ struct btrfs_device *device;
|
|
|
+ struct list_head *devices;
|
|
|
u64 group_trimmed;
|
|
|
u64 start;
|
|
|
u64 end;
|
|
@@ -10120,6 +10338,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
|
|
|
cache = next_block_group(fs_info->tree_root, cache);
|
|
|
}
|
|
|
|
|
|
+ mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
|
|
|
+ devices = &root->fs_info->fs_devices->alloc_list;
|
|
|
+ list_for_each_entry(device, devices, dev_alloc_list) {
|
|
|
+ ret = btrfs_trim_free_extents(device, range->minlen,
|
|
|
+ &group_trimmed);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+
|
|
|
+ trimmed += group_trimmed;
|
|
|
+ }
|
|
|
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
|
|
|
+
|
|
|
range->len = trimmed;
|
|
|
return ret;
|
|
|
}
|