|
@@ -2458,6 +2458,46 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static int btrfs_punch_hole_lock_range(struct inode *inode,
|
|
|
+ const u64 lockstart,
|
|
|
+ const u64 lockend,
|
|
|
+ struct extent_state **cached_state)
|
|
|
+{
|
|
|
+ while (1) {
|
|
|
+ struct btrfs_ordered_extent *ordered;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ truncate_pagecache_range(inode, lockstart, lockend);
|
|
|
+
|
|
|
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
|
|
+ cached_state);
|
|
|
+ ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We need to make sure we have no ordered extents in this range
|
|
|
+ * and nobody raced in and read a page in this range, if we did
|
|
|
+ * we need to try again.
|
|
|
+ */
|
|
|
+ if ((!ordered ||
|
|
|
+ (ordered->file_offset + ordered->len <= lockstart ||
|
|
|
+ ordered->file_offset > lockend)) &&
|
|
|
+ !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
|
|
|
+ if (ordered)
|
|
|
+ btrfs_put_ordered_extent(ordered);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (ordered)
|
|
|
+ btrfs_put_ordered_extent(ordered);
|
|
|
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
|
|
|
+ lockend, cached_state);
|
|
|
+ ret = btrfs_wait_ordered_range(inode, lockstart,
|
|
|
+ lockend - lockstart + 1);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
|
|
{
|
|
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
|
@@ -2574,38 +2614,11 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
|
|
goto out_only_mutex;
|
|
|
}
|
|
|
|
|
|
- while (1) {
|
|
|
- struct btrfs_ordered_extent *ordered;
|
|
|
-
|
|
|
- truncate_pagecache_range(inode, lockstart, lockend);
|
|
|
-
|
|
|
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
|
|
- &cached_state);
|
|
|
- ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
|
|
|
-
|
|
|
- /*
|
|
|
- * We need to make sure we have no ordered extents in this range
|
|
|
- * and nobody raced in and read a page in this range, if we did
|
|
|
- * we need to try again.
|
|
|
- */
|
|
|
- if ((!ordered ||
|
|
|
- (ordered->file_offset + ordered->len <= lockstart ||
|
|
|
- ordered->file_offset > lockend)) &&
|
|
|
- !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
|
|
|
- if (ordered)
|
|
|
- btrfs_put_ordered_extent(ordered);
|
|
|
- break;
|
|
|
- }
|
|
|
- if (ordered)
|
|
|
- btrfs_put_ordered_extent(ordered);
|
|
|
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
|
|
|
- lockend, &cached_state);
|
|
|
- ret = btrfs_wait_ordered_range(inode, lockstart,
|
|
|
- lockend - lockstart + 1);
|
|
|
- if (ret) {
|
|
|
- inode_unlock(inode);
|
|
|
- return ret;
|
|
|
- }
|
|
|
+ ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
|
|
|
+ &cached_state);
|
|
|
+ if (ret) {
|
|
|
+ inode_unlock(inode);
|
|
|
+ goto out_only_mutex;
|
|
|
}
|
|
|
|
|
|
path = btrfs_alloc_path();
|
|
@@ -2814,6 +2827,217 @@ insert:
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int btrfs_fallocate_update_isize(struct inode *inode,
|
|
|
+ const u64 end,
|
|
|
+ const int mode)
|
|
|
+{
|
|
|
+ struct btrfs_trans_handle *trans;
|
|
|
+ struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
|
+ int ret;
|
|
|
+ int ret2;
|
|
|
+
|
|
|
+ if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ trans = btrfs_start_transaction(root, 1);
|
|
|
+ if (IS_ERR(trans))
|
|
|
+ return PTR_ERR(trans);
|
|
|
+
|
|
|
+ inode->i_ctime = current_time(inode);
|
|
|
+ i_size_write(inode, end);
|
|
|
+ btrfs_ordered_update_i_size(inode, end, NULL);
|
|
|
+ ret = btrfs_update_inode(trans, root, inode);
|
|
|
+ ret2 = btrfs_end_transaction(trans);
|
|
|
+
|
|
|
+ return ret ? ret : ret2;
|
|
|
+}
|
|
|
+
|
|
|
+static int btrfs_zero_range_check_range_boundary(struct inode *inode,
|
|
|
+ u64 offset)
|
|
|
+{
|
|
|
+ const u64 sectorsize = btrfs_inode_sectorsize(inode);
|
|
|
+ struct extent_map *em;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ offset = round_down(offset, sectorsize);
|
|
|
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
|
|
+ if (IS_ERR(em))
|
|
|
+ return PTR_ERR(em);
|
|
|
+
|
|
|
+ if (em->block_start == EXTENT_MAP_HOLE)
|
|
|
+ ret = 1;
|
|
|
+
|
|
|
+ free_extent_map(em);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int btrfs_zero_range(struct inode *inode,
|
|
|
+ loff_t offset,
|
|
|
+ loff_t len,
|
|
|
+ const int mode)
|
|
|
+{
|
|
|
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
|
|
+ struct extent_map *em;
|
|
|
+ struct extent_changeset *data_reserved = NULL;
|
|
|
+ int ret;
|
|
|
+ u64 alloc_hint = 0;
|
|
|
+ const u64 sectorsize = btrfs_inode_sectorsize(inode);
|
|
|
+ u64 alloc_start = round_down(offset, sectorsize);
|
|
|
+ u64 alloc_end = round_up(offset + len, sectorsize);
|
|
|
+ u64 bytes_to_reserve = 0;
|
|
|
+ bool space_reserved = false;
|
|
|
+
|
|
|
+ inode_dio_wait(inode);
|
|
|
+
|
|
|
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
|
|
+ alloc_start, alloc_end - alloc_start, 0);
|
|
|
+ if (IS_ERR(em)) {
|
|
|
+ ret = PTR_ERR(em);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Avoid hole punching and extent allocation for some cases. More cases
|
|
|
+ * could be considered, but these are unlikely common and we keep things
|
|
|
+ * as simple as possible for now. Also, intentionally, if the target
|
|
|
+ * range contains one or more prealloc extents together with regular
|
|
|
+ * extents and holes, we drop all the existing extents and allocate a
|
|
|
+ * new prealloc extent, so that we get a larger contiguous disk extent.
|
|
|
+ */
|
|
|
+ if (em->start <= alloc_start &&
|
|
|
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
|
|
|
+ const u64 em_end = em->start + em->len;
|
|
|
+
|
|
|
+ if (em_end >= offset + len) {
|
|
|
+ /*
|
|
|
+ * The whole range is already a prealloc extent,
|
|
|
+ * do nothing except updating the inode's i_size if
|
|
|
+ * needed.
|
|
|
+ */
|
|
|
+ free_extent_map(em);
|
|
|
+ ret = btrfs_fallocate_update_isize(inode, offset + len,
|
|
|
+ mode);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * Part of the range is already a prealloc extent, so operate
|
|
|
+ * only on the remaining part of the range.
|
|
|
+ */
|
|
|
+ alloc_start = em_end;
|
|
|
+ ASSERT(IS_ALIGNED(alloc_start, sectorsize));
|
|
|
+ len = offset + len - alloc_start;
|
|
|
+ offset = alloc_start;
|
|
|
+ alloc_hint = em->block_start + em->len;
|
|
|
+ }
|
|
|
+ free_extent_map(em);
|
|
|
+
|
|
|
+ if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
|
|
|
+ BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
|
|
|
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
|
|
+ alloc_start, sectorsize, 0);
|
|
|
+ if (IS_ERR(em)) {
|
|
|
+ ret = PTR_ERR(em);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
|
|
|
+ free_extent_map(em);
|
|
|
+ ret = btrfs_fallocate_update_isize(inode, offset + len,
|
|
|
+ mode);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) {
|
|
|
+ free_extent_map(em);
|
|
|
+ ret = btrfs_truncate_block(inode, offset, len, 0);
|
|
|
+ if (!ret)
|
|
|
+ ret = btrfs_fallocate_update_isize(inode,
|
|
|
+ offset + len,
|
|
|
+ mode);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ free_extent_map(em);
|
|
|
+ alloc_start = round_down(offset, sectorsize);
|
|
|
+ alloc_end = alloc_start + sectorsize;
|
|
|
+ goto reserve_space;
|
|
|
+ }
|
|
|
+
|
|
|
+ alloc_start = round_up(offset, sectorsize);
|
|
|
+ alloc_end = round_down(offset + len, sectorsize);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * For unaligned ranges, check the pages at the boundaries, they might
|
|
|
+ * map to an extent, in which case we need to partially zero them, or
|
|
|
+ * they might map to a hole, in which case we need our allocation range
|
|
|
+ * to cover them.
|
|
|
+ */
|
|
|
+ if (!IS_ALIGNED(offset, sectorsize)) {
|
|
|
+ ret = btrfs_zero_range_check_range_boundary(inode, offset);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+ if (ret) {
|
|
|
+ alloc_start = round_down(offset, sectorsize);
|
|
|
+ ret = 0;
|
|
|
+ } else {
|
|
|
+ ret = btrfs_truncate_block(inode, offset, 0, 0);
|
|
|
+ if (ret)
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!IS_ALIGNED(offset + len, sectorsize)) {
|
|
|
+ ret = btrfs_zero_range_check_range_boundary(inode,
|
|
|
+ offset + len);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+ if (ret) {
|
|
|
+ alloc_end = round_up(offset + len, sectorsize);
|
|
|
+ ret = 0;
|
|
|
+ } else {
|
|
|
+ ret = btrfs_truncate_block(inode, offset + len, 0, 1);
|
|
|
+ if (ret)
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+reserve_space:
|
|
|
+ if (alloc_start < alloc_end) {
|
|
|
+ struct extent_state *cached_state = NULL;
|
|
|
+ const u64 lockstart = alloc_start;
|
|
|
+ const u64 lockend = alloc_end - 1;
|
|
|
+
|
|
|
+ bytes_to_reserve = alloc_end - alloc_start;
|
|
|
+ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
|
|
|
+ bytes_to_reserve);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+ space_reserved = true;
|
|
|
+ ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
|
|
|
+ alloc_start, bytes_to_reserve);
|
|
|
+ if (ret)
|
|
|
+ goto out;
|
|
|
+ ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
|
|
|
+ &cached_state);
|
|
|
+ if (ret)
|
|
|
+ goto out;
|
|
|
+ ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
|
|
|
+ alloc_end - alloc_start,
|
|
|
+ i_blocksize(inode),
|
|
|
+ offset + len, &alloc_hint);
|
|
|
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
|
|
|
+ lockend, &cached_state);
|
|
|
+ /* btrfs_prealloc_file_range releases reserved space on error */
|
|
|
+ if (ret)
|
|
|
+ space_reserved = false;
|
|
|
+ }
|
|
|
+ out:
|
|
|
+ if (ret && space_reserved)
|
|
|
+ btrfs_free_reserved_data_space(inode, data_reserved,
|
|
|
+ alloc_start, bytes_to_reserve);
|
|
|
+ extent_changeset_free(data_reserved);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
static long btrfs_fallocate(struct file *file, int mode,
|
|
|
loff_t offset, loff_t len)
|
|
|
{
|
|
@@ -2839,7 +3063,8 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|
|
cur_offset = alloc_start;
|
|
|
|
|
|
/* Make sure we aren't being give some crap mode */
|
|
|
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
|
|
|
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
|
|
|
+ FALLOC_FL_ZERO_RANGE))
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
if (mode & FALLOC_FL_PUNCH_HOLE)
|
|
@@ -2850,10 +3075,12 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|
|
*
|
|
|
* For qgroup space, it will be checked later.
|
|
|
*/
|
|
|
- ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
|
|
|
- alloc_end - alloc_start);
|
|
|
- if (ret < 0)
|
|
|
- return ret;
|
|
|
+ if (!(mode & FALLOC_FL_ZERO_RANGE)) {
|
|
|
+ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
|
|
|
+ alloc_end - alloc_start);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
|
|
|
inode_lock(inode);
|
|
|
|
|
@@ -2895,6 +3122,12 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|
|
if (ret)
|
|
|
goto out;
|
|
|
|
|
|
+ if (mode & FALLOC_FL_ZERO_RANGE) {
|
|
|
+ ret = btrfs_zero_range(inode, offset, len, mode);
|
|
|
+ inode_unlock(inode);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
locked_end = alloc_end - 1;
|
|
|
while (1) {
|
|
|
struct btrfs_ordered_extent *ordered;
|
|
@@ -2988,37 +3221,18 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|
|
if (ret < 0)
|
|
|
goto out_unlock;
|
|
|
|
|
|
- if (actual_end > inode->i_size &&
|
|
|
- !(mode & FALLOC_FL_KEEP_SIZE)) {
|
|
|
- struct btrfs_trans_handle *trans;
|
|
|
- struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
|
-
|
|
|
- /*
|
|
|
- * We didn't need to allocate any more space, but we
|
|
|
- * still extended the size of the file so we need to
|
|
|
- * update i_size and the inode item.
|
|
|
- */
|
|
|
- trans = btrfs_start_transaction(root, 1);
|
|
|
- if (IS_ERR(trans)) {
|
|
|
- ret = PTR_ERR(trans);
|
|
|
- } else {
|
|
|
- inode->i_ctime = current_time(inode);
|
|
|
- i_size_write(inode, actual_end);
|
|
|
- btrfs_ordered_update_i_size(inode, actual_end, NULL);
|
|
|
- ret = btrfs_update_inode(trans, root, inode);
|
|
|
- if (ret)
|
|
|
- btrfs_end_transaction(trans);
|
|
|
- else
|
|
|
- ret = btrfs_end_transaction(trans);
|
|
|
- }
|
|
|
- }
|
|
|
+ /*
|
|
|
+ * We didn't need to allocate any more space, but we still extended the
|
|
|
+ * size of the file so we need to update i_size and the inode item.
|
|
|
+ */
|
|
|
+ ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
|
|
|
out_unlock:
|
|
|
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
|
|
|
&cached_state);
|
|
|
out:
|
|
|
inode_unlock(inode);
|
|
|
/* Let go of our reservation. */
|
|
|
- if (ret != 0)
|
|
|
+ if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
|
|
|
btrfs_free_reserved_data_space(inode, data_reserved,
|
|
|
alloc_start, alloc_end - cur_offset);
|
|
|
extent_changeset_free(data_reserved);
|