|
@@ -37,7 +37,6 @@
|
|
|
#include <linux/quotaops.h>
|
|
|
#include <linux/string.h>
|
|
|
#include <linux/slab.h>
|
|
|
-#include <linux/falloc.h>
|
|
|
#include <asm/uaccess.h>
|
|
|
#include <linux/fiemap.h>
|
|
|
#include "ext4_jbd2.h"
|
|
@@ -1691,7 +1690,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
|
|
|
* the extent that was written properly split out and conversion to
|
|
|
* initialized is trivial.
|
|
|
*/
|
|
|
- if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
|
|
|
+ if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2))
|
|
|
return 0;
|
|
|
|
|
|
ext1_ee_len = ext4_ext_get_actual_len(ex1);
|
|
@@ -1708,6 +1707,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
|
|
|
*/
|
|
|
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
|
|
|
return 0;
|
|
|
+ if (ext4_ext_is_uninitialized(ex1) &&
|
|
|
+ (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
|
|
|
+ atomic_read(&EXT4_I(inode)->i_unwritten) ||
|
|
|
+ (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN)))
|
|
|
+ return 0;
|
|
|
#ifdef AGGRESSIVE_TEST
|
|
|
if (ext1_ee_len >= 4)
|
|
|
return 0;
|
|
@@ -1731,7 +1735,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
|
|
|
{
|
|
|
struct ext4_extent_header *eh;
|
|
|
unsigned int depth, len;
|
|
|
- int merge_done = 0;
|
|
|
+ int merge_done = 0, uninit;
|
|
|
|
|
|
depth = ext_depth(inode);
|
|
|
BUG_ON(path[depth].p_hdr == NULL);
|
|
@@ -1741,8 +1745,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
|
|
|
if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
|
|
|
break;
|
|
|
/* merge with next extent! */
|
|
|
+ uninit = ext4_ext_is_uninitialized(ex);
|
|
|
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
|
|
|
+ ext4_ext_get_actual_len(ex + 1));
|
|
|
+ if (uninit)
|
|
|
+ ext4_ext_mark_uninitialized(ex);
|
|
|
|
|
|
if (ex + 1 < EXT_LAST_EXTENT(eh)) {
|
|
|
len = (EXT_LAST_EXTENT(eh) - ex - 1)
|
|
@@ -1896,7 +1903,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
|
|
|
struct ext4_ext_path *npath = NULL;
|
|
|
int depth, len, err;
|
|
|
ext4_lblk_t next;
|
|
|
- int mb_flags = 0;
|
|
|
+ int mb_flags = 0, uninit;
|
|
|
|
|
|
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
|
|
|
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
|
|
@@ -1946,9 +1953,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
|
|
|
path + depth);
|
|
|
if (err)
|
|
|
return err;
|
|
|
-
|
|
|
+ uninit = ext4_ext_is_uninitialized(ex);
|
|
|
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
|
|
|
+ ext4_ext_get_actual_len(newext));
|
|
|
+ if (uninit)
|
|
|
+ ext4_ext_mark_uninitialized(ex);
|
|
|
eh = path[depth].p_hdr;
|
|
|
nearex = ex;
|
|
|
goto merge;
|
|
@@ -1971,10 +1980,13 @@ prepend:
|
|
|
if (err)
|
|
|
return err;
|
|
|
|
|
|
+ uninit = ext4_ext_is_uninitialized(ex);
|
|
|
ex->ee_block = newext->ee_block;
|
|
|
ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
|
|
|
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
|
|
|
+ ext4_ext_get_actual_len(newext));
|
|
|
+ if (uninit)
|
|
|
+ ext4_ext_mark_uninitialized(ex);
|
|
|
eh = path[depth].p_hdr;
|
|
|
nearex = ex;
|
|
|
goto merge;
|
|
@@ -2585,6 +2597,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|
|
ex_ee_block = le32_to_cpu(ex->ee_block);
|
|
|
ex_ee_len = ext4_ext_get_actual_len(ex);
|
|
|
|
|
|
+ /*
|
|
|
+ * If we're starting with an extent other than the last one in the
|
|
|
+ * node, we need to see if it shares a cluster with the extent to
|
|
|
+ * the right (towards the end of the file). If its leftmost cluster
|
|
|
+ * is this extent's rightmost cluster and it is not cluster aligned,
|
|
|
+ * we'll mark it as a partial that is not to be deallocated.
|
|
|
+ */
|
|
|
+
|
|
|
+ if (ex != EXT_LAST_EXTENT(eh)) {
|
|
|
+ ext4_fsblk_t current_pblk, right_pblk;
|
|
|
+ long long current_cluster, right_cluster;
|
|
|
+
|
|
|
+ current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
|
|
|
+ current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
|
|
|
+ right_pblk = ext4_ext_pblock(ex + 1);
|
|
|
+ right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
|
|
|
+ if (current_cluster == right_cluster &&
|
|
|
+ EXT4_PBLK_COFF(sbi, right_pblk))
|
|
|
+ *partial_cluster = -right_cluster;
|
|
|
+ }
|
|
|
+
|
|
|
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
|
|
|
|
|
|
while (ex >= EXT_FIRST_EXTENT(eh) &&
|
|
@@ -2710,10 +2743,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|
|
err = ext4_ext_correct_indexes(handle, inode, path);
|
|
|
|
|
|
/*
|
|
|
- * Free the partial cluster only if the current extent does not
|
|
|
- * reference it. Otherwise we might free used cluster.
|
|
|
+ * If there's a partial cluster and at least one extent remains in
|
|
|
+ * the leaf, free the partial cluster if it isn't shared with the
|
|
|
+ * current extent. If there's a partial cluster and no extents
|
|
|
+ * remain in the leaf, it can't be freed here. It can only be
|
|
|
+ * freed when it's possible to determine if it's not shared with
|
|
|
+ * any other extent - when the next leaf is processed or when space
|
|
|
+ * removal is complete.
|
|
|
*/
|
|
|
- if (*partial_cluster > 0 &&
|
|
|
+ if (*partial_cluster > 0 && eh->eh_entries &&
|
|
|
(EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
|
|
|
*partial_cluster)) {
|
|
|
int flags = get_default_free_blocks_flags(inode);
|
|
@@ -3569,6 +3607,8 @@ out:
|
|
|
* b> Splits in two extents: Write is happening at either end of the extent
|
|
|
* c> Splits in three extents: Somone is writing in middle of the extent
|
|
|
*
|
|
|
+ * This works the same way in the case of initialized -> unwritten conversion.
|
|
|
+ *
|
|
|
* One of more index blocks maybe needed if the extent tree grow after
|
|
|
* the uninitialized extent split. To prevent ENOSPC occur at the IO
|
|
|
* complete, we need to split the uninitialized extent before DIO submit
|
|
@@ -3579,7 +3619,7 @@ out:
|
|
|
*
|
|
|
* Returns the size of uninitialized extent to be written on success.
|
|
|
*/
|
|
|
-static int ext4_split_unwritten_extents(handle_t *handle,
|
|
|
+static int ext4_split_convert_extents(handle_t *handle,
|
|
|
struct inode *inode,
|
|
|
struct ext4_map_blocks *map,
|
|
|
struct ext4_ext_path *path,
|
|
@@ -3591,9 +3631,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
|
|
|
unsigned int ee_len;
|
|
|
int split_flag = 0, depth;
|
|
|
|
|
|
- ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
|
|
|
- "block %llu, max_blocks %u\n", inode->i_ino,
|
|
|
- (unsigned long long)map->m_lblk, map->m_len);
|
|
|
+ ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n",
|
|
|
+ __func__, inode->i_ino,
|
|
|
+ (unsigned long long)map->m_lblk, map->m_len);
|
|
|
|
|
|
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
|
|
|
inode->i_sb->s_blocksize_bits;
|
|
@@ -3608,14 +3648,73 @@ static int ext4_split_unwritten_extents(handle_t *handle,
|
|
|
ee_block = le32_to_cpu(ex->ee_block);
|
|
|
ee_len = ext4_ext_get_actual_len(ex);
|
|
|
|
|
|
- split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
|
|
|
- split_flag |= EXT4_EXT_MARK_UNINIT2;
|
|
|
- if (flags & EXT4_GET_BLOCKS_CONVERT)
|
|
|
- split_flag |= EXT4_EXT_DATA_VALID2;
|
|
|
+ /* Convert to unwritten */
|
|
|
+ if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
|
|
|
+ split_flag |= EXT4_EXT_DATA_VALID1;
|
|
|
+ /* Convert to initialized */
|
|
|
+ } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
|
|
|
+ split_flag |= ee_block + ee_len <= eof_block ?
|
|
|
+ EXT4_EXT_MAY_ZEROOUT : 0;
|
|
|
+ split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2);
|
|
|
+ }
|
|
|
flags |= EXT4_GET_BLOCKS_PRE_IO;
|
|
|
return ext4_split_extent(handle, inode, path, map, split_flag, flags);
|
|
|
}
|
|
|
|
|
|
+static int ext4_convert_initialized_extents(handle_t *handle,
|
|
|
+ struct inode *inode,
|
|
|
+ struct ext4_map_blocks *map,
|
|
|
+ struct ext4_ext_path *path)
|
|
|
+{
|
|
|
+ struct ext4_extent *ex;
|
|
|
+ ext4_lblk_t ee_block;
|
|
|
+ unsigned int ee_len;
|
|
|
+ int depth;
|
|
|
+ int err = 0;
|
|
|
+
|
|
|
+ depth = ext_depth(inode);
|
|
|
+ ex = path[depth].p_ext;
|
|
|
+ ee_block = le32_to_cpu(ex->ee_block);
|
|
|
+ ee_len = ext4_ext_get_actual_len(ex);
|
|
|
+
|
|
|
+ ext_debug("%s: inode %lu, logical"
|
|
|
+ "block %llu, max_blocks %u\n", __func__, inode->i_ino,
|
|
|
+ (unsigned long long)ee_block, ee_len);
|
|
|
+
|
|
|
+ if (ee_block != map->m_lblk || ee_len > map->m_len) {
|
|
|
+ err = ext4_split_convert_extents(handle, inode, map, path,
|
|
|
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
|
|
|
+ if (err < 0)
|
|
|
+ goto out;
|
|
|
+ ext4_ext_drop_refs(path);
|
|
|
+ path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
|
|
|
+ if (IS_ERR(path)) {
|
|
|
+ err = PTR_ERR(path);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ depth = ext_depth(inode);
|
|
|
+ ex = path[depth].p_ext;
|
|
|
+ }
|
|
|
+
|
|
|
+ err = ext4_ext_get_access(handle, inode, path + depth);
|
|
|
+ if (err)
|
|
|
+ goto out;
|
|
|
+ /* first mark the extent as uninitialized */
|
|
|
+ ext4_ext_mark_uninitialized(ex);
|
|
|
+
|
|
|
+ /* note: ext4_ext_correct_indexes() isn't needed here because
|
|
|
+ * borders are not changed
|
|
|
+ */
|
|
|
+ ext4_ext_try_to_merge(handle, inode, path, ex);
|
|
|
+
|
|
|
+ /* Mark modified extent as dirty */
|
|
|
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
|
|
|
+out:
|
|
|
+ ext4_ext_show_leaf(inode, path);
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
|
|
|
struct inode *inode,
|
|
|
struct ext4_map_blocks *map,
|
|
@@ -3649,8 +3748,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
|
|
|
inode->i_ino, (unsigned long long)ee_block, ee_len,
|
|
|
(unsigned long long)map->m_lblk, map->m_len);
|
|
|
#endif
|
|
|
- err = ext4_split_unwritten_extents(handle, inode, map, path,
|
|
|
- EXT4_GET_BLOCKS_CONVERT);
|
|
|
+ err = ext4_split_convert_extents(handle, inode, map, path,
|
|
|
+ EXT4_GET_BLOCKS_CONVERT);
|
|
|
if (err < 0)
|
|
|
goto out;
|
|
|
ext4_ext_drop_refs(path);
|
|
@@ -3850,6 +3949,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
|
|
|
return allocated_clusters;
|
|
|
}
|
|
|
|
|
|
+static int
|
|
|
+ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
|
|
|
+ struct ext4_map_blocks *map,
|
|
|
+ struct ext4_ext_path *path, int flags,
|
|
|
+ unsigned int allocated, ext4_fsblk_t newblock)
|
|
|
+{
|
|
|
+ int ret = 0;
|
|
|
+ int err = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Make sure that the extent is no bigger than we support with
|
|
|
+ * uninitialized extent
|
|
|
+ */
|
|
|
+ if (map->m_len > EXT_UNINIT_MAX_LEN)
|
|
|
+ map->m_len = EXT_UNINIT_MAX_LEN / 2;
|
|
|
+
|
|
|
+ ret = ext4_convert_initialized_extents(handle, inode, map,
|
|
|
+ path);
|
|
|
+ if (ret >= 0) {
|
|
|
+ ext4_update_inode_fsync_trans(handle, inode, 1);
|
|
|
+ err = check_eofblocks_fl(handle, inode, map->m_lblk,
|
|
|
+ path, map->m_len);
|
|
|
+ } else
|
|
|
+ err = ret;
|
|
|
+ map->m_flags |= EXT4_MAP_UNWRITTEN;
|
|
|
+ if (allocated > map->m_len)
|
|
|
+ allocated = map->m_len;
|
|
|
+ map->m_len = allocated;
|
|
|
+
|
|
|
+ return err ? err : allocated;
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
|
|
struct ext4_map_blocks *map,
|
|
@@ -3877,8 +4008,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
|
|
|
|
|
/* get_block() before submit the IO, split the extent */
|
|
|
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
|
|
|
- ret = ext4_split_unwritten_extents(handle, inode, map,
|
|
|
- path, flags);
|
|
|
+ ret = ext4_split_convert_extents(handle, inode, map,
|
|
|
+ path, flags | EXT4_GET_BLOCKS_CONVERT);
|
|
|
if (ret <= 0)
|
|
|
goto out;
|
|
|
/*
|
|
@@ -3993,10 +4124,6 @@ out1:
|
|
|
map->m_pblk = newblock;
|
|
|
map->m_len = allocated;
|
|
|
out2:
|
|
|
- if (path) {
|
|
|
- ext4_ext_drop_refs(path);
|
|
|
- kfree(path);
|
|
|
- }
|
|
|
return err ? err : allocated;
|
|
|
}
|
|
|
|
|
@@ -4128,7 +4255,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|
|
struct ext4_extent newex, *ex, *ex2;
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
ext4_fsblk_t newblock = 0;
|
|
|
- int free_on_err = 0, err = 0, depth;
|
|
|
+ int free_on_err = 0, err = 0, depth, ret;
|
|
|
unsigned int allocated = 0, offset = 0;
|
|
|
unsigned int allocated_clusters = 0;
|
|
|
struct ext4_allocation_request ar;
|
|
@@ -4170,6 +4297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|
|
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
|
|
|
unsigned short ee_len;
|
|
|
|
|
|
+
|
|
|
/*
|
|
|
* Uninitialized extents are treated as holes, except that
|
|
|
* we split out initialized portions during a write.
|
|
@@ -4186,13 +4314,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|
|
ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
|
|
|
ee_block, ee_len, newblock);
|
|
|
|
|
|
- if (!ext4_ext_is_uninitialized(ex))
|
|
|
+ /*
|
|
|
+ * If the extent is initialized check whether the
|
|
|
+ * caller wants to convert it to unwritten.
|
|
|
+ */
|
|
|
+ if ((!ext4_ext_is_uninitialized(ex)) &&
|
|
|
+ (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
|
|
|
+ allocated = ext4_ext_convert_initialized_extent(
|
|
|
+ handle, inode, map, path, flags,
|
|
|
+ allocated, newblock);
|
|
|
+ goto out2;
|
|
|
+ } else if (!ext4_ext_is_uninitialized(ex))
|
|
|
goto out;
|
|
|
|
|
|
- allocated = ext4_ext_handle_uninitialized_extents(
|
|
|
+ ret = ext4_ext_handle_uninitialized_extents(
|
|
|
handle, inode, map, path, flags,
|
|
|
allocated, newblock);
|
|
|
- goto out3;
|
|
|
+ if (ret < 0)
|
|
|
+ err = ret;
|
|
|
+ else
|
|
|
+ allocated = ret;
|
|
|
+ goto out2;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -4473,7 +4615,6 @@ out2:
|
|
|
kfree(path);
|
|
|
}
|
|
|
|
|
|
-out3:
|
|
|
trace_ext4_ext_map_blocks_exit(inode, flags, map,
|
|
|
err ? err : allocated);
|
|
|
ext4_es_lru_add(inode);
|
|
@@ -4514,34 +4655,200 @@ retry:
|
|
|
ext4_std_error(inode->i_sb, err);
|
|
|
}
|
|
|
|
|
|
-static void ext4_falloc_update_inode(struct inode *inode,
|
|
|
- int mode, loff_t new_size, int update_ctime)
|
|
|
+static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
|
|
|
+ ext4_lblk_t len, int flags, int mode)
|
|
|
{
|
|
|
- struct timespec now;
|
|
|
+ struct inode *inode = file_inode(file);
|
|
|
+ handle_t *handle;
|
|
|
+ int ret = 0;
|
|
|
+ int ret2 = 0;
|
|
|
+ int retries = 0;
|
|
|
+ struct ext4_map_blocks map;
|
|
|
+ unsigned int credits;
|
|
|
|
|
|
- if (update_ctime) {
|
|
|
- now = current_fs_time(inode->i_sb);
|
|
|
- if (!timespec_equal(&inode->i_ctime, &now))
|
|
|
- inode->i_ctime = now;
|
|
|
+ map.m_lblk = offset;
|
|
|
+ /*
|
|
|
+ * Don't normalize the request if it can fit in one extent so
|
|
|
+ * that it doesn't get unnecessarily split into multiple
|
|
|
+ * extents.
|
|
|
+ */
|
|
|
+ if (len <= EXT_UNINIT_MAX_LEN)
|
|
|
+ flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * credits to insert 1 extent into extent tree
|
|
|
+ */
|
|
|
+ credits = ext4_chunk_trans_blocks(inode, len);
|
|
|
+
|
|
|
+retry:
|
|
|
+ while (ret >= 0 && ret < len) {
|
|
|
+ map.m_lblk = map.m_lblk + ret;
|
|
|
+ map.m_len = len = len - ret;
|
|
|
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
|
|
|
+ credits);
|
|
|
+ if (IS_ERR(handle)) {
|
|
|
+ ret = PTR_ERR(handle);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ ret = ext4_map_blocks(handle, inode, &map, flags);
|
|
|
+ if (ret <= 0) {
|
|
|
+ ext4_debug("inode #%lu: block %u: len %u: "
|
|
|
+ "ext4_ext_map_blocks returned %d",
|
|
|
+ inode->i_ino, map.m_lblk,
|
|
|
+ map.m_len, ret);
|
|
|
+ ext4_mark_inode_dirty(handle, inode);
|
|
|
+ ret2 = ext4_journal_stop(handle);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ ret2 = ext4_journal_stop(handle);
|
|
|
+ if (ret2)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (ret == -ENOSPC &&
|
|
|
+ ext4_should_retry_alloc(inode->i_sb, &retries)) {
|
|
|
+ ret = 0;
|
|
|
+ goto retry;
|
|
|
}
|
|
|
+
|
|
|
+ return ret > 0 ? ret2 : ret;
|
|
|
+}
|
|
|
+
|
|
|
+static long ext4_zero_range(struct file *file, loff_t offset,
|
|
|
+ loff_t len, int mode)
|
|
|
+{
|
|
|
+ struct inode *inode = file_inode(file);
|
|
|
+ handle_t *handle = NULL;
|
|
|
+ unsigned int max_blocks;
|
|
|
+ loff_t new_size = 0;
|
|
|
+ int ret = 0;
|
|
|
+ int flags;
|
|
|
+ int partial;
|
|
|
+ loff_t start, end;
|
|
|
+ ext4_lblk_t lblk;
|
|
|
+ struct address_space *mapping = inode->i_mapping;
|
|
|
+ unsigned int blkbits = inode->i_blkbits;
|
|
|
+
|
|
|
+ trace_ext4_zero_range(inode, offset, len, mode);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Write out all dirty pages to avoid race conditions
|
|
|
+ * Then release them.
|
|
|
+ */
|
|
|
+ if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
|
|
+ ret = filemap_write_and_wait_range(mapping, offset,
|
|
|
+ offset + len - 1);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
- * Update only when preallocation was requested beyond
|
|
|
- * the file size.
|
|
|
+ * Round up offset. This is not fallocate, we neet to zero out
|
|
|
+ * blocks, so convert interior block aligned part of the range to
|
|
|
+ * unwritten and possibly manually zero out unaligned parts of the
|
|
|
+ * range.
|
|
|
*/
|
|
|
- if (!(mode & FALLOC_FL_KEEP_SIZE)) {
|
|
|
+ start = round_up(offset, 1 << blkbits);
|
|
|
+ end = round_down((offset + len), 1 << blkbits);
|
|
|
+
|
|
|
+ if (start < offset || end > offset + len)
|
|
|
+ return -EINVAL;
|
|
|
+ partial = (offset + len) & ((1 << blkbits) - 1);
|
|
|
+
|
|
|
+ lblk = start >> blkbits;
|
|
|
+ max_blocks = (end >> blkbits);
|
|
|
+ if (max_blocks < lblk)
|
|
|
+ max_blocks = 0;
|
|
|
+ else
|
|
|
+ max_blocks -= lblk;
|
|
|
+
|
|
|
+ flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
|
|
|
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
|
|
|
+ if (mode & FALLOC_FL_KEEP_SIZE)
|
|
|
+ flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
|
|
|
+
|
|
|
+ mutex_lock(&inode->i_mutex);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Indirect files do not support unwritten extnets
|
|
|
+ */
|
|
|
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
|
|
|
+ ret = -EOPNOTSUPP;
|
|
|
+ goto out_mutex;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
|
|
|
+ offset + len > i_size_read(inode)) {
|
|
|
+ new_size = offset + len;
|
|
|
+ ret = inode_newsize_ok(inode, new_size);
|
|
|
+ if (ret)
|
|
|
+ goto out_mutex;
|
|
|
+ /*
|
|
|
+ * If we have a partial block after EOF we have to allocate
|
|
|
+ * the entire block.
|
|
|
+ */
|
|
|
+ if (partial)
|
|
|
+ max_blocks += 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (max_blocks > 0) {
|
|
|
+
|
|
|
+ /* Now release the pages and zero block aligned part of pages*/
|
|
|
+ truncate_pagecache_range(inode, start, end - 1);
|
|
|
+
|
|
|
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
|
|
|
+ ext4_inode_block_unlocked_dio(inode);
|
|
|
+ inode_dio_wait(inode);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Remove entire range from the extent status tree.
|
|
|
+ */
|
|
|
+ ret = ext4_es_remove_extent(inode, lblk, max_blocks);
|
|
|
+ if (ret)
|
|
|
+ goto out_dio;
|
|
|
+
|
|
|
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
|
|
|
+ mode);
|
|
|
+ if (ret)
|
|
|
+ goto out_dio;
|
|
|
+ }
|
|
|
+
|
|
|
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 4);
|
|
|
+ if (IS_ERR(handle)) {
|
|
|
+ ret = PTR_ERR(handle);
|
|
|
+ ext4_std_error(inode->i_sb, ret);
|
|
|
+ goto out_dio;
|
|
|
+ }
|
|
|
+
|
|
|
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
|
|
+
|
|
|
+ if (new_size) {
|
|
|
if (new_size > i_size_read(inode))
|
|
|
i_size_write(inode, new_size);
|
|
|
if (new_size > EXT4_I(inode)->i_disksize)
|
|
|
ext4_update_i_disksize(inode, new_size);
|
|
|
} else {
|
|
|
/*
|
|
|
- * Mark that we allocate beyond EOF so the subsequent truncate
|
|
|
- * can proceed even if the new size is the same as i_size.
|
|
|
- */
|
|
|
- if (new_size > i_size_read(inode))
|
|
|
+ * Mark that we allocate beyond EOF so the subsequent truncate
|
|
|
+ * can proceed even if the new size is the same as i_size.
|
|
|
+ */
|
|
|
+ if ((offset + len) > i_size_read(inode))
|
|
|
ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
|
|
|
}
|
|
|
|
|
|
+ ext4_mark_inode_dirty(handle, inode);
|
|
|
+
|
|
|
+ /* Zero out partial block at the edges of the range */
|
|
|
+ ret = ext4_zero_partial_blocks(handle, inode, offset, len);
|
|
|
+
|
|
|
+ if (file->f_flags & O_SYNC)
|
|
|
+ ext4_handle_sync(handle);
|
|
|
+
|
|
|
+ ext4_journal_stop(handle);
|
|
|
+out_dio:
|
|
|
+ ext4_inode_resume_unlocked_dio(inode);
|
|
|
+out_mutex:
|
|
|
+ mutex_unlock(&inode->i_mutex);
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -4555,22 +4862,25 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
|
|
{
|
|
|
struct inode *inode = file_inode(file);
|
|
|
handle_t *handle;
|
|
|
- loff_t new_size;
|
|
|
+ loff_t new_size = 0;
|
|
|
unsigned int max_blocks;
|
|
|
int ret = 0;
|
|
|
- int ret2 = 0;
|
|
|
- int retries = 0;
|
|
|
int flags;
|
|
|
- struct ext4_map_blocks map;
|
|
|
- unsigned int credits, blkbits = inode->i_blkbits;
|
|
|
+ ext4_lblk_t lblk;
|
|
|
+ struct timespec tv;
|
|
|
+ unsigned int blkbits = inode->i_blkbits;
|
|
|
|
|
|
/* Return error if mode is not supported */
|
|
|
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
|
|
|
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
|
|
|
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
if (mode & FALLOC_FL_PUNCH_HOLE)
|
|
|
return ext4_punch_hole(inode, offset, len);
|
|
|
|
|
|
+ if (mode & FALLOC_FL_COLLAPSE_RANGE)
|
|
|
+ return ext4_collapse_range(inode, offset, len);
|
|
|
+
|
|
|
ret = ext4_convert_inline_data(inode);
|
|
|
if (ret)
|
|
|
return ret;
|
|
@@ -4582,83 +4892,66 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
|
|
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
+ if (mode & FALLOC_FL_ZERO_RANGE)
|
|
|
+ return ext4_zero_range(file, offset, len, mode);
|
|
|
+
|
|
|
trace_ext4_fallocate_enter(inode, offset, len, mode);
|
|
|
- map.m_lblk = offset >> blkbits;
|
|
|
+ lblk = offset >> blkbits;
|
|
|
/*
|
|
|
* We can't just convert len to max_blocks because
|
|
|
* If blocksize = 4096 offset = 3072 and len = 2048
|
|
|
*/
|
|
|
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
|
|
|
- - map.m_lblk;
|
|
|
- /*
|
|
|
- * credits to insert 1 extent into extent tree
|
|
|
- */
|
|
|
- credits = ext4_chunk_trans_blocks(inode, max_blocks);
|
|
|
- mutex_lock(&inode->i_mutex);
|
|
|
- ret = inode_newsize_ok(inode, (len + offset));
|
|
|
- if (ret) {
|
|
|
- mutex_unlock(&inode->i_mutex);
|
|
|
- trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
|
|
|
- return ret;
|
|
|
- }
|
|
|
+ - lblk;
|
|
|
+
|
|
|
flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
|
|
|
if (mode & FALLOC_FL_KEEP_SIZE)
|
|
|
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
|
|
|
- /*
|
|
|
- * Don't normalize the request if it can fit in one extent so
|
|
|
- * that it doesn't get unnecessarily split into multiple
|
|
|
- * extents.
|
|
|
- */
|
|
|
- if (len <= EXT_UNINIT_MAX_LEN << blkbits)
|
|
|
- flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
|
|
|
|
|
|
-retry:
|
|
|
- while (ret >= 0 && ret < max_blocks) {
|
|
|
- map.m_lblk = map.m_lblk + ret;
|
|
|
- map.m_len = max_blocks = max_blocks - ret;
|
|
|
- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
|
|
|
- credits);
|
|
|
- if (IS_ERR(handle)) {
|
|
|
- ret = PTR_ERR(handle);
|
|
|
- break;
|
|
|
- }
|
|
|
- ret = ext4_map_blocks(handle, inode, &map, flags);
|
|
|
- if (ret <= 0) {
|
|
|
-#ifdef EXT4FS_DEBUG
|
|
|
- ext4_warning(inode->i_sb,
|
|
|
- "inode #%lu: block %u: len %u: "
|
|
|
- "ext4_ext_map_blocks returned %d",
|
|
|
- inode->i_ino, map.m_lblk,
|
|
|
- map.m_len, ret);
|
|
|
-#endif
|
|
|
- ext4_mark_inode_dirty(handle, inode);
|
|
|
- ret2 = ext4_journal_stop(handle);
|
|
|
- break;
|
|
|
- }
|
|
|
- if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
|
|
|
- blkbits) >> blkbits))
|
|
|
- new_size = offset + len;
|
|
|
- else
|
|
|
- new_size = ((loff_t) map.m_lblk + ret) << blkbits;
|
|
|
+ mutex_lock(&inode->i_mutex);
|
|
|
|
|
|
- ext4_falloc_update_inode(inode, mode, new_size,
|
|
|
- (map.m_flags & EXT4_MAP_NEW));
|
|
|
- ext4_mark_inode_dirty(handle, inode);
|
|
|
- if ((file->f_flags & O_SYNC) && ret >= max_blocks)
|
|
|
- ext4_handle_sync(handle);
|
|
|
- ret2 = ext4_journal_stop(handle);
|
|
|
- if (ret2)
|
|
|
- break;
|
|
|
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
|
|
|
+ offset + len > i_size_read(inode)) {
|
|
|
+ new_size = offset + len;
|
|
|
+ ret = inode_newsize_ok(inode, new_size);
|
|
|
+ if (ret)
|
|
|
+ goto out;
|
|
|
}
|
|
|
- if (ret == -ENOSPC &&
|
|
|
- ext4_should_retry_alloc(inode->i_sb, &retries)) {
|
|
|
- ret = 0;
|
|
|
- goto retry;
|
|
|
+
|
|
|
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
|
|
|
+ if (ret)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
|
|
+ if (IS_ERR(handle))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ tv = inode->i_ctime = ext4_current_time(inode);
|
|
|
+
|
|
|
+ if (new_size) {
|
|
|
+ if (new_size > i_size_read(inode)) {
|
|
|
+ i_size_write(inode, new_size);
|
|
|
+ inode->i_mtime = tv;
|
|
|
+ }
|
|
|
+ if (new_size > EXT4_I(inode)->i_disksize)
|
|
|
+ ext4_update_i_disksize(inode, new_size);
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Mark that we allocate beyond EOF so the subsequent truncate
|
|
|
+ * can proceed even if the new size is the same as i_size.
|
|
|
+ */
|
|
|
+ if ((offset + len) > i_size_read(inode))
|
|
|
+ ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
|
|
|
}
|
|
|
+ ext4_mark_inode_dirty(handle, inode);
|
|
|
+ if (file->f_flags & O_SYNC)
|
|
|
+ ext4_handle_sync(handle);
|
|
|
+
|
|
|
+ ext4_journal_stop(handle);
|
|
|
+out:
|
|
|
mutex_unlock(&inode->i_mutex);
|
|
|
- trace_ext4_fallocate_exit(inode, offset, max_blocks,
|
|
|
- ret > 0 ? ret2 : ret);
|
|
|
- return ret > 0 ? ret2 : ret;
|
|
|
+ trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -4869,3 +5162,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|
|
ext4_es_lru_add(inode);
|
|
|
return error;
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ext4_access_path:
|
|
|
+ * Function to access the path buffer for marking it dirty.
|
|
|
+ * It also checks if there are sufficient credits left in the journal handle
|
|
|
+ * to update path.
|
|
|
+ */
|
|
|
+static int
|
|
|
+ext4_access_path(handle_t *handle, struct inode *inode,
|
|
|
+ struct ext4_ext_path *path)
|
|
|
+{
|
|
|
+ int credits, err;
|
|
|
+
|
|
|
+ if (!ext4_handle_valid(handle))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Check if need to extend journal credits
|
|
|
+ * 3 for leaf, sb, and inode plus 2 (bmap and group
|
|
|
+ * descriptor) for each block group; assume two block
|
|
|
+ * groups
|
|
|
+ */
|
|
|
+ if (handle->h_buffer_credits < 7) {
|
|
|
+ credits = ext4_writepage_trans_blocks(inode);
|
|
|
+ err = ext4_ext_truncate_extend_restart(handle, inode, credits);
|
|
|
+ /* EAGAIN is success */
|
|
|
+ if (err && err != -EAGAIN)
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+
|
|
|
+ err = ext4_ext_get_access(handle, inode, path);
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ext4_ext_shift_path_extents:
|
|
|
+ * Shift the extents of a path structure lying between path[depth].p_ext
|
|
|
+ * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
|
|
|
+ * from starting block for each extent.
|
|
|
+ */
|
|
|
+static int
|
|
|
+ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
|
|
|
+ struct inode *inode, handle_t *handle,
|
|
|
+ ext4_lblk_t *start)
|
|
|
+{
|
|
|
+ int depth, err = 0;
|
|
|
+ struct ext4_extent *ex_start, *ex_last;
|
|
|
+ bool update = 0;
|
|
|
+ depth = path->p_depth;
|
|
|
+
|
|
|
+ while (depth >= 0) {
|
|
|
+ if (depth == path->p_depth) {
|
|
|
+ ex_start = path[depth].p_ext;
|
|
|
+ if (!ex_start)
|
|
|
+ return -EIO;
|
|
|
+
|
|
|
+ ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
|
|
|
+ if (!ex_last)
|
|
|
+ return -EIO;
|
|
|
+
|
|
|
+ err = ext4_access_path(handle, inode, path + depth);
|
|
|
+ if (err)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
|
|
|
+ update = 1;
|
|
|
+
|
|
|
+ *start = ex_last->ee_block +
|
|
|
+ ext4_ext_get_actual_len(ex_last);
|
|
|
+
|
|
|
+ while (ex_start <= ex_last) {
|
|
|
+ ex_start->ee_block -= shift;
|
|
|
+ if (ex_start >
|
|
|
+ EXT_FIRST_EXTENT(path[depth].p_hdr)) {
|
|
|
+ if (ext4_ext_try_to_merge_right(inode,
|
|
|
+ path, ex_start - 1))
|
|
|
+ ex_last--;
|
|
|
+ }
|
|
|
+ ex_start++;
|
|
|
+ }
|
|
|
+ err = ext4_ext_dirty(handle, inode, path + depth);
|
|
|
+ if (err)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ if (--depth < 0 || !update)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Update index too */
|
|
|
+ err = ext4_access_path(handle, inode, path + depth);
|
|
|
+ if (err)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ path[depth].p_idx->ei_block -= shift;
|
|
|
+ err = ext4_ext_dirty(handle, inode, path + depth);
|
|
|
+ if (err)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ /* we are done if current index is not a starting index */
|
|
|
+ if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
|
|
|
+ break;
|
|
|
+
|
|
|
+ depth--;
|
|
|
+ }
|
|
|
+
|
|
|
+out:
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ext4_ext_shift_extents:
|
|
|
+ * All the extents which lies in the range from start to the last allocated
|
|
|
+ * block for the file are shifted downwards by shift blocks.
|
|
|
+ * On success, 0 is returned, error otherwise.
|
|
|
+ */
|
|
|
+static int
|
|
|
+ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
|
|
|
+ ext4_lblk_t start, ext4_lblk_t shift)
|
|
|
+{
|
|
|
+ struct ext4_ext_path *path;
|
|
|
+ int ret = 0, depth;
|
|
|
+ struct ext4_extent *extent;
|
|
|
+ ext4_lblk_t stop_block, current_block;
|
|
|
+ ext4_lblk_t ex_start, ex_end;
|
|
|
+
|
|
|
+ /* Let path point to the last extent */
|
|
|
+ path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
|
|
|
+ if (IS_ERR(path))
|
|
|
+ return PTR_ERR(path);
|
|
|
+
|
|
|
+ depth = path->p_depth;
|
|
|
+ extent = path[depth].p_ext;
|
|
|
+ if (!extent) {
|
|
|
+ ext4_ext_drop_refs(path);
|
|
|
+ kfree(path);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
|
|
|
+ ext4_ext_drop_refs(path);
|
|
|
+ kfree(path);
|
|
|
+
|
|
|
+ /* Nothing to shift, if hole is at the end of file */
|
|
|
+ if (start >= stop_block)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Don't start shifting extents until we make sure the hole is big
|
|
|
+ * enough to accomodate the shift.
|
|
|
+ */
|
|
|
+ path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
|
|
|
+ depth = path->p_depth;
|
|
|
+ extent = path[depth].p_ext;
|
|
|
+ ex_start = extent->ee_block;
|
|
|
+ ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
|
|
|
+ ext4_ext_drop_refs(path);
|
|
|
+ kfree(path);
|
|
|
+
|
|
|
+ if ((start == ex_start && shift > ex_start) ||
|
|
|
+ (shift > start - ex_end))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /* Its safe to start updating extents */
|
|
|
+ while (start < stop_block) {
|
|
|
+ path = ext4_ext_find_extent(inode, start, NULL, 0);
|
|
|
+ if (IS_ERR(path))
|
|
|
+ return PTR_ERR(path);
|
|
|
+ depth = path->p_depth;
|
|
|
+ extent = path[depth].p_ext;
|
|
|
+ current_block = extent->ee_block;
|
|
|
+ if (start > current_block) {
|
|
|
+ /* Hole, move to the next extent */
|
|
|
+ ret = mext_next_extent(inode, path, &extent);
|
|
|
+ if (ret != 0) {
|
|
|
+ ext4_ext_drop_refs(path);
|
|
|
+ kfree(path);
|
|
|
+ if (ret == 1)
|
|
|
+ ret = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ret = ext4_ext_shift_path_extents(path, shift, inode,
|
|
|
+ handle, &start);
|
|
|
+ ext4_ext_drop_refs(path);
|
|
|
+ kfree(path);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ext4_collapse_range:
|
|
|
+ * This implements the fallocate's collapse range functionality for ext4
|
|
|
+ * Returns: 0 and non-zero on error.
|
|
|
+ */
|
|
|
+int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
|
|
|
+{
|
|
|
+ struct super_block *sb = inode->i_sb;
|
|
|
+ ext4_lblk_t punch_start, punch_stop;
|
|
|
+ handle_t *handle;
|
|
|
+ unsigned int credits;
|
|
|
+ loff_t new_size;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ BUG_ON(offset + len > i_size_read(inode));
|
|
|
+
|
|
|
+ /* Collapse range works only on fs block size aligned offsets. */
|
|
|
+ if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
|
|
|
+ len & (EXT4_BLOCK_SIZE(sb) - 1))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ if (!S_ISREG(inode->i_mode))
|
|
|
+ return -EOPNOTSUPP;
|
|
|
+
|
|
|
+ trace_ext4_collapse_range(inode, offset, len);
|
|
|
+
|
|
|
+ punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
|
|
|
+ punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
|
|
|
+
|
|
|
+ /* Write out all dirty pages */
|
|
|
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /* Take mutex lock */
|
|
|
+ mutex_lock(&inode->i_mutex);
|
|
|
+
|
|
|
+ /* It's not possible punch hole on append only file */
|
|
|
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
|
|
|
+ ret = -EPERM;
|
|
|
+ goto out_mutex;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (IS_SWAPFILE(inode)) {
|
|
|
+ ret = -ETXTBSY;
|
|
|
+ goto out_mutex;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Currently just for extent based files */
|
|
|
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
|
|
+ ret = -EOPNOTSUPP;
|
|
|
+ goto out_mutex;
|
|
|
+ }
|
|
|
+
|
|
|
+ truncate_pagecache_range(inode, offset, -1);
|
|
|
+
|
|
|
+ /* Wait for existing dio to complete */
|
|
|
+ ext4_inode_block_unlocked_dio(inode);
|
|
|
+ inode_dio_wait(inode);
|
|
|
+
|
|
|
+ credits = ext4_writepage_trans_blocks(inode);
|
|
|
+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
|
|
|
+ if (IS_ERR(handle)) {
|
|
|
+ ret = PTR_ERR(handle);
|
|
|
+ goto out_dio;
|
|
|
+ }
|
|
|
+
|
|
|
+ down_write(&EXT4_I(inode)->i_data_sem);
|
|
|
+ ext4_discard_preallocations(inode);
|
|
|
+
|
|
|
+ ret = ext4_es_remove_extent(inode, punch_start,
|
|
|
+ EXT_MAX_BLOCKS - punch_start - 1);
|
|
|
+ if (ret) {
|
|
|
+ up_write(&EXT4_I(inode)->i_data_sem);
|
|
|
+ goto out_stop;
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
|
|
|
+ if (ret) {
|
|
|
+ up_write(&EXT4_I(inode)->i_data_sem);
|
|
|
+ goto out_stop;
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = ext4_ext_shift_extents(inode, handle, punch_stop,
|
|
|
+ punch_stop - punch_start);
|
|
|
+ if (ret) {
|
|
|
+ up_write(&EXT4_I(inode)->i_data_sem);
|
|
|
+ goto out_stop;
|
|
|
+ }
|
|
|
+
|
|
|
+ new_size = i_size_read(inode) - len;
|
|
|
+ truncate_setsize(inode, new_size);
|
|
|
+ EXT4_I(inode)->i_disksize = new_size;
|
|
|
+
|
|
|
+ ext4_discard_preallocations(inode);
|
|
|
+ up_write(&EXT4_I(inode)->i_data_sem);
|
|
|
+ if (IS_SYNC(inode))
|
|
|
+ ext4_handle_sync(handle);
|
|
|
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
|
|
+ ext4_mark_inode_dirty(handle, inode);
|
|
|
+
|
|
|
+out_stop:
|
|
|
+ ext4_journal_stop(handle);
|
|
|
+out_dio:
|
|
|
+ ext4_inode_resume_unlocked_dio(inode);
|
|
|
+out_mutex:
|
|
|
+ mutex_unlock(&inode->i_mutex);
|
|
|
+ return ret;
|
|
|
+}
|