|
@@ -2351,8 +2351,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
|
|
|
{
|
|
|
struct extent_status es;
|
|
|
|
|
|
- ext4_es_find_delayed_extent_range(inode, hole_start,
|
|
|
- hole_start + hole_len - 1, &es);
|
|
|
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
|
|
|
+ hole_start + hole_len - 1, &es);
|
|
|
if (es.es_len) {
|
|
|
/* There's delayed extent containing lblock? */
|
|
|
if (es.es_lblk <= hole_start)
|
|
@@ -2490,106 +2490,157 @@ static inline int get_default_free_blocks_flags(struct inode *inode)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * ext4_rereserve_cluster - increment the reserved cluster count when
|
|
|
+ * freeing a cluster with a pending reservation
|
|
|
+ *
|
|
|
+ * @inode - file containing the cluster
|
|
|
+ * @lblk - logical block in cluster to be reserved
|
|
|
+ *
|
|
|
+ * Increments the reserved cluster count and adjusts quota in a bigalloc
|
|
|
+ * file system when freeing a partial cluster containing at least one
|
|
|
+ * delayed and unwritten block. A partial cluster meeting that
|
|
|
+ * requirement will have a pending reservation. If so, the
|
|
|
+ * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
|
|
|
+ * defer reserved and allocated space accounting to a subsequent call
|
|
|
+ * to this function.
|
|
|
+ */
|
|
|
+static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
|
|
|
+{
|
|
|
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
|
|
+
|
|
|
+ dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
|
|
|
+
|
|
|
+ spin_lock(&ei->i_block_reservation_lock);
|
|
|
+ ei->i_reserved_data_blocks++;
|
|
|
+ percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
|
|
|
+ spin_unlock(&ei->i_block_reservation_lock);
|
|
|
+
|
|
|
+ percpu_counter_add(&sbi->s_freeclusters_counter, 1);
|
|
|
+ ext4_remove_pending(inode, lblk);
|
|
|
+}
|
|
|
+
|
|
|
static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|
|
struct ext4_extent *ex,
|
|
|
- long long *partial_cluster,
|
|
|
+ struct partial_cluster *partial,
|
|
|
ext4_lblk_t from, ext4_lblk_t to)
|
|
|
{
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
unsigned short ee_len = ext4_ext_get_actual_len(ex);
|
|
|
- ext4_fsblk_t pblk;
|
|
|
- int flags = get_default_free_blocks_flags(inode);
|
|
|
+ ext4_fsblk_t last_pblk, pblk;
|
|
|
+ ext4_lblk_t num;
|
|
|
+ int flags;
|
|
|
+
|
|
|
+ /* only extent tail removal is allowed */
|
|
|
+ if (from < le32_to_cpu(ex->ee_block) ||
|
|
|
+ to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
|
|
+ ext4_error(sbi->s_sb,
|
|
|
+ "strange request: removal(2) %u-%u from %u:%u",
|
|
|
+ from, to, le32_to_cpu(ex->ee_block), ee_len);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+#ifdef EXTENTS_STATS
|
|
|
+ spin_lock(&sbi->s_ext_stats_lock);
|
|
|
+ sbi->s_ext_blocks += ee_len;
|
|
|
+ sbi->s_ext_extents++;
|
|
|
+ if (ee_len < sbi->s_ext_min)
|
|
|
+ sbi->s_ext_min = ee_len;
|
|
|
+ if (ee_len > sbi->s_ext_max)
|
|
|
+ sbi->s_ext_max = ee_len;
|
|
|
+ if (ext_depth(inode) > sbi->s_depth_max)
|
|
|
+ sbi->s_depth_max = ext_depth(inode);
|
|
|
+ spin_unlock(&sbi->s_ext_stats_lock);
|
|
|
+#endif
|
|
|
+
|
|
|
+ trace_ext4_remove_blocks(inode, ex, from, to, partial);
|
|
|
|
|
|
/*
|
|
|
- * For bigalloc file systems, we never free a partial cluster
|
|
|
- * at the beginning of the extent. Instead, we make a note
|
|
|
- * that we tried freeing the cluster, and check to see if we
|
|
|
- * need to free it on a subsequent call to ext4_remove_blocks,
|
|
|
- * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
|
|
|
+ * if we have a partial cluster, and it's different from the
|
|
|
+ * cluster of the last block in the extent, we free it
|
|
|
*/
|
|
|
- flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
|
|
|
+ last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
|
|
|
+
|
|
|
+ if (partial->state != initial &&
|
|
|
+ partial->pclu != EXT4_B2C(sbi, last_pblk)) {
|
|
|
+ if (partial->state == tofree) {
|
|
|
+ flags = get_default_free_blocks_flags(inode);
|
|
|
+ if (ext4_is_pending(inode, partial->lblk))
|
|
|
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
|
|
+ ext4_free_blocks(handle, inode, NULL,
|
|
|
+ EXT4_C2B(sbi, partial->pclu),
|
|
|
+ sbi->s_cluster_ratio, flags);
|
|
|
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
|
|
+ ext4_rereserve_cluster(inode, partial->lblk);
|
|
|
+ }
|
|
|
+ partial->state = initial;
|
|
|
+ }
|
|
|
+
|
|
|
+ num = le32_to_cpu(ex->ee_block) + ee_len - from;
|
|
|
+ pblk = ext4_ext_pblock(ex) + ee_len - num;
|
|
|
|
|
|
- trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
|
|
|
/*
|
|
|
- * If we have a partial cluster, and it's different from the
|
|
|
- * cluster of the last block, we need to explicitly free the
|
|
|
- * partial cluster here.
|
|
|
+ * We free the partial cluster at the end of the extent (if any),
|
|
|
+ * unless the cluster is used by another extent (partial_cluster
|
|
|
+ * state is nofree). If a partial cluster exists here, it must be
|
|
|
+ * shared with the last block in the extent.
|
|
|
*/
|
|
|
- pblk = ext4_ext_pblock(ex) + ee_len - 1;
|
|
|
- if (*partial_cluster > 0 &&
|
|
|
- *partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
|
|
+ flags = get_default_free_blocks_flags(inode);
|
|
|
+
|
|
|
+ /* partial, left end cluster aligned, right end unaligned */
|
|
|
+ if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
|
|
|
+ (EXT4_LBLK_CMASK(sbi, to) >= from) &&
|
|
|
+ (partial->state != nofree)) {
|
|
|
+ if (ext4_is_pending(inode, to))
|
|
|
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
|
|
ext4_free_blocks(handle, inode, NULL,
|
|
|
- EXT4_C2B(sbi, *partial_cluster),
|
|
|
+ EXT4_PBLK_CMASK(sbi, last_pblk),
|
|
|
sbi->s_cluster_ratio, flags);
|
|
|
- *partial_cluster = 0;
|
|
|
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
|
|
+ ext4_rereserve_cluster(inode, to);
|
|
|
+ partial->state = initial;
|
|
|
+ flags = get_default_free_blocks_flags(inode);
|
|
|
}
|
|
|
|
|
|
-#ifdef EXTENTS_STATS
|
|
|
- {
|
|
|
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
- spin_lock(&sbi->s_ext_stats_lock);
|
|
|
- sbi->s_ext_blocks += ee_len;
|
|
|
- sbi->s_ext_extents++;
|
|
|
- if (ee_len < sbi->s_ext_min)
|
|
|
- sbi->s_ext_min = ee_len;
|
|
|
- if (ee_len > sbi->s_ext_max)
|
|
|
- sbi->s_ext_max = ee_len;
|
|
|
- if (ext_depth(inode) > sbi->s_depth_max)
|
|
|
- sbi->s_depth_max = ext_depth(inode);
|
|
|
- spin_unlock(&sbi->s_ext_stats_lock);
|
|
|
- }
|
|
|
-#endif
|
|
|
- if (from >= le32_to_cpu(ex->ee_block)
|
|
|
- && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
|
|
- /* tail removal */
|
|
|
- ext4_lblk_t num;
|
|
|
- long long first_cluster;
|
|
|
-
|
|
|
- num = le32_to_cpu(ex->ee_block) + ee_len - from;
|
|
|
- pblk = ext4_ext_pblock(ex) + ee_len - num;
|
|
|
- /*
|
|
|
- * Usually we want to free partial cluster at the end of the
|
|
|
- * extent, except for the situation when the cluster is still
|
|
|
- * used by any other extent (partial_cluster is negative).
|
|
|
- */
|
|
|
- if (*partial_cluster < 0 &&
|
|
|
- *partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1))
|
|
|
- flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
|
|
|
+ flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
|
|
|
|
|
|
- ext_debug("free last %u blocks starting %llu partial %lld\n",
|
|
|
- num, pblk, *partial_cluster);
|
|
|
- ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
|
|
|
- /*
|
|
|
- * If the block range to be freed didn't start at the
|
|
|
- * beginning of a cluster, and we removed the entire
|
|
|
- * extent and the cluster is not used by any other extent,
|
|
|
- * save the partial cluster here, since we might need to
|
|
|
- * delete if we determine that the truncate or punch hole
|
|
|
- * operation has removed all of the blocks in the cluster.
|
|
|
- * If that cluster is used by another extent, preserve its
|
|
|
- * negative value so it isn't freed later on.
|
|
|
- *
|
|
|
- * If the whole extent wasn't freed, we've reached the
|
|
|
- * start of the truncated/punched region and have finished
|
|
|
- * removing blocks. If there's a partial cluster here it's
|
|
|
- * shared with the remainder of the extent and is no longer
|
|
|
- * a candidate for removal.
|
|
|
- */
|
|
|
- if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) {
|
|
|
- first_cluster = (long long) EXT4_B2C(sbi, pblk);
|
|
|
- if (first_cluster != -*partial_cluster)
|
|
|
- *partial_cluster = first_cluster;
|
|
|
- } else {
|
|
|
- *partial_cluster = 0;
|
|
|
+ /*
|
|
|
+ * For bigalloc file systems, we never free a partial cluster
|
|
|
+ * at the beginning of the extent. Instead, we check to see if we
|
|
|
+ * need to free it on a subsequent call to ext4_remove_blocks,
|
|
|
+ * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
|
|
|
+ */
|
|
|
+ flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
|
|
|
+ ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
|
|
|
+
|
|
|
+ /* reset the partial cluster if we've freed past it */
|
|
|
+ if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
|
|
|
+ partial->state = initial;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we've freed the entire extent but the beginning is not left
|
|
|
+ * cluster aligned and is not marked as ineligible for freeing we
|
|
|
+ * record the partial cluster at the beginning of the extent. It
|
|
|
+ * wasn't freed by the preceding ext4_free_blocks() call, and we
|
|
|
+ * need to look farther to the left to determine if it's to be freed
|
|
|
+ * (not shared with another extent). Else, reset the partial
|
|
|
+ * cluster - we're either done freeing or the beginning of the
|
|
|
+ * extent is left cluster aligned.
|
|
|
+ */
|
|
|
+ if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
|
|
|
+ if (partial->state == initial) {
|
|
|
+ partial->pclu = EXT4_B2C(sbi, pblk);
|
|
|
+ partial->lblk = from;
|
|
|
+ partial->state = tofree;
|
|
|
}
|
|
|
- } else
|
|
|
- ext4_error(sbi->s_sb, "strange request: removal(2) "
|
|
|
- "%u-%u from %u:%u",
|
|
|
- from, to, le32_to_cpu(ex->ee_block), ee_len);
|
|
|
+ } else {
|
|
|
+ partial->state = initial;
|
|
|
+ }
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
/*
|
|
|
* ext4_ext_rm_leaf() Removes the extents associated with the
|
|
|
* blocks appearing between "start" and "end". Both "start"
|
|
@@ -2608,7 +2659,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|
|
static int
|
|
|
ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|
|
struct ext4_ext_path *path,
|
|
|
- long long *partial_cluster,
|
|
|
+ struct partial_cluster *partial,
|
|
|
ext4_lblk_t start, ext4_lblk_t end)
|
|
|
{
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
@@ -2640,7 +2691,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|
|
ex_ee_block = le32_to_cpu(ex->ee_block);
|
|
|
ex_ee_len = ext4_ext_get_actual_len(ex);
|
|
|
|
|
|
- trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
|
|
|
+ trace_ext4_ext_rm_leaf(inode, start, ex, partial);
|
|
|
|
|
|
while (ex >= EXT_FIRST_EXTENT(eh) &&
|
|
|
ex_ee_block + ex_ee_len > start) {
|
|
@@ -2671,8 +2722,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|
|
*/
|
|
|
if (sbi->s_cluster_ratio > 1) {
|
|
|
pblk = ext4_ext_pblock(ex);
|
|
|
- *partial_cluster =
|
|
|
- -(long long) EXT4_B2C(sbi, pblk);
|
|
|
+ partial->pclu = EXT4_B2C(sbi, pblk);
|
|
|
+ partial->state = nofree;
|
|
|
}
|
|
|
ex--;
|
|
|
ex_ee_block = le32_to_cpu(ex->ee_block);
|
|
@@ -2714,8 +2765,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|
|
if (err)
|
|
|
goto out;
|
|
|
|
|
|
- err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
|
|
|
- a, b);
|
|
|
+ err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
|
|
|
if (err)
|
|
|
goto out;
|
|
|
|
|
@@ -2769,18 +2819,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|
|
* If there's a partial cluster and at least one extent remains in
|
|
|
* the leaf, free the partial cluster if it isn't shared with the
|
|
|
* current extent. If it is shared with the current extent
|
|
|
- * we zero partial_cluster because we've reached the start of the
|
|
|
+ * we reset the partial cluster because we've reached the start of the
|
|
|
* truncated/punched region and we're done removing blocks.
|
|
|
*/
|
|
|
- if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) {
|
|
|
+ if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
|
|
|
pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
|
|
|
- if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
|
|
+ if (partial->pclu != EXT4_B2C(sbi, pblk)) {
|
|
|
+ int flags = get_default_free_blocks_flags(inode);
|
|
|
+
|
|
|
+ if (ext4_is_pending(inode, partial->lblk))
|
|
|
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
|
|
ext4_free_blocks(handle, inode, NULL,
|
|
|
- EXT4_C2B(sbi, *partial_cluster),
|
|
|
- sbi->s_cluster_ratio,
|
|
|
- get_default_free_blocks_flags(inode));
|
|
|
+ EXT4_C2B(sbi, partial->pclu),
|
|
|
+ sbi->s_cluster_ratio, flags);
|
|
|
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
|
|
+ ext4_rereserve_cluster(inode, partial->lblk);
|
|
|
}
|
|
|
- *partial_cluster = 0;
|
|
|
+ partial->state = initial;
|
|
|
}
|
|
|
|
|
|
/* if this leaf is free, then we should
|
|
@@ -2819,10 +2874,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
int depth = ext_depth(inode);
|
|
|
struct ext4_ext_path *path = NULL;
|
|
|
- long long partial_cluster = 0;
|
|
|
+ struct partial_cluster partial;
|
|
|
handle_t *handle;
|
|
|
int i = 0, err = 0;
|
|
|
|
|
|
+ partial.pclu = 0;
|
|
|
+ partial.lblk = 0;
|
|
|
+ partial.state = initial;
|
|
|
+
|
|
|
ext_debug("truncate since %u to %u\n", start, end);
|
|
|
|
|
|
/* probably first extent we're gonna free will be last in block */
|
|
@@ -2882,8 +2941,8 @@ again:
|
|
|
*/
|
|
|
if (sbi->s_cluster_ratio > 1) {
|
|
|
pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
|
|
|
- partial_cluster =
|
|
|
- -(long long) EXT4_B2C(sbi, pblk);
|
|
|
+ partial.pclu = EXT4_B2C(sbi, pblk);
|
|
|
+ partial.state = nofree;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2911,9 +2970,10 @@ again:
|
|
|
&ex);
|
|
|
if (err)
|
|
|
goto out;
|
|
|
- if (pblk)
|
|
|
- partial_cluster =
|
|
|
- -(long long) EXT4_B2C(sbi, pblk);
|
|
|
+ if (pblk) {
|
|
|
+ partial.pclu = EXT4_B2C(sbi, pblk);
|
|
|
+ partial.state = nofree;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
/*
|
|
@@ -2948,8 +3008,7 @@ again:
|
|
|
if (i == depth) {
|
|
|
/* this is leaf block */
|
|
|
err = ext4_ext_rm_leaf(handle, inode, path,
|
|
|
- &partial_cluster, start,
|
|
|
- end);
|
|
|
+ &partial, start, end);
|
|
|
/* root level has p_bh == NULL, brelse() eats this */
|
|
|
brelse(path[i].p_bh);
|
|
|
path[i].p_bh = NULL;
|
|
@@ -3021,21 +3080,24 @@ again:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- trace_ext4_ext_remove_space_done(inode, start, end, depth,
|
|
|
- partial_cluster, path->p_hdr->eh_entries);
|
|
|
+ trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
|
|
|
+ path->p_hdr->eh_entries);
|
|
|
|
|
|
/*
|
|
|
- * If we still have something in the partial cluster and we have removed
|
|
|
- * even the first extent, then we should free the blocks in the partial
|
|
|
- * cluster as well. (This code will only run when there are no leaves
|
|
|
- * to the immediate left of the truncated/punched region.)
|
|
|
+ * if there's a partial cluster and we have removed the first extent
|
|
|
+ * in the file, then we also free the partial cluster, if any
|
|
|
*/
|
|
|
- if (partial_cluster > 0 && err == 0) {
|
|
|
- /* don't zero partial_cluster since it's not used afterwards */
|
|
|
+ if (partial.state == tofree && err == 0) {
|
|
|
+ int flags = get_default_free_blocks_flags(inode);
|
|
|
+
|
|
|
+ if (ext4_is_pending(inode, partial.lblk))
|
|
|
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
|
|
|
ext4_free_blocks(handle, inode, NULL,
|
|
|
- EXT4_C2B(sbi, partial_cluster),
|
|
|
- sbi->s_cluster_ratio,
|
|
|
- get_default_free_blocks_flags(inode));
|
|
|
+ EXT4_C2B(sbi, partial.pclu),
|
|
|
+ sbi->s_cluster_ratio, flags);
|
|
|
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
|
|
|
+ ext4_rereserve_cluster(inode, partial.lblk);
|
|
|
+ partial.state = initial;
|
|
|
}
|
|
|
|
|
|
/* TODO: flexible tree reduction should be here */
|
|
@@ -3819,114 +3881,6 @@ out:
|
|
|
return ext4_mark_inode_dirty(handle, inode);
|
|
|
}
|
|
|
|
|
|
-/**
|
|
|
- * ext4_find_delalloc_range: find delayed allocated block in the given range.
|
|
|
- *
|
|
|
- * Return 1 if there is a delalloc block in the range, otherwise 0.
|
|
|
- */
|
|
|
-int ext4_find_delalloc_range(struct inode *inode,
|
|
|
- ext4_lblk_t lblk_start,
|
|
|
- ext4_lblk_t lblk_end)
|
|
|
-{
|
|
|
- struct extent_status es;
|
|
|
-
|
|
|
- ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
|
|
|
- if (es.es_len == 0)
|
|
|
- return 0; /* there is no delay extent in this tree */
|
|
|
- else if (es.es_lblk <= lblk_start &&
|
|
|
- lblk_start < es.es_lblk + es.es_len)
|
|
|
- return 1;
|
|
|
- else if (lblk_start <= es.es_lblk && es.es_lblk <= lblk_end)
|
|
|
- return 1;
|
|
|
- else
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
-int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
|
|
|
-{
|
|
|
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
- ext4_lblk_t lblk_start, lblk_end;
|
|
|
- lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
|
|
|
- lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
|
|
|
-
|
|
|
- return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * Determines how many complete clusters (out of those specified by the 'map')
|
|
|
- * are under delalloc and were reserved quota for.
|
|
|
- * This function is called when we are writing out the blocks that were
|
|
|
- * originally written with their allocation delayed, but then the space was
|
|
|
- * allocated using fallocate() before the delayed allocation could be resolved.
|
|
|
- * The cases to look for are:
|
|
|
- * ('=' indicated delayed allocated blocks
|
|
|
- * '-' indicates non-delayed allocated blocks)
|
|
|
- * (a) partial clusters towards beginning and/or end outside of allocated range
|
|
|
- * are not delalloc'ed.
|
|
|
- * Ex:
|
|
|
- * |----c---=|====c====|====c====|===-c----|
|
|
|
- * |++++++ allocated ++++++|
|
|
|
- * ==> 4 complete clusters in above example
|
|
|
- *
|
|
|
- * (b) partial cluster (outside of allocated range) towards either end is
|
|
|
- * marked for delayed allocation. In this case, we will exclude that
|
|
|
- * cluster.
|
|
|
- * Ex:
|
|
|
- * |----====c========|========c========|
|
|
|
- * |++++++ allocated ++++++|
|
|
|
- * ==> 1 complete clusters in above example
|
|
|
- *
|
|
|
- * Ex:
|
|
|
- * |================c================|
|
|
|
- * |++++++ allocated ++++++|
|
|
|
- * ==> 0 complete clusters in above example
|
|
|
- *
|
|
|
- * The ext4_da_update_reserve_space will be called only if we
|
|
|
- * determine here that there were some "entire" clusters that span
|
|
|
- * this 'allocated' range.
|
|
|
- * In the non-bigalloc case, this function will just end up returning num_blks
|
|
|
- * without ever calling ext4_find_delalloc_range.
|
|
|
- */
|
|
|
-static unsigned int
|
|
|
-get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
|
|
|
- unsigned int num_blks)
|
|
|
-{
|
|
|
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
- ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
|
|
|
- ext4_lblk_t lblk_from, lblk_to, c_offset;
|
|
|
- unsigned int allocated_clusters = 0;
|
|
|
-
|
|
|
- alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
|
|
|
- alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
|
|
|
-
|
|
|
- /* max possible clusters for this allocation */
|
|
|
- allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
|
|
|
-
|
|
|
- trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
|
|
|
-
|
|
|
- /* Check towards left side */
|
|
|
- c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
|
|
|
- if (c_offset) {
|
|
|
- lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
|
|
|
- lblk_to = lblk_from + c_offset - 1;
|
|
|
-
|
|
|
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
|
|
|
- allocated_clusters--;
|
|
|
- }
|
|
|
-
|
|
|
- /* Now check towards right. */
|
|
|
- c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
|
|
|
- if (allocated_clusters && c_offset) {
|
|
|
- lblk_from = lblk_start + num_blks;
|
|
|
- lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
|
|
|
-
|
|
|
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
|
|
|
- allocated_clusters--;
|
|
|
- }
|
|
|
-
|
|
|
- return allocated_clusters;
|
|
|
-}
|
|
|
-
|
|
|
static int
|
|
|
convert_initialized_extent(handle_t *handle, struct inode *inode,
|
|
|
struct ext4_map_blocks *map,
|
|
@@ -4108,23 +4062,6 @@ out:
|
|
|
}
|
|
|
map->m_len = allocated;
|
|
|
|
|
|
- /*
|
|
|
- * If we have done fallocate with the offset that is already
|
|
|
- * delayed allocated, we would have block reservation
|
|
|
- * and quota reservation done in the delayed write path.
|
|
|
- * But fallocate would have already updated quota and block
|
|
|
- * count for this offset. So cancel these reservation
|
|
|
- */
|
|
|
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
|
|
- unsigned int reserved_clusters;
|
|
|
- reserved_clusters = get_reserved_cluster_alloc(inode,
|
|
|
- map->m_lblk, map->m_len);
|
|
|
- if (reserved_clusters)
|
|
|
- ext4_da_update_reserve_space(inode,
|
|
|
- reserved_clusters,
|
|
|
- 0);
|
|
|
- }
|
|
|
-
|
|
|
map_out:
|
|
|
map->m_flags |= EXT4_MAP_MAPPED;
|
|
|
if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
|
|
@@ -4513,77 +4450,39 @@ got_allocated_blocks:
|
|
|
map->m_flags |= EXT4_MAP_NEW;
|
|
|
|
|
|
/*
|
|
|
- * Update reserved blocks/metadata blocks after successful
|
|
|
- * block allocation which had been deferred till now.
|
|
|
+ * Reduce the reserved cluster count to reflect successful deferred
|
|
|
+ * allocation of delayed allocated clusters or direct allocation of
|
|
|
+ * clusters discovered to be delayed allocated. Once allocated, a
|
|
|
+ * cluster is not included in the reserved count.
|
|
|
*/
|
|
|
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
|
|
- unsigned int reserved_clusters;
|
|
|
- /*
|
|
|
- * Check how many clusters we had reserved this allocated range
|
|
|
- */
|
|
|
- reserved_clusters = get_reserved_cluster_alloc(inode,
|
|
|
- map->m_lblk, allocated);
|
|
|
- if (!map_from_cluster) {
|
|
|
- BUG_ON(allocated_clusters < reserved_clusters);
|
|
|
- if (reserved_clusters < allocated_clusters) {
|
|
|
- struct ext4_inode_info *ei = EXT4_I(inode);
|
|
|
- int reservation = allocated_clusters -
|
|
|
- reserved_clusters;
|
|
|
- /*
|
|
|
- * It seems we claimed few clusters outside of
|
|
|
- * the range of this allocation. We should give
|
|
|
- * it back to the reservation pool. This can
|
|
|
- * happen in the following case:
|
|
|
- *
|
|
|
- * * Suppose s_cluster_ratio is 4 (i.e., each
|
|
|
- * cluster has 4 blocks. Thus, the clusters
|
|
|
- * are [0-3],[4-7],[8-11]...
|
|
|
- * * First comes delayed allocation write for
|
|
|
- * logical blocks 10 & 11. Since there were no
|
|
|
- * previous delayed allocated blocks in the
|
|
|
- * range [8-11], we would reserve 1 cluster
|
|
|
- * for this write.
|
|
|
- * * Next comes write for logical blocks 3 to 8.
|
|
|
- * In this case, we will reserve 2 clusters
|
|
|
- * (for [0-3] and [4-7]; and not for [8-11] as
|
|
|
- * that range has a delayed allocated blocks.
|
|
|
- * Thus total reserved clusters now becomes 3.
|
|
|
- * * Now, during the delayed allocation writeout
|
|
|
- * time, we will first write blocks [3-8] and
|
|
|
- * allocate 3 clusters for writing these
|
|
|
- * blocks. Also, we would claim all these
|
|
|
- * three clusters above.
|
|
|
- * * Now when we come here to writeout the
|
|
|
- * blocks [10-11], we would expect to claim
|
|
|
- * the reservation of 1 cluster we had made
|
|
|
- * (and we would claim it since there are no
|
|
|
- * more delayed allocated blocks in the range
|
|
|
- * [8-11]. But our reserved cluster count had
|
|
|
- * already gone to 0.
|
|
|
- *
|
|
|
- * Thus, at the step 4 above when we determine
|
|
|
- * that there are still some unwritten delayed
|
|
|
- * allocated blocks outside of our current
|
|
|
- * block range, we should increment the
|
|
|
- * reserved clusters count so that when the
|
|
|
- * remaining blocks finally gets written, we
|
|
|
- * could claim them.
|
|
|
- */
|
|
|
- dquot_reserve_block(inode,
|
|
|
- EXT4_C2B(sbi, reservation));
|
|
|
- spin_lock(&ei->i_block_reservation_lock);
|
|
|
- ei->i_reserved_data_blocks += reservation;
|
|
|
- spin_unlock(&ei->i_block_reservation_lock);
|
|
|
- }
|
|
|
+ if (test_opt(inode->i_sb, DELALLOC) && !map_from_cluster) {
|
|
|
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
|
|
/*
|
|
|
- * We will claim quota for all newly allocated blocks.
|
|
|
- * We're updating the reserved space *after* the
|
|
|
- * correction above so we do not accidentally free
|
|
|
- * all the metadata reservation because we might
|
|
|
- * actually need it later on.
|
|
|
+ * When allocating delayed allocated clusters, simply
|
|
|
+ * reduce the reserved cluster count and claim quota
|
|
|
*/
|
|
|
ext4_da_update_reserve_space(inode, allocated_clusters,
|
|
|
1);
|
|
|
+ } else {
|
|
|
+ ext4_lblk_t lblk, len;
|
|
|
+ unsigned int n;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * When allocating non-delayed allocated clusters
|
|
|
+ * (from fallocate, filemap, DIO, or clusters
|
|
|
+ * allocated when delalloc has been disabled by
|
|
|
+ * ext4_nonda_switch), reduce the reserved cluster
|
|
|
+ * count by the number of allocated clusters that
|
|
|
+ * have previously been delayed allocated. Quota
|
|
|
+ * has been claimed by ext4_mb_new_blocks() above,
|
|
|
+ * so release the quota reservations made for any
|
|
|
+ * previously delayed allocated clusters.
|
|
|
+ */
|
|
|
+ lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
|
|
|
+ len = allocated_clusters << sbi->s_cluster_bits;
|
|
|
+ n = ext4_es_delayed_clu(inode, lblk, len);
|
|
|
+ if (n > 0)
|
|
|
+ ext4_da_update_reserve_space(inode, (int) n, 0);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -5075,8 +4974,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
|
|
|
ext4_lblk_t block, next_del;
|
|
|
|
|
|
if (newes->es_pblk == 0) {
|
|
|
- ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
|
|
|
- newes->es_lblk + newes->es_len - 1, &es);
|
|
|
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
|
|
|
+ newes->es_lblk,
|
|
|
+ newes->es_lblk + newes->es_len - 1,
|
|
|
+ &es);
|
|
|
|
|
|
/*
|
|
|
* No extent in extent-tree contains block @newes->es_pblk,
|
|
@@ -5097,7 +4998,8 @@ static int ext4_find_delayed_extent(struct inode *inode,
|
|
|
}
|
|
|
|
|
|
block = newes->es_lblk + newes->es_len;
|
|
|
- ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
|
|
|
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed, block,
|
|
|
+ EXT_MAX_BLOCKS, &es);
|
|
|
if (es.es_len == 0)
|
|
|
next_del = EXT_MAX_BLOCKS;
|
|
|
else
|
|
@@ -5958,3 +5860,82 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
|
|
|
}
|
|
|
return replaced_count;
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ext4_clu_mapped - determine whether any block in a logical cluster has
|
|
|
+ * been mapped to a physical cluster
|
|
|
+ *
|
|
|
+ * @inode - file containing the logical cluster
|
|
|
+ * @lclu - logical cluster of interest
|
|
|
+ *
|
|
|
+ * Returns 1 if any block in the logical cluster is mapped, signifying
|
|
|
+ * that a physical cluster has been allocated for it. Otherwise,
|
|
|
+ * returns 0. Can also return negative error codes. Derived from
|
|
|
+ * ext4_ext_map_blocks().
|
|
|
+ */
|
|
|
+int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
|
|
|
+{
|
|
|
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
+ struct ext4_ext_path *path;
|
|
|
+ int depth, mapped = 0, err = 0;
|
|
|
+ struct ext4_extent *extent;
|
|
|
+ ext4_lblk_t first_lblk, first_lclu, last_lclu;
|
|
|
+
|
|
|
+ /* search for the extent closest to the first block in the cluster */
|
|
|
+ path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
|
|
|
+ if (IS_ERR(path)) {
|
|
|
+ err = PTR_ERR(path);
|
|
|
+ path = NULL;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ depth = ext_depth(inode);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * A consistent leaf must not be empty. This situation is possible,
|
|
|
+ * though, _during_ tree modification, and it's why an assert can't
|
|
|
+ * be put in ext4_find_extent().
|
|
|
+ */
|
|
|
+ if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
|
|
|
+ EXT4_ERROR_INODE(inode,
|
|
|
+ "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
|
|
|
+ (unsigned long) EXT4_C2B(sbi, lclu),
|
|
|
+ depth, path[depth].p_block);
|
|
|
+ err = -EFSCORRUPTED;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ extent = path[depth].p_ext;
|
|
|
+
|
|
|
+ /* can't be mapped if the extent tree is empty */
|
|
|
+ if (extent == NULL)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ first_lblk = le32_to_cpu(extent->ee_block);
|
|
|
+ first_lclu = EXT4_B2C(sbi, first_lblk);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Three possible outcomes at this point - found extent spanning
|
|
|
+ * the target cluster, to the left of the target cluster, or to the
|
|
|
+ * right of the target cluster. The first two cases are handled here.
|
|
|
+ * The last case indicates the target cluster is not mapped.
|
|
|
+ */
|
|
|
+ if (lclu >= first_lclu) {
|
|
|
+ last_lclu = EXT4_B2C(sbi, first_lblk +
|
|
|
+ ext4_ext_get_actual_len(extent) - 1);
|
|
|
+ if (lclu <= last_lclu) {
|
|
|
+ mapped = 1;
|
|
|
+ } else {
|
|
|
+ first_lblk = ext4_ext_next_allocated_block(path);
|
|
|
+ first_lclu = EXT4_B2C(sbi, first_lblk);
|
|
|
+ if (lclu == first_lclu)
|
|
|
+ mapped = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+out:
|
|
|
+ ext4_ext_drop_refs(path);
|
|
|
+ kfree(path);
|
|
|
+
|
|
|
+ return err ? err : mapped;
|
|
|
+}
|