|
@@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep;
|
|
|
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
|
|
|
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
|
|
ext4_lblk_t end);
|
|
|
-static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
|
|
|
- int nr_to_scan);
|
|
|
-static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
|
|
- struct ext4_inode_info *locked_ei);
|
|
|
+static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
|
|
+static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
|
|
+ struct ext4_inode_info *locked_ei);
|
|
|
|
|
|
int __init ext4_init_es(void)
|
|
|
{
|
|
@@ -298,6 +297,36 @@ out:
|
|
|
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
|
|
|
}
|
|
|
|
|
|
+static void ext4_es_list_add(struct inode *inode)
|
|
|
+{
|
|
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
|
|
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
+
|
|
|
+ if (!list_empty(&ei->i_es_list))
|
|
|
+ return;
|
|
|
+
|
|
|
+ spin_lock(&sbi->s_es_lock);
|
|
|
+ if (list_empty(&ei->i_es_list)) {
|
|
|
+ list_add_tail(&ei->i_es_list, &sbi->s_es_list);
|
|
|
+ sbi->s_es_nr_inode++;
|
|
|
+ }
|
|
|
+ spin_unlock(&sbi->s_es_lock);
|
|
|
+}
|
|
|
+
|
|
|
+static void ext4_es_list_del(struct inode *inode)
|
|
|
+{
|
|
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
|
|
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
+
|
|
|
+ spin_lock(&sbi->s_es_lock);
|
|
|
+ if (!list_empty(&ei->i_es_list)) {
|
|
|
+ list_del_init(&ei->i_es_list);
|
|
|
+ sbi->s_es_nr_inode--;
|
|
|
+ WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
|
|
|
+ }
|
|
|
+ spin_unlock(&sbi->s_es_lock);
|
|
|
+}
|
|
|
+
|
|
|
static struct extent_status *
|
|
|
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
|
|
ext4_fsblk_t pblk)
|
|
@@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
|
|
* We don't count delayed extent because we never try to reclaim them
|
|
|
*/
|
|
|
if (!ext4_es_is_delayed(es)) {
|
|
|
- EXT4_I(inode)->i_es_lru_nr++;
|
|
|
+ if (!EXT4_I(inode)->i_es_shk_nr++)
|
|
|
+ ext4_es_list_add(inode);
|
|
|
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
|
|
|
- s_es_stats.es_stats_lru_cnt);
|
|
|
+ s_es_stats.es_stats_shk_cnt);
|
|
|
}
|
|
|
|
|
|
EXT4_I(inode)->i_es_all_nr++;
|
|
@@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
|
|
EXT4_I(inode)->i_es_all_nr--;
|
|
|
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
|
|
|
|
|
- /* Decrease the lru counter when this es is not delayed */
|
|
|
+ /* Decrease the shrink counter when this es is not delayed */
|
|
|
if (!ext4_es_is_delayed(es)) {
|
|
|
- BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
|
|
|
- EXT4_I(inode)->i_es_lru_nr--;
|
|
|
+ BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
|
|
|
+ if (!--EXT4_I(inode)->i_es_shk_nr)
|
|
|
+ ext4_es_list_del(inode);
|
|
|
percpu_counter_dec(&EXT4_SB(inode->i_sb)->
|
|
|
- s_es_stats.es_stats_lru_cnt);
|
|
|
+ s_es_stats.es_stats_shk_cnt);
|
|
|
}
|
|
|
|
|
|
kmem_cache_free(ext4_es_cachep, es);
|
|
@@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
|
|
static int ext4_es_can_be_merged(struct extent_status *es1,
|
|
|
struct extent_status *es2)
|
|
|
{
|
|
|
- if (ext4_es_status(es1) != ext4_es_status(es2))
|
|
|
+ if (ext4_es_type(es1) != ext4_es_type(es2))
|
|
|
return 0;
|
|
|
|
|
|
if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
|
|
@@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
|
|
|
es1 = rb_entry(node, struct extent_status, rb_node);
|
|
|
if (ext4_es_can_be_merged(es1, es)) {
|
|
|
es1->es_len += es->es_len;
|
|
|
+ if (ext4_es_is_referenced(es))
|
|
|
+ ext4_es_set_referenced(es1);
|
|
|
rb_erase(&es->rb_node, &tree->root);
|
|
|
ext4_es_free_extent(inode, es);
|
|
|
es = es1;
|
|
@@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
|
|
|
es1 = rb_entry(node, struct extent_status, rb_node);
|
|
|
if (ext4_es_can_be_merged(es, es1)) {
|
|
|
es->es_len += es1->es_len;
|
|
|
+ if (ext4_es_is_referenced(es1))
|
|
|
+ ext4_es_set_referenced(es);
|
|
|
rb_erase(node, &tree->root);
|
|
|
ext4_es_free_extent(inode, es1);
|
|
|
}
|
|
@@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
|
|
goto error;
|
|
|
retry:
|
|
|
err = __es_insert_extent(inode, &newes);
|
|
|
- if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
|
|
|
- EXT4_I(inode)))
|
|
|
+ if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
|
|
|
+ 128, EXT4_I(inode)))
|
|
|
goto retry;
|
|
|
if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
|
|
|
err = 0;
|
|
@@ -782,6 +817,8 @@ out:
|
|
|
es->es_lblk = es1->es_lblk;
|
|
|
es->es_len = es1->es_len;
|
|
|
es->es_pblk = es1->es_pblk;
|
|
|
+ if (!ext4_es_is_referenced(es))
|
|
|
+ ext4_es_set_referenced(es);
|
|
|
stats->es_stats_cache_hits++;
|
|
|
} else {
|
|
|
stats->es_stats_cache_misses++;
|
|
@@ -841,8 +878,8 @@ retry:
|
|
|
es->es_lblk = orig_es.es_lblk;
|
|
|
es->es_len = orig_es.es_len;
|
|
|
if ((err == -ENOMEM) &&
|
|
|
- __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
|
|
|
- EXT4_I(inode)))
|
|
|
+ __es_shrink(EXT4_SB(inode->i_sb),
|
|
|
+ 128, EXT4_I(inode)))
|
|
|
goto retry;
|
|
|
goto out;
|
|
|
}
|
|
@@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
|
|
end = lblk + len - 1;
|
|
|
BUG_ON(end < lblk);
|
|
|
|
|
|
+ /*
|
|
|
+ * ext4_clear_inode() depends on us taking i_es_lock unconditionally
|
|
|
+ * so that we are sure __es_shrink() is done with the inode before it
|
|
|
+ * is reclaimed.
|
|
|
+ */
|
|
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
|
|
err = __es_remove_extent(inode, lblk, end);
|
|
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
|
@@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
-static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
|
|
|
- struct list_head *b)
|
|
|
-{
|
|
|
- struct ext4_inode_info *eia, *eib;
|
|
|
- eia = list_entry(a, struct ext4_inode_info, i_es_lru);
|
|
|
- eib = list_entry(b, struct ext4_inode_info, i_es_lru);
|
|
|
-
|
|
|
- if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
|
|
|
- !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
|
|
|
- return 1;
|
|
|
- if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
|
|
|
- ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
|
|
|
- return -1;
|
|
|
- if (eia->i_touch_when == eib->i_touch_when)
|
|
|
- return 0;
|
|
|
- if (time_after(eia->i_touch_when, eib->i_touch_when))
|
|
|
- return 1;
|
|
|
- else
|
|
|
- return -1;
|
|
|
-}
|
|
|
-
|
|
|
-static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
|
|
- struct ext4_inode_info *locked_ei)
|
|
|
+static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
|
|
+ struct ext4_inode_info *locked_ei)
|
|
|
{
|
|
|
struct ext4_inode_info *ei;
|
|
|
struct ext4_es_stats *es_stats;
|
|
|
- struct list_head *cur, *tmp;
|
|
|
- LIST_HEAD(skipped);
|
|
|
ktime_t start_time;
|
|
|
u64 scan_time;
|
|
|
+ int nr_to_walk;
|
|
|
int nr_shrunk = 0;
|
|
|
- int retried = 0, skip_precached = 1, nr_skipped = 0;
|
|
|
+ int retried = 0, nr_skipped = 0;
|
|
|
|
|
|
es_stats = &sbi->s_es_stats;
|
|
|
start_time = ktime_get();
|
|
|
- spin_lock(&sbi->s_es_lru_lock);
|
|
|
|
|
|
retry:
|
|
|
- list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
|
|
|
- int shrunk;
|
|
|
-
|
|
|
- /*
|
|
|
- * If we have already reclaimed all extents from extent
|
|
|
- * status tree, just stop the loop immediately.
|
|
|
- */
|
|
|
- if (percpu_counter_read_positive(
|
|
|
- &es_stats->es_stats_lru_cnt) == 0)
|
|
|
- break;
|
|
|
-
|
|
|
- ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
|
|
|
+ spin_lock(&sbi->s_es_lock);
|
|
|
+ nr_to_walk = sbi->s_es_nr_inode;
|
|
|
+ while (nr_to_walk-- > 0) {
|
|
|
+ if (list_empty(&sbi->s_es_list)) {
|
|
|
+ spin_unlock(&sbi->s_es_lock);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
|
|
|
+ i_es_list);
|
|
|
+ /* Move the inode to the tail */
|
|
|
+ list_move_tail(&ei->i_es_list, &sbi->s_es_list);
|
|
|
|
|
|
/*
|
|
|
- * Skip the inode that is newer than the last_sorted
|
|
|
- * time. Normally we try hard to avoid shrinking
|
|
|
- * precached inodes, but we will as a last resort.
|
|
|
+ * Normally we try hard to avoid shrinking precached inodes,
|
|
|
+ * but we will as a last resort.
|
|
|
*/
|
|
|
- if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
|
|
|
- (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
|
|
|
- EXT4_STATE_EXT_PRECACHED))) {
|
|
|
+ if (!retried && ext4_test_inode_state(&ei->vfs_inode,
|
|
|
+ EXT4_STATE_EXT_PRECACHED)) {
|
|
|
nr_skipped++;
|
|
|
- list_move_tail(cur, &skipped);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
|
|
|
- !write_trylock(&ei->i_es_lock))
|
|
|
+ if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
|
|
|
+ nr_skipped++;
|
|
|
continue;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * Now we hold i_es_lock which protects us from inode reclaim
|
|
|
+ * freeing inode under us
|
|
|
+ */
|
|
|
+ spin_unlock(&sbi->s_es_lock);
|
|
|
|
|
|
- shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
|
|
|
- if (ei->i_es_lru_nr == 0)
|
|
|
- list_del_init(&ei->i_es_lru);
|
|
|
+ nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
|
|
|
write_unlock(&ei->i_es_lock);
|
|
|
|
|
|
- nr_shrunk += shrunk;
|
|
|
- nr_to_scan -= shrunk;
|
|
|
- if (nr_to_scan == 0)
|
|
|
- break;
|
|
|
+ if (nr_to_scan <= 0)
|
|
|
+ goto out;
|
|
|
+ spin_lock(&sbi->s_es_lock);
|
|
|
}
|
|
|
-
|
|
|
- /* Move the newer inodes into the tail of the LRU list. */
|
|
|
- list_splice_tail(&skipped, &sbi->s_es_lru);
|
|
|
- INIT_LIST_HEAD(&skipped);
|
|
|
+ spin_unlock(&sbi->s_es_lock);
|
|
|
|
|
|
/*
|
|
|
* If we skipped any inodes, and we weren't able to make any
|
|
|
- * forward progress, sort the list and try again.
|
|
|
+ * forward progress, try again to scan precached inodes.
|
|
|
*/
|
|
|
if ((nr_shrunk == 0) && nr_skipped && !retried) {
|
|
|
retried++;
|
|
|
- list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
|
|
|
- es_stats->es_stats_last_sorted = jiffies;
|
|
|
- ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
|
|
|
- i_es_lru);
|
|
|
- /*
|
|
|
- * If there are no non-precached inodes left on the
|
|
|
- * list, start releasing precached extents.
|
|
|
- */
|
|
|
- if (ext4_test_inode_state(&ei->vfs_inode,
|
|
|
- EXT4_STATE_EXT_PRECACHED))
|
|
|
- skip_precached = 0;
|
|
|
goto retry;
|
|
|
}
|
|
|
|
|
|
- spin_unlock(&sbi->s_es_lru_lock);
|
|
|
-
|
|
|
if (locked_ei && nr_shrunk == 0)
|
|
|
- nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
|
|
|
+ nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
|
|
|
|
|
|
+out:
|
|
|
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
|
|
|
if (likely(es_stats->es_stats_scan_time))
|
|
|
es_stats->es_stats_scan_time = (scan_time +
|
|
@@ -1043,7 +1046,7 @@ retry:
|
|
|
else
|
|
|
es_stats->es_stats_shrunk = nr_shrunk;
|
|
|
|
|
|
- trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
|
|
|
+ trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
|
|
|
nr_skipped, retried);
|
|
|
return nr_shrunk;
|
|
|
}
|
|
@@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
|
|
|
struct ext4_sb_info *sbi;
|
|
|
|
|
|
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
|
|
|
- nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
|
|
|
+ nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
|
|
|
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
|
|
|
return nr;
|
|
|
}
|
|
@@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
|
|
|
int nr_to_scan = sc->nr_to_scan;
|
|
|
int ret, nr_shrunk;
|
|
|
|
|
|
- ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
|
|
|
+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
|
|
|
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
|
|
|
|
|
|
if (!nr_to_scan)
|
|
|
return ret;
|
|
|
|
|
|
- nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
|
|
|
+ nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
|
|
|
|
|
|
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
|
|
|
return nr_shrunk;
|
|
@@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
|
|
|
return 0;
|
|
|
|
|
|
/* here we just find an inode that has the max nr. of objects */
|
|
|
- spin_lock(&sbi->s_es_lru_lock);
|
|
|
- list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
|
|
|
+ spin_lock(&sbi->s_es_lock);
|
|
|
+ list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
|
|
|
inode_cnt++;
|
|
|
if (max && max->i_es_all_nr < ei->i_es_all_nr)
|
|
|
max = ei;
|
|
|
else if (!max)
|
|
|
max = ei;
|
|
|
}
|
|
|
- spin_unlock(&sbi->s_es_lru_lock);
|
|
|
+ spin_unlock(&sbi->s_es_lock);
|
|
|
|
|
|
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
|
|
|
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
|
|
|
- percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
|
|
|
+ percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
|
|
|
seq_printf(seq, " %lu/%lu cache hits/misses\n",
|
|
|
es_stats->es_stats_cache_hits,
|
|
|
es_stats->es_stats_cache_misses);
|
|
|
- if (es_stats->es_stats_last_sorted != 0)
|
|
|
- seq_printf(seq, " %u ms last sorted interval\n",
|
|
|
- jiffies_to_msecs(jiffies -
|
|
|
- es_stats->es_stats_last_sorted));
|
|
|
if (inode_cnt)
|
|
|
- seq_printf(seq, " %d inodes on lru list\n", inode_cnt);
|
|
|
+ seq_printf(seq, " %d inodes on list\n", inode_cnt);
|
|
|
|
|
|
seq_printf(seq, "average:\n %llu us scan time\n",
|
|
|
div_u64(es_stats->es_stats_scan_time, 1000));
|
|
@@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
|
|
|
seq_printf(seq,
|
|
|
"maximum:\n %lu inode (%u objects, %u reclaimable)\n"
|
|
|
" %llu us max scan time\n",
|
|
|
- max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
|
|
|
+ max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
|
|
|
div_u64(es_stats->es_stats_max_scan_time, 1000));
|
|
|
|
|
|
return 0;
|
|
@@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
|
|
{
|
|
|
int err;
|
|
|
|
|
|
- INIT_LIST_HEAD(&sbi->s_es_lru);
|
|
|
- spin_lock_init(&sbi->s_es_lru_lock);
|
|
|
- sbi->s_es_stats.es_stats_last_sorted = 0;
|
|
|
+ /* Make sure we have enough bits for physical block number */
|
|
|
+ BUILD_BUG_ON(ES_SHIFT < 48);
|
|
|
+ INIT_LIST_HEAD(&sbi->s_es_list);
|
|
|
+ sbi->s_es_nr_inode = 0;
|
|
|
+ spin_lock_init(&sbi->s_es_lock);
|
|
|
sbi->s_es_stats.es_stats_shrunk = 0;
|
|
|
sbi->s_es_stats.es_stats_cache_hits = 0;
|
|
|
sbi->s_es_stats.es_stats_cache_misses = 0;
|
|
@@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
|
|
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
|
|
|
if (err)
|
|
|
return err;
|
|
|
- err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
|
|
|
+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
|
|
|
if (err)
|
|
|
goto err1;
|
|
|
|
|
@@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
|
|
return 0;
|
|
|
|
|
|
err2:
|
|
|
- percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
|
|
|
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
|
|
err1:
|
|
|
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
|
|
return err;
|
|
@@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
|
|
|
if (sbi->s_proc)
|
|
|
remove_proc_entry("es_shrinker_info", sbi->s_proc);
|
|
|
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
|
|
- percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
|
|
|
+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
|
|
unregister_shrinker(&sbi->s_es_shrinker);
|
|
|
}
|
|
|
|
|
|
-void ext4_es_lru_add(struct inode *inode)
|
|
|
+/*
|
|
|
+ * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
|
|
|
+ * most *nr_to_scan extents, update *nr_to_scan accordingly.
|
|
|
+ *
|
|
|
+ * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
|
|
|
+ * Increment *nr_shrunk by the number of reclaimed extents. Also update
|
|
|
+ * ei->i_es_shrink_lblk to where we should continue scanning.
|
|
|
+ */
|
|
|
+static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
|
|
|
+ int *nr_to_scan, int *nr_shrunk)
|
|
|
{
|
|
|
- struct ext4_inode_info *ei = EXT4_I(inode);
|
|
|
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
-
|
|
|
- ei->i_touch_when = jiffies;
|
|
|
-
|
|
|
- if (!list_empty(&ei->i_es_lru))
|
|
|
- return;
|
|
|
+ struct inode *inode = &ei->vfs_inode;
|
|
|
+ struct ext4_es_tree *tree = &ei->i_es_tree;
|
|
|
+ struct extent_status *es;
|
|
|
+ struct rb_node *node;
|
|
|
|
|
|
- spin_lock(&sbi->s_es_lru_lock);
|
|
|
- if (list_empty(&ei->i_es_lru))
|
|
|
- list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
|
|
|
- spin_unlock(&sbi->s_es_lru_lock);
|
|
|
-}
|
|
|
+ es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
|
|
|
+ if (!es)
|
|
|
+ goto out_wrap;
|
|
|
+ node = &es->rb_node;
|
|
|
+ while (*nr_to_scan > 0) {
|
|
|
+ if (es->es_lblk > end) {
|
|
|
+ ei->i_es_shrink_lblk = end + 1;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
|
|
|
-void ext4_es_lru_del(struct inode *inode)
|
|
|
-{
|
|
|
- struct ext4_inode_info *ei = EXT4_I(inode);
|
|
|
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
|
+ (*nr_to_scan)--;
|
|
|
+ node = rb_next(&es->rb_node);
|
|
|
+ /*
|
|
|
+ * We can't reclaim delayed extent from status tree because
|
|
|
+ * fiemap, bigallic, and seek_data/hole need to use it.
|
|
|
+ */
|
|
|
+ if (ext4_es_is_delayed(es))
|
|
|
+ goto next;
|
|
|
+ if (ext4_es_is_referenced(es)) {
|
|
|
+ ext4_es_clear_referenced(es);
|
|
|
+ goto next;
|
|
|
+ }
|
|
|
|
|
|
- spin_lock(&sbi->s_es_lru_lock);
|
|
|
- if (!list_empty(&ei->i_es_lru))
|
|
|
- list_del_init(&ei->i_es_lru);
|
|
|
- spin_unlock(&sbi->s_es_lru_lock);
|
|
|
+ rb_erase(&es->rb_node, &tree->root);
|
|
|
+ ext4_es_free_extent(inode, es);
|
|
|
+ (*nr_shrunk)++;
|
|
|
+next:
|
|
|
+ if (!node)
|
|
|
+ goto out_wrap;
|
|
|
+ es = rb_entry(node, struct extent_status, rb_node);
|
|
|
+ }
|
|
|
+ ei->i_es_shrink_lblk = es->es_lblk;
|
|
|
+ return 1;
|
|
|
+out_wrap:
|
|
|
+ ei->i_es_shrink_lblk = 0;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
-static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
|
|
|
- int nr_to_scan)
|
|
|
+static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
|
|
|
{
|
|
|
struct inode *inode = &ei->vfs_inode;
|
|
|
- struct ext4_es_tree *tree = &ei->i_es_tree;
|
|
|
- struct rb_node *node;
|
|
|
- struct extent_status *es;
|
|
|
- unsigned long nr_shrunk = 0;
|
|
|
+ int nr_shrunk = 0;
|
|
|
+ ext4_lblk_t start = ei->i_es_shrink_lblk;
|
|
|
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
|
|
DEFAULT_RATELIMIT_BURST);
|
|
|
|
|
|
- if (ei->i_es_lru_nr == 0)
|
|
|
+ if (ei->i_es_shk_nr == 0)
|
|
|
return 0;
|
|
|
|
|
|
if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
|
|
|
__ratelimit(&_rs))
|
|
|
ext4_warning(inode->i_sb, "forced shrink of precached extents");
|
|
|
|
|
|
- node = rb_first(&tree->root);
|
|
|
- while (node != NULL) {
|
|
|
- es = rb_entry(node, struct extent_status, rb_node);
|
|
|
- node = rb_next(&es->rb_node);
|
|
|
- /*
|
|
|
- * We can't reclaim delayed extent from status tree because
|
|
|
- * fiemap, bigallic, and seek_data/hole need to use it.
|
|
|
- */
|
|
|
- if (!ext4_es_is_delayed(es)) {
|
|
|
- rb_erase(&es->rb_node, &tree->root);
|
|
|
- ext4_es_free_extent(inode, es);
|
|
|
- nr_shrunk++;
|
|
|
- if (--nr_to_scan == 0)
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- tree->cache_es = NULL;
|
|
|
+ if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
|
|
|
+ start != 0)
|
|
|
+ es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
|
|
|
+
|
|
|
+ ei->i_es_tree.cache_es = NULL;
|
|
|
return nr_shrunk;
|
|
|
}
|