|
@@ -4986,24 +4986,40 @@ static void evict_inode_truncate_pages(struct inode *inode)
|
|
|
}
|
|
|
write_unlock(&map_tree->lock);
|
|
|
|
|
|
+ /*
|
|
|
+ * Keep looping until we have no more ranges in the io tree.
|
|
|
+ * We can have ongoing bios started by readpages (called from readahead)
|
|
|
+ * that didn't get their end io callbacks called yet or they are still
|
|
|
+ * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
|
|
|
+ * ranges can still be locked and eviction started because before
|
|
|
+ * submitting those bios, which are executed by a separate task (work
|
|
|
+ * queue kthread), inode references (inode->i_count) were not taken
|
|
|
+ * (which would be dropped in the end io callback of each bio).
|
|
|
+ * Therefore here we effectively end up waiting for those bios and
|
|
|
+ * anyone else holding locked ranges without having bumped the inode's
|
|
|
+ * reference count - if we don't do it, when they access the inode's
|
|
|
+ * io_tree to unlock a range it may be too late, leading to an
|
|
|
+ * use-after-free issue.
|
|
|
+ */
|
|
|
spin_lock(&io_tree->lock);
|
|
|
while (!RB_EMPTY_ROOT(&io_tree->state)) {
|
|
|
struct extent_state *state;
|
|
|
struct extent_state *cached_state = NULL;
|
|
|
+ u64 start;
|
|
|
+ u64 end;
|
|
|
|
|
|
node = rb_first(&io_tree->state);
|
|
|
state = rb_entry(node, struct extent_state, rb_node);
|
|
|
- atomic_inc(&state->refs);
|
|
|
+ start = state->start;
|
|
|
+ end = state->end;
|
|
|
spin_unlock(&io_tree->lock);
|
|
|
|
|
|
- lock_extent_bits(io_tree, state->start, state->end,
|
|
|
- 0, &cached_state);
|
|
|
- clear_extent_bit(io_tree, state->start, state->end,
|
|
|
+ lock_extent_bits(io_tree, start, end, 0, &cached_state);
|
|
|
+ clear_extent_bit(io_tree, start, end,
|
|
|
EXTENT_LOCKED | EXTENT_DIRTY |
|
|
|
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
|
|
|
EXTENT_DEFRAG, 1, 1,
|
|
|
&cached_state, GFP_NOFS);
|
|
|
- free_extent_state(state);
|
|
|
|
|
|
cond_resched();
|
|
|
spin_lock(&io_tree->lock);
|