|
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
|
|
|
|
|
|
static int btrfs_dirty_inode(struct inode *inode);
|
|
|
|
|
|
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
|
|
+void btrfs_test_inode_set_ops(struct inode *inode)
|
|
|
+{
|
|
|
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
|
|
|
struct inode *inode, struct inode *dir,
|
|
|
const struct qstr *qstr)
|
|
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
|
|
|
u64 new_size;
|
|
|
|
|
|
/*
|
|
|
- * We need the largest size of the remaining extent to see if we
|
|
|
- * need to add a new outstanding extent. Think of the following
|
|
|
- * case
|
|
|
- *
|
|
|
- * [MEAX_EXTENT_SIZEx2 - 4k][4k]
|
|
|
- *
|
|
|
- * The new_size would just be 4k and we'd think we had enough
|
|
|
- * outstanding extents for this if we only took one side of the
|
|
|
- * split, same goes for the other direction. We need to see if
|
|
|
- * the larger size still is the same amount of extents as the
|
|
|
- * original size, because if it is we need to add a new
|
|
|
- * outstanding extent. But if we split up and the larger size
|
|
|
- * is less than the original then we are good to go since we've
|
|
|
- * already accounted for the extra extent in our original
|
|
|
- * accounting.
|
|
|
+ * See the explanation in btrfs_merge_extent_hook, the same
|
|
|
+ * applies here, just in reverse.
|
|
|
*/
|
|
|
new_size = orig->end - split + 1;
|
|
|
- if ((split - orig->start) > new_size)
|
|
|
- new_size = split - orig->start;
|
|
|
-
|
|
|
- num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
+ num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
BTRFS_MAX_EXTENT_SIZE);
|
|
|
- if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
- BTRFS_MAX_EXTENT_SIZE) < num_extents)
|
|
|
+ new_size = split - orig->start;
|
|
|
+ num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
+ BTRFS_MAX_EXTENT_SIZE);
|
|
|
+ if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
+ BTRFS_MAX_EXTENT_SIZE) >= num_extents)
|
|
|
return;
|
|
|
}
|
|
|
|
|
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
|
|
|
if (!(other->state & EXTENT_DELALLOC))
|
|
|
return;
|
|
|
|
|
|
- old_size = other->end - other->start + 1;
|
|
|
- new_size = old_size + (new->end - new->start + 1);
|
|
|
+ if (new->start > other->start)
|
|
|
+ new_size = new->end - other->start + 1;
|
|
|
+ else
|
|
|
+ new_size = other->end - new->start + 1;
|
|
|
|
|
|
/* we're not bigger than the max, unreserve the space and go */
|
|
|
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
|
|
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * If we grew by another max_extent, just return, we want to keep that
|
|
|
- * reserved amount.
|
|
|
+ * We have to add up either side to figure out how many extents were
|
|
|
+ * accounted for before we merged into one big extent. If the number of
|
|
|
+ * extents we accounted for is <= the amount we need for the new range
|
|
|
+ * then we can return, otherwise drop. Think of it like this
|
|
|
+ *
|
|
|
+ * [ 4k][MAX_SIZE]
|
|
|
+ *
|
|
|
+ * So we've grown the extent by a MAX_SIZE extent, this would mean we
|
|
|
+ * need 2 outstanding extents, on one side we have 1 and the other side
|
|
|
+ * we have 1 so they are == and we can return. But in this case
|
|
|
+ *
|
|
|
+ * [MAX_SIZE+4k][MAX_SIZE+4k]
|
|
|
+ *
|
|
|
+ * Each range on their own accounts for 2 extents, but merged together
|
|
|
+ * they are only 3 extents worth of accounting, so we need to drop in
|
|
|
+ * this case.
|
|
|
*/
|
|
|
+ old_size = other->end - other->start + 1;
|
|
|
num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
BTRFS_MAX_EXTENT_SIZE);
|
|
|
+ old_size = new->end - new->start + 1;
|
|
|
+ num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
+ BTRFS_MAX_EXTENT_SIZE);
|
|
|
+
|
|
|
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
- BTRFS_MAX_EXTENT_SIZE) > num_extents)
|
|
|
+ BTRFS_MAX_EXTENT_SIZE) >= num_extents)
|
|
|
return;
|
|
|
|
|
|
spin_lock(&BTRFS_I(inode)->lock);
|
|
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
|
|
|
spin_unlock(&BTRFS_I(inode)->lock);
|
|
|
}
|
|
|
|
|
|
+ /* For sanity tests */
|
|
|
+ if (btrfs_test_is_dummy_root(root))
|
|
|
+ return;
|
|
|
+
|
|
|
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
|
|
|
root->fs_info->delalloc_batch);
|
|
|
spin_lock(&BTRFS_I(inode)->lock);
|
|
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
|
|
|
root != root->fs_info->tree_root)
|
|
|
btrfs_delalloc_release_metadata(inode, len);
|
|
|
|
|
|
+ /* For sanity tests. */
|
|
|
+ if (btrfs_test_is_dummy_root(root))
|
|
|
+ return;
|
|
|
+
|
|
|
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
|
|
|
&& do_list && !(state->state & EXTENT_NORESERVE))
|
|
|
btrfs_free_reserved_data_space(inode, len);
|
|
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
|
|
u64 start = iblock << inode->i_blkbits;
|
|
|
u64 lockstart, lockend;
|
|
|
u64 len = bh_result->b_size;
|
|
|
- u64 orig_len = len;
|
|
|
+ u64 *outstanding_extents = NULL;
|
|
|
int unlock_bits = EXTENT_LOCKED;
|
|
|
int ret = 0;
|
|
|
|
|
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
|
|
lockstart = start;
|
|
|
lockend = start + len - 1;
|
|
|
|
|
|
+ if (current->journal_info) {
|
|
|
+ /*
|
|
|
+ * Need to pull our outstanding extents and set journal_info to NULL so
|
|
|
+ * that anything that needs to check if there's a transction doesn't get
|
|
|
+ * confused.
|
|
|
+ */
|
|
|
+ outstanding_extents = current->journal_info;
|
|
|
+ current->journal_info = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* If this errors out it's because we couldn't invalidate pagecache for
|
|
|
* this range and we need to fallback to buffered.
|
|
@@ -7348,11 +7381,20 @@ unlock:
|
|
|
if (start + len > i_size_read(inode))
|
|
|
i_size_write(inode, start + len);
|
|
|
|
|
|
- if (len < orig_len) {
|
|
|
+ /*
|
|
|
+ * If we have an outstanding_extents count still set then we're
|
|
|
+ * within our reservation, otherwise we need to adjust our inode
|
|
|
+ * counter appropriately.
|
|
|
+ */
|
|
|
+ if (*outstanding_extents) {
|
|
|
+ (*outstanding_extents)--;
|
|
|
+ } else {
|
|
|
spin_lock(&BTRFS_I(inode)->lock);
|
|
|
BTRFS_I(inode)->outstanding_extents++;
|
|
|
spin_unlock(&BTRFS_I(inode)->lock);
|
|
|
}
|
|
|
+
|
|
|
+ current->journal_info = outstanding_extents;
|
|
|
btrfs_free_reserved_data_space(inode, len);
|
|
|
}
|
|
|
|
|
@@ -7376,6 +7418,8 @@ unlock:
|
|
|
unlock_err:
|
|
|
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
|
|
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
|
|
|
+ if (outstanding_extents)
|
|
|
+ current->journal_info = outstanding_extents;
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -8075,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
|
|
{
|
|
|
struct file *file = iocb->ki_filp;
|
|
|
struct inode *inode = file->f_mapping->host;
|
|
|
+ u64 outstanding_extents = 0;
|
|
|
size_t count = 0;
|
|
|
int flags = 0;
|
|
|
bool wakeup = true;
|
|
@@ -8112,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
|
|
ret = btrfs_delalloc_reserve_space(inode, count);
|
|
|
if (ret)
|
|
|
goto out;
|
|
|
+ outstanding_extents = div64_u64(count +
|
|
|
+ BTRFS_MAX_EXTENT_SIZE - 1,
|
|
|
+ BTRFS_MAX_EXTENT_SIZE);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We need to know how many extents we reserved so that we can
|
|
|
+ * do the accounting properly if we go over the number we
|
|
|
+ * originally calculated. Abuse current->journal_info for this.
|
|
|
+ */
|
|
|
+ current->journal_info = &outstanding_extents;
|
|
|
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
|
|
|
&BTRFS_I(inode)->runtime_flags)) {
|
|
|
inode_dio_done(inode);
|
|
@@ -8124,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
|
|
iter, offset, btrfs_get_blocks_direct, NULL,
|
|
|
btrfs_submit_direct, flags);
|
|
|
if (rw & WRITE) {
|
|
|
+ current->journal_info = NULL;
|
|
|
if (ret < 0 && ret != -EIOCBQUEUED)
|
|
|
btrfs_delalloc_release_space(inode, count);
|
|
|
else if (ret >= 0 && (size_t)ret < count)
|