|
@@ -492,11 +492,19 @@ insert:
|
|
|
|
|
|
if (btrfs_inode_generation(eb, src_item) == 0) {
|
|
|
struct extent_buffer *dst_eb = path->nodes[0];
|
|
|
+ const u64 ino_size = btrfs_inode_size(eb, src_item);
|
|
|
|
|
|
+ /*
|
|
|
+ * For regular files an ino_size == 0 is used only when
|
|
|
+ * logging that an inode exists, as part of a directory
|
|
|
+ * fsync, and the inode wasn't fsynced before. In this
|
|
|
+ * case don't set the size of the inode in the fs/subvol
|
|
|
+ * tree, otherwise we would be throwing valid data away.
|
|
|
+ */
|
|
|
if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
|
|
|
- S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
|
|
|
+ S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
|
|
|
+ ino_size != 0) {
|
|
|
struct btrfs_map_token token;
|
|
|
- u64 ino_size = btrfs_inode_size(eb, src_item);
|
|
|
|
|
|
btrfs_init_map_token(&token);
|
|
|
btrfs_set_token_inode_size(dst_eb, dst_item,
|
|
@@ -3124,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root, struct inode *inode,
|
|
|
struct btrfs_path *path,
|
|
|
struct btrfs_path *dst_path, int key_type,
|
|
|
+ struct btrfs_log_ctx *ctx,
|
|
|
u64 min_offset, u64 *last_offset_ret)
|
|
|
{
|
|
|
struct btrfs_key min_key;
|
|
@@ -3208,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
|
|
|
src = path->nodes[0];
|
|
|
nritems = btrfs_header_nritems(src);
|
|
|
for (i = path->slots[0]; i < nritems; i++) {
|
|
|
+ struct btrfs_dir_item *di;
|
|
|
+
|
|
|
btrfs_item_key_to_cpu(src, &min_key, i);
|
|
|
|
|
|
if (min_key.objectid != ino || min_key.type != key_type)
|
|
@@ -3218,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
|
|
|
err = ret;
|
|
|
goto done;
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We must make sure that when we log a directory entry,
|
|
|
+ * the corresponding inode, after log replay, has a
|
|
|
+ * matching link count. For example:
|
|
|
+ *
|
|
|
+ * touch foo
|
|
|
+ * mkdir mydir
|
|
|
+ * sync
|
|
|
+ * ln foo mydir/bar
|
|
|
+ * xfs_io -c "fsync" mydir
|
|
|
+ * <crash>
|
|
|
+ * <mount fs and log replay>
|
|
|
+ *
|
|
|
+ * Would result in a fsync log that when replayed, our
|
|
|
+ * file inode would have a link count of 1, but we get
|
|
|
+ * two directory entries pointing to the same inode.
|
|
|
+ * After removing one of the names, it would not be
|
|
|
+ * possible to remove the other name, which resulted
|
|
|
+ * always in stale file handle errors, and would not
|
|
|
+ * be possible to rmdir the parent directory, since
|
|
|
+ * its i_size could never decrement to the value
|
|
|
+ * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
|
|
|
+ */
|
|
|
+ di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
|
|
|
+ btrfs_dir_item_key_to_cpu(src, di, &tmp);
|
|
|
+ if (ctx &&
|
|
|
+ (btrfs_dir_transid(src, di) == trans->transid ||
|
|
|
+ btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
|
|
|
+ tmp.type != BTRFS_ROOT_ITEM_KEY)
|
|
|
+ ctx->log_new_dentries = true;
|
|
|
}
|
|
|
path->slots[0] = nritems;
|
|
|
|
|
@@ -3279,7 +3321,8 @@ done:
|
|
|
static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root, struct inode *inode,
|
|
|
struct btrfs_path *path,
|
|
|
- struct btrfs_path *dst_path)
|
|
|
+ struct btrfs_path *dst_path,
|
|
|
+ struct btrfs_log_ctx *ctx)
|
|
|
{
|
|
|
u64 min_key;
|
|
|
u64 max_key;
|
|
@@ -3291,7 +3334,7 @@ again:
|
|
|
max_key = 0;
|
|
|
while (1) {
|
|
|
ret = log_dir_items(trans, root, inode, path,
|
|
|
- dst_path, key_type, min_key,
|
|
|
+ dst_path, key_type, ctx, min_key,
|
|
|
&max_key);
|
|
|
if (ret)
|
|
|
return ret;
|
|
@@ -4067,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
|
|
|
if (ret < 0) {
|
|
|
return ret;
|
|
|
} else if (ret > 0) {
|
|
|
- *size_ret = i_size_read(inode);
|
|
|
+ *size_ret = 0;
|
|
|
} else {
|
|
|
struct btrfs_inode_item *item;
|
|
|
|
|
@@ -4374,15 +4417,18 @@ log_extents:
|
|
|
}
|
|
|
|
|
|
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
|
|
|
- ret = log_directory_changes(trans, root, inode, path, dst_path);
|
|
|
+ ret = log_directory_changes(trans, root, inode, path, dst_path,
|
|
|
+ ctx);
|
|
|
if (ret) {
|
|
|
err = ret;
|
|
|
goto out_unlock;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ spin_lock(&BTRFS_I(inode)->lock);
|
|
|
BTRFS_I(inode)->logged_trans = trans->transid;
|
|
|
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
|
|
|
+ spin_unlock(&BTRFS_I(inode)->lock);
|
|
|
out_unlock:
|
|
|
if (unlikely(err))
|
|
|
btrfs_put_logged_extents(&logged_list);
|
|
@@ -4469,6 +4515,181 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+struct btrfs_dir_list {
|
|
|
+ u64 ino;
|
|
|
+ struct list_head list;
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Log the inodes of the new dentries of a directory. See log_dir_items() for
|
|
|
+ * details about the why it is needed.
|
|
|
+ * This is a recursive operation - if an existing dentry corresponds to a
|
|
|
+ * directory, that directory's new entries are logged too (same behaviour as
|
|
|
+ * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
|
|
|
+ * the dentries point to we do not lock their i_mutex, otherwise lockdep
|
|
|
+ * complains about the following circular lock dependency / possible deadlock:
|
|
|
+ *
|
|
|
+ * CPU0 CPU1
|
|
|
+ * ---- ----
|
|
|
+ * lock(&type->i_mutex_dir_key#3/2);
|
|
|
+ * lock(sb_internal#2);
|
|
|
+ * lock(&type->i_mutex_dir_key#3/2);
|
|
|
+ * lock(&sb->s_type->i_mutex_key#14);
|
|
|
+ *
|
|
|
+ * Where sb_internal is the lock (a counter that works as a lock) acquired by
|
|
|
+ * sb_start_intwrite() in btrfs_start_transaction().
|
|
|
+ * Not locking i_mutex of the inodes is still safe because:
|
|
|
+ *
|
|
|
+ * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
|
|
|
+ * that while logging the inode new references (names) are added or removed
|
|
|
+ * from the inode, leaving the logged inode item with a link count that does
|
|
|
+ * not match the number of logged inode reference items. This is fine because
|
|
|
+ * at log replay time we compute the real number of links and correct the
|
|
|
+ * link count in the inode item (see replay_one_buffer() and
|
|
|
+ * link_to_fixup_dir());
|
|
|
+ *
|
|
|
+ * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
|
|
|
+ * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
|
|
|
+ * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
|
|
|
+ * has a size that doesn't match the sum of the lengths of all the logged
|
|
|
+ * names. This does not result in a problem because if a dir_item key is
|
|
|
+ * logged but its matching dir_index key is not logged, at log replay time we
|
|
|
+ * don't use it to replay the respective name (see replay_one_name()). On the
|
|
|
+ * other hand if only the dir_index key ends up being logged, the respective
|
|
|
+ * name is added to the fs/subvol tree with both the dir_item and dir_index
|
|
|
+ * keys created (see replay_one_name()).
|
|
|
+ * The directory's inode item with a wrong i_size is not a problem as well,
|
|
|
+ * since we don't use it at log replay time to set the i_size in the inode
|
|
|
+ * item of the fs/subvol tree (see overwrite_item()).
|
|
|
+ */
|
|
|
+static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_root *root,
|
|
|
+ struct inode *start_inode,
|
|
|
+ struct btrfs_log_ctx *ctx)
|
|
|
+{
|
|
|
+ struct btrfs_root *log = root->log_root;
|
|
|
+ struct btrfs_path *path;
|
|
|
+ LIST_HEAD(dir_list);
|
|
|
+ struct btrfs_dir_list *dir_elem;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ path = btrfs_alloc_path();
|
|
|
+ if (!path)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
|
|
|
+ if (!dir_elem) {
|
|
|
+ btrfs_free_path(path);
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+ dir_elem->ino = btrfs_ino(start_inode);
|
|
|
+ list_add_tail(&dir_elem->list, &dir_list);
|
|
|
+
|
|
|
+ while (!list_empty(&dir_list)) {
|
|
|
+ struct extent_buffer *leaf;
|
|
|
+ struct btrfs_key min_key;
|
|
|
+ int nritems;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
|
|
|
+ list);
|
|
|
+ if (ret)
|
|
|
+ goto next_dir_inode;
|
|
|
+
|
|
|
+ min_key.objectid = dir_elem->ino;
|
|
|
+ min_key.type = BTRFS_DIR_ITEM_KEY;
|
|
|
+ min_key.offset = 0;
|
|
|
+again:
|
|
|
+ btrfs_release_path(path);
|
|
|
+ ret = btrfs_search_forward(log, &min_key, path, trans->transid);
|
|
|
+ if (ret < 0) {
|
|
|
+ goto next_dir_inode;
|
|
|
+ } else if (ret > 0) {
|
|
|
+ ret = 0;
|
|
|
+ goto next_dir_inode;
|
|
|
+ }
|
|
|
+
|
|
|
+process_leaf:
|
|
|
+ leaf = path->nodes[0];
|
|
|
+ nritems = btrfs_header_nritems(leaf);
|
|
|
+ for (i = path->slots[0]; i < nritems; i++) {
|
|
|
+ struct btrfs_dir_item *di;
|
|
|
+ struct btrfs_key di_key;
|
|
|
+ struct inode *di_inode;
|
|
|
+ struct btrfs_dir_list *new_dir_elem;
|
|
|
+ int log_mode = LOG_INODE_EXISTS;
|
|
|
+ int type;
|
|
|
+
|
|
|
+ btrfs_item_key_to_cpu(leaf, &min_key, i);
|
|
|
+ if (min_key.objectid != dir_elem->ino ||
|
|
|
+ min_key.type != BTRFS_DIR_ITEM_KEY)
|
|
|
+ goto next_dir_inode;
|
|
|
+
|
|
|
+ di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
|
|
|
+ type = btrfs_dir_type(leaf, di);
|
|
|
+ if (btrfs_dir_transid(leaf, di) < trans->transid &&
|
|
|
+ type != BTRFS_FT_DIR)
|
|
|
+ continue;
|
|
|
+ btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
|
|
|
+ if (di_key.type == BTRFS_ROOT_ITEM_KEY)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ di_inode = btrfs_iget(root->fs_info->sb, &di_key,
|
|
|
+ root, NULL);
|
|
|
+ if (IS_ERR(di_inode)) {
|
|
|
+ ret = PTR_ERR(di_inode);
|
|
|
+ goto next_dir_inode;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (btrfs_inode_in_log(di_inode, trans->transid)) {
|
|
|
+ iput(di_inode);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ ctx->log_new_dentries = false;
|
|
|
+ if (type == BTRFS_FT_DIR)
|
|
|
+ log_mode = LOG_INODE_ALL;
|
|
|
+ btrfs_release_path(path);
|
|
|
+ ret = btrfs_log_inode(trans, root, di_inode,
|
|
|
+ log_mode, 0, LLONG_MAX, ctx);
|
|
|
+ iput(di_inode);
|
|
|
+ if (ret)
|
|
|
+ goto next_dir_inode;
|
|
|
+ if (ctx->log_new_dentries) {
|
|
|
+ new_dir_elem = kmalloc(sizeof(*new_dir_elem),
|
|
|
+ GFP_NOFS);
|
|
|
+ if (!new_dir_elem) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ goto next_dir_inode;
|
|
|
+ }
|
|
|
+ new_dir_elem->ino = di_key.objectid;
|
|
|
+ list_add_tail(&new_dir_elem->list, &dir_list);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (i == nritems) {
|
|
|
+ ret = btrfs_next_leaf(log, path);
|
|
|
+ if (ret < 0) {
|
|
|
+ goto next_dir_inode;
|
|
|
+ } else if (ret > 0) {
|
|
|
+ ret = 0;
|
|
|
+ goto next_dir_inode;
|
|
|
+ }
|
|
|
+ goto process_leaf;
|
|
|
+ }
|
|
|
+ if (min_key.offset < (u64)-1) {
|
|
|
+ min_key.offset++;
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+next_dir_inode:
|
|
|
+ list_del(&dir_elem->list);
|
|
|
+ kfree(dir_elem);
|
|
|
+ }
|
|
|
+
|
|
|
+ btrfs_free_path(path);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* helper function around btrfs_log_inode to make sure newly created
|
|
|
* parent directories also end up in the log. A minimal inode and backref
|
|
@@ -4491,6 +4712,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
|
|
const struct dentry * const first_parent = parent;
|
|
|
const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
|
|
|
last_committed);
|
|
|
+ bool log_dentries = false;
|
|
|
+ struct inode *orig_inode = inode;
|
|
|
|
|
|
sb = inode->i_sb;
|
|
|
|
|
@@ -4546,6 +4769,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
|
|
goto end_trans;
|
|
|
}
|
|
|
|
|
|
+ if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
|
|
|
+ log_dentries = true;
|
|
|
+
|
|
|
while (1) {
|
|
|
if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
|
|
|
break;
|
|
@@ -4582,7 +4808,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
|
|
dput(old_parent);
|
|
|
old_parent = parent;
|
|
|
}
|
|
|
- ret = 0;
|
|
|
+ if (log_dentries)
|
|
|
+ ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
|
|
|
+ else
|
|
|
+ ret = 0;
|
|
|
end_trans:
|
|
|
dput(old_parent);
|
|
|
if (ret < 0) {
|