|
@@ -4415,6 +4415,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * When we are logging a new inode X, check if it doesn't have a reference that
|
|
|
+ * matches the reference from some other inode Y created in a past transaction
|
|
|
+ * and that was renamed in the current transaction. If we don't do this, then at
|
|
|
+ * log replay time we can lose inode Y (and all its files if it's a directory):
|
|
|
+ *
|
|
|
+ * mkdir /mnt/x
|
|
|
+ * echo "hello world" > /mnt/x/foobar
|
|
|
+ * sync
|
|
|
+ * mv /mnt/x /mnt/y
|
|
|
+ * mkdir /mnt/x # or touch /mnt/x
|
|
|
+ * xfs_io -c fsync /mnt/x
|
|
|
+ * <power fail>
|
|
|
+ * mount fs, trigger log replay
|
|
|
+ *
|
|
|
+ * After the log replay procedure, we would lose the first directory and all its
|
|
|
+ * files (file foobar).
|
|
|
+ * For the case where inode Y is not a directory we simply end up losing it:
|
|
|
+ *
|
|
|
+ * echo "123" > /mnt/foo
|
|
|
+ * sync
|
|
|
+ * mv /mnt/foo /mnt/bar
|
|
|
+ * echo "abc" > /mnt/foo
|
|
|
+ * xfs_io -c fsync /mnt/foo
|
|
|
+ * <power fail>
|
|
|
+ *
|
|
|
+ * We also need this for cases where a snapshot entry is replaced by some other
|
|
|
+ * entry (file or directory) otherwise we end up with an unreplayable log due to
|
|
|
+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
|
|
|
+ * if it were a regular entry:
|
|
|
+ *
|
|
|
+ * mkdir /mnt/x
|
|
|
+ * btrfs subvolume snapshot /mnt /mnt/x/snap
|
|
|
+ * btrfs subvolume delete /mnt/x/snap
|
|
|
+ * rmdir /mnt/x
|
|
|
+ * mkdir /mnt/x
|
|
|
+ * fsync /mnt/x or fsync some new file inside it
|
|
|
+ * <power fail>
|
|
|
+ *
|
|
|
+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
|
|
|
+ * the same transaction.
|
|
|
+ */
|
|
|
+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
|
|
|
+ const int slot,
|
|
|
+ const struct btrfs_key *key,
|
|
|
+ struct inode *inode)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+ struct btrfs_path *search_path;
|
|
|
+ char *name = NULL;
|
|
|
+ u32 name_len = 0;
|
|
|
+ u32 item_size = btrfs_item_size_nr(eb, slot);
|
|
|
+ u32 cur_offset = 0;
|
|
|
+ unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
|
|
|
+
|
|
|
+ search_path = btrfs_alloc_path();
|
|
|
+ if (!search_path)
|
|
|
+ return -ENOMEM;
|
|
|
+ search_path->search_commit_root = 1;
|
|
|
+ search_path->skip_locking = 1;
|
|
|
+
|
|
|
+ while (cur_offset < item_size) {
|
|
|
+ u64 parent;
|
|
|
+ u32 this_name_len;
|
|
|
+ u32 this_len;
|
|
|
+ unsigned long name_ptr;
|
|
|
+ struct btrfs_dir_item *di;
|
|
|
+
|
|
|
+ if (key->type == BTRFS_INODE_REF_KEY) {
|
|
|
+ struct btrfs_inode_ref *iref;
|
|
|
+
|
|
|
+ iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
|
|
|
+ parent = key->offset;
|
|
|
+ this_name_len = btrfs_inode_ref_name_len(eb, iref);
|
|
|
+ name_ptr = (unsigned long)(iref + 1);
|
|
|
+ this_len = sizeof(*iref) + this_name_len;
|
|
|
+ } else {
|
|
|
+ struct btrfs_inode_extref *extref;
|
|
|
+
|
|
|
+ extref = (struct btrfs_inode_extref *)(ptr +
|
|
|
+ cur_offset);
|
|
|
+ parent = btrfs_inode_extref_parent(eb, extref);
|
|
|
+ this_name_len = btrfs_inode_extref_name_len(eb, extref);
|
|
|
+ name_ptr = (unsigned long)&extref->name;
|
|
|
+ this_len = sizeof(*extref) + this_name_len;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (this_name_len > name_len) {
|
|
|
+ char *new_name;
|
|
|
+
|
|
|
+ new_name = krealloc(name, this_name_len, GFP_NOFS);
|
|
|
+ if (!new_name) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ name_len = this_name_len;
|
|
|
+ name = new_name;
|
|
|
+ }
|
|
|
+
|
|
|
+ read_extent_buffer(eb, name, name_ptr, this_name_len);
|
|
|
+ di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
|
|
|
+ search_path, parent,
|
|
|
+ name, this_name_len, 0);
|
|
|
+ if (di && !IS_ERR(di)) {
|
|
|
+ ret = 1;
|
|
|
+ goto out;
|
|
|
+ } else if (IS_ERR(di)) {
|
|
|
+ ret = PTR_ERR(di);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ btrfs_release_path(search_path);
|
|
|
+
|
|
|
+ cur_offset += this_len;
|
|
|
+ }
|
|
|
+ ret = 0;
|
|
|
+out:
|
|
|
+ btrfs_free_path(search_path);
|
|
|
+ kfree(name);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
/* log a single inode in the tree log.
|
|
|
* At least one parent directory for this inode must exist in the tree
|
|
|
* or be logged already.
|
|
@@ -4602,6 +4723,22 @@ again:
|
|
|
if (min_key.type == BTRFS_INODE_ITEM_KEY)
|
|
|
need_log_inode_item = false;
|
|
|
|
|
|
+ if ((min_key.type == BTRFS_INODE_REF_KEY ||
|
|
|
+ min_key.type == BTRFS_INODE_EXTREF_KEY) &&
|
|
|
+ BTRFS_I(inode)->generation == trans->transid) {
|
|
|
+ ret = btrfs_check_ref_name_override(path->nodes[0],
|
|
|
+ path->slots[0],
|
|
|
+ &min_key, inode);
|
|
|
+ if (ret < 0) {
|
|
|
+ err = ret;
|
|
|
+ goto out_unlock;
|
|
|
+ } else if (ret > 0) {
|
|
|
+ err = 1;
|
|
|
+ btrfs_set_log_full_commit(root->fs_info, trans);
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
|
|
|
if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
|
|
|
if (ins_nr == 0)
|