9 years ago · 56f23fdbb6
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4415,6 +4415,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * When we are logging a new inode X, check if it doesn't have a reference that
			
 
				+ * matches the reference from some other inode Y created in a past transaction
			
 
				+ * and that was renamed in the current transaction. If we don't do this, then at
			
 
				+ * log replay time we can lose inode Y (and all its files if it's a directory):
			
 
				+ *
			
 
				+ * mkdir /mnt/x
			
 
				+ * echo "hello world" > /mnt/x/foobar
			
 
				+ * sync
			
 
				+ * mv /mnt/x /mnt/y
			
 
				+ * mkdir /mnt/x                 # or touch /mnt/x
			
 
				+ * xfs_io -c fsync /mnt/x
			
 
				+ * <power fail>
			
 
				+ * mount fs, trigger log replay
			
 
				+ *
			
 
				+ * After the log replay procedure, we would lose the first directory and all its
			
 
				+ * files (file foobar).
			
 
				+ * For the case where inode Y is not a directory we simply end up losing it:
			
 
				+ *
			
 
				+ * echo "123" > /mnt/foo
			
 
				+ * sync
			
 
				+ * mv /mnt/foo /mnt/bar
			
 
				+ * echo "abc" > /mnt/foo
			
 
				+ * xfs_io -c fsync /mnt/foo
			
 
				+ * <power fail>
			
 
				+ *
			
 
				+ * We also need this for cases where a snapshot entry is replaced by some other
			
 
				+ * entry (file or directory) otherwise we end up with an unreplayable log due to
			
 
				+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
			
 
				+ * if it were a regular entry:
			
 
				+ *
			
 
				+ * mkdir /mnt/x
			
 
				+ * btrfs subvolume snapshot /mnt /mnt/x/snap
			
 
				+ * btrfs subvolume delete /mnt/x/snap
			
 
				+ * rmdir /mnt/x
			
 
				+ * mkdir /mnt/x
			
 
				+ * fsync /mnt/x or fsync some new file inside it
			
 
				+ * <power fail>
			
 
				+ *
			
 
				+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
			
 
				+ * the same transaction.
			
 
				+ */
			
 
				+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
			
 
				+					 const int slot,
			
 
				+					 const struct btrfs_key *key,
			
 
				+					 struct inode *inode)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct btrfs_path *search_path;
			
 
				+	char *name = NULL;
			
 
				+	u32 name_len = 0;
			
 
				+	u32 item_size = btrfs_item_size_nr(eb, slot);
			
 
				+	u32 cur_offset = 0;
			
 
				+	unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
			
 
				+
			
 
				+	search_path = btrfs_alloc_path();
			
 
				+	if (!search_path)
			
 
				+		return -ENOMEM;
			
 
				+	search_path->search_commit_root = 1;
			
 
				+	search_path->skip_locking = 1;
			
 
				+
			
 
				+	while (cur_offset < item_size) {
			
 
				+		u64 parent;
			
 
				+		u32 this_name_len;
			
 
				+		u32 this_len;
			
 
				+		unsigned long name_ptr;
			
 
				+		struct btrfs_dir_item *di;
			
 
				+
			
 
				+		if (key->type == BTRFS_INODE_REF_KEY) {
			
 
				+			struct btrfs_inode_ref *iref;
			
 
				+
			
 
				+			iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
			
 
				+			parent = key->offset;
			
 
				+			this_name_len = btrfs_inode_ref_name_len(eb, iref);
			
 
				+			name_ptr = (unsigned long)(iref + 1);
			
 
				+			this_len = sizeof(*iref) + this_name_len;
			
 
				+		} else {
			
 
				+			struct btrfs_inode_extref *extref;
			
 
				+
			
 
				+			extref = (struct btrfs_inode_extref *)(ptr +
			
 
				+							       cur_offset);
			
 
				+			parent = btrfs_inode_extref_parent(eb, extref);
			
 
				+			this_name_len = btrfs_inode_extref_name_len(eb, extref);
			
 
				+			name_ptr = (unsigned long)&extref->name;
			
 
				+			this_len = sizeof(*extref) + this_name_len;
			
 
				+		}
			
 
				+
			
 
				+		if (this_name_len > name_len) {
			
 
				+			char *new_name;
			
 
				+
			
 
				+			new_name = krealloc(name, this_name_len, GFP_NOFS);
			
 
				+			if (!new_name) {
			
 
				+				ret = -ENOMEM;
			
 
				+				goto out;
			
 
				+			}
			
 
				+			name_len = this_name_len;
			
 
				+			name = new_name;
			
 
				+		}
			
 
				+
			
 
				+		read_extent_buffer(eb, name, name_ptr, this_name_len);
			
 
				+		di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
			
 
				+					   search_path, parent,
			
 
				+					   name, this_name_len, 0);
			
 
				+		if (di && !IS_ERR(di)) {
			
 
				+			ret = 1;
			
 
				+			goto out;
			
 
				+		} else if (IS_ERR(di)) {
			
 
				+			ret = PTR_ERR(di);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		btrfs_release_path(search_path);
			
 
				+
			
 
				+		cur_offset += this_len;
			
 
				+	}
			
 
				+	ret = 0;
			
 
				+out:
			
 
				+	btrfs_free_path(search_path);
			
 
				+	kfree(name);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /* log a single inode in the tree log.
			
 
				  * At least one parent directory for this inode must exist in the tree
			
 
				  * or be logged already.
			
@@ -4602,6 +4723,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
				 		if (min_key.type == BTRFS_INODE_ITEM_KEY)
			
 
				 			need_log_inode_item = false;
			
 
				 
			
 
				+		if ((min_key.type == BTRFS_INODE_REF_KEY ||
			
 
				+		     min_key.type == BTRFS_INODE_EXTREF_KEY) &&
			
 
				+		    BTRFS_I(inode)->generation == trans->transid) {
			
 
				+			ret = btrfs_check_ref_name_override(path->nodes[0],
			
 
				+							    path->slots[0],
			
 
				+							    &min_key, inode);
			
 
				+			if (ret < 0) {
			
 
				+				err = ret;
			
 
				+				goto out_unlock;
			
 
				+			} else if (ret > 0) {
			
 
				+				err = 1;
			
 
				+				btrfs_set_log_full_commit(root->fs_info, trans);
			
 
				+				goto out_unlock;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				 		/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
			
 
				 		if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
			
 
				 			if (ins_nr == 0)