|
@@ -3654,6 +3654,35 @@ cache_index:
|
|
|
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
|
|
&BTRFS_I(inode)->runtime_flags);
|
|
|
|
|
|
+ /*
|
|
|
+ * We don't persist the id of the transaction where an unlink operation
|
|
|
+ * against the inode was last made. So here we assume the inode might
|
|
|
+ * have been evicted, and therefore the exact value of last_unlink_trans
|
|
|
+ * lost, and set it to last_trans to avoid metadata inconsistencies
|
|
|
+ * between the inode and its parent if the inode is fsync'ed and the log
|
|
|
+ * replayed. For example, in the scenario:
|
|
|
+ *
|
|
|
+ * touch mydir/foo
|
|
|
+ * ln mydir/foo mydir/bar
|
|
|
+ * sync
|
|
|
+ * unlink mydir/bar
|
|
|
+ * echo 2 > /proc/sys/vm/drop_caches # evicts inode
|
|
|
+ * xfs_io -c fsync mydir/foo
|
|
|
+ * <power failure>
|
|
|
+ * mount fs, triggers fsync log replay
|
|
|
+ *
|
|
|
+ * We must make sure that when we fsync our inode foo we also log its
|
|
|
+ * parent inode, otherwise after log replay the parent still has the
|
|
|
+ * dentry with the "bar" name but our inode foo has a link count of 1
|
|
|
+ * and doesn't have an inode ref with the name "bar" anymore.
|
|
|
+ *
|
|
|
+ * Setting last_unlink_trans to last_trans is a pessimistic approach,
|
|
|
+ * but it guarantees correctness at the expense of ocassional full
|
|
|
+ * transaction commits on fsync if our inode is a directory, or if our
|
|
|
+ * inode is not a directory, logging its parent unnecessarily.
|
|
|
+ */
|
|
|
+ BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
|
|
|
+
|
|
|
path->slots[0]++;
|
|
|
if (inode->i_nlink != 1 ||
|
|
|
path->slots[0] >= btrfs_header_nritems(leaf))
|