|
@@ -722,11 +722,65 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
|
&ordered_sums, 0);
|
|
&ordered_sums, 0);
|
|
if (ret)
|
|
if (ret)
|
|
goto out;
|
|
goto out;
|
|
|
|
+ /*
|
|
|
|
+ * Now delete all existing cums in the csum root that
|
|
|
|
+ * cover our range. We do this because we can have an
|
|
|
|
+ * extent that is completely referenced by one file
|
|
|
|
+ * extent item and partially referenced by another
|
|
|
|
+ * file extent item (like after using the clone or
|
|
|
|
+ * extent_same ioctls). In this case if we end up doing
|
|
|
|
+ * the replay of the one that partially references the
|
|
|
|
+ * extent first, and we do not do the csum deletion
|
|
|
|
+ * below, we can get 2 csum items in the csum tree that
|
|
|
|
+ * overlap each other. For example, imagine our log has
|
|
|
|
+ * the two following file extent items:
|
|
|
|
+ *
|
|
|
|
+ * key (257 EXTENT_DATA 409600)
|
|
|
|
+ * extent data disk byte 12845056 nr 102400
|
|
|
|
+ * extent data offset 20480 nr 20480 ram 102400
|
|
|
|
+ *
|
|
|
|
+ * key (257 EXTENT_DATA 819200)
|
|
|
|
+ * extent data disk byte 12845056 nr 102400
|
|
|
|
+ * extent data offset 0 nr 102400 ram 102400
|
|
|
|
+ *
|
|
|
|
+ * Where the second one fully references the 100K extent
|
|
|
|
+ * that starts at disk byte 12845056, and the log tree
|
|
|
|
+ * has a single csum item that covers the entire range
|
|
|
|
+ * of the extent:
|
|
|
|
+ *
|
|
|
|
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
|
|
|
|
+ *
|
|
|
|
+ * After the first file extent item is replayed, the
|
|
|
|
+ * csum tree gets the following csum item:
|
|
|
|
+ *
|
|
|
|
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
|
|
|
|
+ *
|
|
|
|
+ * Which covers the 20K sub-range starting at offset 20K
|
|
|
|
+ * of our extent. Now when we replay the second file
|
|
|
|
+ * extent item, if we do not delete existing csum items
|
|
|
|
+ * that cover any of its blocks, we end up getting two
|
|
|
|
+ * csum items in our csum tree that overlap each other:
|
|
|
|
+ *
|
|
|
|
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
|
|
|
|
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
|
|
|
|
+ *
|
|
|
|
+ * Which is a problem, because after this anyone trying
|
|
|
|
+ * to lookup up for the checksum of any block of our
|
|
|
|
+ * extent starting at an offset of 40K or higher, will
|
|
|
|
+ * end up looking at the second csum item only, which
|
|
|
|
+ * does not contain the checksum for any block starting
|
|
|
|
+ * at offset 40K or higher of our extent.
|
|
|
|
+ */
|
|
while (!list_empty(&ordered_sums)) {
|
|
while (!list_empty(&ordered_sums)) {
|
|
struct btrfs_ordered_sum *sums;
|
|
struct btrfs_ordered_sum *sums;
|
|
sums = list_entry(ordered_sums.next,
|
|
sums = list_entry(ordered_sums.next,
|
|
struct btrfs_ordered_sum,
|
|
struct btrfs_ordered_sum,
|
|
list);
|
|
list);
|
|
|
|
+ if (!ret)
|
|
|
|
+ ret = btrfs_del_csums(trans,
|
|
|
|
+ root->fs_info->csum_root,
|
|
|
|
+ sums->bytenr,
|
|
|
|
+ sums->len);
|
|
if (!ret)
|
|
if (!ret)
|
|
ret = btrfs_csum_file_blocks(trans,
|
|
ret = btrfs_csum_file_blocks(trans,
|
|
root->fs_info->csum_root,
|
|
root->fs_info->csum_root,
|