|
@@ -1962,25 +1962,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
|
|
|
SetPageUptodate(page);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * When IO fails, either with EIO or csum verification fails, we
|
|
|
- * try other mirrors that might have a good copy of the data. This
|
|
|
- * io_failure_record is used to record state as we go through all the
|
|
|
- * mirrors. If another mirror has good data, the page is set up to date
|
|
|
- * and things continue. If a good mirror can't be found, the original
|
|
|
- * bio end_io callback is called to indicate things have failed.
|
|
|
- */
|
|
|
-struct io_failure_record {
|
|
|
- struct page *page;
|
|
|
- u64 start;
|
|
|
- u64 len;
|
|
|
- u64 logical;
|
|
|
- unsigned long bio_flags;
|
|
|
- int this_mirror;
|
|
|
- int failed_mirror;
|
|
|
- int in_validation;
|
|
|
-};
|
|
|
-
|
|
|
static int free_io_failure(struct inode *inode, struct io_failure_record *rec)
|
|
|
{
|
|
|
int ret;
|
|
@@ -2156,40 +2137,24 @@ out:
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * this is a generic handler for readpage errors (default
|
|
|
- * readpage_io_failed_hook). if other copies exist, read those and write back
|
|
|
- * good data to the failed position. does not investigate in remapping the
|
|
|
- * failed extent elsewhere, hoping the device will be smart enough to do this as
|
|
|
- * needed
|
|
|
- */
|
|
|
-
|
|
|
-static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
- struct page *page, u64 start, u64 end,
|
|
|
- int failed_mirror)
|
|
|
+int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
|
|
|
+ struct io_failure_record **failrec_ret)
|
|
|
{
|
|
|
- struct io_failure_record *failrec = NULL;
|
|
|
+ struct io_failure_record *failrec;
|
|
|
u64 private;
|
|
|
struct extent_map *em;
|
|
|
- struct inode *inode = page->mapping->host;
|
|
|
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
|
|
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
|
|
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
|
|
|
- struct bio *bio;
|
|
|
- struct btrfs_io_bio *btrfs_failed_bio;
|
|
|
- struct btrfs_io_bio *btrfs_bio;
|
|
|
- int num_copies;
|
|
|
int ret;
|
|
|
- int read_mode;
|
|
|
u64 logical;
|
|
|
|
|
|
- BUG_ON(failed_bio->bi_rw & REQ_WRITE);
|
|
|
-
|
|
|
ret = get_state_private(failure_tree, start, &private);
|
|
|
if (ret) {
|
|
|
failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
|
|
|
if (!failrec)
|
|
|
return -ENOMEM;
|
|
|
+
|
|
|
failrec->start = start;
|
|
|
failrec->len = end - start + 1;
|
|
|
failrec->this_mirror = 0;
|
|
@@ -2209,11 +2174,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
em = NULL;
|
|
|
}
|
|
|
read_unlock(&em_tree->lock);
|
|
|
-
|
|
|
if (!em) {
|
|
|
kfree(failrec);
|
|
|
return -EIO;
|
|
|
}
|
|
|
+
|
|
|
logical = start - em->start;
|
|
|
logical = em->block_start + logical;
|
|
|
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
|
|
@@ -2222,8 +2187,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
extent_set_compress_type(&failrec->bio_flags,
|
|
|
em->compress_type);
|
|
|
}
|
|
|
- pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
|
|
|
- "len=%llu\n", logical, start, failrec->len);
|
|
|
+
|
|
|
+ pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
|
|
|
+ logical, start, failrec->len);
|
|
|
+
|
|
|
failrec->logical = logical;
|
|
|
free_extent_map(em);
|
|
|
|
|
@@ -2243,8 +2210,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
}
|
|
|
} else {
|
|
|
failrec = (struct io_failure_record *)(unsigned long)private;
|
|
|
- pr_debug("bio_readpage_error: (found) logical=%llu, "
|
|
|
- "start=%llu, len=%llu, validation=%d\n",
|
|
|
+ pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
|
|
|
failrec->logical, failrec->start, failrec->len,
|
|
|
failrec->in_validation);
|
|
|
/*
|
|
@@ -2253,6 +2219,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
* clean_io_failure() clean all those errors at once.
|
|
|
*/
|
|
|
}
|
|
|
+
|
|
|
+ *failrec_ret = failrec;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
|
|
|
+ struct io_failure_record *failrec, int failed_mirror)
|
|
|
+{
|
|
|
+ int num_copies;
|
|
|
+
|
|
|
num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
|
|
|
failrec->logical, failrec->len);
|
|
|
if (num_copies == 1) {
|
|
@@ -2261,10 +2238,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
* all the retry and error correction code that follows. no
|
|
|
* matter what the error is, it is very likely to persist.
|
|
|
*/
|
|
|
- pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
|
|
|
+ pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
|
|
|
num_copies, failrec->this_mirror, failed_mirror);
|
|
|
- free_io_failure(inode, failrec);
|
|
|
- return -EIO;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2284,7 +2260,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
BUG_ON(failrec->in_validation);
|
|
|
failrec->in_validation = 1;
|
|
|
failrec->this_mirror = failed_mirror;
|
|
|
- read_mode = READ_SYNC | REQ_FAILFAST_DEV;
|
|
|
} else {
|
|
|
/*
|
|
|
* we're ready to fulfill a) and b) alongside. get a good copy
|
|
@@ -2300,22 +2275,32 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
failrec->this_mirror++;
|
|
|
if (failrec->this_mirror == failed_mirror)
|
|
|
failrec->this_mirror++;
|
|
|
- read_mode = READ_SYNC;
|
|
|
}
|
|
|
|
|
|
if (failrec->this_mirror > num_copies) {
|
|
|
- pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
|
|
|
+ pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
|
|
|
num_copies, failrec->this_mirror, failed_mirror);
|
|
|
- free_io_failure(inode, failrec);
|
|
|
- return -EIO;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
|
|
|
+ struct io_failure_record *failrec,
|
|
|
+ struct page *page, int pg_offset, int icsum,
|
|
|
+ bio_end_io_t *endio_func)
|
|
|
+{
|
|
|
+ struct bio *bio;
|
|
|
+ struct btrfs_io_bio *btrfs_failed_bio;
|
|
|
+ struct btrfs_io_bio *btrfs_bio;
|
|
|
+
|
|
|
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
|
|
|
- if (!bio) {
|
|
|
- free_io_failure(inode, failrec);
|
|
|
- return -EIO;
|
|
|
- }
|
|
|
- bio->bi_end_io = failed_bio->bi_end_io;
|
|
|
+ if (!bio)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ bio->bi_end_io = endio_func;
|
|
|
bio->bi_iter.bi_sector = failrec->logical >> 9;
|
|
|
bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
|
|
|
bio->bi_iter.bi_size = 0;
|
|
@@ -2327,17 +2312,63 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
|
|
|
btrfs_bio = btrfs_io_bio(bio);
|
|
|
btrfs_bio->csum = btrfs_bio->csum_inline;
|
|
|
- phy_offset >>= inode->i_sb->s_blocksize_bits;
|
|
|
- phy_offset *= csum_size;
|
|
|
- memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
|
|
|
+ icsum *= csum_size;
|
|
|
+ memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
|
|
|
csum_size);
|
|
|
}
|
|
|
|
|
|
- bio_add_page(bio, page, failrec->len, start - page_offset(page));
|
|
|
+ bio_add_page(bio, page, failrec->len, pg_offset);
|
|
|
+
|
|
|
+ return bio;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * this is a generic handler for readpage errors (default
|
|
|
+ * readpage_io_failed_hook). if other copies exist, read those and write back
|
|
|
+ * good data to the failed position. does not investigate in remapping the
|
|
|
+ * failed extent elsewhere, hoping the device will be smart enough to do this as
|
|
|
+ * needed
|
|
|
+ */
|
|
|
+
|
|
|
+static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
|
+ struct page *page, u64 start, u64 end,
|
|
|
+ int failed_mirror)
|
|
|
+{
|
|
|
+ struct io_failure_record *failrec;
|
|
|
+ struct inode *inode = page->mapping->host;
|
|
|
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
|
|
+ struct bio *bio;
|
|
|
+ int read_mode;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ BUG_ON(failed_bio->bi_rw & REQ_WRITE);
|
|
|
+
|
|
|
+ ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
|
|
|
+ if (!ret) {
|
|
|
+ free_io_failure(inode, failrec);
|
|
|
+ return -EIO;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (failed_bio->bi_vcnt > 1)
|
|
|
+ read_mode = READ_SYNC | REQ_FAILFAST_DEV;
|
|
|
+ else
|
|
|
+ read_mode = READ_SYNC;
|
|
|
+
|
|
|
+ phy_offset >>= inode->i_sb->s_blocksize_bits;
|
|
|
+ bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
|
|
|
+ start - page_offset(page),
|
|
|
+ (int)phy_offset, failed_bio->bi_end_io);
|
|
|
+ if (!bio) {
|
|
|
+ free_io_failure(inode, failrec);
|
|
|
+ return -EIO;
|
|
|
+ }
|
|
|
|
|
|
- pr_debug("bio_readpage_error: submitting new read[%#x] to "
|
|
|
- "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
|
|
|
- failrec->this_mirror, num_copies, failrec->in_validation);
|
|
|
+ pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
|
|
|
+ read_mode, failrec->this_mirror, failrec->in_validation);
|
|
|
|
|
|
ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
|
|
|
failrec->this_mirror,
|