|
@@ -7242,30 +7242,267 @@ unlock_err:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static int btrfs_subio_endio_read(struct inode *inode,
|
|
|
- struct btrfs_io_bio *io_bio)
|
|
|
+static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
|
|
|
+ int rw, int mirror_num)
|
|
|
+{
|
|
|
+ struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ BUG_ON(rw & REQ_WRITE);
|
|
|
+
|
|
|
+ bio_get(bio);
|
|
|
+
|
|
|
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio,
|
|
|
+ BTRFS_WQ_ENDIO_DIO_REPAIR);
|
|
|
+ if (ret)
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
|
|
|
+err:
|
|
|
+ bio_put(bio);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int btrfs_check_dio_repairable(struct inode *inode,
|
|
|
+ struct bio *failed_bio,
|
|
|
+ struct io_failure_record *failrec,
|
|
|
+ int failed_mirror)
|
|
|
+{
|
|
|
+ int num_copies;
|
|
|
+
|
|
|
+ num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
|
|
|
+ failrec->logical, failrec->len);
|
|
|
+ if (num_copies == 1) {
|
|
|
+ /*
|
|
|
+ * we only have a single copy of the data, so don't bother with
|
|
|
+ * all the retry and error correction code that follows. no
|
|
|
+ * matter what the error is, it is very likely to persist.
|
|
|
+ */
|
|
|
+ pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
|
|
|
+ num_copies, failrec->this_mirror, failed_mirror);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ failrec->failed_mirror = failed_mirror;
|
|
|
+ failrec->this_mirror++;
|
|
|
+ if (failrec->this_mirror == failed_mirror)
|
|
|
+ failrec->this_mirror++;
|
|
|
+
|
|
|
+ if (failrec->this_mirror > num_copies) {
|
|
|
+ pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
|
|
|
+ num_copies, failrec->this_mirror, failed_mirror);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
+static int dio_read_error(struct inode *inode, struct bio *failed_bio,
|
|
|
+ struct page *page, u64 start, u64 end,
|
|
|
+ int failed_mirror, bio_end_io_t *repair_endio,
|
|
|
+ void *repair_arg)
|
|
|
+{
|
|
|
+ struct io_failure_record *failrec;
|
|
|
+ struct bio *bio;
|
|
|
+ int isector;
|
|
|
+ int read_mode;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ BUG_ON(failed_bio->bi_rw & REQ_WRITE);
|
|
|
+
|
|
|
+ ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
|
|
|
+ failed_mirror);
|
|
|
+ if (!ret) {
|
|
|
+ free_io_failure(inode, failrec);
|
|
|
+ return -EIO;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (failed_bio->bi_vcnt > 1)
|
|
|
+ read_mode = READ_SYNC | REQ_FAILFAST_DEV;
|
|
|
+ else
|
|
|
+ read_mode = READ_SYNC;
|
|
|
+
|
|
|
+ isector = start - btrfs_io_bio(failed_bio)->logical;
|
|
|
+ isector >>= inode->i_sb->s_blocksize_bits;
|
|
|
+ bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
|
|
|
+ 0, isector, repair_endio, repair_arg);
|
|
|
+ if (!bio) {
|
|
|
+ free_io_failure(inode, failrec);
|
|
|
+ return -EIO;
|
|
|
+ }
|
|
|
+
|
|
|
+ btrfs_debug(BTRFS_I(inode)->root->fs_info,
|
|
|
+ "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
|
|
|
+ read_mode, failrec->this_mirror, failrec->in_validation);
|
|
|
+
|
|
|
+ ret = submit_dio_repair_bio(inode, bio, read_mode,
|
|
|
+ failrec->this_mirror);
|
|
|
+ if (ret) {
|
|
|
+ free_io_failure(inode, failrec);
|
|
|
+ bio_put(bio);
|
|
|
+ }
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+struct btrfs_retry_complete {
|
|
|
+ struct completion done;
|
|
|
+ struct inode *inode;
|
|
|
+ u64 start;
|
|
|
+ int uptodate;
|
|
|
+};
|
|
|
+
|
|
|
+static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
|
|
|
+{
|
|
|
+ struct btrfs_retry_complete *done = bio->bi_private;
|
|
|
+ struct bio_vec *bvec;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ if (err)
|
|
|
+ goto end;
|
|
|
+
|
|
|
+ done->uptodate = 1;
|
|
|
+ bio_for_each_segment_all(bvec, bio, i)
|
|
|
+ clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
|
|
|
+end:
|
|
|
+ complete(&done->done);
|
|
|
+ bio_put(bio);
|
|
|
+}
|
|
|
+
|
|
|
+static int __btrfs_correct_data_nocsum(struct inode *inode,
|
|
|
+ struct btrfs_io_bio *io_bio)
|
|
|
{
|
|
|
struct bio_vec *bvec;
|
|
|
+ struct btrfs_retry_complete done;
|
|
|
u64 start;
|
|
|
int i;
|
|
|
int ret;
|
|
|
- int err = 0;
|
|
|
|
|
|
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
|
|
|
- return 0;
|
|
|
+ start = io_bio->logical;
|
|
|
+ done.inode = inode;
|
|
|
+
|
|
|
+ bio_for_each_segment_all(bvec, &io_bio->bio, i) {
|
|
|
+try_again:
|
|
|
+ done.uptodate = 0;
|
|
|
+ done.start = start;
|
|
|
+ init_completion(&done.done);
|
|
|
+
|
|
|
+ ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
|
|
|
+ start + bvec->bv_len - 1,
|
|
|
+ io_bio->mirror_num,
|
|
|
+ btrfs_retry_endio_nocsum, &done);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ wait_for_completion(&done.done);
|
|
|
+
|
|
|
+ if (!done.uptodate) {
|
|
|
+ /* We might have another mirror, so try again */
|
|
|
+ goto try_again;
|
|
|
+ }
|
|
|
+
|
|
|
+ start += bvec->bv_len;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void btrfs_retry_endio(struct bio *bio, int err)
|
|
|
+{
|
|
|
+ struct btrfs_retry_complete *done = bio->bi_private;
|
|
|
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
|
|
|
+ struct bio_vec *bvec;
|
|
|
+ int uptodate;
|
|
|
+ int ret;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ if (err)
|
|
|
+ goto end;
|
|
|
+
|
|
|
+ uptodate = 1;
|
|
|
+ bio_for_each_segment_all(bvec, bio, i) {
|
|
|
+ ret = __readpage_endio_check(done->inode, io_bio, i,
|
|
|
+ bvec->bv_page, 0,
|
|
|
+ done->start, bvec->bv_len);
|
|
|
+ if (!ret)
|
|
|
+ clean_io_failure(done->inode, done->start,
|
|
|
+ bvec->bv_page, 0);
|
|
|
+ else
|
|
|
+ uptodate = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ done->uptodate = uptodate;
|
|
|
+end:
|
|
|
+ complete(&done->done);
|
|
|
+ bio_put(bio);
|
|
|
+}
|
|
|
|
|
|
+static int __btrfs_subio_endio_read(struct inode *inode,
|
|
|
+ struct btrfs_io_bio *io_bio, int err)
|
|
|
+{
|
|
|
+ struct bio_vec *bvec;
|
|
|
+ struct btrfs_retry_complete done;
|
|
|
+ u64 start;
|
|
|
+ u64 offset = 0;
|
|
|
+ int i;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ err = 0;
|
|
|
start = io_bio->logical;
|
|
|
+ done.inode = inode;
|
|
|
+
|
|
|
bio_for_each_segment_all(bvec, &io_bio->bio, i) {
|
|
|
ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
|
|
|
0, start, bvec->bv_len);
|
|
|
- if (ret)
|
|
|
- err = -EIO;
|
|
|
+ if (likely(!ret))
|
|
|
+ goto next;
|
|
|
+try_again:
|
|
|
+ done.uptodate = 0;
|
|
|
+ done.start = start;
|
|
|
+ init_completion(&done.done);
|
|
|
+
|
|
|
+ ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
|
|
|
+ start + bvec->bv_len - 1,
|
|
|
+ io_bio->mirror_num,
|
|
|
+ btrfs_retry_endio, &done);
|
|
|
+ if (ret) {
|
|
|
+ err = ret;
|
|
|
+ goto next;
|
|
|
+ }
|
|
|
+
|
|
|
+ wait_for_completion(&done.done);
|
|
|
+
|
|
|
+ if (!done.uptodate) {
|
|
|
+ /* We might have another mirror, so try again */
|
|
|
+ goto try_again;
|
|
|
+ }
|
|
|
+next:
|
|
|
+ offset += bvec->bv_len;
|
|
|
start += bvec->bv_len;
|
|
|
}
|
|
|
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
+static int btrfs_subio_endio_read(struct inode *inode,
|
|
|
+ struct btrfs_io_bio *io_bio, int err)
|
|
|
+{
|
|
|
+ bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
|
|
|
+
|
|
|
+ if (skip_csum) {
|
|
|
+ if (unlikely(err))
|
|
|
+ return __btrfs_correct_data_nocsum(inode, io_bio);
|
|
|
+ else
|
|
|
+ return 0;
|
|
|
+ } else {
|
|
|
+ return __btrfs_subio_endio_read(inode, io_bio, err);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void btrfs_endio_direct_read(struct bio *bio, int err)
|
|
|
{
|
|
|
struct btrfs_dio_private *dip = bio->bi_private;
|
|
@@ -7273,8 +7510,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
|
|
|
struct bio *dio_bio;
|
|
|
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
|
|
|
|
|
|
- if (!err && (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED))
|
|
|
- err = btrfs_subio_endio_read(inode, io_bio);
|
|
|
+ if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
|
|
|
+ err = btrfs_subio_endio_read(inode, io_bio, err);
|
|
|
|
|
|
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
|
|
|
dip->logical_offset + dip->bytes - 1);
|
|
@@ -7353,19 +7590,16 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
|
|
|
static void btrfs_end_dio_bio(struct bio *bio, int err)
|
|
|
{
|
|
|
struct btrfs_dio_private *dip = bio->bi_private;
|
|
|
- int ret;
|
|
|
|
|
|
- if (err) {
|
|
|
- btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
|
|
|
- "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
|
|
|
- btrfs_ino(dip->inode), bio->bi_rw,
|
|
|
- (unsigned long long)bio->bi_iter.bi_sector,
|
|
|
- bio->bi_iter.bi_size, err);
|
|
|
- } else if (dip->subio_endio) {
|
|
|
- ret = dip->subio_endio(dip->inode, btrfs_io_bio(bio));
|
|
|
- if (ret)
|
|
|
- err = ret;
|
|
|
- }
|
|
|
+ if (err)
|
|
|
+ btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
|
|
|
+ "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
|
|
|
+ btrfs_ino(dip->inode), bio->bi_rw,
|
|
|
+ (unsigned long long)bio->bi_iter.bi_sector,
|
|
|
+ bio->bi_iter.bi_size, err);
|
|
|
+
|
|
|
+ if (dip->subio_endio)
|
|
|
+ err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
|
|
|
|
|
|
if (err) {
|
|
|
dip->errors = 1;
|