|
@@ -63,10 +63,18 @@ struct scrub_ctx;
|
|
*/
|
|
*/
|
|
#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
|
|
#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
|
|
|
|
|
|
|
|
+struct scrub_recover {
|
|
|
|
+ atomic_t refs;
|
|
|
|
+ struct btrfs_bio *bbio;
|
|
|
|
+ u64 *raid_map;
|
|
|
|
+ u64 map_length;
|
|
|
|
+};
|
|
|
|
+
|
|
struct scrub_page {
|
|
struct scrub_page {
|
|
struct scrub_block *sblock;
|
|
struct scrub_block *sblock;
|
|
struct page *page;
|
|
struct page *page;
|
|
struct btrfs_device *dev;
|
|
struct btrfs_device *dev;
|
|
|
|
+ struct list_head list;
|
|
u64 flags; /* extent flags */
|
|
u64 flags; /* extent flags */
|
|
u64 generation;
|
|
u64 generation;
|
|
u64 logical;
|
|
u64 logical;
|
|
@@ -79,6 +87,8 @@ struct scrub_page {
|
|
unsigned int io_error:1;
|
|
unsigned int io_error:1;
|
|
};
|
|
};
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
|
|
+
|
|
|
|
+ struct scrub_recover *recover;
|
|
};
|
|
};
|
|
|
|
|
|
struct scrub_bio {
|
|
struct scrub_bio {
|
|
@@ -105,14 +115,52 @@ struct scrub_block {
|
|
atomic_t outstanding_pages;
|
|
atomic_t outstanding_pages;
|
|
atomic_t ref_count; /* free mem on transition to zero */
|
|
atomic_t ref_count; /* free mem on transition to zero */
|
|
struct scrub_ctx *sctx;
|
|
struct scrub_ctx *sctx;
|
|
|
|
+ struct scrub_parity *sparity;
|
|
struct {
|
|
struct {
|
|
unsigned int header_error:1;
|
|
unsigned int header_error:1;
|
|
unsigned int checksum_error:1;
|
|
unsigned int checksum_error:1;
|
|
unsigned int no_io_error_seen:1;
|
|
unsigned int no_io_error_seen:1;
|
|
unsigned int generation_error:1; /* also sets header_error */
|
|
unsigned int generation_error:1; /* also sets header_error */
|
|
|
|
+
|
|
|
|
+ /* The following is for the data used to check parity */
|
|
|
|
+ /* It is for the data with checksum */
|
|
|
|
+ unsigned int data_corrected:1;
|
|
};
|
|
};
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+/* Used for the chunks with parity stripe such RAID5/6 */
|
|
|
|
+struct scrub_parity {
|
|
|
|
+ struct scrub_ctx *sctx;
|
|
|
|
+
|
|
|
|
+ struct btrfs_device *scrub_dev;
|
|
|
|
+
|
|
|
|
+ u64 logic_start;
|
|
|
|
+
|
|
|
|
+ u64 logic_end;
|
|
|
|
+
|
|
|
|
+ int nsectors;
|
|
|
|
+
|
|
|
|
+ int stripe_len;
|
|
|
|
+
|
|
|
|
+ atomic_t ref_count;
|
|
|
|
+
|
|
|
|
+ struct list_head spages;
|
|
|
|
+
|
|
|
|
+ /* Work of parity check and repair */
|
|
|
|
+ struct btrfs_work work;
|
|
|
|
+
|
|
|
|
+ /* Mark the parity blocks which have data */
|
|
|
|
+ unsigned long *dbitmap;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Mark the parity blocks which have data, but errors happen when
|
|
|
|
+ * read data or check data
|
|
|
|
+ */
|
|
|
|
+ unsigned long *ebitmap;
|
|
|
|
+
|
|
|
|
+ unsigned long bitmap[0];
|
|
|
|
+};
|
|
|
|
+
|
|
struct scrub_wr_ctx {
|
|
struct scrub_wr_ctx {
|
|
struct scrub_bio *wr_curr_bio;
|
|
struct scrub_bio *wr_curr_bio;
|
|
struct btrfs_device *tgtdev;
|
|
struct btrfs_device *tgtdev;
|
|
@@ -196,7 +244,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
|
|
static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|
static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|
struct scrub_block *sblock, int is_metadata,
|
|
struct scrub_block *sblock, int is_metadata,
|
|
int have_csum, u8 *csum, u64 generation,
|
|
int have_csum, u8 *csum, u64 generation,
|
|
- u16 csum_size);
|
|
|
|
|
|
+ u16 csum_size, int retry_failed_mirror);
|
|
static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
|
|
static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
|
|
struct scrub_block *sblock,
|
|
struct scrub_block *sblock,
|
|
int is_metadata, int have_csum,
|
|
int is_metadata, int have_csum,
|
|
@@ -218,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock);
|
|
static void scrub_block_put(struct scrub_block *sblock);
|
|
static void scrub_block_put(struct scrub_block *sblock);
|
|
static void scrub_page_get(struct scrub_page *spage);
|
|
static void scrub_page_get(struct scrub_page *spage);
|
|
static void scrub_page_put(struct scrub_page *spage);
|
|
static void scrub_page_put(struct scrub_page *spage);
|
|
|
|
+static void scrub_parity_get(struct scrub_parity *sparity);
|
|
|
|
+static void scrub_parity_put(struct scrub_parity *sparity);
|
|
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
|
|
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
|
|
struct scrub_page *spage);
|
|
struct scrub_page *spage);
|
|
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
|
|
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
|
|
@@ -790,6 +840,20 @@ out:
|
|
scrub_pending_trans_workers_dec(sctx);
|
|
scrub_pending_trans_workers_dec(sctx);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static inline void scrub_get_recover(struct scrub_recover *recover)
|
|
|
|
+{
|
|
|
|
+ atomic_inc(&recover->refs);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void scrub_put_recover(struct scrub_recover *recover)
|
|
|
|
+{
|
|
|
|
+ if (atomic_dec_and_test(&recover->refs)) {
|
|
|
|
+ kfree(recover->bbio);
|
|
|
|
+ kfree(recover->raid_map);
|
|
|
|
+ kfree(recover);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* scrub_handle_errored_block gets called when either verification of the
|
|
* scrub_handle_errored_block gets called when either verification of the
|
|
* pages failed or the bio failed to read, e.g. with EIO. In the latter
|
|
* pages failed or the bio failed to read, e.g. with EIO. In the latter
|
|
@@ -906,7 +970,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
|
|
|
|
|
/* build and submit the bios for the failed mirror, check checksums */
|
|
/* build and submit the bios for the failed mirror, check checksums */
|
|
scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
|
|
scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
|
|
- csum, generation, sctx->csum_size);
|
|
|
|
|
|
+ csum, generation, sctx->csum_size, 1);
|
|
|
|
|
|
if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
|
|
if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
|
|
sblock_bad->no_io_error_seen) {
|
|
sblock_bad->no_io_error_seen) {
|
|
@@ -920,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
|
*/
|
|
*/
|
|
spin_lock(&sctx->stat_lock);
|
|
spin_lock(&sctx->stat_lock);
|
|
sctx->stat.unverified_errors++;
|
|
sctx->stat.unverified_errors++;
|
|
|
|
+ sblock_to_check->data_corrected = 1;
|
|
spin_unlock(&sctx->stat_lock);
|
|
spin_unlock(&sctx->stat_lock);
|
|
|
|
|
|
if (sctx->is_dev_replace)
|
|
if (sctx->is_dev_replace)
|
|
@@ -1019,7 +1084,7 @@ nodatasum_case:
|
|
/* build and submit the bios, check checksums */
|
|
/* build and submit the bios, check checksums */
|
|
scrub_recheck_block(fs_info, sblock_other, is_metadata,
|
|
scrub_recheck_block(fs_info, sblock_other, is_metadata,
|
|
have_csum, csum, generation,
|
|
have_csum, csum, generation,
|
|
- sctx->csum_size);
|
|
|
|
|
|
+ sctx->csum_size, 0);
|
|
|
|
|
|
if (!sblock_other->header_error &&
|
|
if (!sblock_other->header_error &&
|
|
!sblock_other->checksum_error &&
|
|
!sblock_other->checksum_error &&
|
|
@@ -1169,7 +1234,7 @@ nodatasum_case:
|
|
*/
|
|
*/
|
|
scrub_recheck_block(fs_info, sblock_bad,
|
|
scrub_recheck_block(fs_info, sblock_bad,
|
|
is_metadata, have_csum, csum,
|
|
is_metadata, have_csum, csum,
|
|
- generation, sctx->csum_size);
|
|
|
|
|
|
+ generation, sctx->csum_size, 1);
|
|
if (!sblock_bad->header_error &&
|
|
if (!sblock_bad->header_error &&
|
|
!sblock_bad->checksum_error &&
|
|
!sblock_bad->checksum_error &&
|
|
sblock_bad->no_io_error_seen)
|
|
sblock_bad->no_io_error_seen)
|
|
@@ -1180,6 +1245,7 @@ nodatasum_case:
|
|
corrected_error:
|
|
corrected_error:
|
|
spin_lock(&sctx->stat_lock);
|
|
spin_lock(&sctx->stat_lock);
|
|
sctx->stat.corrected_errors++;
|
|
sctx->stat.corrected_errors++;
|
|
|
|
+ sblock_to_check->data_corrected = 1;
|
|
spin_unlock(&sctx->stat_lock);
|
|
spin_unlock(&sctx->stat_lock);
|
|
printk_ratelimited_in_rcu(KERN_ERR
|
|
printk_ratelimited_in_rcu(KERN_ERR
|
|
"BTRFS: fixed up error at logical %llu on dev %s\n",
|
|
"BTRFS: fixed up error at logical %llu on dev %s\n",
|
|
@@ -1201,11 +1267,18 @@ out:
|
|
mirror_index++) {
|
|
mirror_index++) {
|
|
struct scrub_block *sblock = sblocks_for_recheck +
|
|
struct scrub_block *sblock = sblocks_for_recheck +
|
|
mirror_index;
|
|
mirror_index;
|
|
|
|
+ struct scrub_recover *recover;
|
|
int page_index;
|
|
int page_index;
|
|
|
|
|
|
for (page_index = 0; page_index < sblock->page_count;
|
|
for (page_index = 0; page_index < sblock->page_count;
|
|
page_index++) {
|
|
page_index++) {
|
|
sblock->pagev[page_index]->sblock = NULL;
|
|
sblock->pagev[page_index]->sblock = NULL;
|
|
|
|
+ recover = sblock->pagev[page_index]->recover;
|
|
|
|
+ if (recover) {
|
|
|
|
+ scrub_put_recover(recover);
|
|
|
|
+ sblock->pagev[page_index]->recover =
|
|
|
|
+ NULL;
|
|
|
|
+ }
|
|
scrub_page_put(sblock->pagev[page_index]);
|
|
scrub_page_put(sblock->pagev[page_index]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1215,14 +1288,63 @@ out:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map)
|
|
|
|
+{
|
|
|
|
+ if (raid_map) {
|
|
|
|
+ if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
|
|
|
|
+ return 3;
|
|
|
|
+ else
|
|
|
|
+ return 2;
|
|
|
|
+ } else {
|
|
|
|
+ return (int)bbio->num_stripes;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
|
|
|
|
+ u64 mapped_length,
|
|
|
|
+ int nstripes, int mirror,
|
|
|
|
+ int *stripe_index,
|
|
|
|
+ u64 *stripe_offset)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ if (raid_map) {
|
|
|
|
+ /* RAID5/6 */
|
|
|
|
+ for (i = 0; i < nstripes; i++) {
|
|
|
|
+ if (raid_map[i] == RAID6_Q_STRIPE ||
|
|
|
|
+ raid_map[i] == RAID5_P_STRIPE)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ if (logical >= raid_map[i] &&
|
|
|
|
+ logical < raid_map[i] + mapped_length)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ *stripe_index = i;
|
|
|
|
+ *stripe_offset = logical - raid_map[i];
|
|
|
|
+ } else {
|
|
|
|
+ /* The other RAID type */
|
|
|
|
+ *stripe_index = mirror;
|
|
|
|
+ *stripe_offset = 0;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
|
|
static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
|
|
struct btrfs_fs_info *fs_info,
|
|
struct btrfs_fs_info *fs_info,
|
|
struct scrub_block *original_sblock,
|
|
struct scrub_block *original_sblock,
|
|
u64 length, u64 logical,
|
|
u64 length, u64 logical,
|
|
struct scrub_block *sblocks_for_recheck)
|
|
struct scrub_block *sblocks_for_recheck)
|
|
{
|
|
{
|
|
|
|
+ struct scrub_recover *recover;
|
|
|
|
+ struct btrfs_bio *bbio;
|
|
|
|
+ u64 *raid_map;
|
|
|
|
+ u64 sublen;
|
|
|
|
+ u64 mapped_length;
|
|
|
|
+ u64 stripe_offset;
|
|
|
|
+ int stripe_index;
|
|
int page_index;
|
|
int page_index;
|
|
int mirror_index;
|
|
int mirror_index;
|
|
|
|
+ int nmirrors;
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -1233,23 +1355,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
|
|
|
|
|
|
page_index = 0;
|
|
page_index = 0;
|
|
while (length > 0) {
|
|
while (length > 0) {
|
|
- u64 sublen = min_t(u64, length, PAGE_SIZE);
|
|
|
|
- u64 mapped_length = sublen;
|
|
|
|
- struct btrfs_bio *bbio = NULL;
|
|
|
|
|
|
+ sublen = min_t(u64, length, PAGE_SIZE);
|
|
|
|
+ mapped_length = sublen;
|
|
|
|
+ bbio = NULL;
|
|
|
|
+ raid_map = NULL;
|
|
|
|
|
|
/*
|
|
/*
|
|
* with a length of PAGE_SIZE, each returned stripe
|
|
* with a length of PAGE_SIZE, each returned stripe
|
|
* represents one mirror
|
|
* represents one mirror
|
|
*/
|
|
*/
|
|
- ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
|
|
|
|
- &mapped_length, &bbio, 0);
|
|
|
|
|
|
+ ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
|
|
|
|
+ &mapped_length, &bbio, 0, &raid_map);
|
|
if (ret || !bbio || mapped_length < sublen) {
|
|
if (ret || !bbio || mapped_length < sublen) {
|
|
kfree(bbio);
|
|
kfree(bbio);
|
|
|
|
+ kfree(raid_map);
|
|
return -EIO;
|
|
return -EIO;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
|
|
|
|
+ if (!recover) {
|
|
|
|
+ kfree(bbio);
|
|
|
|
+ kfree(raid_map);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ atomic_set(&recover->refs, 1);
|
|
|
|
+ recover->bbio = bbio;
|
|
|
|
+ recover->raid_map = raid_map;
|
|
|
|
+ recover->map_length = mapped_length;
|
|
|
|
+
|
|
BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
|
|
BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
|
|
- for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
|
|
|
|
|
|
+
|
|
|
|
+ nmirrors = scrub_nr_raid_mirrors(bbio, raid_map);
|
|
|
|
+ for (mirror_index = 0; mirror_index < nmirrors;
|
|
mirror_index++) {
|
|
mirror_index++) {
|
|
struct scrub_block *sblock;
|
|
struct scrub_block *sblock;
|
|
struct scrub_page *page;
|
|
struct scrub_page *page;
|
|
@@ -1265,26 +1403,38 @@ leave_nomem:
|
|
spin_lock(&sctx->stat_lock);
|
|
spin_lock(&sctx->stat_lock);
|
|
sctx->stat.malloc_errors++;
|
|
sctx->stat.malloc_errors++;
|
|
spin_unlock(&sctx->stat_lock);
|
|
spin_unlock(&sctx->stat_lock);
|
|
- kfree(bbio);
|
|
|
|
|
|
+ scrub_put_recover(recover);
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
scrub_page_get(page);
|
|
scrub_page_get(page);
|
|
sblock->pagev[page_index] = page;
|
|
sblock->pagev[page_index] = page;
|
|
page->logical = logical;
|
|
page->logical = logical;
|
|
- page->physical = bbio->stripes[mirror_index].physical;
|
|
|
|
|
|
+
|
|
|
|
+ scrub_stripe_index_and_offset(logical, raid_map,
|
|
|
|
+ mapped_length,
|
|
|
|
+ bbio->num_stripes,
|
|
|
|
+ mirror_index,
|
|
|
|
+ &stripe_index,
|
|
|
|
+ &stripe_offset);
|
|
|
|
+ page->physical = bbio->stripes[stripe_index].physical +
|
|
|
|
+ stripe_offset;
|
|
|
|
+ page->dev = bbio->stripes[stripe_index].dev;
|
|
|
|
+
|
|
BUG_ON(page_index >= original_sblock->page_count);
|
|
BUG_ON(page_index >= original_sblock->page_count);
|
|
page->physical_for_dev_replace =
|
|
page->physical_for_dev_replace =
|
|
original_sblock->pagev[page_index]->
|
|
original_sblock->pagev[page_index]->
|
|
physical_for_dev_replace;
|
|
physical_for_dev_replace;
|
|
/* for missing devices, dev->bdev is NULL */
|
|
/* for missing devices, dev->bdev is NULL */
|
|
- page->dev = bbio->stripes[mirror_index].dev;
|
|
|
|
page->mirror_num = mirror_index + 1;
|
|
page->mirror_num = mirror_index + 1;
|
|
sblock->page_count++;
|
|
sblock->page_count++;
|
|
page->page = alloc_page(GFP_NOFS);
|
|
page->page = alloc_page(GFP_NOFS);
|
|
if (!page->page)
|
|
if (!page->page)
|
|
goto leave_nomem;
|
|
goto leave_nomem;
|
|
|
|
+
|
|
|
|
+ scrub_get_recover(recover);
|
|
|
|
+ page->recover = recover;
|
|
}
|
|
}
|
|
- kfree(bbio);
|
|
|
|
|
|
+ scrub_put_recover(recover);
|
|
length -= sublen;
|
|
length -= sublen;
|
|
logical += sublen;
|
|
logical += sublen;
|
|
page_index++;
|
|
page_index++;
|
|
@@ -1293,6 +1443,51 @@ leave_nomem:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+struct scrub_bio_ret {
|
|
|
|
+ struct completion event;
|
|
|
|
+ int error;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+static void scrub_bio_wait_endio(struct bio *bio, int error)
|
|
|
|
+{
|
|
|
|
+ struct scrub_bio_ret *ret = bio->bi_private;
|
|
|
|
+
|
|
|
|
+ ret->error = error;
|
|
|
|
+ complete(&ret->event);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline int scrub_is_page_on_raid56(struct scrub_page *page)
|
|
|
|
+{
|
|
|
|
+ return page->recover && page->recover->raid_map;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct bio *bio,
|
|
|
|
+ struct scrub_page *page)
|
|
|
|
+{
|
|
|
|
+ struct scrub_bio_ret done;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ init_completion(&done.event);
|
|
|
|
+ done.error = 0;
|
|
|
|
+ bio->bi_iter.bi_sector = page->logical >> 9;
|
|
|
|
+ bio->bi_private = &done;
|
|
|
|
+ bio->bi_end_io = scrub_bio_wait_endio;
|
|
|
|
+
|
|
|
|
+ ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
|
|
|
|
+ page->recover->raid_map,
|
|
|
|
+ page->recover->map_length,
|
|
|
|
+ page->mirror_num, 0);
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
+ wait_for_completion(&done.event);
|
|
|
|
+ if (done.error)
|
|
|
|
+ return -EIO;
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* this function will check the on disk data for checksum errors, header
|
|
* this function will check the on disk data for checksum errors, header
|
|
* errors and read I/O errors. If any I/O errors happen, the exact pages
|
|
* errors and read I/O errors. If any I/O errors happen, the exact pages
|
|
@@ -1303,7 +1498,7 @@ leave_nomem:
|
|
static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|
static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|
struct scrub_block *sblock, int is_metadata,
|
|
struct scrub_block *sblock, int is_metadata,
|
|
int have_csum, u8 *csum, u64 generation,
|
|
int have_csum, u8 *csum, u64 generation,
|
|
- u16 csum_size)
|
|
|
|
|
|
+ u16 csum_size, int retry_failed_mirror)
|
|
{
|
|
{
|
|
int page_num;
|
|
int page_num;
|
|
|
|
|
|
@@ -1329,11 +1524,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
bio->bi_bdev = page->dev->bdev;
|
|
bio->bi_bdev = page->dev->bdev;
|
|
- bio->bi_iter.bi_sector = page->physical >> 9;
|
|
|
|
|
|
|
|
bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
|
bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
|
- if (btrfsic_submit_bio_wait(READ, bio))
|
|
|
|
- sblock->no_io_error_seen = 0;
|
|
|
|
|
|
+ if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
|
|
|
|
+ if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
|
|
|
|
+ sblock->no_io_error_seen = 0;
|
|
|
|
+ } else {
|
|
|
|
+ bio->bi_iter.bi_sector = page->physical >> 9;
|
|
|
|
+
|
|
|
|
+ if (btrfsic_submit_bio_wait(READ, bio))
|
|
|
|
+ sblock->no_io_error_seen = 0;
|
|
|
|
+ }
|
|
|
|
|
|
bio_put(bio);
|
|
bio_put(bio);
|
|
}
|
|
}
|
|
@@ -1486,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
|
|
{
|
|
{
|
|
int page_num;
|
|
int page_num;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * This block is used for the check of the parity on the source device,
|
|
|
|
+ * so the data needn't be written into the destination device.
|
|
|
|
+ */
|
|
|
|
+ if (sblock->sparity)
|
|
|
|
+ return;
|
|
|
|
+
|
|
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
|
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
@@ -1867,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock)
|
|
if (atomic_dec_and_test(&sblock->ref_count)) {
|
|
if (atomic_dec_and_test(&sblock->ref_count)) {
|
|
int i;
|
|
int i;
|
|
|
|
|
|
|
|
+ if (sblock->sparity)
|
|
|
|
+ scrub_parity_put(sblock->sparity);
|
|
|
|
+
|
|
for (i = 0; i < sblock->page_count; i++)
|
|
for (i = 0; i < sblock->page_count; i++)
|
|
scrub_page_put(sblock->pagev[i]);
|
|
scrub_page_put(sblock->pagev[i]);
|
|
kfree(sblock);
|
|
kfree(sblock);
|
|
@@ -2124,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
|
|
scrub_pending_bio_dec(sctx);
|
|
scrub_pending_bio_dec(sctx);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
|
|
|
|
+ unsigned long *bitmap,
|
|
|
|
+ u64 start, u64 len)
|
|
|
|
+{
|
|
|
|
+ int offset;
|
|
|
|
+ int nsectors;
|
|
|
|
+ int sectorsize = sparity->sctx->dev_root->sectorsize;
|
|
|
|
+
|
|
|
|
+ if (len >= sparity->stripe_len) {
|
|
|
|
+ bitmap_set(bitmap, 0, sparity->nsectors);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ start -= sparity->logic_start;
|
|
|
|
+ offset = (int)do_div(start, sparity->stripe_len);
|
|
|
|
+ offset /= sectorsize;
|
|
|
|
+ nsectors = (int)len / sectorsize;
|
|
|
|
+
|
|
|
|
+ if (offset + nsectors <= sparity->nsectors) {
|
|
|
|
+ bitmap_set(bitmap, offset, nsectors);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ bitmap_set(bitmap, offset, sparity->nsectors - offset);
|
|
|
|
+ bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
|
|
|
|
+ u64 start, u64 len)
|
|
|
|
+{
|
|
|
|
+ __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
|
|
|
|
+ u64 start, u64 len)
|
|
|
|
+{
|
|
|
|
+ __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
|
|
|
|
+}
|
|
|
|
+
|
|
static void scrub_block_complete(struct scrub_block *sblock)
|
|
static void scrub_block_complete(struct scrub_block *sblock)
|
|
{
|
|
{
|
|
|
|
+ int corrupted = 0;
|
|
|
|
+
|
|
if (!sblock->no_io_error_seen) {
|
|
if (!sblock->no_io_error_seen) {
|
|
|
|
+ corrupted = 1;
|
|
scrub_handle_errored_block(sblock);
|
|
scrub_handle_errored_block(sblock);
|
|
} else {
|
|
} else {
|
|
/*
|
|
/*
|
|
@@ -2134,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock)
|
|
* dev replace case, otherwise write here in dev replace
|
|
* dev replace case, otherwise write here in dev replace
|
|
* case.
|
|
* case.
|
|
*/
|
|
*/
|
|
- if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
|
|
|
|
|
|
+ corrupted = scrub_checksum(sblock);
|
|
|
|
+ if (!corrupted && sblock->sctx->is_dev_replace)
|
|
scrub_write_block_to_dev_replace(sblock);
|
|
scrub_write_block_to_dev_replace(sblock);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ if (sblock->sparity && corrupted && !sblock->data_corrected) {
|
|
|
|
+ u64 start = sblock->pagev[0]->logical;
|
|
|
|
+ u64 end = sblock->pagev[sblock->page_count - 1]->logical +
|
|
|
|
+ PAGE_SIZE;
|
|
|
|
+
|
|
|
|
+ scrub_parity_mark_sectors_error(sblock->sparity,
|
|
|
|
+ start, end - start);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
|
|
static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
|
|
@@ -2228,6 +2491,132 @@ behind_scrub_pages:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static int scrub_pages_for_parity(struct scrub_parity *sparity,
|
|
|
|
+ u64 logical, u64 len,
|
|
|
|
+ u64 physical, struct btrfs_device *dev,
|
|
|
|
+ u64 flags, u64 gen, int mirror_num, u8 *csum)
|
|
|
|
+{
|
|
|
|
+ struct scrub_ctx *sctx = sparity->sctx;
|
|
|
|
+ struct scrub_block *sblock;
|
|
|
|
+ int index;
|
|
|
|
+
|
|
|
|
+ sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
|
|
|
|
+ if (!sblock) {
|
|
|
|
+ spin_lock(&sctx->stat_lock);
|
|
|
|
+ sctx->stat.malloc_errors++;
|
|
|
|
+ spin_unlock(&sctx->stat_lock);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* one ref inside this function, plus one for each page added to
|
|
|
|
+ * a bio later on */
|
|
|
|
+ atomic_set(&sblock->ref_count, 1);
|
|
|
|
+ sblock->sctx = sctx;
|
|
|
|
+ sblock->no_io_error_seen = 1;
|
|
|
|
+ sblock->sparity = sparity;
|
|
|
|
+ scrub_parity_get(sparity);
|
|
|
|
+
|
|
|
|
+ for (index = 0; len > 0; index++) {
|
|
|
|
+ struct scrub_page *spage;
|
|
|
|
+ u64 l = min_t(u64, len, PAGE_SIZE);
|
|
|
|
+
|
|
|
|
+ spage = kzalloc(sizeof(*spage), GFP_NOFS);
|
|
|
|
+ if (!spage) {
|
|
|
|
+leave_nomem:
|
|
|
|
+ spin_lock(&sctx->stat_lock);
|
|
|
|
+ sctx->stat.malloc_errors++;
|
|
|
|
+ spin_unlock(&sctx->stat_lock);
|
|
|
|
+ scrub_block_put(sblock);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+ BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
|
|
|
|
+ /* For scrub block */
|
|
|
|
+ scrub_page_get(spage);
|
|
|
|
+ sblock->pagev[index] = spage;
|
|
|
|
+ /* For scrub parity */
|
|
|
|
+ scrub_page_get(spage);
|
|
|
|
+ list_add_tail(&spage->list, &sparity->spages);
|
|
|
|
+ spage->sblock = sblock;
|
|
|
|
+ spage->dev = dev;
|
|
|
|
+ spage->flags = flags;
|
|
|
|
+ spage->generation = gen;
|
|
|
|
+ spage->logical = logical;
|
|
|
|
+ spage->physical = physical;
|
|
|
|
+ spage->mirror_num = mirror_num;
|
|
|
|
+ if (csum) {
|
|
|
|
+ spage->have_csum = 1;
|
|
|
|
+ memcpy(spage->csum, csum, sctx->csum_size);
|
|
|
|
+ } else {
|
|
|
|
+ spage->have_csum = 0;
|
|
|
|
+ }
|
|
|
|
+ sblock->page_count++;
|
|
|
|
+ spage->page = alloc_page(GFP_NOFS);
|
|
|
|
+ if (!spage->page)
|
|
|
|
+ goto leave_nomem;
|
|
|
|
+ len -= l;
|
|
|
|
+ logical += l;
|
|
|
|
+ physical += l;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ WARN_ON(sblock->page_count == 0);
|
|
|
|
+ for (index = 0; index < sblock->page_count; index++) {
|
|
|
|
+ struct scrub_page *spage = sblock->pagev[index];
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = scrub_add_page_to_rd_bio(sctx, spage);
|
|
|
|
+ if (ret) {
|
|
|
|
+ scrub_block_put(sblock);
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* last one frees, either here or in bio completion for last page */
|
|
|
|
+ scrub_block_put(sblock);
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int scrub_extent_for_parity(struct scrub_parity *sparity,
|
|
|
|
+ u64 logical, u64 len,
|
|
|
|
+ u64 physical, struct btrfs_device *dev,
|
|
|
|
+ u64 flags, u64 gen, int mirror_num)
|
|
|
|
+{
|
|
|
|
+ struct scrub_ctx *sctx = sparity->sctx;
|
|
|
|
+ int ret;
|
|
|
|
+ u8 csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ u32 blocksize;
|
|
|
|
+
|
|
|
|
+ if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
|
|
|
+ blocksize = sctx->sectorsize;
|
|
|
|
+ } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
|
|
|
|
+ blocksize = sctx->nodesize;
|
|
|
|
+ } else {
|
|
|
|
+ blocksize = sctx->sectorsize;
|
|
|
|
+ WARN_ON(1);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ while (len) {
|
|
|
|
+ u64 l = min_t(u64, len, blocksize);
|
|
|
|
+ int have_csum = 0;
|
|
|
|
+
|
|
|
|
+ if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
|
|
|
+ /* push csums to sbio */
|
|
|
|
+ have_csum = scrub_find_csum(sctx, logical, l, csum);
|
|
|
|
+ if (have_csum == 0)
|
|
|
|
+ goto skip;
|
|
|
|
+ }
|
|
|
|
+ ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
|
|
|
|
+ flags, gen, mirror_num,
|
|
|
|
+ have_csum ? csum : NULL);
|
|
|
|
+skip:
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
+ len -= l;
|
|
|
|
+ logical += l;
|
|
|
|
+ physical += l;
|
|
|
|
+ }
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Given a physical address, this will calculate it's
|
|
* Given a physical address, this will calculate it's
|
|
* logical offset. if this is a parity stripe, it will return
|
|
* logical offset. if this is a parity stripe, it will return
|
|
@@ -2236,7 +2625,8 @@ behind_scrub_pages:
|
|
* return 0 if it is a data stripe, 1 means parity stripe.
|
|
* return 0 if it is a data stripe, 1 means parity stripe.
|
|
*/
|
|
*/
|
|
static int get_raid56_logic_offset(u64 physical, int num,
|
|
static int get_raid56_logic_offset(u64 physical, int num,
|
|
- struct map_lookup *map, u64 *offset)
|
|
|
|
|
|
+ struct map_lookup *map, u64 *offset,
|
|
|
|
+ u64 *stripe_start)
|
|
{
|
|
{
|
|
int i;
|
|
int i;
|
|
int j = 0;
|
|
int j = 0;
|
|
@@ -2247,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num,
|
|
|
|
|
|
last_offset = (physical - map->stripes[num].physical) *
|
|
last_offset = (physical - map->stripes[num].physical) *
|
|
nr_data_stripes(map);
|
|
nr_data_stripes(map);
|
|
|
|
+ if (stripe_start)
|
|
|
|
+ *stripe_start = last_offset;
|
|
|
|
+
|
|
*offset = last_offset;
|
|
*offset = last_offset;
|
|
for (i = 0; i < nr_data_stripes(map); i++) {
|
|
for (i = 0; i < nr_data_stripes(map); i++) {
|
|
*offset = last_offset + i * map->stripe_len;
|
|
*offset = last_offset + i * map->stripe_len;
|
|
@@ -2269,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num,
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void scrub_free_parity(struct scrub_parity *sparity)
|
|
|
|
+{
|
|
|
|
+ struct scrub_ctx *sctx = sparity->sctx;
|
|
|
|
+ struct scrub_page *curr, *next;
|
|
|
|
+ int nbits;
|
|
|
|
+
|
|
|
|
+ nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
|
|
|
|
+ if (nbits) {
|
|
|
|
+ spin_lock(&sctx->stat_lock);
|
|
|
|
+ sctx->stat.read_errors += nbits;
|
|
|
|
+ sctx->stat.uncorrectable_errors += nbits;
|
|
|
|
+ spin_unlock(&sctx->stat_lock);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ list_for_each_entry_safe(curr, next, &sparity->spages, list) {
|
|
|
|
+ list_del_init(&curr->list);
|
|
|
|
+ scrub_page_put(curr);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ kfree(sparity);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_parity_bio_endio(struct bio *bio, int error)
|
|
|
|
+{
|
|
|
|
+ struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
|
|
|
|
+ struct scrub_ctx *sctx = sparity->sctx;
|
|
|
|
+
|
|
|
|
+ if (error)
|
|
|
|
+ bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
|
|
|
|
+ sparity->nsectors);
|
|
|
|
+
|
|
|
|
+ scrub_free_parity(sparity);
|
|
|
|
+ scrub_pending_bio_dec(sctx);
|
|
|
|
+ bio_put(bio);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
|
|
|
+{
|
|
|
|
+ struct scrub_ctx *sctx = sparity->sctx;
|
|
|
|
+ struct bio *bio;
|
|
|
|
+ struct btrfs_raid_bio *rbio;
|
|
|
|
+ struct scrub_page *spage;
|
|
|
|
+ struct btrfs_bio *bbio = NULL;
|
|
|
|
+ u64 *raid_map = NULL;
|
|
|
|
+ u64 length;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
|
|
|
|
+ sparity->nsectors))
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ length = sparity->logic_end - sparity->logic_start + 1;
|
|
|
|
+ ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
|
|
|
|
+ sparity->logic_start,
|
|
|
|
+ &length, &bbio, 0, &raid_map);
|
|
|
|
+ if (ret || !bbio || !raid_map)
|
|
|
|
+ goto bbio_out;
|
|
|
|
+
|
|
|
|
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
|
|
|
|
+ if (!bio)
|
|
|
|
+ goto bbio_out;
|
|
|
|
+
|
|
|
|
+ bio->bi_iter.bi_sector = sparity->logic_start >> 9;
|
|
|
|
+ bio->bi_private = sparity;
|
|
|
|
+ bio->bi_end_io = scrub_parity_bio_endio;
|
|
|
|
+
|
|
|
|
+ rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
|
|
|
|
+ raid_map, length,
|
|
|
|
+ sparity->scrub_dev,
|
|
|
|
+ sparity->dbitmap,
|
|
|
|
+ sparity->nsectors);
|
|
|
|
+ if (!rbio)
|
|
|
|
+ goto rbio_out;
|
|
|
|
+
|
|
|
|
+ list_for_each_entry(spage, &sparity->spages, list)
|
|
|
|
+ raid56_parity_add_scrub_pages(rbio, spage->page,
|
|
|
|
+ spage->logical);
|
|
|
|
+
|
|
|
|
+ scrub_pending_bio_inc(sctx);
|
|
|
|
+ raid56_parity_submit_scrub_rbio(rbio);
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+rbio_out:
|
|
|
|
+ bio_put(bio);
|
|
|
|
+bbio_out:
|
|
|
|
+ kfree(bbio);
|
|
|
|
+ kfree(raid_map);
|
|
|
|
+ bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
|
|
|
|
+ sparity->nsectors);
|
|
|
|
+ spin_lock(&sctx->stat_lock);
|
|
|
|
+ sctx->stat.malloc_errors++;
|
|
|
|
+ spin_unlock(&sctx->stat_lock);
|
|
|
|
+out:
|
|
|
|
+ scrub_free_parity(sparity);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline int scrub_calc_parity_bitmap_len(int nsectors)
|
|
|
|
+{
|
|
|
|
+ return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_parity_get(struct scrub_parity *sparity)
|
|
|
|
+{
|
|
|
|
+ atomic_inc(&sparity->ref_count);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_parity_put(struct scrub_parity *sparity)
|
|
|
|
+{
|
|
|
|
+ if (!atomic_dec_and_test(&sparity->ref_count))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ scrub_parity_check_and_repair(sparity);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
|
|
|
+ struct map_lookup *map,
|
|
|
|
+ struct btrfs_device *sdev,
|
|
|
|
+ struct btrfs_path *path,
|
|
|
|
+ u64 logic_start,
|
|
|
|
+ u64 logic_end)
|
|
|
|
+{
|
|
|
|
+ struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
|
|
|
|
+ struct btrfs_root *root = fs_info->extent_root;
|
|
|
|
+ struct btrfs_root *csum_root = fs_info->csum_root;
|
|
|
|
+ struct btrfs_extent_item *extent;
|
|
|
|
+ u64 flags;
|
|
|
|
+ int ret;
|
|
|
|
+ int slot;
|
|
|
|
+ struct extent_buffer *l;
|
|
|
|
+ struct btrfs_key key;
|
|
|
|
+ u64 generation;
|
|
|
|
+ u64 extent_logical;
|
|
|
|
+ u64 extent_physical;
|
|
|
|
+ u64 extent_len;
|
|
|
|
+ struct btrfs_device *extent_dev;
|
|
|
|
+ struct scrub_parity *sparity;
|
|
|
|
+ int nsectors;
|
|
|
|
+ int bitmap_len;
|
|
|
|
+ int extent_mirror_num;
|
|
|
|
+ int stop_loop = 0;
|
|
|
|
+
|
|
|
|
+ nsectors = map->stripe_len / root->sectorsize;
|
|
|
|
+ bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
|
|
|
|
+ sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
|
|
|
|
+ GFP_NOFS);
|
|
|
|
+ if (!sparity) {
|
|
|
|
+ spin_lock(&sctx->stat_lock);
|
|
|
|
+ sctx->stat.malloc_errors++;
|
|
|
|
+ spin_unlock(&sctx->stat_lock);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ sparity->stripe_len = map->stripe_len;
|
|
|
|
+ sparity->nsectors = nsectors;
|
|
|
|
+ sparity->sctx = sctx;
|
|
|
|
+ sparity->scrub_dev = sdev;
|
|
|
|
+ sparity->logic_start = logic_start;
|
|
|
|
+ sparity->logic_end = logic_end;
|
|
|
|
+ atomic_set(&sparity->ref_count, 1);
|
|
|
|
+ INIT_LIST_HEAD(&sparity->spages);
|
|
|
|
+ sparity->dbitmap = sparity->bitmap;
|
|
|
|
+ sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
|
|
|
|
+
|
|
|
|
+ ret = 0;
|
|
|
|
+ while (logic_start < logic_end) {
|
|
|
|
+ if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
|
|
|
|
+ key.type = BTRFS_METADATA_ITEM_KEY;
|
|
|
|
+ else
|
|
|
|
+ key.type = BTRFS_EXTENT_ITEM_KEY;
|
|
|
|
+ key.objectid = logic_start;
|
|
|
|
+ key.offset = (u64)-1;
|
|
|
|
+
|
|
|
|
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
|
|
|
+ if (ret < 0)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ if (ret > 0) {
|
|
|
|
+ ret = btrfs_previous_extent_item(root, path, 0);
|
|
|
|
+ if (ret < 0)
|
|
|
|
+ goto out;
|
|
|
|
+ if (ret > 0) {
|
|
|
|
+ btrfs_release_path(path);
|
|
|
|
+ ret = btrfs_search_slot(NULL, root, &key,
|
|
|
|
+ path, 0, 0);
|
|
|
|
+ if (ret < 0)
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ stop_loop = 0;
|
|
|
|
+ while (1) {
|
|
|
|
+ u64 bytes;
|
|
|
|
+
|
|
|
|
+ l = path->nodes[0];
|
|
|
|
+ slot = path->slots[0];
|
|
|
|
+ if (slot >= btrfs_header_nritems(l)) {
|
|
|
|
+ ret = btrfs_next_leaf(root, path);
|
|
|
|
+ if (ret == 0)
|
|
|
|
+ continue;
|
|
|
|
+ if (ret < 0)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ stop_loop = 1;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ btrfs_item_key_to_cpu(l, &key, slot);
|
|
|
|
+
|
|
|
|
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
|
|
|
|
+ bytes = root->nodesize;
|
|
|
|
+ else
|
|
|
|
+ bytes = key.offset;
|
|
|
|
+
|
|
|
|
+ if (key.objectid + bytes <= logic_start)
|
|
|
|
+ goto next;
|
|
|
|
+
|
|
|
|
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
|
|
|
|
+ key.type != BTRFS_METADATA_ITEM_KEY)
|
|
|
|
+ goto next;
|
|
|
|
+
|
|
|
|
+ if (key.objectid > logic_end) {
|
|
|
|
+ stop_loop = 1;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ while (key.objectid >= logic_start + map->stripe_len)
|
|
|
|
+ logic_start += map->stripe_len;
|
|
|
|
+
|
|
|
|
+ extent = btrfs_item_ptr(l, slot,
|
|
|
|
+ struct btrfs_extent_item);
|
|
|
|
+ flags = btrfs_extent_flags(l, extent);
|
|
|
|
+ generation = btrfs_extent_generation(l, extent);
|
|
|
|
+
|
|
|
|
+ if (key.objectid < logic_start &&
|
|
|
|
+ (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
|
|
|
|
+ btrfs_err(fs_info,
|
|
|
|
+ "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
|
|
|
|
+ key.objectid, logic_start);
|
|
|
|
+ goto next;
|
|
|
|
+ }
|
|
|
|
+again:
|
|
|
|
+ extent_logical = key.objectid;
|
|
|
|
+ extent_len = bytes;
|
|
|
|
+
|
|
|
|
+ if (extent_logical < logic_start) {
|
|
|
|
+ extent_len -= logic_start - extent_logical;
|
|
|
|
+ extent_logical = logic_start;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (extent_logical + extent_len >
|
|
|
|
+ logic_start + map->stripe_len)
|
|
|
|
+ extent_len = logic_start + map->stripe_len -
|
|
|
|
+ extent_logical;
|
|
|
|
+
|
|
|
|
+ scrub_parity_mark_sectors_data(sparity, extent_logical,
|
|
|
|
+ extent_len);
|
|
|
|
+
|
|
|
|
+ scrub_remap_extent(fs_info, extent_logical,
|
|
|
|
+ extent_len, &extent_physical,
|
|
|
|
+ &extent_dev,
|
|
|
|
+ &extent_mirror_num);
|
|
|
|
+
|
|
|
|
+ ret = btrfs_lookup_csums_range(csum_root,
|
|
|
|
+ extent_logical,
|
|
|
|
+ extent_logical + extent_len - 1,
|
|
|
|
+ &sctx->csum_list, 1);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ ret = scrub_extent_for_parity(sparity, extent_logical,
|
|
|
|
+ extent_len,
|
|
|
|
+ extent_physical,
|
|
|
|
+ extent_dev, flags,
|
|
|
|
+ generation,
|
|
|
|
+ extent_mirror_num);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ scrub_free_csums(sctx);
|
|
|
|
+ if (extent_logical + extent_len <
|
|
|
|
+ key.objectid + bytes) {
|
|
|
|
+ logic_start += map->stripe_len;
|
|
|
|
+
|
|
|
|
+ if (logic_start >= logic_end) {
|
|
|
|
+ stop_loop = 1;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (logic_start < key.objectid + bytes) {
|
|
|
|
+ cond_resched();
|
|
|
|
+ goto again;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+next:
|
|
|
|
+ path->slots[0]++;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ btrfs_release_path(path);
|
|
|
|
+
|
|
|
|
+ if (stop_loop)
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ logic_start += map->stripe_len;
|
|
|
|
+ }
|
|
|
|
+out:
|
|
|
|
+ if (ret < 0)
|
|
|
|
+ scrub_parity_mark_sectors_error(sparity, logic_start,
|
|
|
|
+ logic_end - logic_start + 1);
|
|
|
|
+ scrub_parity_put(sparity);
|
|
|
|
+ scrub_submit(sctx);
|
|
|
|
+ mutex_lock(&sctx->wr_ctx.wr_lock);
|
|
|
|
+ scrub_wr_submit(sctx);
|
|
|
|
+ mutex_unlock(&sctx->wr_ctx.wr_lock);
|
|
|
|
+
|
|
|
|
+ btrfs_release_path(path);
|
|
|
|
+ return ret < 0 ? ret : 0;
|
|
|
|
+}
|
|
|
|
+
|
|
static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
|
static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
|
struct map_lookup *map,
|
|
struct map_lookup *map,
|
|
struct btrfs_device *scrub_dev,
|
|
struct btrfs_device *scrub_dev,
|
|
int num, u64 base, u64 length,
|
|
int num, u64 base, u64 length,
|
|
int is_dev_replace)
|
|
int is_dev_replace)
|
|
{
|
|
{
|
|
- struct btrfs_path *path;
|
|
|
|
|
|
+ struct btrfs_path *path, *ppath;
|
|
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
|
|
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
|
|
struct btrfs_root *root = fs_info->extent_root;
|
|
struct btrfs_root *root = fs_info->extent_root;
|
|
struct btrfs_root *csum_root = fs_info->csum_root;
|
|
struct btrfs_root *csum_root = fs_info->csum_root;
|
|
@@ -2302,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
|
u64 extent_logical;
|
|
u64 extent_logical;
|
|
u64 extent_physical;
|
|
u64 extent_physical;
|
|
u64 extent_len;
|
|
u64 extent_len;
|
|
|
|
+ u64 stripe_logical;
|
|
|
|
+ u64 stripe_end;
|
|
struct btrfs_device *extent_dev;
|
|
struct btrfs_device *extent_dev;
|
|
int extent_mirror_num;
|
|
int extent_mirror_num;
|
|
int stop_loop = 0;
|
|
int stop_loop = 0;
|
|
@@ -2327,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
|
mirror_num = num % map->num_stripes + 1;
|
|
mirror_num = num % map->num_stripes + 1;
|
|
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
|
|
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
|
|
BTRFS_BLOCK_GROUP_RAID6)) {
|
|
BTRFS_BLOCK_GROUP_RAID6)) {
|
|
- get_raid56_logic_offset(physical, num, map, &offset);
|
|
|
|
|
|
+ get_raid56_logic_offset(physical, num, map, &offset, NULL);
|
|
increment = map->stripe_len * nr_data_stripes(map);
|
|
increment = map->stripe_len * nr_data_stripes(map);
|
|
mirror_num = 1;
|
|
mirror_num = 1;
|
|
} else {
|
|
} else {
|
|
@@ -2339,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
|
if (!path)
|
|
if (!path)
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
+ ppath = btrfs_alloc_path();
|
|
|
|
+ if (!ppath) {
|
|
|
|
+ btrfs_free_path(ppath);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* work on commit root. The related disk blocks are static as
|
|
* work on commit root. The related disk blocks are static as
|
|
* long as COW is applied. This means, it is save to rewrite
|
|
* long as COW is applied. This means, it is save to rewrite
|
|
@@ -2357,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
|
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
|
|
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
|
|
BTRFS_BLOCK_GROUP_RAID6)) {
|
|
BTRFS_BLOCK_GROUP_RAID6)) {
|
|
get_raid56_logic_offset(physical_end, num,
|
|
get_raid56_logic_offset(physical_end, num,
|
|
- map, &logic_end);
|
|
|
|
|
|
+ map, &logic_end, NULL);
|
|
logic_end += base;
|
|
logic_end += base;
|
|
} else {
|
|
} else {
|
|
logic_end = logical + increment * nstripes;
|
|
logic_end = logical + increment * nstripes;
|
|
@@ -2404,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
|
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
|
|
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
|
|
BTRFS_BLOCK_GROUP_RAID6)) {
|
|
BTRFS_BLOCK_GROUP_RAID6)) {
|
|
ret = get_raid56_logic_offset(physical, num,
|
|
ret = get_raid56_logic_offset(physical, num,
|
|
- map, &logical);
|
|
|
|
|
|
+ map, &logical, &stripe_logical);
|
|
logical += base;
|
|
logical += base;
|
|
- if (ret)
|
|
|
|
|
|
+ if (ret) {
|
|
|
|
+ stripe_logical += base;
|
|
|
|
+ stripe_end = stripe_logical + increment - 1;
|
|
|
|
+ ret = scrub_raid56_parity(sctx, map, scrub_dev,
|
|
|
|
+ ppath, stripe_logical,
|
|
|
|
+ stripe_end);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto out;
|
|
goto skip;
|
|
goto skip;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
/*
|
|
/*
|
|
* canceled?
|
|
* canceled?
|
|
@@ -2558,13 +3284,25 @@ again:
|
|
* loop until we find next data stripe
|
|
* loop until we find next data stripe
|
|
* or we have finished all stripes.
|
|
* or we have finished all stripes.
|
|
*/
|
|
*/
|
|
- do {
|
|
|
|
- physical += map->stripe_len;
|
|
|
|
- ret = get_raid56_logic_offset(
|
|
|
|
- physical, num,
|
|
|
|
- map, &logical);
|
|
|
|
- logical += base;
|
|
|
|
- } while (physical < physical_end && ret);
|
|
|
|
|
|
+loop:
|
|
|
|
+ physical += map->stripe_len;
|
|
|
|
+ ret = get_raid56_logic_offset(physical,
|
|
|
|
+ num, map, &logical,
|
|
|
|
+ &stripe_logical);
|
|
|
|
+ logical += base;
|
|
|
|
+
|
|
|
|
+ if (ret && physical < physical_end) {
|
|
|
|
+ stripe_logical += base;
|
|
|
|
+ stripe_end = stripe_logical +
|
|
|
|
+ increment - 1;
|
|
|
|
+ ret = scrub_raid56_parity(sctx,
|
|
|
|
+ map, scrub_dev, ppath,
|
|
|
|
+ stripe_logical,
|
|
|
|
+ stripe_end);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto out;
|
|
|
|
+ goto loop;
|
|
|
|
+ }
|
|
} else {
|
|
} else {
|
|
physical += map->stripe_len;
|
|
physical += map->stripe_len;
|
|
logical += increment;
|
|
logical += increment;
|
|
@@ -2605,6 +3343,7 @@ out:
|
|
|
|
|
|
blk_finish_plug(&plug);
|
|
blk_finish_plug(&plug);
|
|
btrfs_free_path(path);
|
|
btrfs_free_path(path);
|
|
|
|
+ btrfs_free_path(ppath);
|
|
return ret < 0 ? ret : 0;
|
|
return ret < 0 ? ret : 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3310,6 +4049,50 @@ out:
|
|
scrub_pending_trans_workers_dec(sctx);
|
|
scrub_pending_trans_workers_dec(sctx);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
|
|
|
|
+ u64 logical)
|
|
|
|
+{
|
|
|
|
+ struct extent_state *cached_state = NULL;
|
|
|
|
+ struct btrfs_ordered_extent *ordered;
|
|
|
|
+ struct extent_io_tree *io_tree;
|
|
|
|
+ struct extent_map *em;
|
|
|
|
+ u64 lockstart = start, lockend = start + len - 1;
|
|
|
|
+ int ret = 0;
|
|
|
|
+
|
|
|
|
+ io_tree = &BTRFS_I(inode)->io_tree;
|
|
|
|
+
|
|
|
|
+ lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
|
|
|
|
+ ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
|
|
|
|
+ if (ordered) {
|
|
|
|
+ btrfs_put_ordered_extent(ordered);
|
|
|
|
+ ret = 1;
|
|
|
|
+ goto out_unlock;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
|
|
|
|
+ if (IS_ERR(em)) {
|
|
|
|
+ ret = PTR_ERR(em);
|
|
|
|
+ goto out_unlock;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * This extent does not actually cover the logical extent anymore,
|
|
|
|
+ * move on to the next inode.
|
|
|
|
+ */
|
|
|
|
+ if (em->block_start > logical ||
|
|
|
|
+ em->block_start + em->block_len < logical + len) {
|
|
|
|
+ free_extent_map(em);
|
|
|
|
+ ret = 1;
|
|
|
|
+ goto out_unlock;
|
|
|
|
+ }
|
|
|
|
+ free_extent_map(em);
|
|
|
|
+
|
|
|
|
+out_unlock:
|
|
|
|
+ unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
|
|
|
|
+ GFP_NOFS);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|
struct scrub_copy_nocow_ctx *nocow_ctx)
|
|
struct scrub_copy_nocow_ctx *nocow_ctx)
|
|
{
|
|
{
|
|
@@ -3318,13 +4101,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|
struct inode *inode;
|
|
struct inode *inode;
|
|
struct page *page;
|
|
struct page *page;
|
|
struct btrfs_root *local_root;
|
|
struct btrfs_root *local_root;
|
|
- struct btrfs_ordered_extent *ordered;
|
|
|
|
- struct extent_map *em;
|
|
|
|
- struct extent_state *cached_state = NULL;
|
|
|
|
struct extent_io_tree *io_tree;
|
|
struct extent_io_tree *io_tree;
|
|
u64 physical_for_dev_replace;
|
|
u64 physical_for_dev_replace;
|
|
|
|
+ u64 nocow_ctx_logical;
|
|
u64 len = nocow_ctx->len;
|
|
u64 len = nocow_ctx->len;
|
|
- u64 lockstart = offset, lockend = offset + len - 1;
|
|
|
|
unsigned long index;
|
|
unsigned long index;
|
|
int srcu_index;
|
|
int srcu_index;
|
|
int ret = 0;
|
|
int ret = 0;
|
|
@@ -3356,30 +4136,13 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|
|
|
|
|
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
|
|
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
|
|
io_tree = &BTRFS_I(inode)->io_tree;
|
|
io_tree = &BTRFS_I(inode)->io_tree;
|
|
|
|
+ nocow_ctx_logical = nocow_ctx->logical;
|
|
|
|
|
|
- lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
|
|
|
|
- ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
|
|
|
|
- if (ordered) {
|
|
|
|
- btrfs_put_ordered_extent(ordered);
|
|
|
|
- goto out_unlock;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
|
|
|
|
- if (IS_ERR(em)) {
|
|
|
|
- ret = PTR_ERR(em);
|
|
|
|
- goto out_unlock;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * This extent does not actually cover the logical extent anymore,
|
|
|
|
- * move on to the next inode.
|
|
|
|
- */
|
|
|
|
- if (em->block_start > nocow_ctx->logical ||
|
|
|
|
- em->block_start + em->block_len < nocow_ctx->logical + len) {
|
|
|
|
- free_extent_map(em);
|
|
|
|
- goto out_unlock;
|
|
|
|
|
|
+ ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
|
|
|
|
+ if (ret) {
|
|
|
|
+ ret = ret > 0 ? 0 : ret;
|
|
|
|
+ goto out;
|
|
}
|
|
}
|
|
- free_extent_map(em);
|
|
|
|
|
|
|
|
while (len >= PAGE_CACHE_SIZE) {
|
|
while (len >= PAGE_CACHE_SIZE) {
|
|
index = offset >> PAGE_CACHE_SHIFT;
|
|
index = offset >> PAGE_CACHE_SHIFT;
|
|
@@ -3396,7 +4159,7 @@ again:
|
|
goto next_page;
|
|
goto next_page;
|
|
} else {
|
|
} else {
|
|
ClearPageError(page);
|
|
ClearPageError(page);
|
|
- err = extent_read_full_page_nolock(io_tree, page,
|
|
|
|
|
|
+ err = extent_read_full_page(io_tree, page,
|
|
btrfs_get_extent,
|
|
btrfs_get_extent,
|
|
nocow_ctx->mirror_num);
|
|
nocow_ctx->mirror_num);
|
|
if (err) {
|
|
if (err) {
|
|
@@ -3421,6 +4184,14 @@ again:
|
|
goto next_page;
|
|
goto next_page;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ ret = check_extent_to_block(inode, offset, len,
|
|
|
|
+ nocow_ctx_logical);
|
|
|
|
+ if (ret) {
|
|
|
|
+ ret = ret > 0 ? 0 : ret;
|
|
|
|
+ goto next_page;
|
|
|
|
+ }
|
|
|
|
+
|
|
err = write_page_nocow(nocow_ctx->sctx,
|
|
err = write_page_nocow(nocow_ctx->sctx,
|
|
physical_for_dev_replace, page);
|
|
physical_for_dev_replace, page);
|
|
if (err)
|
|
if (err)
|
|
@@ -3434,12 +4205,10 @@ next_page:
|
|
|
|
|
|
offset += PAGE_CACHE_SIZE;
|
|
offset += PAGE_CACHE_SIZE;
|
|
physical_for_dev_replace += PAGE_CACHE_SIZE;
|
|
physical_for_dev_replace += PAGE_CACHE_SIZE;
|
|
|
|
+ nocow_ctx_logical += PAGE_CACHE_SIZE;
|
|
len -= PAGE_CACHE_SIZE;
|
|
len -= PAGE_CACHE_SIZE;
|
|
}
|
|
}
|
|
ret = COPY_COMPLETE;
|
|
ret = COPY_COMPLETE;
|
|
-out_unlock:
|
|
|
|
- unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
|
|
|
|
- GFP_NOFS);
|
|
|
|
out:
|
|
out:
|
|
mutex_unlock(&inode->i_mutex);
|
|
mutex_unlock(&inode->i_mutex);
|
|
iput(inode);
|
|
iput(inode);
|