|
@@ -40,16 +40,26 @@
|
|
* - add a mode to also read unallocated space
|
|
* - add a mode to also read unallocated space
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
+struct scrub_block;
|
|
struct scrub_dev;
|
|
struct scrub_dev;
|
|
|
|
|
|
#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
|
|
#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
|
|
#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
|
|
#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
|
|
|
|
+#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
|
|
|
|
|
|
struct scrub_page {
|
|
struct scrub_page {
|
|
|
|
+ struct scrub_block *sblock;
|
|
|
|
+ struct page *page;
|
|
|
|
+ struct block_device *bdev;
|
|
u64 flags; /* extent flags */
|
|
u64 flags; /* extent flags */
|
|
u64 generation;
|
|
u64 generation;
|
|
- int mirror_num;
|
|
|
|
- int have_csum;
|
|
|
|
|
|
+ u64 logical;
|
|
|
|
+ u64 physical;
|
|
|
|
+ struct {
|
|
|
|
+ unsigned int mirror_num:8;
|
|
|
|
+ unsigned int have_csum:1;
|
|
|
|
+ unsigned int io_error:1;
|
|
|
|
+ };
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
};
|
|
};
|
|
|
|
|
|
@@ -60,12 +70,25 @@ struct scrub_bio {
|
|
int err;
|
|
int err;
|
|
u64 logical;
|
|
u64 logical;
|
|
u64 physical;
|
|
u64 physical;
|
|
- struct scrub_page spag[SCRUB_PAGES_PER_BIO];
|
|
|
|
- u64 count;
|
|
|
|
|
|
+ struct scrub_page *pagev[SCRUB_PAGES_PER_BIO];
|
|
|
|
+ int page_count;
|
|
int next_free;
|
|
int next_free;
|
|
struct btrfs_work work;
|
|
struct btrfs_work work;
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+struct scrub_block {
|
|
|
|
+ struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK];
|
|
|
|
+ int page_count;
|
|
|
|
+ atomic_t outstanding_pages;
|
|
|
|
+ atomic_t ref_count; /* free mem on transition to zero */
|
|
|
|
+ struct scrub_dev *sdev;
|
|
|
|
+ struct {
|
|
|
|
+ unsigned int header_error:1;
|
|
|
|
+ unsigned int checksum_error:1;
|
|
|
|
+ unsigned int no_io_error_seen:1;
|
|
|
|
+ };
|
|
|
|
+};
|
|
|
|
+
|
|
struct scrub_dev {
|
|
struct scrub_dev {
|
|
struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
|
|
struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
|
|
struct btrfs_device *dev;
|
|
struct btrfs_device *dev;
|
|
@@ -79,6 +102,10 @@ struct scrub_dev {
|
|
struct list_head csum_list;
|
|
struct list_head csum_list;
|
|
atomic_t cancel_req;
|
|
atomic_t cancel_req;
|
|
int readonly;
|
|
int readonly;
|
|
|
|
+ int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */
|
|
|
|
+ u32 sectorsize;
|
|
|
|
+ u32 nodesize;
|
|
|
|
+ u32 leafsize;
|
|
/*
|
|
/*
|
|
* statistics
|
|
* statistics
|
|
*/
|
|
*/
|
|
@@ -107,19 +134,41 @@ struct scrub_warning {
|
|
int scratch_bufsize;
|
|
int scratch_bufsize;
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+
|
|
|
|
+static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
|
|
|
|
+static int scrub_setup_recheck_block(struct scrub_dev *sdev,
|
|
|
|
+ struct btrfs_mapping_tree *map_tree,
|
|
|
|
+ u64 length, u64 logical,
|
|
|
|
+ struct scrub_block *sblock);
|
|
|
|
+static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct scrub_block *sblock, int is_metadata,
|
|
|
|
+ int have_csum, u8 *csum, u64 generation,
|
|
|
|
+ u16 csum_size);
|
|
|
|
+static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct scrub_block *sblock,
|
|
|
|
+ int is_metadata, int have_csum,
|
|
|
|
+ const u8 *csum, u64 generation,
|
|
|
|
+ u16 csum_size);
|
|
|
|
+static void scrub_complete_bio_end_io(struct bio *bio, int err);
|
|
|
|
+static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
|
|
|
|
+ struct scrub_block *sblock_good,
|
|
|
|
+ int force_write);
|
|
|
|
+static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
|
|
|
|
+ struct scrub_block *sblock_good,
|
|
|
|
+ int page_num, int force_write);
|
|
|
|
+static int scrub_checksum_data(struct scrub_block *sblock);
|
|
|
|
+static int scrub_checksum_tree_block(struct scrub_block *sblock);
|
|
|
|
+static int scrub_checksum_super(struct scrub_block *sblock);
|
|
|
|
+static void scrub_block_get(struct scrub_block *sblock);
|
|
|
|
+static void scrub_block_put(struct scrub_block *sblock);
|
|
|
|
+static int scrub_add_page_to_bio(struct scrub_dev *sdev,
|
|
|
|
+ struct scrub_page *spage);
|
|
|
|
+static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
|
|
+ u64 physical, u64 flags, u64 gen, int mirror_num,
|
|
|
|
+ u8 *csum, int force);
|
|
static void scrub_bio_end_io(struct bio *bio, int err);
|
|
static void scrub_bio_end_io(struct bio *bio, int err);
|
|
-static void scrub_checksum(struct btrfs_work *work);
|
|
|
|
-static int scrub_checksum_data(struct scrub_dev *sdev,
|
|
|
|
- struct scrub_page *spag, void *buffer);
|
|
|
|
-static int scrub_checksum_tree_block(struct scrub_dev *sdev,
|
|
|
|
- struct scrub_page *spag, u64 logical,
|
|
|
|
- void *buffer);
|
|
|
|
-static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
|
|
|
|
-static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
|
|
|
|
-static void scrub_fixup_end_io(struct bio *bio, int err);
|
|
|
|
-static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
|
|
|
|
- struct page *page);
|
|
|
|
-static void scrub_fixup(struct scrub_bio *sbio, int ix);
|
|
|
|
|
|
+static void scrub_bio_end_io_worker(struct btrfs_work *work);
|
|
|
|
+static void scrub_block_complete(struct scrub_block *sblock);
|
|
|
|
|
|
|
|
|
|
static void scrub_free_csums(struct scrub_dev *sdev)
|
|
static void scrub_free_csums(struct scrub_dev *sdev)
|
|
@@ -133,23 +182,6 @@ static void scrub_free_csums(struct scrub_dev *sdev)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-static void scrub_free_bio(struct bio *bio)
|
|
|
|
-{
|
|
|
|
- int i;
|
|
|
|
- struct page *last_page = NULL;
|
|
|
|
-
|
|
|
|
- if (!bio)
|
|
|
|
- return;
|
|
|
|
-
|
|
|
|
- for (i = 0; i < bio->bi_vcnt; ++i) {
|
|
|
|
- if (bio->bi_io_vec[i].bv_page == last_page)
|
|
|
|
- continue;
|
|
|
|
- last_page = bio->bi_io_vec[i].bv_page;
|
|
|
|
- __free_page(last_page);
|
|
|
|
- }
|
|
|
|
- bio_put(bio);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
|
|
static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
|
|
{
|
|
{
|
|
int i;
|
|
int i;
|
|
@@ -157,13 +189,23 @@ static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
|
|
if (!sdev)
|
|
if (!sdev)
|
|
return;
|
|
return;
|
|
|
|
|
|
|
|
+ /* this can happen when scrub is cancelled */
|
|
|
|
+ if (sdev->curr != -1) {
|
|
|
|
+ struct scrub_bio *sbio = sdev->bios[sdev->curr];
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < sbio->page_count; i++) {
|
|
|
|
+ BUG_ON(!sbio->pagev[i]);
|
|
|
|
+ BUG_ON(!sbio->pagev[i]->page);
|
|
|
|
+ scrub_block_put(sbio->pagev[i]->sblock);
|
|
|
|
+ }
|
|
|
|
+ bio_put(sbio->bio);
|
|
|
|
+ }
|
|
|
|
+
|
|
for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
|
|
for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
|
|
struct scrub_bio *sbio = sdev->bios[i];
|
|
struct scrub_bio *sbio = sdev->bios[i];
|
|
|
|
|
|
if (!sbio)
|
|
if (!sbio)
|
|
break;
|
|
break;
|
|
-
|
|
|
|
- scrub_free_bio(sbio->bio);
|
|
|
|
kfree(sbio);
|
|
kfree(sbio);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -177,11 +219,16 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
|
|
struct scrub_dev *sdev;
|
|
struct scrub_dev *sdev;
|
|
int i;
|
|
int i;
|
|
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
|
|
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
|
|
|
|
+ int pages_per_bio;
|
|
|
|
|
|
|
|
+ pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
|
|
|
|
+ bio_get_nr_vecs(dev->bdev));
|
|
sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
|
|
sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
|
|
if (!sdev)
|
|
if (!sdev)
|
|
goto nomem;
|
|
goto nomem;
|
|
sdev->dev = dev;
|
|
sdev->dev = dev;
|
|
|
|
+ sdev->pages_per_bio = pages_per_bio;
|
|
|
|
+ sdev->curr = -1;
|
|
for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
|
|
for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
|
|
struct scrub_bio *sbio;
|
|
struct scrub_bio *sbio;
|
|
|
|
|
|
@@ -192,8 +239,8 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
|
|
|
|
|
|
sbio->index = i;
|
|
sbio->index = i;
|
|
sbio->sdev = sdev;
|
|
sbio->sdev = sdev;
|
|
- sbio->count = 0;
|
|
|
|
- sbio->work.func = scrub_checksum;
|
|
|
|
|
|
+ sbio->page_count = 0;
|
|
|
|
+ sbio->work.func = scrub_bio_end_io_worker;
|
|
|
|
|
|
if (i != SCRUB_BIOS_PER_DEV-1)
|
|
if (i != SCRUB_BIOS_PER_DEV-1)
|
|
sdev->bios[i]->next_free = i + 1;
|
|
sdev->bios[i]->next_free = i + 1;
|
|
@@ -201,7 +248,9 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
|
|
sdev->bios[i]->next_free = -1;
|
|
sdev->bios[i]->next_free = -1;
|
|
}
|
|
}
|
|
sdev->first_free = 0;
|
|
sdev->first_free = 0;
|
|
- sdev->curr = -1;
|
|
|
|
|
|
+ sdev->nodesize = dev->dev_root->nodesize;
|
|
|
|
+ sdev->leafsize = dev->dev_root->leafsize;
|
|
|
|
+ sdev->sectorsize = dev->dev_root->sectorsize;
|
|
atomic_set(&sdev->in_flight, 0);
|
|
atomic_set(&sdev->in_flight, 0);
|
|
atomic_set(&sdev->fixup_cnt, 0);
|
|
atomic_set(&sdev->fixup_cnt, 0);
|
|
atomic_set(&sdev->cancel_req, 0);
|
|
atomic_set(&sdev->cancel_req, 0);
|
|
@@ -292,10 +341,9 @@ err:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
|
|
|
|
- int ix)
|
|
|
|
|
|
+static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
|
{
|
|
{
|
|
- struct btrfs_device *dev = sbio->sdev->dev;
|
|
|
|
|
|
+ struct btrfs_device *dev = sblock->sdev->dev;
|
|
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
|
|
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
|
|
struct btrfs_path *path;
|
|
struct btrfs_path *path;
|
|
struct btrfs_key found_key;
|
|
struct btrfs_key found_key;
|
|
@@ -314,8 +362,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
|
|
|
|
|
|
swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
|
|
swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
|
|
swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
|
|
swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
|
|
- swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
|
|
|
|
- swarn.logical = sbio->logical + ix * PAGE_SIZE;
|
|
|
|
|
|
+ BUG_ON(sblock->page_count < 1);
|
|
|
|
+ swarn.sector = (sblock->pagev[0].physical) >> 9;
|
|
|
|
+ swarn.logical = sblock->pagev[0].logical;
|
|
swarn.errstr = errstr;
|
|
swarn.errstr = errstr;
|
|
swarn.dev = dev;
|
|
swarn.dev = dev;
|
|
swarn.msg_bufsize = bufsize;
|
|
swarn.msg_bufsize = bufsize;
|
|
@@ -530,9 +579,9 @@ out:
|
|
spin_lock(&sdev->stat_lock);
|
|
spin_lock(&sdev->stat_lock);
|
|
++sdev->stat.uncorrectable_errors;
|
|
++sdev->stat.uncorrectable_errors;
|
|
spin_unlock(&sdev->stat_lock);
|
|
spin_unlock(&sdev->stat_lock);
|
|
- printk_ratelimited(KERN_ERR "btrfs: unable to fixup "
|
|
|
|
- "(nodatasum) error at logical %llu\n",
|
|
|
|
- fixup->logical);
|
|
|
|
|
|
+ printk_ratelimited(KERN_ERR
|
|
|
|
+ "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
|
|
|
|
+ (unsigned long long)fixup->logical, sdev->dev->name);
|
|
}
|
|
}
|
|
|
|
|
|
btrfs_free_path(path);
|
|
btrfs_free_path(path);
|
|
@@ -549,91 +598,168 @@ out:
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * scrub_recheck_error gets called when either verification of the page
|
|
|
|
- * failed or the bio failed to read, e.g. with EIO. In the latter case,
|
|
|
|
- * recheck_error gets called for every page in the bio, even though only
|
|
|
|
- * one may be bad
|
|
|
|
|
|
+ * scrub_handle_errored_block gets called when either verification of the
|
|
|
|
+ * pages failed or the bio failed to read, e.g. with EIO. In the latter
|
|
|
|
+ * case, this function handles all pages in the bio, even though only one
|
|
|
|
+ * may be bad.
|
|
|
|
+ * The goal of this function is to repair the errored block by using the
|
|
|
|
+ * contents of one of the mirrors.
|
|
*/
|
|
*/
|
|
-static int scrub_recheck_error(struct scrub_bio *sbio, int ix)
|
|
|
|
|
|
+static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
|
{
|
|
{
|
|
- struct scrub_dev *sdev = sbio->sdev;
|
|
|
|
- u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
|
|
|
|
|
|
+ struct scrub_dev *sdev = sblock_to_check->sdev;
|
|
|
|
+ struct btrfs_fs_info *fs_info;
|
|
|
|
+ u64 length;
|
|
|
|
+ u64 logical;
|
|
|
|
+ u64 generation;
|
|
|
|
+ unsigned int failed_mirror_index;
|
|
|
|
+ unsigned int is_metadata;
|
|
|
|
+ unsigned int have_csum;
|
|
|
|
+ u8 *csum;
|
|
|
|
+ struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
|
|
|
|
+ struct scrub_block *sblock_bad;
|
|
|
|
+ int ret;
|
|
|
|
+ int mirror_index;
|
|
|
|
+ int page_num;
|
|
|
|
+ int success;
|
|
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
|
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
|
- DEFAULT_RATELIMIT_BURST);
|
|
|
|
|
|
+ DEFAULT_RATELIMIT_BURST);
|
|
|
|
+
|
|
|
|
+ BUG_ON(sblock_to_check->page_count < 1);
|
|
|
|
+ fs_info = sdev->dev->dev_root->fs_info;
|
|
|
|
+ length = sblock_to_check->page_count * PAGE_SIZE;
|
|
|
|
+ logical = sblock_to_check->pagev[0].logical;
|
|
|
|
+ generation = sblock_to_check->pagev[0].generation;
|
|
|
|
+ BUG_ON(sblock_to_check->pagev[0].mirror_num < 1);
|
|
|
|
+ failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1;
|
|
|
|
+ is_metadata = !(sblock_to_check->pagev[0].flags &
|
|
|
|
+ BTRFS_EXTENT_FLAG_DATA);
|
|
|
|
+ have_csum = sblock_to_check->pagev[0].have_csum;
|
|
|
|
+ csum = sblock_to_check->pagev[0].csum;
|
|
|
|
|
|
- if (sbio->err) {
|
|
|
|
- if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector,
|
|
|
|
- sbio->bio->bi_io_vec[ix].bv_page) == 0) {
|
|
|
|
- if (scrub_fixup_check(sbio, ix) == 0)
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
- if (__ratelimit(&_rs))
|
|
|
|
- scrub_print_warning("i/o error", sbio, ix);
|
|
|
|
- } else {
|
|
|
|
- if (__ratelimit(&_rs))
|
|
|
|
- scrub_print_warning("checksum error", sbio, ix);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * read all mirrors one after the other. This includes to
|
|
|
|
+ * re-read the extent or metadata block that failed (that was
|
|
|
|
+ * the cause that this fixup code is called) another time,
|
|
|
|
+ * page by page this time in order to know which pages
|
|
|
|
+ * caused I/O errors and which ones are good (for all mirrors).
|
|
|
|
+ * It is the goal to handle the situation when more than one
|
|
|
|
+ * mirror contains I/O errors, but the errors do not
|
|
|
|
+ * overlap, i.e. the data can be repaired by selecting the
|
|
|
|
+ * pages from those mirrors without I/O error on the
|
|
|
|
+ * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
|
|
|
|
+ * would be that mirror #1 has an I/O error on the first page,
|
|
|
|
+ * the second page is good, and mirror #2 has an I/O error on
|
|
|
|
+ * the second page, but the first page is good.
|
|
|
|
+ * Then the first page of the first mirror can be repaired by
|
|
|
|
+ * taking the first page of the second mirror, and the
|
|
|
|
+ * second page of the second mirror can be repaired by
|
|
|
|
+ * copying the contents of the 2nd page of the 1st mirror.
|
|
|
|
+ * One more note: if the pages of one mirror contain I/O
|
|
|
|
+ * errors, the checksum cannot be verified. In order to get
|
|
|
|
+ * the best data for repairing, the first attempt is to find
|
|
|
|
+ * a mirror without I/O errors and with a validated checksum.
|
|
|
|
+ * Only if this is not possible, the pages are picked from
|
|
|
|
+ * mirrors with I/O errors without considering the checksum.
|
|
|
|
+ * If the latter is the case, at the end, the checksum of the
|
|
|
|
+ * repaired area is verified in order to correctly maintain
|
|
|
|
+ * the statistics.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+ sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
|
|
|
|
+ sizeof(*sblocks_for_recheck),
|
|
|
|
+ GFP_NOFS);
|
|
|
|
+ if (!sblocks_for_recheck) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.malloc_errors++;
|
|
|
|
+ sdev->stat.read_errors++;
|
|
|
|
+ sdev->stat.uncorrectable_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ goto out;
|
|
}
|
|
}
|
|
|
|
|
|
- spin_lock(&sdev->stat_lock);
|
|
|
|
- ++sdev->stat.read_errors;
|
|
|
|
- spin_unlock(&sdev->stat_lock);
|
|
|
|
|
|
+ /* setup the context, map the logical blocks and alloc the pages */
|
|
|
|
+ ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length,
|
|
|
|
+ logical, sblocks_for_recheck);
|
|
|
|
+ if (ret) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.read_errors++;
|
|
|
|
+ sdev->stat.uncorrectable_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+ BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
|
|
|
|
+ sblock_bad = sblocks_for_recheck + failed_mirror_index;
|
|
|
|
|
|
- scrub_fixup(sbio, ix);
|
|
|
|
- return 1;
|
|
|
|
-}
|
|
|
|
|
|
+ /* build and submit the bios for the failed mirror, check checksums */
|
|
|
|
+ ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
|
|
|
|
+ csum, generation, sdev->csum_size);
|
|
|
|
+ if (ret) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.read_errors++;
|
|
|
|
+ sdev->stat.uncorrectable_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
|
|
-static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
|
|
|
|
-{
|
|
|
|
- int ret = 1;
|
|
|
|
- struct page *page;
|
|
|
|
- void *buffer;
|
|
|
|
- u64 flags = sbio->spag[ix].flags;
|
|
|
|
|
|
+ if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
|
|
|
|
+ sblock_bad->no_io_error_seen) {
|
|
|
|
+ /*
|
|
|
|
+ * the error disappeared after reading page by page, or
|
|
|
|
+ * the area was part of a huge bio and other parts of the
|
|
|
|
+ * bio caused I/O errors, or the block layer merged several
|
|
|
|
+ * read requests into one and the error is caused by a
|
|
|
|
+ * different bio (usually one of the two latter cases is
|
|
|
|
+ * the cause)
|
|
|
|
+ */
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.unverified_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
|
|
- page = sbio->bio->bi_io_vec[ix].bv_page;
|
|
|
|
- buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
|
|
|
- ret = scrub_checksum_data(sbio->sdev,
|
|
|
|
- sbio->spag + ix, buffer);
|
|
|
|
- } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
|
|
|
|
- ret = scrub_checksum_tree_block(sbio->sdev,
|
|
|
|
- sbio->spag + ix,
|
|
|
|
- sbio->logical + ix * PAGE_SIZE,
|
|
|
|
- buffer);
|
|
|
|
- } else {
|
|
|
|
- WARN_ON(1);
|
|
|
|
|
|
+ goto out;
|
|
}
|
|
}
|
|
- kunmap_atomic(buffer, KM_USER0);
|
|
|
|
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
|
|
+ if (!sblock_bad->no_io_error_seen) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.read_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ if (__ratelimit(&_rs))
|
|
|
|
+ scrub_print_warning("i/o error", sblock_to_check);
|
|
|
|
+ } else if (sblock_bad->checksum_error) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.csum_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ if (__ratelimit(&_rs))
|
|
|
|
+ scrub_print_warning("checksum error", sblock_to_check);
|
|
|
|
+ } else if (sblock_bad->header_error) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.verify_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ if (__ratelimit(&_rs))
|
|
|
|
+ scrub_print_warning("checksum/header error",
|
|
|
|
+ sblock_to_check);
|
|
|
|
+ }
|
|
|
|
|
|
-static void scrub_fixup_end_io(struct bio *bio, int err)
|
|
|
|
-{
|
|
|
|
- complete((struct completion *)bio->bi_private);
|
|
|
|
-}
|
|
|
|
|
|
+ if (sdev->readonly)
|
|
|
|
+ goto did_not_correct_error;
|
|
|
|
+
|
|
|
|
+ if (!is_metadata && !have_csum) {
|
|
|
|
+ struct scrub_fixup_nodatasum *fixup_nodatasum;
|
|
|
|
|
|
-static void scrub_fixup(struct scrub_bio *sbio, int ix)
|
|
|
|
-{
|
|
|
|
- struct scrub_dev *sdev = sbio->sdev;
|
|
|
|
- struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
|
|
|
|
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
|
|
|
|
- struct btrfs_bio *bbio = NULL;
|
|
|
|
- struct scrub_fixup_nodatasum *fixup;
|
|
|
|
- u64 logical = sbio->logical + ix * PAGE_SIZE;
|
|
|
|
- u64 length;
|
|
|
|
- int i;
|
|
|
|
- int ret;
|
|
|
|
- DECLARE_COMPLETION_ONSTACK(complete);
|
|
|
|
-
|
|
|
|
- if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
|
|
|
|
- (sbio->spag[ix].have_csum == 0)) {
|
|
|
|
- fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
|
|
|
|
- if (!fixup)
|
|
|
|
- goto uncorrectable;
|
|
|
|
- fixup->sdev = sdev;
|
|
|
|
- fixup->logical = logical;
|
|
|
|
- fixup->root = fs_info->extent_root;
|
|
|
|
- fixup->mirror_num = sbio->spag[ix].mirror_num;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * !is_metadata and !have_csum, this means that the data
|
|
|
|
+ * might not be COW'ed, that it might be modified
|
|
|
|
+ * concurrently. The general strategy to work on the
|
|
|
|
+ * commit root does not help in the case when COW is not
|
|
|
|
+ * used.
|
|
|
|
+ */
|
|
|
|
+ fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
|
|
|
|
+ if (!fixup_nodatasum)
|
|
|
|
+ goto did_not_correct_error;
|
|
|
|
+ fixup_nodatasum->sdev = sdev;
|
|
|
|
+ fixup_nodatasum->logical = logical;
|
|
|
|
+ fixup_nodatasum->root = fs_info->extent_root;
|
|
|
|
+ fixup_nodatasum->mirror_num = failed_mirror_index + 1;
|
|
/*
|
|
/*
|
|
* increment scrubs_running to prevent cancel requests from
|
|
* increment scrubs_running to prevent cancel requests from
|
|
* completing as long as a fixup worker is running. we must also
|
|
* completing as long as a fixup worker is running. we must also
|
|
@@ -648,235 +774,529 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
|
|
atomic_inc(&fs_info->scrubs_paused);
|
|
atomic_inc(&fs_info->scrubs_paused);
|
|
mutex_unlock(&fs_info->scrub_lock);
|
|
mutex_unlock(&fs_info->scrub_lock);
|
|
atomic_inc(&sdev->fixup_cnt);
|
|
atomic_inc(&sdev->fixup_cnt);
|
|
- fixup->work.func = scrub_fixup_nodatasum;
|
|
|
|
- btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
|
|
|
|
- return;
|
|
|
|
|
|
+ fixup_nodatasum->work.func = scrub_fixup_nodatasum;
|
|
|
|
+ btrfs_queue_worker(&fs_info->scrub_workers,
|
|
|
|
+ &fixup_nodatasum->work);
|
|
|
|
+ goto out;
|
|
}
|
|
}
|
|
|
|
|
|
- length = PAGE_SIZE;
|
|
|
|
- ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
|
|
|
|
- &bbio, 0);
|
|
|
|
- if (ret || !bbio || length < PAGE_SIZE) {
|
|
|
|
- printk(KERN_ERR
|
|
|
|
- "scrub_fixup: btrfs_map_block failed us for %llu\n",
|
|
|
|
- (unsigned long long)logical);
|
|
|
|
- WARN_ON(1);
|
|
|
|
- kfree(bbio);
|
|
|
|
- return;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * now build and submit the bios for the other mirrors, check
|
|
|
|
+ * checksums
|
|
|
|
+ */
|
|
|
|
+ for (mirror_index = 0;
|
|
|
|
+ mirror_index < BTRFS_MAX_MIRRORS &&
|
|
|
|
+ sblocks_for_recheck[mirror_index].page_count > 0;
|
|
|
|
+ mirror_index++) {
|
|
|
|
+ if (mirror_index == failed_mirror_index)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ /* build and submit the bios, check checksums */
|
|
|
|
+ ret = scrub_recheck_block(fs_info,
|
|
|
|
+ sblocks_for_recheck + mirror_index,
|
|
|
|
+ is_metadata, have_csum, csum,
|
|
|
|
+ generation, sdev->csum_size);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto did_not_correct_error;
|
|
}
|
|
}
|
|
|
|
|
|
- if (bbio->num_stripes == 1)
|
|
|
|
- /* there aren't any replicas */
|
|
|
|
- goto uncorrectable;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * first try to pick the mirror which is completely without I/O
|
|
|
|
+ * errors and also does not have a checksum error.
|
|
|
|
+ * If one is found, and if a checksum is present, the full block
|
|
|
|
+ * that is known to contain an error is rewritten. Afterwards
|
|
|
|
+ * the block is known to be corrected.
|
|
|
|
+ * If a mirror is found which is completely correct, and no
|
|
|
|
+ * checksum is present, only those pages are rewritten that had
|
|
|
|
+ * an I/O error in the block to be repaired, since it cannot be
|
|
|
|
+ * determined, which copy of the other pages is better (and it
|
|
|
|
+ * could happen otherwise that a correct page would be
|
|
|
|
+ * overwritten by a bad one).
|
|
|
|
+ */
|
|
|
|
+ for (mirror_index = 0;
|
|
|
|
+ mirror_index < BTRFS_MAX_MIRRORS &&
|
|
|
|
+ sblocks_for_recheck[mirror_index].page_count > 0;
|
|
|
|
+ mirror_index++) {
|
|
|
|
+ struct scrub_block *sblock_other = sblocks_for_recheck +
|
|
|
|
+ mirror_index;
|
|
|
|
+
|
|
|
|
+ if (!sblock_other->header_error &&
|
|
|
|
+ !sblock_other->checksum_error &&
|
|
|
|
+ sblock_other->no_io_error_seen) {
|
|
|
|
+ int force_write = is_metadata || have_csum;
|
|
|
|
+
|
|
|
|
+ ret = scrub_repair_block_from_good_copy(sblock_bad,
|
|
|
|
+ sblock_other,
|
|
|
|
+ force_write);
|
|
|
|
+ if (0 == ret)
|
|
|
|
+ goto corrected_error;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
/*
|
|
- * first find a good copy
|
|
|
|
|
|
+ * in case of I/O errors in the area that is supposed to be
|
|
|
|
+ * repaired, continue by picking good copies of those pages.
|
|
|
|
+ * Select the good pages from mirrors to rewrite bad pages from
|
|
|
|
+ * the area to fix. Afterwards verify the checksum of the block
|
|
|
|
+ * that is supposed to be repaired. This verification step is
|
|
|
|
+ * only done for the purpose of statistic counting and for the
|
|
|
|
+ * final scrub report, whether errors remain.
|
|
|
|
+ * A perfect algorithm could make use of the checksum and try
|
|
|
|
+ * all possible combinations of pages from the different mirrors
|
|
|
|
+ * until the checksum verification succeeds. For example, when
|
|
|
|
+ * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
|
|
|
|
+ * of mirror #2 is readable but the final checksum test fails,
|
|
|
|
+ * then the 2nd page of mirror #3 could be tried, whether now
|
|
|
|
+ * the final checksum succeedes. But this would be a rare
|
|
|
|
+ * exception and is therefore not implemented. At least it is
|
|
|
|
+ * avoided that the good copy is overwritten.
|
|
|
|
+ * A more useful improvement would be to pick the sectors
|
|
|
|
+ * without I/O error based on sector sizes (512 bytes on legacy
|
|
|
|
+ * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
|
|
|
|
+ * mirror could be repaired by taking 512 byte of a different
|
|
|
|
+ * mirror, even if other 512 byte sectors in the same PAGE_SIZE
|
|
|
|
+ * area are unreadable.
|
|
*/
|
|
*/
|
|
- for (i = 0; i < bbio->num_stripes; ++i) {
|
|
|
|
- if (i + 1 == sbio->spag[ix].mirror_num)
|
|
|
|
- continue;
|
|
|
|
|
|
|
|
- if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev,
|
|
|
|
- bbio->stripes[i].physical >> 9,
|
|
|
|
- sbio->bio->bi_io_vec[ix].bv_page)) {
|
|
|
|
- /* I/O-error, this is not a good copy */
|
|
|
|
|
|
+ /* can only fix I/O errors from here on */
|
|
|
|
+ if (sblock_bad->no_io_error_seen)
|
|
|
|
+ goto did_not_correct_error;
|
|
|
|
+
|
|
|
|
+ success = 1;
|
|
|
|
+ for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
|
|
|
|
+ struct scrub_page *page_bad = sblock_bad->pagev + page_num;
|
|
|
|
+
|
|
|
|
+ if (!page_bad->io_error)
|
|
continue;
|
|
continue;
|
|
|
|
+
|
|
|
|
+ for (mirror_index = 0;
|
|
|
|
+ mirror_index < BTRFS_MAX_MIRRORS &&
|
|
|
|
+ sblocks_for_recheck[mirror_index].page_count > 0;
|
|
|
|
+ mirror_index++) {
|
|
|
|
+ struct scrub_block *sblock_other = sblocks_for_recheck +
|
|
|
|
+ mirror_index;
|
|
|
|
+ struct scrub_page *page_other = sblock_other->pagev +
|
|
|
|
+ page_num;
|
|
|
|
+
|
|
|
|
+ if (!page_other->io_error) {
|
|
|
|
+ ret = scrub_repair_page_from_good_copy(
|
|
|
|
+ sblock_bad, sblock_other, page_num, 0);
|
|
|
|
+ if (0 == ret) {
|
|
|
|
+ page_bad->io_error = 0;
|
|
|
|
+ break; /* succeeded for this page */
|
|
|
|
+ }
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
- if (scrub_fixup_check(sbio, ix) == 0)
|
|
|
|
- break;
|
|
|
|
|
|
+ if (page_bad->io_error) {
|
|
|
|
+ /* did not find a mirror to copy the page from */
|
|
|
|
+ success = 0;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- if (i == bbio->num_stripes)
|
|
|
|
- goto uncorrectable;
|
|
|
|
|
|
|
|
- if (!sdev->readonly) {
|
|
|
|
- /*
|
|
|
|
- * bi_io_vec[ix].bv_page now contains good data, write it back
|
|
|
|
- */
|
|
|
|
- if (scrub_fixup_io(WRITE, sdev->dev->bdev,
|
|
|
|
- (sbio->physical + ix * PAGE_SIZE) >> 9,
|
|
|
|
- sbio->bio->bi_io_vec[ix].bv_page)) {
|
|
|
|
- /* I/O-error, writeback failed, give up */
|
|
|
|
- goto uncorrectable;
|
|
|
|
|
|
+ if (success) {
|
|
|
|
+ if (is_metadata || have_csum) {
|
|
|
|
+ /*
|
|
|
|
+ * need to verify the checksum now that all
|
|
|
|
+ * sectors on disk are repaired (the write
|
|
|
|
+ * request for data to be repaired is on its way).
|
|
|
|
+ * Just be lazy and use scrub_recheck_block()
|
|
|
|
+ * which re-reads the data before the checksum
|
|
|
|
+ * is verified, but most likely the data comes out
|
|
|
|
+ * of the page cache.
|
|
|
|
+ */
|
|
|
|
+ ret = scrub_recheck_block(fs_info, sblock_bad,
|
|
|
|
+ is_metadata, have_csum, csum,
|
|
|
|
+ generation, sdev->csum_size);
|
|
|
|
+ if (!ret && !sblock_bad->header_error &&
|
|
|
|
+ !sblock_bad->checksum_error &&
|
|
|
|
+ sblock_bad->no_io_error_seen)
|
|
|
|
+ goto corrected_error;
|
|
|
|
+ else
|
|
|
|
+ goto did_not_correct_error;
|
|
|
|
+ } else {
|
|
|
|
+corrected_error:
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.corrected_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ printk_ratelimited(KERN_ERR
|
|
|
|
+ "btrfs: fixed up error at logical %llu on dev %s\n",
|
|
|
|
+ (unsigned long long)logical, sdev->dev->name);
|
|
}
|
|
}
|
|
|
|
+ } else {
|
|
|
|
+did_not_correct_error:
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.uncorrectable_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ printk_ratelimited(KERN_ERR
|
|
|
|
+ "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
|
|
|
|
+ (unsigned long long)logical, sdev->dev->name);
|
|
}
|
|
}
|
|
|
|
|
|
- kfree(bbio);
|
|
|
|
- spin_lock(&sdev->stat_lock);
|
|
|
|
- ++sdev->stat.corrected_errors;
|
|
|
|
- spin_unlock(&sdev->stat_lock);
|
|
|
|
|
|
+out:
|
|
|
|
+ if (sblocks_for_recheck) {
|
|
|
|
+ for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
|
|
|
|
+ mirror_index++) {
|
|
|
|
+ struct scrub_block *sblock = sblocks_for_recheck +
|
|
|
|
+ mirror_index;
|
|
|
|
+ int page_index;
|
|
|
|
+
|
|
|
|
+ for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO;
|
|
|
|
+ page_index++)
|
|
|
|
+ if (sblock->pagev[page_index].page)
|
|
|
|
+ __free_page(
|
|
|
|
+ sblock->pagev[page_index].page);
|
|
|
|
+ }
|
|
|
|
+ kfree(sblocks_for_recheck);
|
|
|
|
+ }
|
|
|
|
|
|
- printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n",
|
|
|
|
- (unsigned long long)logical);
|
|
|
|
- return;
|
|
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
|
|
-uncorrectable:
|
|
|
|
- kfree(bbio);
|
|
|
|
- spin_lock(&sdev->stat_lock);
|
|
|
|
- ++sdev->stat.uncorrectable_errors;
|
|
|
|
- spin_unlock(&sdev->stat_lock);
|
|
|
|
|
|
+static int scrub_setup_recheck_block(struct scrub_dev *sdev,
|
|
|
|
+ struct btrfs_mapping_tree *map_tree,
|
|
|
|
+ u64 length, u64 logical,
|
|
|
|
+ struct scrub_block *sblocks_for_recheck)
|
|
|
|
+{
|
|
|
|
+ int page_index;
|
|
|
|
+ int mirror_index;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * note: the three members sdev, ref_count and outstanding_pages
|
|
|
|
+ * are not used (and not set) in the blocks that are used for
|
|
|
|
+ * the recheck procedure
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+ page_index = 0;
|
|
|
|
+ while (length > 0) {
|
|
|
|
+ u64 sublen = min_t(u64, length, PAGE_SIZE);
|
|
|
|
+ u64 mapped_length = sublen;
|
|
|
|
+ struct btrfs_bio *bbio = NULL;
|
|
|
|
|
|
- printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at "
|
|
|
|
- "logical %llu\n", (unsigned long long)logical);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * with a length of PAGE_SIZE, each returned stripe
|
|
|
|
+ * represents one mirror
|
|
|
|
+ */
|
|
|
|
+ ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length,
|
|
|
|
+ &bbio, 0);
|
|
|
|
+ if (ret || !bbio || mapped_length < sublen) {
|
|
|
|
+ kfree(bbio);
|
|
|
|
+ return -EIO;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
|
|
|
|
+ for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
|
|
|
|
+ mirror_index++) {
|
|
|
|
+ struct scrub_block *sblock;
|
|
|
|
+ struct scrub_page *page;
|
|
|
|
+
|
|
|
|
+ if (mirror_index >= BTRFS_MAX_MIRRORS)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ sblock = sblocks_for_recheck + mirror_index;
|
|
|
|
+ page = sblock->pagev + page_index;
|
|
|
|
+ page->logical = logical;
|
|
|
|
+ page->physical = bbio->stripes[mirror_index].physical;
|
|
|
|
+ page->bdev = bbio->stripes[mirror_index].dev->bdev;
|
|
|
|
+ page->mirror_num = mirror_index + 1;
|
|
|
|
+ page->page = alloc_page(GFP_NOFS);
|
|
|
|
+ if (!page->page) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.malloc_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+ sblock->page_count++;
|
|
|
|
+ }
|
|
|
|
+ kfree(bbio);
|
|
|
|
+ length -= sublen;
|
|
|
|
+ logical += sublen;
|
|
|
|
+ page_index++;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
|
|
|
|
- struct page *page)
|
|
|
|
|
|
+/*
|
|
|
|
+ * this function will check the on disk data for checksum errors, header
|
|
|
|
+ * errors and read I/O errors. If any I/O errors happen, the exact pages
|
|
|
|
+ * which are errored are marked as being bad. The goal is to enable scrub
|
|
|
|
+ * to take those pages that are not errored from all the mirrors so that
|
|
|
|
+ * the pages that are errored in the just handled mirror can be repaired.
|
|
|
|
+ */
|
|
|
|
+static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct scrub_block *sblock, int is_metadata,
|
|
|
|
+ int have_csum, u8 *csum, u64 generation,
|
|
|
|
+ u16 csum_size)
|
|
{
|
|
{
|
|
- struct bio *bio = NULL;
|
|
|
|
- int ret;
|
|
|
|
- DECLARE_COMPLETION_ONSTACK(complete);
|
|
|
|
|
|
+ int page_num;
|
|
|
|
|
|
- bio = bio_alloc(GFP_NOFS, 1);
|
|
|
|
- bio->bi_bdev = bdev;
|
|
|
|
- bio->bi_sector = sector;
|
|
|
|
- bio_add_page(bio, page, PAGE_SIZE, 0);
|
|
|
|
- bio->bi_end_io = scrub_fixup_end_io;
|
|
|
|
- bio->bi_private = &complete;
|
|
|
|
- btrfsic_submit_bio(rw, bio);
|
|
|
|
|
|
+ sblock->no_io_error_seen = 1;
|
|
|
|
+ sblock->header_error = 0;
|
|
|
|
+ sblock->checksum_error = 0;
|
|
|
|
|
|
- /* this will also unplug the queue */
|
|
|
|
- wait_for_completion(&complete);
|
|
|
|
|
|
+ for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
|
|
|
+ struct bio *bio;
|
|
|
|
+ int ret;
|
|
|
|
+ struct scrub_page *page = sblock->pagev + page_num;
|
|
|
|
+ DECLARE_COMPLETION_ONSTACK(complete);
|
|
|
|
+
|
|
|
|
+ BUG_ON(!page->page);
|
|
|
|
+ bio = bio_alloc(GFP_NOFS, 1);
|
|
|
|
+ bio->bi_bdev = page->bdev;
|
|
|
|
+ bio->bi_sector = page->physical >> 9;
|
|
|
|
+ bio->bi_end_io = scrub_complete_bio_end_io;
|
|
|
|
+ bio->bi_private = &complete;
|
|
|
|
+
|
|
|
|
+ ret = bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
|
|
|
+ if (PAGE_SIZE != ret) {
|
|
|
|
+ bio_put(bio);
|
|
|
|
+ return -EIO;
|
|
|
|
+ }
|
|
|
|
+ btrfsic_submit_bio(READ, bio);
|
|
|
|
|
|
- ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
|
|
- bio_put(bio);
|
|
|
|
- return ret;
|
|
|
|
|
|
+ /* this will also unplug the queue */
|
|
|
|
+ wait_for_completion(&complete);
|
|
|
|
+
|
|
|
|
+ page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
|
|
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
|
|
|
+ sblock->no_io_error_seen = 0;
|
|
|
|
+ bio_put(bio);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (sblock->no_io_error_seen)
|
|
|
|
+ scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
|
|
|
|
+ have_csum, csum, generation,
|
|
|
|
+ csum_size);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static void scrub_bio_end_io(struct bio *bio, int err)
|
|
|
|
|
|
+static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct scrub_block *sblock,
|
|
|
|
+ int is_metadata, int have_csum,
|
|
|
|
+ const u8 *csum, u64 generation,
|
|
|
|
+ u16 csum_size)
|
|
{
|
|
{
|
|
- struct scrub_bio *sbio = bio->bi_private;
|
|
|
|
- struct scrub_dev *sdev = sbio->sdev;
|
|
|
|
- struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
|
|
|
|
|
|
+ int page_num;
|
|
|
|
+ u8 calculated_csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ u32 crc = ~(u32)0;
|
|
|
|
+ struct btrfs_root *root = fs_info->extent_root;
|
|
|
|
+ void *mapped_buffer;
|
|
|
|
+
|
|
|
|
+ BUG_ON(!sblock->pagev[0].page);
|
|
|
|
+ if (is_metadata) {
|
|
|
|
+ struct btrfs_header *h;
|
|
|
|
+
|
|
|
|
+ mapped_buffer = kmap_atomic(sblock->pagev[0].page, KM_USER0);
|
|
|
|
+ h = (struct btrfs_header *)mapped_buffer;
|
|
|
|
+
|
|
|
|
+ if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
|
|
|
|
+ generation != le64_to_cpu(h->generation) ||
|
|
|
|
+ memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
|
|
|
|
+ memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
|
|
|
|
+ BTRFS_UUID_SIZE))
|
|
|
|
+ sblock->header_error = 1;
|
|
|
|
+ csum = h->csum;
|
|
|
|
+ } else {
|
|
|
|
+ if (!have_csum)
|
|
|
|
+ return;
|
|
|
|
|
|
- sbio->err = err;
|
|
|
|
- sbio->bio = bio;
|
|
|
|
|
|
+ mapped_buffer = kmap_atomic(sblock->pagev[0].page, KM_USER0);
|
|
|
|
+ }
|
|
|
|
|
|
- btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
|
|
|
|
|
|
+ for (page_num = 0;;) {
|
|
|
|
+ if (page_num == 0 && is_metadata)
|
|
|
|
+ crc = btrfs_csum_data(root,
|
|
|
|
+ ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
|
|
|
|
+ crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
|
|
|
|
+ else
|
|
|
|
+ crc = btrfs_csum_data(root, mapped_buffer, crc,
|
|
|
|
+ PAGE_SIZE);
|
|
|
|
+
|
|
|
|
+ kunmap_atomic(mapped_buffer, KM_USER0);
|
|
|
|
+ page_num++;
|
|
|
|
+ if (page_num >= sblock->page_count)
|
|
|
|
+ break;
|
|
|
|
+ BUG_ON(!sblock->pagev[page_num].page);
|
|
|
|
+
|
|
|
|
+ mapped_buffer = kmap_atomic(sblock->pagev[page_num].page,
|
|
|
|
+ KM_USER0);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ btrfs_csum_final(crc, calculated_csum);
|
|
|
|
+ if (memcmp(calculated_csum, csum, csum_size))
|
|
|
|
+ sblock->checksum_error = 1;
|
|
}
|
|
}
|
|
|
|
|
|
-static void scrub_checksum(struct btrfs_work *work)
|
|
|
|
|
|
+static void scrub_complete_bio_end_io(struct bio *bio, int err)
|
|
{
|
|
{
|
|
- struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
|
|
|
|
- struct scrub_dev *sdev = sbio->sdev;
|
|
|
|
- struct page *page;
|
|
|
|
- void *buffer;
|
|
|
|
- int i;
|
|
|
|
- u64 flags;
|
|
|
|
- u64 logical;
|
|
|
|
- int ret;
|
|
|
|
|
|
+ complete((struct completion *)bio->bi_private);
|
|
|
|
+}
|
|
|
|
|
|
- if (sbio->err) {
|
|
|
|
- ret = 0;
|
|
|
|
- for (i = 0; i < sbio->count; ++i)
|
|
|
|
- ret |= scrub_recheck_error(sbio, i);
|
|
|
|
- if (!ret) {
|
|
|
|
- spin_lock(&sdev->stat_lock);
|
|
|
|
- ++sdev->stat.unverified_errors;
|
|
|
|
- spin_unlock(&sdev->stat_lock);
|
|
|
|
- }
|
|
|
|
|
|
+static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
|
|
|
|
+ struct scrub_block *sblock_good,
|
|
|
|
+ int force_write)
|
|
|
|
+{
|
|
|
|
+ int page_num;
|
|
|
|
+ int ret = 0;
|
|
|
|
|
|
- sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
|
|
|
- sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
|
|
|
|
- sbio->bio->bi_phys_segments = 0;
|
|
|
|
- sbio->bio->bi_idx = 0;
|
|
|
|
|
|
+ for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
|
|
|
|
+ int ret_sub;
|
|
|
|
|
|
- for (i = 0; i < sbio->count; i++) {
|
|
|
|
- struct bio_vec *bi;
|
|
|
|
- bi = &sbio->bio->bi_io_vec[i];
|
|
|
|
- bi->bv_offset = 0;
|
|
|
|
- bi->bv_len = PAGE_SIZE;
|
|
|
|
- }
|
|
|
|
- goto out;
|
|
|
|
|
|
+ ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
|
|
|
|
+ sblock_good,
|
|
|
|
+ page_num,
|
|
|
|
+ force_write);
|
|
|
|
+ if (ret_sub)
|
|
|
|
+ ret = ret_sub;
|
|
}
|
|
}
|
|
- for (i = 0; i < sbio->count; ++i) {
|
|
|
|
- page = sbio->bio->bi_io_vec[i].bv_page;
|
|
|
|
- buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
- flags = sbio->spag[i].flags;
|
|
|
|
- logical = sbio->logical + i * PAGE_SIZE;
|
|
|
|
- ret = 0;
|
|
|
|
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
|
|
|
- ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
|
|
|
|
- } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
|
|
|
|
- ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
|
|
|
|
- logical, buffer);
|
|
|
|
- } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
|
|
|
|
- BUG_ON(i);
|
|
|
|
- (void)scrub_checksum_super(sbio, buffer);
|
|
|
|
- } else {
|
|
|
|
- WARN_ON(1);
|
|
|
|
- }
|
|
|
|
- kunmap_atomic(buffer, KM_USER0);
|
|
|
|
- if (ret) {
|
|
|
|
- ret = scrub_recheck_error(sbio, i);
|
|
|
|
- if (!ret) {
|
|
|
|
- spin_lock(&sdev->stat_lock);
|
|
|
|
- ++sdev->stat.unverified_errors;
|
|
|
|
- spin_unlock(&sdev->stat_lock);
|
|
|
|
- }
|
|
|
|
|
|
+
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
|
|
|
|
+ struct scrub_block *sblock_good,
|
|
|
|
+ int page_num, int force_write)
|
|
|
|
+{
|
|
|
|
+ struct scrub_page *page_bad = sblock_bad->pagev + page_num;
|
|
|
|
+ struct scrub_page *page_good = sblock_good->pagev + page_num;
|
|
|
|
+
|
|
|
|
+ BUG_ON(sblock_bad->pagev[page_num].page == NULL);
|
|
|
|
+ BUG_ON(sblock_good->pagev[page_num].page == NULL);
|
|
|
|
+ if (force_write || sblock_bad->header_error ||
|
|
|
|
+ sblock_bad->checksum_error || page_bad->io_error) {
|
|
|
|
+ struct bio *bio;
|
|
|
|
+ int ret;
|
|
|
|
+ DECLARE_COMPLETION_ONSTACK(complete);
|
|
|
|
+
|
|
|
|
+ bio = bio_alloc(GFP_NOFS, 1);
|
|
|
|
+ bio->bi_bdev = page_bad->bdev;
|
|
|
|
+ bio->bi_sector = page_bad->physical >> 9;
|
|
|
|
+ bio->bi_end_io = scrub_complete_bio_end_io;
|
|
|
|
+ bio->bi_private = &complete;
|
|
|
|
+
|
|
|
|
+ ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
|
|
|
|
+ if (PAGE_SIZE != ret) {
|
|
|
|
+ bio_put(bio);
|
|
|
|
+ return -EIO;
|
|
}
|
|
}
|
|
|
|
+ btrfsic_submit_bio(WRITE, bio);
|
|
|
|
+
|
|
|
|
+ /* this will also unplug the queue */
|
|
|
|
+ wait_for_completion(&complete);
|
|
|
|
+ bio_put(bio);
|
|
}
|
|
}
|
|
|
|
|
|
-out:
|
|
|
|
- scrub_free_bio(sbio->bio);
|
|
|
|
- sbio->bio = NULL;
|
|
|
|
- spin_lock(&sdev->list_lock);
|
|
|
|
- sbio->next_free = sdev->first_free;
|
|
|
|
- sdev->first_free = sbio->index;
|
|
|
|
- spin_unlock(&sdev->list_lock);
|
|
|
|
- atomic_dec(&sdev->in_flight);
|
|
|
|
- wake_up(&sdev->list_wait);
|
|
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_checksum(struct scrub_block *sblock)
|
|
|
|
+{
|
|
|
|
+ u64 flags;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ BUG_ON(sblock->page_count < 1);
|
|
|
|
+ flags = sblock->pagev[0].flags;
|
|
|
|
+ ret = 0;
|
|
|
|
+ if (flags & BTRFS_EXTENT_FLAG_DATA)
|
|
|
|
+ ret = scrub_checksum_data(sblock);
|
|
|
|
+ else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
|
|
|
|
+ ret = scrub_checksum_tree_block(sblock);
|
|
|
|
+ else if (flags & BTRFS_EXTENT_FLAG_SUPER)
|
|
|
|
+ (void)scrub_checksum_super(sblock);
|
|
|
|
+ else
|
|
|
|
+ WARN_ON(1);
|
|
|
|
+ if (ret)
|
|
|
|
+ scrub_handle_errored_block(sblock);
|
|
}
|
|
}
|
|
|
|
|
|
-static int scrub_checksum_data(struct scrub_dev *sdev,
|
|
|
|
- struct scrub_page *spag, void *buffer)
|
|
|
|
|
|
+static int scrub_checksum_data(struct scrub_block *sblock)
|
|
{
|
|
{
|
|
|
|
+ struct scrub_dev *sdev = sblock->sdev;
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ u8 *on_disk_csum;
|
|
|
|
+ struct page *page;
|
|
|
|
+ void *buffer;
|
|
u32 crc = ~(u32)0;
|
|
u32 crc = ~(u32)0;
|
|
int fail = 0;
|
|
int fail = 0;
|
|
struct btrfs_root *root = sdev->dev->dev_root;
|
|
struct btrfs_root *root = sdev->dev->dev_root;
|
|
|
|
+ u64 len;
|
|
|
|
+ int index;
|
|
|
|
|
|
- if (!spag->have_csum)
|
|
|
|
|
|
+ BUG_ON(sblock->page_count < 1);
|
|
|
|
+ if (!sblock->pagev[0].have_csum)
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
- crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
|
|
|
|
|
|
+ on_disk_csum = sblock->pagev[0].csum;
|
|
|
|
+ page = sblock->pagev[0].page;
|
|
|
|
+ buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
+
|
|
|
|
+ len = sdev->sectorsize;
|
|
|
|
+ index = 0;
|
|
|
|
+ for (;;) {
|
|
|
|
+ u64 l = min_t(u64, len, PAGE_SIZE);
|
|
|
|
+
|
|
|
|
+ crc = btrfs_csum_data(root, buffer, crc, l);
|
|
|
|
+ kunmap_atomic(buffer, KM_USER0);
|
|
|
|
+ len -= l;
|
|
|
|
+ if (len == 0)
|
|
|
|
+ break;
|
|
|
|
+ index++;
|
|
|
|
+ BUG_ON(index >= sblock->page_count);
|
|
|
|
+ BUG_ON(!sblock->pagev[index].page);
|
|
|
|
+ page = sblock->pagev[index].page;
|
|
|
|
+ buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
+ }
|
|
|
|
+
|
|
btrfs_csum_final(crc, csum);
|
|
btrfs_csum_final(crc, csum);
|
|
- if (memcmp(csum, spag->csum, sdev->csum_size))
|
|
|
|
|
|
+ if (memcmp(csum, on_disk_csum, sdev->csum_size))
|
|
fail = 1;
|
|
fail = 1;
|
|
|
|
|
|
- spin_lock(&sdev->stat_lock);
|
|
|
|
- ++sdev->stat.data_extents_scrubbed;
|
|
|
|
- sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
|
|
|
|
- if (fail)
|
|
|
|
|
|
+ if (fail) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
++sdev->stat.csum_errors;
|
|
++sdev->stat.csum_errors;
|
|
- spin_unlock(&sdev->stat_lock);
|
|
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ }
|
|
|
|
|
|
return fail;
|
|
return fail;
|
|
}
|
|
}
|
|
|
|
|
|
-static int scrub_checksum_tree_block(struct scrub_dev *sdev,
|
|
|
|
- struct scrub_page *spag, u64 logical,
|
|
|
|
- void *buffer)
|
|
|
|
|
|
+static int scrub_checksum_tree_block(struct scrub_block *sblock)
|
|
{
|
|
{
|
|
|
|
+ struct scrub_dev *sdev = sblock->sdev;
|
|
struct btrfs_header *h;
|
|
struct btrfs_header *h;
|
|
struct btrfs_root *root = sdev->dev->dev_root;
|
|
struct btrfs_root *root = sdev->dev->dev_root;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
- u8 csum[BTRFS_CSUM_SIZE];
|
|
|
|
|
|
+ u8 calculated_csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ u8 on_disk_csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ struct page *page;
|
|
|
|
+ void *mapped_buffer;
|
|
|
|
+ u64 mapped_size;
|
|
|
|
+ void *p;
|
|
u32 crc = ~(u32)0;
|
|
u32 crc = ~(u32)0;
|
|
int fail = 0;
|
|
int fail = 0;
|
|
int crc_fail = 0;
|
|
int crc_fail = 0;
|
|
|
|
+ u64 len;
|
|
|
|
+ int index;
|
|
|
|
+
|
|
|
|
+ BUG_ON(sblock->page_count < 1);
|
|
|
|
+ page = sblock->pagev[0].page;
|
|
|
|
+ mapped_buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
+ h = (struct btrfs_header *)mapped_buffer;
|
|
|
|
+ memcpy(on_disk_csum, h->csum, sdev->csum_size);
|
|
|
|
|
|
/*
|
|
/*
|
|
* we don't use the getter functions here, as we
|
|
* we don't use the getter functions here, as we
|
|
* a) don't have an extent buffer and
|
|
* a) don't have an extent buffer and
|
|
* b) the page is already kmapped
|
|
* b) the page is already kmapped
|
|
*/
|
|
*/
|
|
- h = (struct btrfs_header *)buffer;
|
|
|
|
|
|
|
|
- if (logical != le64_to_cpu(h->bytenr))
|
|
|
|
|
|
+ if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr))
|
|
++fail;
|
|
++fail;
|
|
|
|
|
|
- if (spag->generation != le64_to_cpu(h->generation))
|
|
|
|
|
|
+ if (sblock->pagev[0].generation != le64_to_cpu(h->generation))
|
|
++fail;
|
|
++fail;
|
|
|
|
|
|
if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
|
|
if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
|
|
@@ -886,51 +1306,99 @@ static int scrub_checksum_tree_block(struct scrub_dev *sdev,
|
|
BTRFS_UUID_SIZE))
|
|
BTRFS_UUID_SIZE))
|
|
++fail;
|
|
++fail;
|
|
|
|
|
|
- crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
|
|
|
|
- PAGE_SIZE - BTRFS_CSUM_SIZE);
|
|
|
|
- btrfs_csum_final(crc, csum);
|
|
|
|
- if (memcmp(csum, h->csum, sdev->csum_size))
|
|
|
|
|
|
+ BUG_ON(sdev->nodesize != sdev->leafsize);
|
|
|
|
+ len = sdev->nodesize - BTRFS_CSUM_SIZE;
|
|
|
|
+ mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
|
|
|
|
+ p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
|
|
|
|
+ index = 0;
|
|
|
|
+ for (;;) {
|
|
|
|
+ u64 l = min_t(u64, len, mapped_size);
|
|
|
|
+
|
|
|
|
+ crc = btrfs_csum_data(root, p, crc, l);
|
|
|
|
+ kunmap_atomic(mapped_buffer, KM_USER0);
|
|
|
|
+ len -= l;
|
|
|
|
+ if (len == 0)
|
|
|
|
+ break;
|
|
|
|
+ index++;
|
|
|
|
+ BUG_ON(index >= sblock->page_count);
|
|
|
|
+ BUG_ON(!sblock->pagev[index].page);
|
|
|
|
+ page = sblock->pagev[index].page;
|
|
|
|
+ mapped_buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
+ mapped_size = PAGE_SIZE;
|
|
|
|
+ p = mapped_buffer;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ btrfs_csum_final(crc, calculated_csum);
|
|
|
|
+ if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
|
|
++crc_fail;
|
|
++crc_fail;
|
|
|
|
|
|
- spin_lock(&sdev->stat_lock);
|
|
|
|
- ++sdev->stat.tree_extents_scrubbed;
|
|
|
|
- sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
|
|
|
|
- if (crc_fail)
|
|
|
|
- ++sdev->stat.csum_errors;
|
|
|
|
- if (fail)
|
|
|
|
- ++sdev->stat.verify_errors;
|
|
|
|
- spin_unlock(&sdev->stat_lock);
|
|
|
|
|
|
+ if (crc_fail || fail) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ if (crc_fail)
|
|
|
|
+ ++sdev->stat.csum_errors;
|
|
|
|
+ if (fail)
|
|
|
|
+ ++sdev->stat.verify_errors;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ }
|
|
|
|
|
|
return fail || crc_fail;
|
|
return fail || crc_fail;
|
|
}
|
|
}
|
|
|
|
|
|
-static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
|
|
|
|
|
|
+static int scrub_checksum_super(struct scrub_block *sblock)
|
|
{
|
|
{
|
|
struct btrfs_super_block *s;
|
|
struct btrfs_super_block *s;
|
|
- u64 logical;
|
|
|
|
- struct scrub_dev *sdev = sbio->sdev;
|
|
|
|
|
|
+ struct scrub_dev *sdev = sblock->sdev;
|
|
struct btrfs_root *root = sdev->dev->dev_root;
|
|
struct btrfs_root *root = sdev->dev->dev_root;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
- u8 csum[BTRFS_CSUM_SIZE];
|
|
|
|
|
|
+ u8 calculated_csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ u8 on_disk_csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ struct page *page;
|
|
|
|
+ void *mapped_buffer;
|
|
|
|
+ u64 mapped_size;
|
|
|
|
+ void *p;
|
|
u32 crc = ~(u32)0;
|
|
u32 crc = ~(u32)0;
|
|
int fail = 0;
|
|
int fail = 0;
|
|
|
|
+ u64 len;
|
|
|
|
+ int index;
|
|
|
|
|
|
- s = (struct btrfs_super_block *)buffer;
|
|
|
|
- logical = sbio->logical;
|
|
|
|
|
|
+ BUG_ON(sblock->page_count < 1);
|
|
|
|
+ page = sblock->pagev[0].page;
|
|
|
|
+ mapped_buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
+ s = (struct btrfs_super_block *)mapped_buffer;
|
|
|
|
+ memcpy(on_disk_csum, s->csum, sdev->csum_size);
|
|
|
|
|
|
- if (logical != le64_to_cpu(s->bytenr))
|
|
|
|
|
|
+ if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
|
|
++fail;
|
|
++fail;
|
|
|
|
|
|
- if (sbio->spag[0].generation != le64_to_cpu(s->generation))
|
|
|
|
|
|
+ if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
|
|
++fail;
|
|
++fail;
|
|
|
|
|
|
if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
|
|
if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
|
|
++fail;
|
|
++fail;
|
|
|
|
|
|
- crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
|
|
|
|
- PAGE_SIZE - BTRFS_CSUM_SIZE);
|
|
|
|
- btrfs_csum_final(crc, csum);
|
|
|
|
- if (memcmp(csum, s->csum, sbio->sdev->csum_size))
|
|
|
|
|
|
+ len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
|
|
|
|
+ mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
|
|
|
|
+ p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
|
|
|
|
+ index = 0;
|
|
|
|
+ for (;;) {
|
|
|
|
+ u64 l = min_t(u64, len, mapped_size);
|
|
|
|
+
|
|
|
|
+ crc = btrfs_csum_data(root, p, crc, l);
|
|
|
|
+ kunmap_atomic(mapped_buffer, KM_USER0);
|
|
|
|
+ len -= l;
|
|
|
|
+ if (len == 0)
|
|
|
|
+ break;
|
|
|
|
+ index++;
|
|
|
|
+ BUG_ON(index >= sblock->page_count);
|
|
|
|
+ BUG_ON(!sblock->pagev[index].page);
|
|
|
|
+ page = sblock->pagev[index].page;
|
|
|
|
+ mapped_buffer = kmap_atomic(page, KM_USER0);
|
|
|
|
+ mapped_size = PAGE_SIZE;
|
|
|
|
+ p = mapped_buffer;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ btrfs_csum_final(crc, calculated_csum);
|
|
|
|
+ if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
|
|
++fail;
|
|
++fail;
|
|
|
|
|
|
if (fail) {
|
|
if (fail) {
|
|
@@ -947,6 +1415,23 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
|
|
return fail;
|
|
return fail;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void scrub_block_get(struct scrub_block *sblock)
|
|
|
|
+{
|
|
|
|
+ atomic_inc(&sblock->ref_count);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_block_put(struct scrub_block *sblock)
|
|
|
|
+{
|
|
|
|
+ if (atomic_dec_and_test(&sblock->ref_count)) {
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < sblock->page_count; i++)
|
|
|
|
+ if (sblock->pagev[i].page)
|
|
|
|
+ __free_page(sblock->pagev[i].page);
|
|
|
|
+ kfree(sblock);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
static void scrub_submit(struct scrub_dev *sdev)
|
|
static void scrub_submit(struct scrub_dev *sdev)
|
|
{
|
|
{
|
|
struct scrub_bio *sbio;
|
|
struct scrub_bio *sbio;
|
|
@@ -955,19 +1440,17 @@ static void scrub_submit(struct scrub_dev *sdev)
|
|
return;
|
|
return;
|
|
|
|
|
|
sbio = sdev->bios[sdev->curr];
|
|
sbio = sdev->bios[sdev->curr];
|
|
- sbio->err = 0;
|
|
|
|
sdev->curr = -1;
|
|
sdev->curr = -1;
|
|
atomic_inc(&sdev->in_flight);
|
|
atomic_inc(&sdev->in_flight);
|
|
|
|
|
|
btrfsic_submit_bio(READ, sbio->bio);
|
|
btrfsic_submit_bio(READ, sbio->bio);
|
|
}
|
|
}
|
|
|
|
|
|
-static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
|
|
- u64 physical, u64 flags, u64 gen, int mirror_num,
|
|
|
|
- u8 *csum, int force)
|
|
|
|
|
|
+static int scrub_add_page_to_bio(struct scrub_dev *sdev,
|
|
|
|
+ struct scrub_page *spage)
|
|
{
|
|
{
|
|
|
|
+ struct scrub_block *sblock = spage->sblock;
|
|
struct scrub_bio *sbio;
|
|
struct scrub_bio *sbio;
|
|
- struct page *page;
|
|
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
again:
|
|
again:
|
|
@@ -980,7 +1463,7 @@ again:
|
|
if (sdev->curr != -1) {
|
|
if (sdev->curr != -1) {
|
|
sdev->first_free = sdev->bios[sdev->curr]->next_free;
|
|
sdev->first_free = sdev->bios[sdev->curr]->next_free;
|
|
sdev->bios[sdev->curr]->next_free = -1;
|
|
sdev->bios[sdev->curr]->next_free = -1;
|
|
- sdev->bios[sdev->curr]->count = 0;
|
|
|
|
|
|
+ sdev->bios[sdev->curr]->page_count = 0;
|
|
spin_unlock(&sdev->list_lock);
|
|
spin_unlock(&sdev->list_lock);
|
|
} else {
|
|
} else {
|
|
spin_unlock(&sdev->list_lock);
|
|
spin_unlock(&sdev->list_lock);
|
|
@@ -988,53 +1471,200 @@ again:
|
|
}
|
|
}
|
|
}
|
|
}
|
|
sbio = sdev->bios[sdev->curr];
|
|
sbio = sdev->bios[sdev->curr];
|
|
- if (sbio->count == 0) {
|
|
|
|
|
|
+ if (sbio->page_count == 0) {
|
|
struct bio *bio;
|
|
struct bio *bio;
|
|
|
|
|
|
- sbio->physical = physical;
|
|
|
|
- sbio->logical = logical;
|
|
|
|
- bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
|
|
|
|
- if (!bio)
|
|
|
|
- return -ENOMEM;
|
|
|
|
|
|
+ sbio->physical = spage->physical;
|
|
|
|
+ sbio->logical = spage->logical;
|
|
|
|
+ bio = sbio->bio;
|
|
|
|
+ if (!bio) {
|
|
|
|
+ bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio);
|
|
|
|
+ if (!bio)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ sbio->bio = bio;
|
|
|
|
+ }
|
|
|
|
|
|
bio->bi_private = sbio;
|
|
bio->bi_private = sbio;
|
|
bio->bi_end_io = scrub_bio_end_io;
|
|
bio->bi_end_io = scrub_bio_end_io;
|
|
bio->bi_bdev = sdev->dev->bdev;
|
|
bio->bi_bdev = sdev->dev->bdev;
|
|
- bio->bi_sector = sbio->physical >> 9;
|
|
|
|
|
|
+ bio->bi_sector = spage->physical >> 9;
|
|
sbio->err = 0;
|
|
sbio->err = 0;
|
|
- sbio->bio = bio;
|
|
|
|
- } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
|
|
|
|
- sbio->logical + sbio->count * PAGE_SIZE != logical) {
|
|
|
|
|
|
+ } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
|
|
|
|
+ spage->physical ||
|
|
|
|
+ sbio->logical + sbio->page_count * PAGE_SIZE !=
|
|
|
|
+ spage->logical) {
|
|
scrub_submit(sdev);
|
|
scrub_submit(sdev);
|
|
goto again;
|
|
goto again;
|
|
}
|
|
}
|
|
- sbio->spag[sbio->count].flags = flags;
|
|
|
|
- sbio->spag[sbio->count].generation = gen;
|
|
|
|
- sbio->spag[sbio->count].have_csum = 0;
|
|
|
|
- sbio->spag[sbio->count].mirror_num = mirror_num;
|
|
|
|
-
|
|
|
|
- page = alloc_page(GFP_NOFS);
|
|
|
|
- if (!page)
|
|
|
|
- return -ENOMEM;
|
|
|
|
|
|
|
|
- ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0);
|
|
|
|
- if (!ret) {
|
|
|
|
- __free_page(page);
|
|
|
|
|
|
+ sbio->pagev[sbio->page_count] = spage;
|
|
|
|
+ ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
|
|
|
|
+ if (ret != PAGE_SIZE) {
|
|
|
|
+ if (sbio->page_count < 1) {
|
|
|
|
+ bio_put(sbio->bio);
|
|
|
|
+ sbio->bio = NULL;
|
|
|
|
+ return -EIO;
|
|
|
|
+ }
|
|
scrub_submit(sdev);
|
|
scrub_submit(sdev);
|
|
goto again;
|
|
goto again;
|
|
}
|
|
}
|
|
|
|
|
|
- if (csum) {
|
|
|
|
- sbio->spag[sbio->count].have_csum = 1;
|
|
|
|
- memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
|
|
|
|
|
|
+ scrub_block_get(sblock); /* one for the added page */
|
|
|
|
+ atomic_inc(&sblock->outstanding_pages);
|
|
|
|
+ sbio->page_count++;
|
|
|
|
+ if (sbio->page_count == sdev->pages_per_bio)
|
|
|
|
+ scrub_submit(sdev);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
|
|
+ u64 physical, u64 flags, u64 gen, int mirror_num,
|
|
|
|
+ u8 *csum, int force)
|
|
|
|
+{
|
|
|
|
+ struct scrub_block *sblock;
|
|
|
|
+ int index;
|
|
|
|
+
|
|
|
|
+ sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
|
|
|
|
+ if (!sblock) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.malloc_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* one ref inside this function, plus one for each page later on */
|
|
|
|
+ atomic_set(&sblock->ref_count, 1);
|
|
|
|
+ sblock->sdev = sdev;
|
|
|
|
+ sblock->no_io_error_seen = 1;
|
|
|
|
+
|
|
|
|
+ for (index = 0; len > 0; index++) {
|
|
|
|
+ struct scrub_page *spage = sblock->pagev + index;
|
|
|
|
+ u64 l = min_t(u64, len, PAGE_SIZE);
|
|
|
|
+
|
|
|
|
+ BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
|
|
|
|
+ spage->page = alloc_page(GFP_NOFS);
|
|
|
|
+ if (!spage->page) {
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.malloc_errors++;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ while (index > 0) {
|
|
|
|
+ index--;
|
|
|
|
+ __free_page(sblock->pagev[index].page);
|
|
|
|
+ }
|
|
|
|
+ kfree(sblock);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+ spage->sblock = sblock;
|
|
|
|
+ spage->bdev = sdev->dev->bdev;
|
|
|
|
+ spage->flags = flags;
|
|
|
|
+ spage->generation = gen;
|
|
|
|
+ spage->logical = logical;
|
|
|
|
+ spage->physical = physical;
|
|
|
|
+ spage->mirror_num = mirror_num;
|
|
|
|
+ if (csum) {
|
|
|
|
+ spage->have_csum = 1;
|
|
|
|
+ memcpy(spage->csum, csum, sdev->csum_size);
|
|
|
|
+ } else {
|
|
|
|
+ spage->have_csum = 0;
|
|
|
|
+ }
|
|
|
|
+ sblock->page_count++;
|
|
|
|
+ len -= l;
|
|
|
|
+ logical += l;
|
|
|
|
+ physical += l;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ BUG_ON(sblock->page_count == 0);
|
|
|
|
+ for (index = 0; index < sblock->page_count; index++) {
|
|
|
|
+ struct scrub_page *spage = sblock->pagev + index;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = scrub_add_page_to_bio(sdev, spage);
|
|
|
|
+ if (ret) {
|
|
|
|
+ scrub_block_put(sblock);
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- ++sbio->count;
|
|
|
|
- if (sbio->count == SCRUB_PAGES_PER_BIO || force)
|
|
|
|
|
|
+
|
|
|
|
+ if (force)
|
|
scrub_submit(sdev);
|
|
scrub_submit(sdev);
|
|
|
|
|
|
|
|
+ /* last one frees, either here or in bio completion for last page */
|
|
|
|
+ scrub_block_put(sblock);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void scrub_bio_end_io(struct bio *bio, int err)
|
|
|
|
+{
|
|
|
|
+ struct scrub_bio *sbio = bio->bi_private;
|
|
|
|
+ struct scrub_dev *sdev = sbio->sdev;
|
|
|
|
+ struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
|
|
|
|
+
|
|
|
|
+ sbio->err = err;
|
|
|
|
+ sbio->bio = bio;
|
|
|
|
+
|
|
|
|
+ btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_bio_end_io_worker(struct btrfs_work *work)
|
|
|
|
+{
|
|
|
|
+ struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
|
|
|
|
+ struct scrub_dev *sdev = sbio->sdev;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO);
|
|
|
|
+ if (sbio->err) {
|
|
|
|
+ for (i = 0; i < sbio->page_count; i++) {
|
|
|
|
+ struct scrub_page *spage = sbio->pagev[i];
|
|
|
|
+
|
|
|
|
+ spage->io_error = 1;
|
|
|
|
+ spage->sblock->no_io_error_seen = 0;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* now complete the scrub_block items that have all pages completed */
|
|
|
|
+ for (i = 0; i < sbio->page_count; i++) {
|
|
|
|
+ struct scrub_page *spage = sbio->pagev[i];
|
|
|
|
+ struct scrub_block *sblock = spage->sblock;
|
|
|
|
+
|
|
|
|
+ if (atomic_dec_and_test(&sblock->outstanding_pages))
|
|
|
|
+ scrub_block_complete(sblock);
|
|
|
|
+ scrub_block_put(sblock);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (sbio->err) {
|
|
|
|
+ /* what is this good for??? */
|
|
|
|
+ sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
|
|
|
+ sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
|
|
|
|
+ sbio->bio->bi_phys_segments = 0;
|
|
|
|
+ sbio->bio->bi_idx = 0;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < sbio->page_count; i++) {
|
|
|
|
+ struct bio_vec *bi;
|
|
|
|
+ bi = &sbio->bio->bi_io_vec[i];
|
|
|
|
+ bi->bv_offset = 0;
|
|
|
|
+ bi->bv_len = PAGE_SIZE;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ bio_put(sbio->bio);
|
|
|
|
+ sbio->bio = NULL;
|
|
|
|
+ spin_lock(&sdev->list_lock);
|
|
|
|
+ sbio->next_free = sdev->first_free;
|
|
|
|
+ sdev->first_free = sbio->index;
|
|
|
|
+ spin_unlock(&sdev->list_lock);
|
|
|
|
+ atomic_dec(&sdev->in_flight);
|
|
|
|
+ wake_up(&sdev->list_wait);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void scrub_block_complete(struct scrub_block *sblock)
|
|
|
|
+{
|
|
|
|
+ if (!sblock->no_io_error_seen)
|
|
|
|
+ scrub_handle_errored_block(sblock);
|
|
|
|
+ else
|
|
|
|
+ scrub_checksum(sblock);
|
|
|
|
+}
|
|
|
|
+
|
|
static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
u8 *csum)
|
|
u8 *csum)
|
|
{
|
|
{
|
|
@@ -1042,7 +1672,6 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
int ret = 0;
|
|
int ret = 0;
|
|
unsigned long i;
|
|
unsigned long i;
|
|
unsigned long num_sectors;
|
|
unsigned long num_sectors;
|
|
- u32 sectorsize = sdev->dev->dev_root->sectorsize;
|
|
|
|
|
|
|
|
while (!list_empty(&sdev->csum_list)) {
|
|
while (!list_empty(&sdev->csum_list)) {
|
|
sum = list_first_entry(&sdev->csum_list,
|
|
sum = list_first_entry(&sdev->csum_list,
|
|
@@ -1060,7 +1689,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
if (!sum)
|
|
if (!sum)
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
- num_sectors = sum->len / sectorsize;
|
|
|
|
|
|
+ num_sectors = sum->len / sdev->sectorsize;
|
|
for (i = 0; i < num_sectors; ++i) {
|
|
for (i = 0; i < num_sectors; ++i) {
|
|
if (sum->sums[i].bytenr == logical) {
|
|
if (sum->sums[i].bytenr == logical) {
|
|
memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
|
|
memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
|
|
@@ -1081,9 +1710,28 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
{
|
|
{
|
|
int ret;
|
|
int ret;
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
u8 csum[BTRFS_CSUM_SIZE];
|
|
|
|
+ u32 blocksize;
|
|
|
|
+
|
|
|
|
+ if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
|
|
|
+ blocksize = sdev->sectorsize;
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.data_extents_scrubbed++;
|
|
|
|
+ sdev->stat.data_bytes_scrubbed += len;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
|
|
|
|
+ BUG_ON(sdev->nodesize != sdev->leafsize);
|
|
|
|
+ blocksize = sdev->nodesize;
|
|
|
|
+ spin_lock(&sdev->stat_lock);
|
|
|
|
+ sdev->stat.tree_extents_scrubbed++;
|
|
|
|
+ sdev->stat.tree_bytes_scrubbed += len;
|
|
|
|
+ spin_unlock(&sdev->stat_lock);
|
|
|
|
+ } else {
|
|
|
|
+ blocksize = sdev->sectorsize;
|
|
|
|
+ BUG_ON(1);
|
|
|
|
+ }
|
|
|
|
|
|
while (len) {
|
|
while (len) {
|
|
- u64 l = min_t(u64, len, PAGE_SIZE);
|
|
|
|
|
|
+ u64 l = min_t(u64, len, blocksize);
|
|
int have_csum = 0;
|
|
int have_csum = 0;
|
|
|
|
|
|
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
|
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
|
@@ -1092,8 +1740,8 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
|
|
if (have_csum == 0)
|
|
if (have_csum == 0)
|
|
++sdev->stat.no_csum;
|
|
++sdev->stat.no_csum;
|
|
}
|
|
}
|
|
- ret = scrub_page(sdev, logical, l, physical, flags, gen,
|
|
|
|
- mirror_num, have_csum ? csum : NULL, 0);
|
|
|
|
|
|
+ ret = scrub_pages(sdev, logical, l, physical, flags, gen,
|
|
|
|
+ mirror_num, have_csum ? csum : NULL, 0);
|
|
if (ret)
|
|
if (ret)
|
|
return ret;
|
|
return ret;
|
|
len -= l;
|
|
len -= l;
|
|
@@ -1158,6 +1806,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
|
|
if (!path)
|
|
if (!path)
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * work on commit root. The related disk blocks are static as
|
|
|
|
+ * long as COW is applied. This means, it is save to rewrite
|
|
|
|
+ * them to repair disk errors without any race conditions
|
|
|
|
+ */
|
|
path->search_commit_root = 1;
|
|
path->search_commit_root = 1;
|
|
path->skip_locking = 1;
|
|
path->skip_locking = 1;
|
|
|
|
|
|
@@ -1511,8 +2164,8 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
|
|
if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
|
|
if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
|
|
break;
|
|
break;
|
|
|
|
|
|
- ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
|
|
|
|
- BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
|
|
|
|
|
|
+ ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
|
|
|
|
+ BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
|
|
if (ret)
|
|
if (ret)
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
@@ -1571,10 +2224,30 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
|
|
/*
|
|
/*
|
|
* check some assumptions
|
|
* check some assumptions
|
|
*/
|
|
*/
|
|
- if (root->sectorsize != PAGE_SIZE ||
|
|
|
|
- root->sectorsize != root->leafsize ||
|
|
|
|
- root->sectorsize != root->nodesize) {
|
|
|
|
- printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
|
|
|
|
|
|
+ if (root->nodesize != root->leafsize) {
|
|
|
|
+ printk(KERN_ERR
|
|
|
|
+ "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
|
|
|
|
+ root->nodesize, root->leafsize);
|
|
|
|
+ return -EINVAL;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (root->nodesize > BTRFS_STRIPE_LEN) {
|
|
|
|
+ /*
|
|
|
|
+ * in this case scrub is unable to calculate the checksum
|
|
|
|
+ * the way scrub is implemented. Do not handle this
|
|
|
|
+ * situation at all because it won't ever happen.
|
|
|
|
+ */
|
|
|
|
+ printk(KERN_ERR
|
|
|
|
+ "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
|
|
|
|
+ root->nodesize, BTRFS_STRIPE_LEN);
|
|
|
|
+ return -EINVAL;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (root->sectorsize != PAGE_SIZE) {
|
|
|
|
+ /* not supported for data w/o checksums */
|
|
|
|
+ printk(KERN_ERR
|
|
|
|
+ "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
|
|
|
|
+ root->sectorsize, (unsigned long long)PAGE_SIZE);
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
|