|
@@ -58,9 +58,23 @@
|
|
|
*/
|
|
|
#define RBIO_CACHE_READY_BIT 3
|
|
|
|
|
|
+/*
|
|
|
+ * bbio and raid_map is managed by the caller, so we shouldn't free
|
|
|
+ * them here. And besides that, all rbios with this flag should not
|
|
|
+ * be cached, because we need raid_map to check the rbios' stripe
|
|
|
+ * is the same or not, but it is very likely that the caller has
|
|
|
+ * free raid_map, so don't cache those rbios.
|
|
|
+ */
|
|
|
+#define RBIO_HOLD_BBIO_MAP_BIT 4
|
|
|
|
|
|
#define RBIO_CACHE_SIZE 1024
|
|
|
|
|
|
+enum btrfs_rbio_ops {
|
|
|
+ BTRFS_RBIO_WRITE = 0,
|
|
|
+ BTRFS_RBIO_READ_REBUILD = 1,
|
|
|
+ BTRFS_RBIO_PARITY_SCRUB = 2,
|
|
|
+};
|
|
|
+
|
|
|
struct btrfs_raid_bio {
|
|
|
struct btrfs_fs_info *fs_info;
|
|
|
struct btrfs_bio *bbio;
|
|
@@ -117,13 +131,16 @@ struct btrfs_raid_bio {
|
|
|
/* number of data stripes (no p/q) */
|
|
|
int nr_data;
|
|
|
|
|
|
+ int real_stripes;
|
|
|
+
|
|
|
+ int stripe_npages;
|
|
|
/*
|
|
|
* set if we're doing a parity rebuild
|
|
|
* for a read from higher up, which is handled
|
|
|
* differently from a parity rebuild as part of
|
|
|
* rmw
|
|
|
*/
|
|
|
- int read_rebuild;
|
|
|
+ enum btrfs_rbio_ops operation;
|
|
|
|
|
|
/* first bad stripe */
|
|
|
int faila;
|
|
@@ -131,6 +148,7 @@ struct btrfs_raid_bio {
|
|
|
/* second bad stripe (for raid6 use) */
|
|
|
int failb;
|
|
|
|
|
|
+ int scrubp;
|
|
|
/*
|
|
|
* number of pages needed to represent the full
|
|
|
* stripe
|
|
@@ -144,8 +162,13 @@ struct btrfs_raid_bio {
|
|
|
*/
|
|
|
int bio_list_bytes;
|
|
|
|
|
|
+ int generic_bio_cnt;
|
|
|
+
|
|
|
atomic_t refs;
|
|
|
|
|
|
+ atomic_t stripes_pending;
|
|
|
+
|
|
|
+ atomic_t error;
|
|
|
/*
|
|
|
* these are two arrays of pointers. We allocate the
|
|
|
* rbio big enough to hold them both and setup their
|
|
@@ -162,6 +185,11 @@ struct btrfs_raid_bio {
|
|
|
* here for faster lookup
|
|
|
*/
|
|
|
struct page **bio_pages;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * bitmap to record which horizontal stripe has data
|
|
|
+ */
|
|
|
+ unsigned long *dbitmap;
|
|
|
};
|
|
|
|
|
|
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
|
|
@@ -176,6 +204,10 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio);
|
|
|
static void index_rbio_pages(struct btrfs_raid_bio *rbio);
|
|
|
static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
|
|
|
|
|
|
+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
|
|
+ int need_check);
|
|
|
+static void async_scrub_parity(struct btrfs_raid_bio *rbio);
|
|
|
+
|
|
|
/*
|
|
|
* the stripe hash table is used for locking, and to collect
|
|
|
* bios in hopes of making a full stripe
|
|
@@ -324,6 +356,7 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
|
|
|
{
|
|
|
bio_list_merge(&dest->bio_list, &victim->bio_list);
|
|
|
dest->bio_list_bytes += victim->bio_list_bytes;
|
|
|
+ dest->generic_bio_cnt += victim->generic_bio_cnt;
|
|
|
bio_list_init(&victim->bio_list);
|
|
|
}
|
|
|
|
|
@@ -577,11 +610,20 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
|
|
|
cur->raid_map[0])
|
|
|
return 0;
|
|
|
|
|
|
- /* reads can't merge with writes */
|
|
|
- if (last->read_rebuild !=
|
|
|
- cur->read_rebuild) {
|
|
|
+ /* we can't merge with different operations */
|
|
|
+ if (last->operation != cur->operation)
|
|
|
+ return 0;
|
|
|
+ /*
|
|
|
+ * We've need read the full stripe from the drive.
|
|
|
+ * check and repair the parity and write the new results.
|
|
|
+ *
|
|
|
+ * We're not allowed to add any new bios to the
|
|
|
+ * bio list here, anyone else that wants to
|
|
|
+ * change this stripe needs to do their own rmw.
|
|
|
+ */
|
|
|
+ if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
|
|
|
+ cur->operation == BTRFS_RBIO_PARITY_SCRUB)
|
|
|
return 0;
|
|
|
- }
|
|
|
|
|
|
return 1;
|
|
|
}
|
|
@@ -601,7 +643,7 @@ static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
|
|
|
*/
|
|
|
static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
|
|
|
{
|
|
|
- if (rbio->nr_data + 1 == rbio->bbio->num_stripes)
|
|
|
+ if (rbio->nr_data + 1 == rbio->real_stripes)
|
|
|
return NULL;
|
|
|
|
|
|
index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
|
|
@@ -772,11 +814,14 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
|
|
spin_unlock(&rbio->bio_list_lock);
|
|
|
spin_unlock_irqrestore(&h->lock, flags);
|
|
|
|
|
|
- if (next->read_rebuild)
|
|
|
+ if (next->operation == BTRFS_RBIO_READ_REBUILD)
|
|
|
async_read_rebuild(next);
|
|
|
- else {
|
|
|
+ else if (next->operation == BTRFS_RBIO_WRITE) {
|
|
|
steal_rbio(rbio, next);
|
|
|
async_rmw_stripe(next);
|
|
|
+ } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
|
|
|
+ steal_rbio(rbio, next);
|
|
|
+ async_scrub_parity(next);
|
|
|
}
|
|
|
|
|
|
goto done_nolock;
|
|
@@ -796,6 +841,21 @@ done_nolock:
|
|
|
remove_rbio_from_cache(rbio);
|
|
|
}
|
|
|
|
|
|
+static inline void
|
|
|
+__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
|
|
|
+{
|
|
|
+ if (need) {
|
|
|
+ kfree(raid_map);
|
|
|
+ kfree(bbio);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
|
|
|
+{
|
|
|
+ __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
|
|
|
+ !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
|
|
|
+}
|
|
|
+
|
|
|
static void __free_raid_bio(struct btrfs_raid_bio *rbio)
|
|
|
{
|
|
|
int i;
|
|
@@ -814,8 +874,9 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
|
|
|
rbio->stripe_pages[i] = NULL;
|
|
|
}
|
|
|
}
|
|
|
- kfree(rbio->raid_map);
|
|
|
- kfree(rbio->bbio);
|
|
|
+
|
|
|
+ free_bbio_and_raid_map(rbio);
|
|
|
+
|
|
|
kfree(rbio);
|
|
|
}
|
|
|
|
|
@@ -833,6 +894,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
|
|
|
{
|
|
|
struct bio *cur = bio_list_get(&rbio->bio_list);
|
|
|
struct bio *next;
|
|
|
+
|
|
|
+ if (rbio->generic_bio_cnt)
|
|
|
+ btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
|
|
|
+
|
|
|
free_raid_bio(rbio);
|
|
|
|
|
|
while (cur) {
|
|
@@ -858,13 +923,13 @@ static void raid_write_end_io(struct bio *bio, int err)
|
|
|
|
|
|
bio_put(bio);
|
|
|
|
|
|
- if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
|
|
|
+ if (!atomic_dec_and_test(&rbio->stripes_pending))
|
|
|
return;
|
|
|
|
|
|
err = 0;
|
|
|
|
|
|
/* OK, we have read all the stripes we need to. */
|
|
|
- if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
|
|
|
+ if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
|
|
err = -EIO;
|
|
|
|
|
|
rbio_orig_end_io(rbio, err, 0);
|
|
@@ -925,16 +990,16 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
|
|
|
{
|
|
|
struct btrfs_raid_bio *rbio;
|
|
|
int nr_data = 0;
|
|
|
- int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes);
|
|
|
+ int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
|
|
|
+ int num_pages = rbio_nr_pages(stripe_len, real_stripes);
|
|
|
+ int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
|
|
|
void *p;
|
|
|
|
|
|
- rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2,
|
|
|
+ rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
|
|
|
+ DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
|
|
|
GFP_NOFS);
|
|
|
- if (!rbio) {
|
|
|
- kfree(raid_map);
|
|
|
- kfree(bbio);
|
|
|
+ if (!rbio)
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
- }
|
|
|
|
|
|
bio_list_init(&rbio->bio_list);
|
|
|
INIT_LIST_HEAD(&rbio->plug_list);
|
|
@@ -946,9 +1011,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
|
|
|
rbio->fs_info = root->fs_info;
|
|
|
rbio->stripe_len = stripe_len;
|
|
|
rbio->nr_pages = num_pages;
|
|
|
+ rbio->real_stripes = real_stripes;
|
|
|
+ rbio->stripe_npages = stripe_npages;
|
|
|
rbio->faila = -1;
|
|
|
rbio->failb = -1;
|
|
|
atomic_set(&rbio->refs, 1);
|
|
|
+ atomic_set(&rbio->error, 0);
|
|
|
+ atomic_set(&rbio->stripes_pending, 0);
|
|
|
|
|
|
/*
|
|
|
* the stripe_pages and bio_pages array point to the extra
|
|
@@ -957,11 +1026,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
|
|
|
p = rbio + 1;
|
|
|
rbio->stripe_pages = p;
|
|
|
rbio->bio_pages = p + sizeof(struct page *) * num_pages;
|
|
|
+ rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
|
|
|
|
|
|
- if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
|
|
|
- nr_data = bbio->num_stripes - 2;
|
|
|
+ if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE)
|
|
|
+ nr_data = real_stripes - 2;
|
|
|
else
|
|
|
- nr_data = bbio->num_stripes - 1;
|
|
|
+ nr_data = real_stripes - 1;
|
|
|
|
|
|
rbio->nr_data = nr_data;
|
|
|
return rbio;
|
|
@@ -1073,7 +1143,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
|
|
|
static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
|
|
|
{
|
|
|
if (rbio->faila >= 0 || rbio->failb >= 0) {
|
|
|
- BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1);
|
|
|
+ BUG_ON(rbio->faila == rbio->real_stripes - 1);
|
|
|
__raid56_parity_recover(rbio);
|
|
|
} else {
|
|
|
finish_rmw(rbio);
|
|
@@ -1134,7 +1204,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
|
|
|
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|
|
{
|
|
|
struct btrfs_bio *bbio = rbio->bbio;
|
|
|
- void *pointers[bbio->num_stripes];
|
|
|
+ void *pointers[rbio->real_stripes];
|
|
|
int stripe_len = rbio->stripe_len;
|
|
|
int nr_data = rbio->nr_data;
|
|
|
int stripe;
|
|
@@ -1148,11 +1218,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|
|
|
|
|
bio_list_init(&bio_list);
|
|
|
|
|
|
- if (bbio->num_stripes - rbio->nr_data == 1) {
|
|
|
- p_stripe = bbio->num_stripes - 1;
|
|
|
- } else if (bbio->num_stripes - rbio->nr_data == 2) {
|
|
|
- p_stripe = bbio->num_stripes - 2;
|
|
|
- q_stripe = bbio->num_stripes - 1;
|
|
|
+ if (rbio->real_stripes - rbio->nr_data == 1) {
|
|
|
+ p_stripe = rbio->real_stripes - 1;
|
|
|
+ } else if (rbio->real_stripes - rbio->nr_data == 2) {
|
|
|
+ p_stripe = rbio->real_stripes - 2;
|
|
|
+ q_stripe = rbio->real_stripes - 1;
|
|
|
} else {
|
|
|
BUG();
|
|
|
}
|
|
@@ -1169,7 +1239,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|
|
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
|
|
|
spin_unlock_irq(&rbio->bio_list_lock);
|
|
|
|
|
|
- atomic_set(&rbio->bbio->error, 0);
|
|
|
+ atomic_set(&rbio->error, 0);
|
|
|
|
|
|
/*
|
|
|
* now that we've set rmw_locked, run through the
|
|
@@ -1209,7 +1279,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|
|
SetPageUptodate(p);
|
|
|
pointers[stripe++] = kmap(p);
|
|
|
|
|
|
- raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE,
|
|
|
+ raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
|
|
|
pointers);
|
|
|
} else {
|
|
|
/* raid5 */
|
|
@@ -1218,7 +1288,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|
|
}
|
|
|
|
|
|
|
|
|
- for (stripe = 0; stripe < bbio->num_stripes; stripe++)
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++)
|
|
|
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
|
|
|
}
|
|
|
|
|
@@ -1227,7 +1297,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|
|
* higher layers (the bio_list in our rbio) and our p/q. Ignore
|
|
|
* everything else.
|
|
|
*/
|
|
|
- for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
|
|
for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
|
|
|
struct page *page;
|
|
|
if (stripe < rbio->nr_data) {
|
|
@@ -1245,8 +1315,34 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list));
|
|
|
- BUG_ON(atomic_read(&bbio->stripes_pending) == 0);
|
|
|
+ if (likely(!bbio->num_tgtdevs))
|
|
|
+ goto write_data;
|
|
|
+
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
|
|
+ if (!bbio->tgtdev_map[stripe])
|
|
|
+ continue;
|
|
|
+
|
|
|
+ for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
|
|
|
+ struct page *page;
|
|
|
+ if (stripe < rbio->nr_data) {
|
|
|
+ page = page_in_rbio(rbio, stripe, pagenr, 1);
|
|
|
+ if (!page)
|
|
|
+ continue;
|
|
|
+ } else {
|
|
|
+ page = rbio_stripe_page(rbio, stripe, pagenr);
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = rbio_add_io_page(rbio, &bio_list, page,
|
|
|
+ rbio->bbio->tgtdev_map[stripe],
|
|
|
+ pagenr, rbio->stripe_len);
|
|
|
+ if (ret)
|
|
|
+ goto cleanup;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+write_data:
|
|
|
+ atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
|
|
|
+ BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
|
|
|
|
|
|
while (1) {
|
|
|
bio = bio_list_pop(&bio_list);
|
|
@@ -1283,7 +1379,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
|
|
|
stripe = &rbio->bbio->stripes[i];
|
|
|
stripe_start = stripe->physical;
|
|
|
if (physical >= stripe_start &&
|
|
|
- physical < stripe_start + rbio->stripe_len) {
|
|
|
+ physical < stripe_start + rbio->stripe_len &&
|
|
|
+ bio->bi_bdev == stripe->dev->bdev) {
|
|
|
return i;
|
|
|
}
|
|
|
}
|
|
@@ -1331,11 +1428,11 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
|
|
|
if (rbio->faila == -1) {
|
|
|
/* first failure on this rbio */
|
|
|
rbio->faila = failed;
|
|
|
- atomic_inc(&rbio->bbio->error);
|
|
|
+ atomic_inc(&rbio->error);
|
|
|
} else if (rbio->failb == -1) {
|
|
|
/* second failure on this rbio */
|
|
|
rbio->failb = failed;
|
|
|
- atomic_inc(&rbio->bbio->error);
|
|
|
+ atomic_inc(&rbio->error);
|
|
|
} else {
|
|
|
ret = -EIO;
|
|
|
}
|
|
@@ -1394,11 +1491,11 @@ static void raid_rmw_end_io(struct bio *bio, int err)
|
|
|
|
|
|
bio_put(bio);
|
|
|
|
|
|
- if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
|
|
|
+ if (!atomic_dec_and_test(&rbio->stripes_pending))
|
|
|
return;
|
|
|
|
|
|
err = 0;
|
|
|
- if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
|
|
|
+ if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
|
|
goto cleanup;
|
|
|
|
|
|
/*
|
|
@@ -1439,7 +1536,6 @@ static void async_read_rebuild(struct btrfs_raid_bio *rbio)
|
|
|
static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
|
|
{
|
|
|
int bios_to_read = 0;
|
|
|
- struct btrfs_bio *bbio = rbio->bbio;
|
|
|
struct bio_list bio_list;
|
|
|
int ret;
|
|
|
int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
|
|
@@ -1455,7 +1551,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
|
|
|
|
|
index_rbio_pages(rbio);
|
|
|
|
|
|
- atomic_set(&rbio->bbio->error, 0);
|
|
|
+ atomic_set(&rbio->error, 0);
|
|
|
/*
|
|
|
* build a list of bios to read all the missing parts of this
|
|
|
* stripe
|
|
@@ -1503,7 +1599,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
|
|
* the bbio may be freed once we submit the last bio. Make sure
|
|
|
* not to touch it after that
|
|
|
*/
|
|
|
- atomic_set(&bbio->stripes_pending, bios_to_read);
|
|
|
+ atomic_set(&rbio->stripes_pending, bios_to_read);
|
|
|
while (1) {
|
|
|
bio = bio_list_pop(&bio_list);
|
|
|
if (!bio)
|
|
@@ -1686,19 +1782,30 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
|
|
|
struct btrfs_raid_bio *rbio;
|
|
|
struct btrfs_plug_cb *plug = NULL;
|
|
|
struct blk_plug_cb *cb;
|
|
|
+ int ret;
|
|
|
|
|
|
rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
|
|
|
- if (IS_ERR(rbio))
|
|
|
+ if (IS_ERR(rbio)) {
|
|
|
+ __free_bbio_and_raid_map(bbio, raid_map, 1);
|
|
|
return PTR_ERR(rbio);
|
|
|
+ }
|
|
|
bio_list_add(&rbio->bio_list, bio);
|
|
|
rbio->bio_list_bytes = bio->bi_iter.bi_size;
|
|
|
+ rbio->operation = BTRFS_RBIO_WRITE;
|
|
|
+
|
|
|
+ btrfs_bio_counter_inc_noblocked(root->fs_info);
|
|
|
+ rbio->generic_bio_cnt = 1;
|
|
|
|
|
|
/*
|
|
|
* don't plug on full rbios, just get them out the door
|
|
|
* as quickly as we can
|
|
|
*/
|
|
|
- if (rbio_is_full(rbio))
|
|
|
- return full_stripe_write(rbio);
|
|
|
+ if (rbio_is_full(rbio)) {
|
|
|
+ ret = full_stripe_write(rbio);
|
|
|
+ if (ret)
|
|
|
+ btrfs_bio_counter_dec(root->fs_info);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
|
|
|
cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
|
|
|
sizeof(*plug));
|
|
@@ -1709,10 +1816,13 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
|
|
|
INIT_LIST_HEAD(&plug->rbio_list);
|
|
|
}
|
|
|
list_add_tail(&rbio->plug_list, &plug->rbio_list);
|
|
|
+ ret = 0;
|
|
|
} else {
|
|
|
- return __raid56_parity_write(rbio);
|
|
|
+ ret = __raid56_parity_write(rbio);
|
|
|
+ if (ret)
|
|
|
+ btrfs_bio_counter_dec(root->fs_info);
|
|
|
}
|
|
|
- return 0;
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1730,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|
|
int err;
|
|
|
int i;
|
|
|
|
|
|
- pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *),
|
|
|
+ pointers = kzalloc(rbio->real_stripes * sizeof(void *),
|
|
|
GFP_NOFS);
|
|
|
if (!pointers) {
|
|
|
err = -ENOMEM;
|
|
@@ -1740,7 +1850,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|
|
faila = rbio->faila;
|
|
|
failb = rbio->failb;
|
|
|
|
|
|
- if (rbio->read_rebuild) {
|
|
|
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
|
|
|
spin_lock_irq(&rbio->bio_list_lock);
|
|
|
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
|
|
|
spin_unlock_irq(&rbio->bio_list_lock);
|
|
@@ -1749,15 +1859,23 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|
|
index_rbio_pages(rbio);
|
|
|
|
|
|
for (pagenr = 0; pagenr < nr_pages; pagenr++) {
|
|
|
+ /*
|
|
|
+ * Now we just use bitmap to mark the horizontal stripes in
|
|
|
+ * which we have data when doing parity scrub.
|
|
|
+ */
|
|
|
+ if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
|
|
|
+ !test_bit(pagenr, rbio->dbitmap))
|
|
|
+ continue;
|
|
|
+
|
|
|
/* setup our array of pointers with pages
|
|
|
* from each stripe
|
|
|
*/
|
|
|
- for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
|
|
/*
|
|
|
* if we're rebuilding a read, we have to use
|
|
|
* pages from the bio list
|
|
|
*/
|
|
|
- if (rbio->read_rebuild &&
|
|
|
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
|
|
|
(stripe == faila || stripe == failb)) {
|
|
|
page = page_in_rbio(rbio, stripe, pagenr, 0);
|
|
|
} else {
|
|
@@ -1767,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|
|
}
|
|
|
|
|
|
/* all raid6 handling here */
|
|
|
- if (rbio->raid_map[rbio->bbio->num_stripes - 1] ==
|
|
|
+ if (rbio->raid_map[rbio->real_stripes - 1] ==
|
|
|
RAID6_Q_STRIPE) {
|
|
|
|
|
|
/*
|
|
@@ -1817,10 +1935,10 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|
|
}
|
|
|
|
|
|
if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
|
|
|
- raid6_datap_recov(rbio->bbio->num_stripes,
|
|
|
+ raid6_datap_recov(rbio->real_stripes,
|
|
|
PAGE_SIZE, faila, pointers);
|
|
|
} else {
|
|
|
- raid6_2data_recov(rbio->bbio->num_stripes,
|
|
|
+ raid6_2data_recov(rbio->real_stripes,
|
|
|
PAGE_SIZE, faila, failb,
|
|
|
pointers);
|
|
|
}
|
|
@@ -1850,7 +1968,7 @@ pstripe:
|
|
|
* know they can be trusted. If this was a read reconstruction,
|
|
|
* other endio functions will fiddle the uptodate bits
|
|
|
*/
|
|
|
- if (!rbio->read_rebuild) {
|
|
|
+ if (rbio->operation == BTRFS_RBIO_WRITE) {
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
|
if (faila != -1) {
|
|
|
page = rbio_stripe_page(rbio, faila, i);
|
|
@@ -1862,12 +1980,12 @@ pstripe:
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
|
|
/*
|
|
|
* if we're rebuilding a read, we have to use
|
|
|
* pages from the bio list
|
|
|
*/
|
|
|
- if (rbio->read_rebuild &&
|
|
|
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
|
|
|
(stripe == faila || stripe == failb)) {
|
|
|
page = page_in_rbio(rbio, stripe, pagenr, 0);
|
|
|
} else {
|
|
@@ -1882,9 +2000,9 @@ cleanup:
|
|
|
kfree(pointers);
|
|
|
|
|
|
cleanup_io:
|
|
|
-
|
|
|
- if (rbio->read_rebuild) {
|
|
|
- if (err == 0)
|
|
|
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
|
|
|
+ if (err == 0 &&
|
|
|
+ !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
|
|
|
cache_rbio_pages(rbio);
|
|
|
else
|
|
|
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
|
|
@@ -1893,7 +2011,13 @@ cleanup_io:
|
|
|
} else if (err == 0) {
|
|
|
rbio->faila = -1;
|
|
|
rbio->failb = -1;
|
|
|
- finish_rmw(rbio);
|
|
|
+
|
|
|
+ if (rbio->operation == BTRFS_RBIO_WRITE)
|
|
|
+ finish_rmw(rbio);
|
|
|
+ else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
|
|
|
+ finish_parity_scrub(rbio, 0);
|
|
|
+ else
|
|
|
+ BUG();
|
|
|
} else {
|
|
|
rbio_orig_end_io(rbio, err, 0);
|
|
|
}
|
|
@@ -1917,10 +2041,10 @@ static void raid_recover_end_io(struct bio *bio, int err)
|
|
|
set_bio_pages_uptodate(bio);
|
|
|
bio_put(bio);
|
|
|
|
|
|
- if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
|
|
|
+ if (!atomic_dec_and_test(&rbio->stripes_pending))
|
|
|
return;
|
|
|
|
|
|
- if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
|
|
|
+ if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
|
|
rbio_orig_end_io(rbio, -EIO, 0);
|
|
|
else
|
|
|
__raid_recover_end_io(rbio);
|
|
@@ -1937,7 +2061,6 @@ static void raid_recover_end_io(struct bio *bio, int err)
|
|
|
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
|
|
{
|
|
|
int bios_to_read = 0;
|
|
|
- struct btrfs_bio *bbio = rbio->bbio;
|
|
|
struct bio_list bio_list;
|
|
|
int ret;
|
|
|
int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
|
|
@@ -1951,16 +2074,16 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
|
|
if (ret)
|
|
|
goto cleanup;
|
|
|
|
|
|
- atomic_set(&rbio->bbio->error, 0);
|
|
|
+ atomic_set(&rbio->error, 0);
|
|
|
|
|
|
/*
|
|
|
* read everything that hasn't failed. Thanks to the
|
|
|
* stripe cache, it is possible that some or all of these
|
|
|
* pages are going to be uptodate.
|
|
|
*/
|
|
|
- for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
|
|
if (rbio->faila == stripe || rbio->failb == stripe) {
|
|
|
- atomic_inc(&rbio->bbio->error);
|
|
|
+ atomic_inc(&rbio->error);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
@@ -1990,7 +2113,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
|
|
* were up to date, or we might have no bios to read because
|
|
|
* the devices were gone.
|
|
|
*/
|
|
|
- if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) {
|
|
|
+ if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
|
|
|
__raid_recover_end_io(rbio);
|
|
|
goto out;
|
|
|
} else {
|
|
@@ -2002,7 +2125,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
|
|
* the bbio may be freed once we submit the last bio. Make sure
|
|
|
* not to touch it after that
|
|
|
*/
|
|
|
- atomic_set(&bbio->stripes_pending, bios_to_read);
|
|
|
+ atomic_set(&rbio->stripes_pending, bios_to_read);
|
|
|
while (1) {
|
|
|
bio = bio_list_pop(&bio_list);
|
|
|
if (!bio)
|
|
@@ -2021,7 +2144,7 @@ out:
|
|
|
return 0;
|
|
|
|
|
|
cleanup:
|
|
|
- if (rbio->read_rebuild)
|
|
|
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD)
|
|
|
rbio_orig_end_io(rbio, -EIO, 0);
|
|
|
return -EIO;
|
|
|
}
|
|
@@ -2034,34 +2157,42 @@ cleanup:
|
|
|
*/
|
|
|
int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
|
|
|
struct btrfs_bio *bbio, u64 *raid_map,
|
|
|
- u64 stripe_len, int mirror_num)
|
|
|
+ u64 stripe_len, int mirror_num, int generic_io)
|
|
|
{
|
|
|
struct btrfs_raid_bio *rbio;
|
|
|
int ret;
|
|
|
|
|
|
rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
|
|
|
- if (IS_ERR(rbio))
|
|
|
+ if (IS_ERR(rbio)) {
|
|
|
+ __free_bbio_and_raid_map(bbio, raid_map, generic_io);
|
|
|
return PTR_ERR(rbio);
|
|
|
+ }
|
|
|
|
|
|
- rbio->read_rebuild = 1;
|
|
|
+ rbio->operation = BTRFS_RBIO_READ_REBUILD;
|
|
|
bio_list_add(&rbio->bio_list, bio);
|
|
|
rbio->bio_list_bytes = bio->bi_iter.bi_size;
|
|
|
|
|
|
rbio->faila = find_logical_bio_stripe(rbio, bio);
|
|
|
if (rbio->faila == -1) {
|
|
|
BUG();
|
|
|
- kfree(raid_map);
|
|
|
- kfree(bbio);
|
|
|
+ __free_bbio_and_raid_map(bbio, raid_map, generic_io);
|
|
|
kfree(rbio);
|
|
|
return -EIO;
|
|
|
}
|
|
|
|
|
|
+ if (generic_io) {
|
|
|
+ btrfs_bio_counter_inc_noblocked(root->fs_info);
|
|
|
+ rbio->generic_bio_cnt = 1;
|
|
|
+ } else {
|
|
|
+ set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags);
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* reconstruct from the q stripe if they are
|
|
|
* asking for mirror 3
|
|
|
*/
|
|
|
if (mirror_num == 3)
|
|
|
- rbio->failb = bbio->num_stripes - 2;
|
|
|
+ rbio->failb = rbio->real_stripes - 2;
|
|
|
|
|
|
ret = lock_stripe_add(rbio);
|
|
|
|
|
@@ -2098,3 +2229,483 @@ static void read_rebuild_work(struct btrfs_work *work)
|
|
|
rbio = container_of(work, struct btrfs_raid_bio, work);
|
|
|
__raid56_parity_recover(rbio);
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * The following code is used to scrub/replace the parity stripe
|
|
|
+ *
|
|
|
+ * Note: We need make sure all the pages that add into the scrub/replace
|
|
|
+ * raid bio are correct and not be changed during the scrub/replace. That
|
|
|
+ * is those pages just hold metadata or file data with checksum.
|
|
|
+ */
|
|
|
+
|
|
|
+struct btrfs_raid_bio *
|
|
|
+raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
|
|
|
+ struct btrfs_bio *bbio, u64 *raid_map,
|
|
|
+ u64 stripe_len, struct btrfs_device *scrub_dev,
|
|
|
+ unsigned long *dbitmap, int stripe_nsectors)
|
|
|
+{
|
|
|
+ struct btrfs_raid_bio *rbio;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
|
|
|
+ if (IS_ERR(rbio))
|
|
|
+ return NULL;
|
|
|
+ bio_list_add(&rbio->bio_list, bio);
|
|
|
+ /*
|
|
|
+ * This is a special bio which is used to hold the completion handler
|
|
|
+ * and make the scrub rbio is similar to the other types
|
|
|
+ */
|
|
|
+ ASSERT(!bio->bi_iter.bi_size);
|
|
|
+ rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
|
|
|
+
|
|
|
+ for (i = 0; i < rbio->real_stripes; i++) {
|
|
|
+ if (bbio->stripes[i].dev == scrub_dev) {
|
|
|
+ rbio->scrubp = i;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Now we just support the sectorsize equals to page size */
|
|
|
+ ASSERT(root->sectorsize == PAGE_SIZE);
|
|
|
+ ASSERT(rbio->stripe_npages == stripe_nsectors);
|
|
|
+ bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
|
|
|
+
|
|
|
+ return rbio;
|
|
|
+}
|
|
|
+
|
|
|
+void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
|
|
|
+ struct page *page, u64 logical)
|
|
|
+{
|
|
|
+ int stripe_offset;
|
|
|
+ int index;
|
|
|
+
|
|
|
+ ASSERT(logical >= rbio->raid_map[0]);
|
|
|
+ ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] +
|
|
|
+ rbio->stripe_len * rbio->nr_data);
|
|
|
+ stripe_offset = (int)(logical - rbio->raid_map[0]);
|
|
|
+ index = stripe_offset >> PAGE_CACHE_SHIFT;
|
|
|
+ rbio->bio_pages[index] = page;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * We just scrub the parity that we have correct data on the same horizontal,
|
|
|
+ * so we needn't allocate all pages for all the stripes.
|
|
|
+ */
|
|
|
+static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ int bit;
|
|
|
+ int index;
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
|
|
|
+ for (i = 0; i < rbio->real_stripes; i++) {
|
|
|
+ index = i * rbio->stripe_npages + bit;
|
|
|
+ if (rbio->stripe_pages[index])
|
|
|
+ continue;
|
|
|
+
|
|
|
+ page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
|
|
+ if (!page)
|
|
|
+ return -ENOMEM;
|
|
|
+ rbio->stripe_pages[index] = page;
|
|
|
+ ClearPageUptodate(page);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * end io function used by finish_rmw. When we finally
|
|
|
+ * get here, we've written a full stripe
|
|
|
+ */
|
|
|
+static void raid_write_parity_end_io(struct bio *bio, int err)
|
|
|
+{
|
|
|
+ struct btrfs_raid_bio *rbio = bio->bi_private;
|
|
|
+
|
|
|
+ if (err)
|
|
|
+ fail_bio_stripe(rbio, bio);
|
|
|
+
|
|
|
+ bio_put(bio);
|
|
|
+
|
|
|
+ if (!atomic_dec_and_test(&rbio->stripes_pending))
|
|
|
+ return;
|
|
|
+
|
|
|
+ err = 0;
|
|
|
+
|
|
|
+ if (atomic_read(&rbio->error))
|
|
|
+ err = -EIO;
|
|
|
+
|
|
|
+ rbio_orig_end_io(rbio, err, 0);
|
|
|
+}
|
|
|
+
|
|
|
+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
|
|
+ int need_check)
|
|
|
+{
|
|
|
+ struct btrfs_bio *bbio = rbio->bbio;
|
|
|
+ void *pointers[rbio->real_stripes];
|
|
|
+ DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
|
|
|
+ int nr_data = rbio->nr_data;
|
|
|
+ int stripe;
|
|
|
+ int pagenr;
|
|
|
+ int p_stripe = -1;
|
|
|
+ int q_stripe = -1;
|
|
|
+ struct page *p_page = NULL;
|
|
|
+ struct page *q_page = NULL;
|
|
|
+ struct bio_list bio_list;
|
|
|
+ struct bio *bio;
|
|
|
+ int is_replace = 0;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ bio_list_init(&bio_list);
|
|
|
+
|
|
|
+ if (rbio->real_stripes - rbio->nr_data == 1) {
|
|
|
+ p_stripe = rbio->real_stripes - 1;
|
|
|
+ } else if (rbio->real_stripes - rbio->nr_data == 2) {
|
|
|
+ p_stripe = rbio->real_stripes - 2;
|
|
|
+ q_stripe = rbio->real_stripes - 1;
|
|
|
+ } else {
|
|
|
+ BUG();
|
|
|
+ }
|
|
|
+
|
|
|
+ if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
|
|
|
+ is_replace = 1;
|
|
|
+ bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Because the higher layers(scrubber) are unlikely to
|
|
|
+ * use this area of the disk again soon, so don't cache
|
|
|
+ * it.
|
|
|
+ */
|
|
|
+ clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
|
|
|
+
|
|
|
+ if (!need_check)
|
|
|
+ goto writeback;
|
|
|
+
|
|
|
+ p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
|
|
+ if (!p_page)
|
|
|
+ goto cleanup;
|
|
|
+ SetPageUptodate(p_page);
|
|
|
+
|
|
|
+ if (q_stripe != -1) {
|
|
|
+ q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
|
|
+ if (!q_page) {
|
|
|
+ __free_page(p_page);
|
|
|
+ goto cleanup;
|
|
|
+ }
|
|
|
+ SetPageUptodate(q_page);
|
|
|
+ }
|
|
|
+
|
|
|
+ atomic_set(&rbio->error, 0);
|
|
|
+
|
|
|
+ for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
|
|
|
+ struct page *p;
|
|
|
+ void *parity;
|
|
|
+ /* first collect one page from each data stripe */
|
|
|
+ for (stripe = 0; stripe < nr_data; stripe++) {
|
|
|
+ p = page_in_rbio(rbio, stripe, pagenr, 0);
|
|
|
+ pointers[stripe] = kmap(p);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* then add the parity stripe */
|
|
|
+ pointers[stripe++] = kmap(p_page);
|
|
|
+
|
|
|
+ if (q_stripe != -1) {
|
|
|
+
|
|
|
+ /*
|
|
|
+ * raid6, add the qstripe and call the
|
|
|
+ * library function to fill in our p/q
|
|
|
+ */
|
|
|
+ pointers[stripe++] = kmap(q_page);
|
|
|
+
|
|
|
+ raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
|
|
|
+ pointers);
|
|
|
+ } else {
|
|
|
+ /* raid5 */
|
|
|
+ memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
|
|
|
+ run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Check scrubbing pairty and repair it */
|
|
|
+ p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
|
|
|
+ parity = kmap(p);
|
|
|
+ if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
|
|
|
+ memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
|
|
|
+ else
|
|
|
+ /* Parity is right, needn't writeback */
|
|
|
+ bitmap_clear(rbio->dbitmap, pagenr, 1);
|
|
|
+ kunmap(p);
|
|
|
+
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++)
|
|
|
+ kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
|
|
|
+ }
|
|
|
+
|
|
|
+ __free_page(p_page);
|
|
|
+ if (q_page)
|
|
|
+ __free_page(q_page);
|
|
|
+
|
|
|
+writeback:
|
|
|
+ /*
|
|
|
+ * time to start writing. Make bios for everything from the
|
|
|
+ * higher layers (the bio_list in our rbio) and our p/q. Ignore
|
|
|
+ * everything else.
|
|
|
+ */
|
|
|
+ for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
|
|
|
+ ret = rbio_add_io_page(rbio, &bio_list,
|
|
|
+ page, rbio->scrubp, pagenr, rbio->stripe_len);
|
|
|
+ if (ret)
|
|
|
+ goto cleanup;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!is_replace)
|
|
|
+ goto submit_write;
|
|
|
+
|
|
|
+ for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
|
|
|
+ ret = rbio_add_io_page(rbio, &bio_list, page,
|
|
|
+ bbio->tgtdev_map[rbio->scrubp],
|
|
|
+ pagenr, rbio->stripe_len);
|
|
|
+ if (ret)
|
|
|
+ goto cleanup;
|
|
|
+ }
|
|
|
+
|
|
|
+submit_write:
|
|
|
+ nr_data = bio_list_size(&bio_list);
|
|
|
+ if (!nr_data) {
|
|
|
+ /* Every parity is right */
|
|
|
+ rbio_orig_end_io(rbio, 0, 0);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ atomic_set(&rbio->stripes_pending, nr_data);
|
|
|
+
|
|
|
+ while (1) {
|
|
|
+ bio = bio_list_pop(&bio_list);
|
|
|
+ if (!bio)
|
|
|
+ break;
|
|
|
+
|
|
|
+ bio->bi_private = rbio;
|
|
|
+ bio->bi_end_io = raid_write_parity_end_io;
|
|
|
+ BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
|
|
|
+ submit_bio(WRITE, bio);
|
|
|
+ }
|
|
|
+ return;
|
|
|
+
|
|
|
+cleanup:
|
|
|
+ rbio_orig_end_io(rbio, -EIO, 0);
|
|
|
+}
|
|
|
+
|
|
|
+static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
|
|
|
+{
|
|
|
+ if (stripe >= 0 && stripe < rbio->nr_data)
|
|
|
+ return 1;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * While we're doing the parity check and repair, we could have errors
|
|
|
+ * in reading pages off the disk. This checks for errors and if we're
|
|
|
+ * not able to read the page it'll trigger parity reconstruction. The
|
|
|
+ * parity scrub will be finished after we've reconstructed the failed
|
|
|
+ * stripes
|
|
|
+ */
|
|
|
+static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
|
|
|
+{
|
|
|
+ if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
|
|
+ goto cleanup;
|
|
|
+
|
|
|
+ if (rbio->faila >= 0 || rbio->failb >= 0) {
|
|
|
+ int dfail = 0, failp = -1;
|
|
|
+
|
|
|
+ if (is_data_stripe(rbio, rbio->faila))
|
|
|
+ dfail++;
|
|
|
+ else if (is_parity_stripe(rbio->faila))
|
|
|
+ failp = rbio->faila;
|
|
|
+
|
|
|
+ if (is_data_stripe(rbio, rbio->failb))
|
|
|
+ dfail++;
|
|
|
+ else if (is_parity_stripe(rbio->failb))
|
|
|
+ failp = rbio->failb;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Because we can not use a scrubbing parity to repair
|
|
|
+ * the data, so the capability of the repair is declined.
|
|
|
+ * (In the case of RAID5, we can not repair anything)
|
|
|
+ */
|
|
|
+ if (dfail > rbio->bbio->max_errors - 1)
|
|
|
+ goto cleanup;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If all data is good, only parity is correctly, just
|
|
|
+ * repair the parity.
|
|
|
+ */
|
|
|
+ if (dfail == 0) {
|
|
|
+ finish_parity_scrub(rbio, 0);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Here means we got one corrupted data stripe and one
|
|
|
+ * corrupted parity on RAID6, if the corrupted parity
|
|
|
+ * is scrubbing parity, luckly, use the other one to repair
|
|
|
+ * the data, or we can not repair the data stripe.
|
|
|
+ */
|
|
|
+ if (failp != rbio->scrubp)
|
|
|
+ goto cleanup;
|
|
|
+
|
|
|
+ __raid_recover_end_io(rbio);
|
|
|
+ } else {
|
|
|
+ finish_parity_scrub(rbio, 1);
|
|
|
+ }
|
|
|
+ return;
|
|
|
+
|
|
|
+cleanup:
|
|
|
+ rbio_orig_end_io(rbio, -EIO, 0);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * end io for the read phase of the rmw cycle. All the bios here are physical
|
|
|
+ * stripe bios we've read from the disk so we can recalculate the parity of the
|
|
|
+ * stripe.
|
|
|
+ *
|
|
|
+ * This will usually kick off finish_rmw once all the bios are read in, but it
|
|
|
+ * may trigger parity reconstruction if we had any errors along the way
|
|
|
+ */
|
|
|
+static void raid56_parity_scrub_end_io(struct bio *bio, int err)
|
|
|
+{
|
|
|
+ struct btrfs_raid_bio *rbio = bio->bi_private;
|
|
|
+
|
|
|
+ if (err)
|
|
|
+ fail_bio_stripe(rbio, bio);
|
|
|
+ else
|
|
|
+ set_bio_pages_uptodate(bio);
|
|
|
+
|
|
|
+ bio_put(bio);
|
|
|
+
|
|
|
+ if (!atomic_dec_and_test(&rbio->stripes_pending))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * this will normally call finish_rmw to start our write
|
|
|
+ * but if there are any failed stripes we'll reconstruct
|
|
|
+ * from parity first
|
|
|
+ */
|
|
|
+ validate_rbio_for_parity_scrub(rbio);
|
|
|
+}
|
|
|
+
|
|
|
+static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
|
|
|
+{
|
|
|
+ int bios_to_read = 0;
|
|
|
+ struct bio_list bio_list;
|
|
|
+ int ret;
|
|
|
+ int pagenr;
|
|
|
+ int stripe;
|
|
|
+ struct bio *bio;
|
|
|
+
|
|
|
+ ret = alloc_rbio_essential_pages(rbio);
|
|
|
+ if (ret)
|
|
|
+ goto cleanup;
|
|
|
+
|
|
|
+ bio_list_init(&bio_list);
|
|
|
+
|
|
|
+ atomic_set(&rbio->error, 0);
|
|
|
+ /*
|
|
|
+ * build a list of bios to read all the missing parts of this
|
|
|
+ * stripe
|
|
|
+ */
|
|
|
+ for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
|
|
+ for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
|
|
|
+ struct page *page;
|
|
|
+ /*
|
|
|
+ * we want to find all the pages missing from
|
|
|
+ * the rbio and read them from the disk. If
|
|
|
+ * page_in_rbio finds a page in the bio list
|
|
|
+ * we don't need to read it off the stripe.
|
|
|
+ */
|
|
|
+ page = page_in_rbio(rbio, stripe, pagenr, 1);
|
|
|
+ if (page)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ page = rbio_stripe_page(rbio, stripe, pagenr);
|
|
|
+ /*
|
|
|
+ * the bio cache may have handed us an uptodate
|
|
|
+ * page. If so, be happy and use it
|
|
|
+ */
|
|
|
+ if (PageUptodate(page))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ ret = rbio_add_io_page(rbio, &bio_list, page,
|
|
|
+ stripe, pagenr, rbio->stripe_len);
|
|
|
+ if (ret)
|
|
|
+ goto cleanup;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ bios_to_read = bio_list_size(&bio_list);
|
|
|
+ if (!bios_to_read) {
|
|
|
+ /*
|
|
|
+ * this can happen if others have merged with
|
|
|
+ * us, it means there is nothing left to read.
|
|
|
+ * But if there are missing devices it may not be
|
|
|
+ * safe to do the full stripe write yet.
|
|
|
+ */
|
|
|
+ goto finish;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * the bbio may be freed once we submit the last bio. Make sure
|
|
|
+ * not to touch it after that
|
|
|
+ */
|
|
|
+ atomic_set(&rbio->stripes_pending, bios_to_read);
|
|
|
+ while (1) {
|
|
|
+ bio = bio_list_pop(&bio_list);
|
|
|
+ if (!bio)
|
|
|
+ break;
|
|
|
+
|
|
|
+ bio->bi_private = rbio;
|
|
|
+ bio->bi_end_io = raid56_parity_scrub_end_io;
|
|
|
+
|
|
|
+ btrfs_bio_wq_end_io(rbio->fs_info, bio,
|
|
|
+ BTRFS_WQ_ENDIO_RAID56);
|
|
|
+
|
|
|
+ BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
|
|
|
+ submit_bio(READ, bio);
|
|
|
+ }
|
|
|
+ /* the actual write will happen once the reads are done */
|
|
|
+ return;
|
|
|
+
|
|
|
+cleanup:
|
|
|
+ rbio_orig_end_io(rbio, -EIO, 0);
|
|
|
+ return;
|
|
|
+
|
|
|
+finish:
|
|
|
+ validate_rbio_for_parity_scrub(rbio);
|
|
|
+}
|
|
|
+
|
|
|
+static void scrub_parity_work(struct btrfs_work *work)
|
|
|
+{
|
|
|
+ struct btrfs_raid_bio *rbio;
|
|
|
+
|
|
|
+ rbio = container_of(work, struct btrfs_raid_bio, work);
|
|
|
+ raid56_parity_scrub_stripe(rbio);
|
|
|
+}
|
|
|
+
|
|
|
+static void async_scrub_parity(struct btrfs_raid_bio *rbio)
|
|
|
+{
|
|
|
+ btrfs_init_work(&rbio->work, btrfs_rmw_helper,
|
|
|
+ scrub_parity_work, NULL, NULL);
|
|
|
+
|
|
|
+ btrfs_queue_work(rbio->fs_info->rmw_workers,
|
|
|
+ &rbio->work);
|
|
|
+}
|
|
|
+
|
|
|
+void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
|
|
|
+{
|
|
|
+ if (!lock_stripe_add(rbio))
|
|
|
+ async_scrub_parity(rbio);
|
|
|
+}
|