|
@@ -73,7 +73,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
|
|
|
struct r10conf *conf = data;
|
|
|
int size = offsetof(struct r10bio, devs[conf->copies]);
|
|
|
|
|
|
- /* allocate a r10bio with room for raid_disks entries in the bios array */
|
|
|
+ /* allocate a r10bio with room for raid_disks entries in the
|
|
|
+ * bios array */
|
|
|
return kzalloc(size, gfp_flags);
|
|
|
}
|
|
|
|
|
@@ -123,12 +124,19 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
|
|
|
if (!bio)
|
|
|
goto out_free_bio;
|
|
|
r10_bio->devs[j].bio = bio;
|
|
|
+ if (!conf->have_replacement)
|
|
|
+ continue;
|
|
|
+ bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
|
|
|
+ if (!bio)
|
|
|
+ goto out_free_bio;
|
|
|
+ r10_bio->devs[j].repl_bio = bio;
|
|
|
}
|
|
|
/*
|
|
|
* Allocate RESYNC_PAGES data pages and attach them
|
|
|
* where needed.
|
|
|
*/
|
|
|
for (j = 0 ; j < nalloc; j++) {
|
|
|
+ struct bio *rbio = r10_bio->devs[j].repl_bio;
|
|
|
bio = r10_bio->devs[j].bio;
|
|
|
for (i = 0; i < RESYNC_PAGES; i++) {
|
|
|
if (j == 1 && !test_bit(MD_RECOVERY_SYNC,
|
|
@@ -143,6 +151,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
|
|
|
goto out_free_pages;
|
|
|
|
|
|
bio->bi_io_vec[i].bv_page = page;
|
|
|
+ if (rbio)
|
|
|
+ rbio->bi_io_vec[i].bv_page = page;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -156,8 +166,11 @@ out_free_pages:
|
|
|
safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
|
|
|
j = -1;
|
|
|
out_free_bio:
|
|
|
- while ( ++j < nalloc )
|
|
|
+ while (++j < nalloc) {
|
|
|
bio_put(r10_bio->devs[j].bio);
|
|
|
+ if (r10_bio->devs[j].repl_bio)
|
|
|
+ bio_put(r10_bio->devs[j].repl_bio);
|
|
|
+ }
|
|
|
r10bio_pool_free(r10_bio, conf);
|
|
|
return NULL;
|
|
|
}
|
|
@@ -178,6 +191,9 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
|
|
|
}
|
|
|
bio_put(bio);
|
|
|
}
|
|
|
+ bio = r10bio->devs[j].repl_bio;
|
|
|
+ if (bio)
|
|
|
+ bio_put(bio);
|
|
|
}
|
|
|
r10bio_pool_free(r10bio, conf);
|
|
|
}
|
|
@@ -191,6 +207,10 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
|
|
|
if (!BIO_SPECIAL(*bio))
|
|
|
bio_put(*bio);
|
|
|
*bio = NULL;
|
|
|
+ bio = &r10_bio->devs[i].repl_bio;
|
|
|
+ if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio))
|
|
|
+ bio_put(*bio);
|
|
|
+ *bio = NULL;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -275,19 +295,27 @@ static inline void update_head_pos(int slot, struct r10bio *r10_bio)
|
|
|
* Find the disk number which triggered given bio
|
|
|
*/
|
|
|
static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
|
|
|
- struct bio *bio, int *slotp)
|
|
|
+ struct bio *bio, int *slotp, int *replp)
|
|
|
{
|
|
|
int slot;
|
|
|
+ int repl = 0;
|
|
|
|
|
|
- for (slot = 0; slot < conf->copies; slot++)
|
|
|
+ for (slot = 0; slot < conf->copies; slot++) {
|
|
|
if (r10_bio->devs[slot].bio == bio)
|
|
|
break;
|
|
|
+ if (r10_bio->devs[slot].repl_bio == bio) {
|
|
|
+ repl = 1;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
BUG_ON(slot == conf->copies);
|
|
|
update_head_pos(slot, r10_bio);
|
|
|
|
|
|
if (slotp)
|
|
|
*slotp = slot;
|
|
|
+ if (replp)
|
|
|
+ *replp = repl;
|
|
|
return r10_bio->devs[slot].devnum;
|
|
|
}
|
|
|
|
|
@@ -296,11 +324,13 @@ static void raid10_end_read_request(struct bio *bio, int error)
|
|
|
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
|
struct r10bio *r10_bio = bio->bi_private;
|
|
|
int slot, dev;
|
|
|
+ struct md_rdev *rdev;
|
|
|
struct r10conf *conf = r10_bio->mddev->private;
|
|
|
|
|
|
|
|
|
slot = r10_bio->read_slot;
|
|
|
dev = r10_bio->devs[slot].devnum;
|
|
|
+ rdev = r10_bio->devs[slot].rdev;
|
|
|
/*
|
|
|
* this branch is our 'one mirror IO has finished' event handler:
|
|
|
*/
|
|
@@ -318,7 +348,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
|
|
|
*/
|
|
|
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
|
|
raid_end_bio_io(r10_bio);
|
|
|
- rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
|
|
|
+ rdev_dec_pending(rdev, conf->mddev);
|
|
|
} else {
|
|
|
/*
|
|
|
* oops, read error - keep the refcount on the rdev
|
|
@@ -327,7 +357,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
|
|
|
printk_ratelimited(KERN_ERR
|
|
|
"md/raid10:%s: %s: rescheduling sector %llu\n",
|
|
|
mdname(conf->mddev),
|
|
|
- bdevname(conf->mirrors[dev].rdev->bdev, b),
|
|
|
+ bdevname(rdev->bdev, b),
|
|
|
(unsigned long long)r10_bio->sector);
|
|
|
set_bit(R10BIO_ReadError, &r10_bio->state);
|
|
|
reschedule_retry(r10_bio);
|
|
@@ -366,17 +396,35 @@ static void raid10_end_write_request(struct bio *bio, int error)
|
|
|
int dev;
|
|
|
int dec_rdev = 1;
|
|
|
struct r10conf *conf = r10_bio->mddev->private;
|
|
|
- int slot;
|
|
|
+ int slot, repl;
|
|
|
+ struct md_rdev *rdev = NULL;
|
|
|
|
|
|
- dev = find_bio_disk(conf, r10_bio, bio, &slot);
|
|
|
+ dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
|
|
|
|
|
|
+ if (repl)
|
|
|
+ rdev = conf->mirrors[dev].replacement;
|
|
|
+ if (!rdev) {
|
|
|
+ smp_rmb();
|
|
|
+ repl = 0;
|
|
|
+ rdev = conf->mirrors[dev].rdev;
|
|
|
+ }
|
|
|
/*
|
|
|
* this branch is our 'one mirror IO has finished' event handler:
|
|
|
*/
|
|
|
if (!uptodate) {
|
|
|
- set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags);
|
|
|
- set_bit(R10BIO_WriteError, &r10_bio->state);
|
|
|
- dec_rdev = 0;
|
|
|
+ if (repl)
|
|
|
+ /* Never record new bad blocks to replacement,
|
|
|
+ * just fail it.
|
|
|
+ */
|
|
|
+ md_error(rdev->mddev, rdev);
|
|
|
+ else {
|
|
|
+ set_bit(WriteErrorSeen, &rdev->flags);
|
|
|
+ if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
|
|
+ set_bit(MD_RECOVERY_NEEDED,
|
|
|
+ &rdev->mddev->recovery);
|
|
|
+ set_bit(R10BIO_WriteError, &r10_bio->state);
|
|
|
+ dec_rdev = 0;
|
|
|
+ }
|
|
|
} else {
|
|
|
/*
|
|
|
* Set R10BIO_Uptodate in our master bio, so that
|
|
@@ -393,12 +441,15 @@ static void raid10_end_write_request(struct bio *bio, int error)
|
|
|
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
|
|
|
|
|
/* Maybe we can clear some bad blocks. */
|
|
|
- if (is_badblock(conf->mirrors[dev].rdev,
|
|
|
+ if (is_badblock(rdev,
|
|
|
r10_bio->devs[slot].addr,
|
|
|
r10_bio->sectors,
|
|
|
&first_bad, &bad_sectors)) {
|
|
|
bio_put(bio);
|
|
|
- r10_bio->devs[slot].bio = IO_MADE_GOOD;
|
|
|
+ if (repl)
|
|
|
+ r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
|
|
|
+ else
|
|
|
+ r10_bio->devs[slot].bio = IO_MADE_GOOD;
|
|
|
dec_rdev = 0;
|
|
|
set_bit(R10BIO_MadeGood, &r10_bio->state);
|
|
|
}
|
|
@@ -414,7 +465,6 @@ static void raid10_end_write_request(struct bio *bio, int error)
|
|
|
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
/*
|
|
|
* RAID10 layout manager
|
|
|
* As well as the chunksize and raid_disks count, there are two
|
|
@@ -562,14 +612,16 @@ static int raid10_mergeable_bvec(struct request_queue *q,
|
|
|
* FIXME: possibly should rethink readbalancing and do it differently
|
|
|
* depending on near_copies / far_copies geometry.
|
|
|
*/
|
|
|
-static int read_balance(struct r10conf *conf, struct r10bio *r10_bio, int *max_sectors)
|
|
|
+static struct md_rdev *read_balance(struct r10conf *conf,
|
|
|
+ struct r10bio *r10_bio,
|
|
|
+ int *max_sectors)
|
|
|
{
|
|
|
const sector_t this_sector = r10_bio->sector;
|
|
|
int disk, slot;
|
|
|
int sectors = r10_bio->sectors;
|
|
|
int best_good_sectors;
|
|
|
sector_t new_distance, best_dist;
|
|
|
- struct md_rdev *rdev;
|
|
|
+ struct md_rdev *rdev, *best_rdev;
|
|
|
int do_balance;
|
|
|
int best_slot;
|
|
|
|
|
@@ -578,6 +630,7 @@ static int read_balance(struct r10conf *conf, struct r10bio *r10_bio, int *max_s
|
|
|
retry:
|
|
|
sectors = r10_bio->sectors;
|
|
|
best_slot = -1;
|
|
|
+ best_rdev = NULL;
|
|
|
best_dist = MaxSector;
|
|
|
best_good_sectors = 0;
|
|
|
do_balance = 1;
|
|
@@ -599,10 +652,16 @@ retry:
|
|
|
if (r10_bio->devs[slot].bio == IO_BLOCKED)
|
|
|
continue;
|
|
|
disk = r10_bio->devs[slot].devnum;
|
|
|
- rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
|
|
+ rdev = rcu_dereference(conf->mirrors[disk].replacement);
|
|
|
+ if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
|
|
|
+ r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
|
|
|
+ rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
|
|
if (rdev == NULL)
|
|
|
continue;
|
|
|
- if (!test_bit(In_sync, &rdev->flags))
|
|
|
+ if (test_bit(Faulty, &rdev->flags))
|
|
|
+ continue;
|
|
|
+ if (!test_bit(In_sync, &rdev->flags) &&
|
|
|
+ r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
|
|
|
continue;
|
|
|
|
|
|
dev_sector = r10_bio->devs[slot].addr;
|
|
@@ -627,6 +686,7 @@ retry:
|
|
|
if (good_sectors > best_good_sectors) {
|
|
|
best_good_sectors = good_sectors;
|
|
|
best_slot = slot;
|
|
|
+ best_rdev = rdev;
|
|
|
}
|
|
|
if (!do_balance)
|
|
|
/* Must read from here */
|
|
@@ -655,16 +715,15 @@ retry:
|
|
|
if (new_distance < best_dist) {
|
|
|
best_dist = new_distance;
|
|
|
best_slot = slot;
|
|
|
+ best_rdev = rdev;
|
|
|
}
|
|
|
}
|
|
|
- if (slot == conf->copies)
|
|
|
+ if (slot >= conf->copies) {
|
|
|
slot = best_slot;
|
|
|
+ rdev = best_rdev;
|
|
|
+ }
|
|
|
|
|
|
if (slot >= 0) {
|
|
|
- disk = r10_bio->devs[slot].devnum;
|
|
|
- rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
|
|
- if (!rdev)
|
|
|
- goto retry;
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
if (test_bit(Faulty, &rdev->flags)) {
|
|
|
/* Cannot risk returning a device that failed
|
|
@@ -675,11 +734,11 @@ retry:
|
|
|
}
|
|
|
r10_bio->read_slot = slot;
|
|
|
} else
|
|
|
- disk = -1;
|
|
|
+ rdev = NULL;
|
|
|
rcu_read_unlock();
|
|
|
*max_sectors = best_good_sectors;
|
|
|
|
|
|
- return disk;
|
|
|
+ return rdev;
|
|
|
}
|
|
|
|
|
|
static int raid10_congested(void *data, int bits)
|
|
@@ -846,7 +905,6 @@ static void unfreeze_array(struct r10conf *conf)
|
|
|
static void make_request(struct mddev *mddev, struct bio * bio)
|
|
|
{
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
- struct mirror_info *mirror;
|
|
|
struct r10bio *r10_bio;
|
|
|
struct bio *read_bio;
|
|
|
int i;
|
|
@@ -945,27 +1003,27 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
|
|
/*
|
|
|
* read balancing logic:
|
|
|
*/
|
|
|
- int disk;
|
|
|
+ struct md_rdev *rdev;
|
|
|
int slot;
|
|
|
|
|
|
read_again:
|
|
|
- disk = read_balance(conf, r10_bio, &max_sectors);
|
|
|
- slot = r10_bio->read_slot;
|
|
|
- if (disk < 0) {
|
|
|
+ rdev = read_balance(conf, r10_bio, &max_sectors);
|
|
|
+ if (!rdev) {
|
|
|
raid_end_bio_io(r10_bio);
|
|
|
return;
|
|
|
}
|
|
|
- mirror = conf->mirrors + disk;
|
|
|
+ slot = r10_bio->read_slot;
|
|
|
|
|
|
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
|
|
md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector,
|
|
|
max_sectors);
|
|
|
|
|
|
r10_bio->devs[slot].bio = read_bio;
|
|
|
+ r10_bio->devs[slot].rdev = rdev;
|
|
|
|
|
|
read_bio->bi_sector = r10_bio->devs[slot].addr +
|
|
|
- mirror->rdev->data_offset;
|
|
|
- read_bio->bi_bdev = mirror->rdev->bdev;
|
|
|
+ rdev->data_offset;
|
|
|
+ read_bio->bi_bdev = rdev->bdev;
|
|
|
read_bio->bi_end_io = raid10_end_read_request;
|
|
|
read_bio->bi_rw = READ | do_sync;
|
|
|
read_bio->bi_private = r10_bio;
|
|
@@ -1025,6 +1083,7 @@ read_again:
|
|
|
*/
|
|
|
plugged = mddev_check_plugged(mddev);
|
|
|
|
|
|
+ r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
|
|
|
raid10_find_phys(conf, r10_bio);
|
|
|
retry_write:
|
|
|
blocked_rdev = NULL;
|
|
@@ -1034,12 +1093,25 @@ retry_write:
|
|
|
for (i = 0; i < conf->copies; i++) {
|
|
|
int d = r10_bio->devs[i].devnum;
|
|
|
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
|
|
+ struct md_rdev *rrdev = rcu_dereference(
|
|
|
+ conf->mirrors[d].replacement);
|
|
|
+ if (rdev == rrdev)
|
|
|
+ rrdev = NULL;
|
|
|
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
blocked_rdev = rdev;
|
|
|
break;
|
|
|
}
|
|
|
+ if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
|
|
|
+ atomic_inc(&rrdev->nr_pending);
|
|
|
+ blocked_rdev = rrdev;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (rrdev && test_bit(Faulty, &rrdev->flags))
|
|
|
+ rrdev = NULL;
|
|
|
+
|
|
|
r10_bio->devs[i].bio = NULL;
|
|
|
+ r10_bio->devs[i].repl_bio = NULL;
|
|
|
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
|
|
set_bit(R10BIO_Degraded, &r10_bio->state);
|
|
|
continue;
|
|
@@ -1088,6 +1160,10 @@ retry_write:
|
|
|
}
|
|
|
r10_bio->devs[i].bio = bio;
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
+ if (rrdev) {
|
|
|
+ r10_bio->devs[i].repl_bio = bio;
|
|
|
+ atomic_inc(&rrdev->nr_pending);
|
|
|
+ }
|
|
|
}
|
|
|
rcu_read_unlock();
|
|
|
|
|
@@ -1096,11 +1172,23 @@ retry_write:
|
|
|
int j;
|
|
|
int d;
|
|
|
|
|
|
- for (j = 0; j < i; j++)
|
|
|
+ for (j = 0; j < i; j++) {
|
|
|
if (r10_bio->devs[j].bio) {
|
|
|
d = r10_bio->devs[j].devnum;
|
|
|
rdev_dec_pending(conf->mirrors[d].rdev, mddev);
|
|
|
}
|
|
|
+ if (r10_bio->devs[j].repl_bio) {
|
|
|
+ struct md_rdev *rdev;
|
|
|
+ d = r10_bio->devs[j].devnum;
|
|
|
+ rdev = conf->mirrors[d].replacement;
|
|
|
+ if (!rdev) {
|
|
|
+ /* Race with remove_disk */
|
|
|
+ smp_mb();
|
|
|
+ rdev = conf->mirrors[d].rdev;
|
|
|
+ }
|
|
|
+ rdev_dec_pending(rdev, mddev);
|
|
|
+ }
|
|
|
+ }
|
|
|
allow_barrier(conf);
|
|
|
md_wait_for_blocked_rdev(blocked_rdev, mddev);
|
|
|
wait_barrier(conf);
|
|
@@ -1147,6 +1235,31 @@ retry_write:
|
|
|
bio_list_add(&conf->pending_bio_list, mbio);
|
|
|
conf->pending_count++;
|
|
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
|
|
+
|
|
|
+ if (!r10_bio->devs[i].repl_bio)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
|
|
+ md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
|
|
|
+ max_sectors);
|
|
|
+ r10_bio->devs[i].repl_bio = mbio;
|
|
|
+
|
|
|
+ /* We are actively writing to the original device
|
|
|
+ * so it cannot disappear, so the replacement cannot
|
|
|
+ * become NULL here
|
|
|
+ */
|
|
|
+ mbio->bi_sector = (r10_bio->devs[i].addr+
|
|
|
+ conf->mirrors[d].replacement->data_offset);
|
|
|
+ mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
|
|
|
+ mbio->bi_end_io = raid10_end_write_request;
|
|
|
+ mbio->bi_rw = WRITE | do_sync | do_fua;
|
|
|
+ mbio->bi_private = r10_bio;
|
|
|
+
|
|
|
+ atomic_inc(&r10_bio->remaining);
|
|
|
+ spin_lock_irqsave(&conf->device_lock, flags);
|
|
|
+ bio_list_add(&conf->pending_bio_list, mbio);
|
|
|
+ conf->pending_count++;
|
|
|
+ spin_unlock_irqrestore(&conf->device_lock, flags);
|
|
|
}
|
|
|
|
|
|
/* Don't remove the bias on 'remaining' (one_write_done) until
|
|
@@ -1309,9 +1422,27 @@ static int raid10_spare_active(struct mddev *mddev)
|
|
|
*/
|
|
|
for (i = 0; i < conf->raid_disks; i++) {
|
|
|
tmp = conf->mirrors + i;
|
|
|
- if (tmp->rdev
|
|
|
- && !test_bit(Faulty, &tmp->rdev->flags)
|
|
|
- && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
|
|
+ if (tmp->replacement
|
|
|
+ && tmp->replacement->recovery_offset == MaxSector
|
|
|
+ && !test_bit(Faulty, &tmp->replacement->flags)
|
|
|
+ && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
|
|
|
+ /* Replacement has just become active */
|
|
|
+ if (!tmp->rdev
|
|
|
+ || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
|
|
|
+ count++;
|
|
|
+ if (tmp->rdev) {
|
|
|
+ /* Replaced device not technically faulty,
|
|
|
+ * but we need to be sure it gets removed
|
|
|
+ * and never re-added.
|
|
|
+ */
|
|
|
+ set_bit(Faulty, &tmp->rdev->flags);
|
|
|
+ sysfs_notify_dirent_safe(
|
|
|
+ tmp->rdev->sysfs_state);
|
|
|
+ }
|
|
|
+ sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
|
|
|
+ } else if (tmp->rdev
|
|
|
+ && !test_bit(Faulty, &tmp->rdev->flags)
|
|
|
+ && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
|
|
count++;
|
|
|
sysfs_notify_dirent(tmp->rdev->sysfs_state);
|
|
|
}
|
|
@@ -1353,8 +1484,25 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|
|
struct mirror_info *p = &conf->mirrors[mirror];
|
|
|
if (p->recovery_disabled == mddev->recovery_disabled)
|
|
|
continue;
|
|
|
- if (p->rdev)
|
|
|
- continue;
|
|
|
+ if (p->rdev) {
|
|
|
+ if (!test_bit(WantReplacement, &p->rdev->flags) ||
|
|
|
+ p->replacement != NULL)
|
|
|
+ continue;
|
|
|
+ clear_bit(In_sync, &rdev->flags);
|
|
|
+ set_bit(Replacement, &rdev->flags);
|
|
|
+ rdev->raid_disk = mirror;
|
|
|
+ err = 0;
|
|
|
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
|
|
|
+ rdev->data_offset << 9);
|
|
|
+ if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
|
+ blk_queue_max_segments(mddev->queue, 1);
|
|
|
+ blk_queue_segment_boundary(mddev->queue,
|
|
|
+ PAGE_CACHE_SIZE - 1);
|
|
|
+ }
|
|
|
+ conf->fullsync = 1;
|
|
|
+ rcu_assign_pointer(p->replacement, rdev);
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
|
|
rdev->data_offset << 9);
|
|
@@ -1385,40 +1533,61 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
-static int raid10_remove_disk(struct mddev *mddev, int number)
|
|
|
+static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|
|
{
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
int err = 0;
|
|
|
- struct md_rdev *rdev;
|
|
|
- struct mirror_info *p = conf->mirrors+ number;
|
|
|
+ int number = rdev->raid_disk;
|
|
|
+ struct md_rdev **rdevp;
|
|
|
+ struct mirror_info *p = conf->mirrors + number;
|
|
|
|
|
|
print_conf(conf);
|
|
|
- rdev = p->rdev;
|
|
|
- if (rdev) {
|
|
|
- if (test_bit(In_sync, &rdev->flags) ||
|
|
|
- atomic_read(&rdev->nr_pending)) {
|
|
|
- err = -EBUSY;
|
|
|
- goto abort;
|
|
|
- }
|
|
|
- /* Only remove faulty devices in recovery
|
|
|
- * is not possible.
|
|
|
- */
|
|
|
- if (!test_bit(Faulty, &rdev->flags) &&
|
|
|
- mddev->recovery_disabled != p->recovery_disabled &&
|
|
|
- enough(conf, -1)) {
|
|
|
- err = -EBUSY;
|
|
|
- goto abort;
|
|
|
- }
|
|
|
- p->rdev = NULL;
|
|
|
- synchronize_rcu();
|
|
|
- if (atomic_read(&rdev->nr_pending)) {
|
|
|
- /* lost the race, try later */
|
|
|
- err = -EBUSY;
|
|
|
- p->rdev = rdev;
|
|
|
- goto abort;
|
|
|
- }
|
|
|
- err = md_integrity_register(mddev);
|
|
|
+ if (rdev == p->rdev)
|
|
|
+ rdevp = &p->rdev;
|
|
|
+ else if (rdev == p->replacement)
|
|
|
+ rdevp = &p->replacement;
|
|
|
+ else
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ if (test_bit(In_sync, &rdev->flags) ||
|
|
|
+ atomic_read(&rdev->nr_pending)) {
|
|
|
+ err = -EBUSY;
|
|
|
+ goto abort;
|
|
|
}
|
|
|
+ /* Only remove faulty devices if recovery
|
|
|
+ * is not possible.
|
|
|
+ */
|
|
|
+ if (!test_bit(Faulty, &rdev->flags) &&
|
|
|
+ mddev->recovery_disabled != p->recovery_disabled &&
|
|
|
+ (!p->replacement || p->replacement == rdev) &&
|
|
|
+ enough(conf, -1)) {
|
|
|
+ err = -EBUSY;
|
|
|
+ goto abort;
|
|
|
+ }
|
|
|
+ *rdevp = NULL;
|
|
|
+ synchronize_rcu();
|
|
|
+ if (atomic_read(&rdev->nr_pending)) {
|
|
|
+ /* lost the race, try later */
|
|
|
+ err = -EBUSY;
|
|
|
+ *rdevp = rdev;
|
|
|
+ goto abort;
|
|
|
+ } else if (p->replacement) {
|
|
|
+ /* We must have just cleared 'rdev' */
|
|
|
+ p->rdev = p->replacement;
|
|
|
+ clear_bit(Replacement, &p->replacement->flags);
|
|
|
+ smp_mb(); /* Make sure other CPUs may see both as identical
|
|
|
+ * but will never see neither -- if they are careful.
|
|
|
+ */
|
|
|
+ p->replacement = NULL;
|
|
|
+ clear_bit(WantReplacement, &rdev->flags);
|
|
|
+ } else
|
|
|
+ /* We might have just remove the Replacement as faulty
|
|
|
+ * Clear the flag just in case
|
|
|
+ */
|
|
|
+ clear_bit(WantReplacement, &rdev->flags);
|
|
|
+
|
|
|
+ err = md_integrity_register(mddev);
|
|
|
+
|
|
|
abort:
|
|
|
|
|
|
print_conf(conf);
|
|
@@ -1432,7 +1601,7 @@ static void end_sync_read(struct bio *bio, int error)
|
|
|
struct r10conf *conf = r10_bio->mddev->private;
|
|
|
int d;
|
|
|
|
|
|
- d = find_bio_disk(conf, r10_bio, bio, NULL);
|
|
|
+ d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
|
|
|
|
|
|
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
|
|
|
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
|
@@ -1493,19 +1662,34 @@ static void end_sync_write(struct bio *bio, int error)
|
|
|
sector_t first_bad;
|
|
|
int bad_sectors;
|
|
|
int slot;
|
|
|
-
|
|
|
- d = find_bio_disk(conf, r10_bio, bio, &slot);
|
|
|
+ int repl;
|
|
|
+ struct md_rdev *rdev = NULL;
|
|
|
+
|
|
|
+ d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
|
|
|
+ if (repl)
|
|
|
+ rdev = conf->mirrors[d].replacement;
|
|
|
+ if (!rdev) {
|
|
|
+ smp_mb();
|
|
|
+ rdev = conf->mirrors[d].rdev;
|
|
|
+ }
|
|
|
|
|
|
if (!uptodate) {
|
|
|
- set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
|
|
|
- set_bit(R10BIO_WriteError, &r10_bio->state);
|
|
|
- } else if (is_badblock(conf->mirrors[d].rdev,
|
|
|
+ if (repl)
|
|
|
+ md_error(mddev, rdev);
|
|
|
+ else {
|
|
|
+ set_bit(WriteErrorSeen, &rdev->flags);
|
|
|
+ if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
|
|
+ set_bit(MD_RECOVERY_NEEDED,
|
|
|
+ &rdev->mddev->recovery);
|
|
|
+ set_bit(R10BIO_WriteError, &r10_bio->state);
|
|
|
+ }
|
|
|
+ } else if (is_badblock(rdev,
|
|
|
r10_bio->devs[slot].addr,
|
|
|
r10_bio->sectors,
|
|
|
&first_bad, &bad_sectors))
|
|
|
set_bit(R10BIO_MadeGood, &r10_bio->state);
|
|
|
|
|
|
- rdev_dec_pending(conf->mirrors[d].rdev, mddev);
|
|
|
+ rdev_dec_pending(rdev, mddev);
|
|
|
|
|
|
end_sync_request(r10_bio);
|
|
|
}
|
|
@@ -1609,6 +1793,29 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
|
|
generic_make_request(tbio);
|
|
|
}
|
|
|
|
|
|
+ /* Now write out to any replacement devices
|
|
|
+ * that are active
|
|
|
+ */
|
|
|
+ for (i = 0; i < conf->copies; i++) {
|
|
|
+ int j, d;
|
|
|
+ int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
|
|
|
+
|
|
|
+ tbio = r10_bio->devs[i].repl_bio;
|
|
|
+ if (!tbio || !tbio->bi_end_io)
|
|
|
+ continue;
|
|
|
+ if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
|
|
|
+ && r10_bio->devs[i].bio != fbio)
|
|
|
+ for (j = 0; j < vcnt; j++)
|
|
|
+ memcpy(page_address(tbio->bi_io_vec[j].bv_page),
|
|
|
+ page_address(fbio->bi_io_vec[j].bv_page),
|
|
|
+ PAGE_SIZE);
|
|
|
+ d = r10_bio->devs[i].devnum;
|
|
|
+ atomic_inc(&r10_bio->remaining);
|
|
|
+ md_sync_acct(conf->mirrors[d].replacement->bdev,
|
|
|
+ tbio->bi_size >> 9);
|
|
|
+ generic_make_request(tbio);
|
|
|
+ }
|
|
|
+
|
|
|
done:
|
|
|
if (atomic_dec_and_test(&r10_bio->remaining)) {
|
|
|
md_done_sync(mddev, r10_bio->sectors, 1);
|
|
@@ -1668,8 +1875,13 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
|
|
|
s << 9,
|
|
|
bio->bi_io_vec[idx].bv_page,
|
|
|
WRITE, false);
|
|
|
- if (!ok)
|
|
|
+ if (!ok) {
|
|
|
set_bit(WriteErrorSeen, &rdev->flags);
|
|
|
+ if (!test_and_set_bit(WantReplacement,
|
|
|
+ &rdev->flags))
|
|
|
+ set_bit(MD_RECOVERY_NEEDED,
|
|
|
+ &rdev->mddev->recovery);
|
|
|
+ }
|
|
|
}
|
|
|
if (!ok) {
|
|
|
/* We don't worry if we cannot set a bad block -
|
|
@@ -1709,7 +1921,7 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
|
|
{
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
int d;
|
|
|
- struct bio *wbio;
|
|
|
+ struct bio *wbio, *wbio2;
|
|
|
|
|
|
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
|
|
|
fix_recovery_read_error(r10_bio);
|
|
@@ -1721,12 +1933,20 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
|
|
* share the pages with the first bio
|
|
|
* and submit the write request
|
|
|
*/
|
|
|
- wbio = r10_bio->devs[1].bio;
|
|
|
d = r10_bio->devs[1].devnum;
|
|
|
-
|
|
|
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
|
|
- md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
|
|
|
- generic_make_request(wbio);
|
|
|
+ wbio = r10_bio->devs[1].bio;
|
|
|
+ wbio2 = r10_bio->devs[1].repl_bio;
|
|
|
+ if (wbio->bi_end_io) {
|
|
|
+ atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
|
|
+ md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
|
|
|
+ generic_make_request(wbio);
|
|
|
+ }
|
|
|
+ if (wbio2 && wbio2->bi_end_io) {
|
|
|
+ atomic_inc(&conf->mirrors[d].replacement->nr_pending);
|
|
|
+ md_sync_acct(conf->mirrors[d].replacement->bdev,
|
|
|
+ wbio2->bi_size >> 9);
|
|
|
+ generic_make_request(wbio2);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
|
|
@@ -1779,8 +1999,12 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
|
|
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
|
|
|
/* success */
|
|
|
return 1;
|
|
|
- if (rw == WRITE)
|
|
|
+ if (rw == WRITE) {
|
|
|
set_bit(WriteErrorSeen, &rdev->flags);
|
|
|
+ if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
|
|
+ set_bit(MD_RECOVERY_NEEDED,
|
|
|
+ &rdev->mddev->recovery);
|
|
|
+ }
|
|
|
/* need to record an error - either for the block or the device */
|
|
|
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
|
|
|
md_error(rdev->mddev, rdev);
|
|
@@ -2060,10 +2284,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
|
|
|
static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
|
|
|
{
|
|
|
int slot = r10_bio->read_slot;
|
|
|
- int mirror = r10_bio->devs[slot].devnum;
|
|
|
struct bio *bio;
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
- struct md_rdev *rdev;
|
|
|
+ struct md_rdev *rdev = r10_bio->devs[slot].rdev;
|
|
|
char b[BDEVNAME_SIZE];
|
|
|
unsigned long do_sync;
|
|
|
int max_sectors;
|
|
@@ -2081,15 +2304,15 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
|
|
|
fix_read_error(conf, mddev, r10_bio);
|
|
|
unfreeze_array(conf);
|
|
|
}
|
|
|
- rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
|
|
|
+ rdev_dec_pending(rdev, mddev);
|
|
|
|
|
|
bio = r10_bio->devs[slot].bio;
|
|
|
bdevname(bio->bi_bdev, b);
|
|
|
r10_bio->devs[slot].bio =
|
|
|
mddev->ro ? IO_BLOCKED : NULL;
|
|
|
read_more:
|
|
|
- mirror = read_balance(conf, r10_bio, &max_sectors);
|
|
|
- if (mirror == -1) {
|
|
|
+ rdev = read_balance(conf, r10_bio, &max_sectors);
|
|
|
+ if (rdev == NULL) {
|
|
|
printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
|
|
|
" read error for block %llu\n",
|
|
|
mdname(mddev), b,
|
|
@@ -2103,7 +2326,6 @@ read_more:
|
|
|
if (bio)
|
|
|
bio_put(bio);
|
|
|
slot = r10_bio->read_slot;
|
|
|
- rdev = conf->mirrors[mirror].rdev;
|
|
|
printk_ratelimited(
|
|
|
KERN_ERR
|
|
|
"md/raid10:%s: %s: redirecting"
|
|
@@ -2117,6 +2339,7 @@ read_more:
|
|
|
r10_bio->sector - bio->bi_sector,
|
|
|
max_sectors);
|
|
|
r10_bio->devs[slot].bio = bio;
|
|
|
+ r10_bio->devs[slot].rdev = rdev;
|
|
|
bio->bi_sector = r10_bio->devs[slot].addr
|
|
|
+ rdev->data_offset;
|
|
|
bio->bi_bdev = rdev->bdev;
|
|
@@ -2187,6 +2410,22 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
|
|
r10_bio->sectors, 0))
|
|
|
md_error(conf->mddev, rdev);
|
|
|
}
|
|
|
+ rdev = conf->mirrors[dev].replacement;
|
|
|
+ if (r10_bio->devs[m].repl_bio == NULL)
|
|
|
+ continue;
|
|
|
+ if (test_bit(BIO_UPTODATE,
|
|
|
+ &r10_bio->devs[m].repl_bio->bi_flags)) {
|
|
|
+ rdev_clear_badblocks(
|
|
|
+ rdev,
|
|
|
+ r10_bio->devs[m].addr,
|
|
|
+ r10_bio->sectors);
|
|
|
+ } else {
|
|
|
+ if (!rdev_set_badblocks(
|
|
|
+ rdev,
|
|
|
+ r10_bio->devs[m].addr,
|
|
|
+ r10_bio->sectors, 0))
|
|
|
+ md_error(conf->mddev, rdev);
|
|
|
+ }
|
|
|
}
|
|
|
put_buf(r10_bio);
|
|
|
} else {
|
|
@@ -2209,6 +2448,15 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
|
|
}
|
|
|
rdev_dec_pending(rdev, conf->mddev);
|
|
|
}
|
|
|
+ bio = r10_bio->devs[m].repl_bio;
|
|
|
+ rdev = conf->mirrors[dev].replacement;
|
|
|
+ if (rdev && bio == IO_MADE_GOOD) {
|
|
|
+ rdev_clear_badblocks(
|
|
|
+ rdev,
|
|
|
+ r10_bio->devs[m].addr,
|
|
|
+ r10_bio->sectors);
|
|
|
+ rdev_dec_pending(rdev, conf->mddev);
|
|
|
+ }
|
|
|
}
|
|
|
if (test_bit(R10BIO_WriteError,
|
|
|
&r10_bio->state))
|
|
@@ -2272,9 +2520,14 @@ static void raid10d(struct mddev *mddev)
|
|
|
static int init_resync(struct r10conf *conf)
|
|
|
{
|
|
|
int buffs;
|
|
|
+ int i;
|
|
|
|
|
|
buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
|
|
|
BUG_ON(conf->r10buf_pool);
|
|
|
+ conf->have_replacement = 0;
|
|
|
+ for (i = 0; i < conf->raid_disks; i++)
|
|
|
+ if (conf->mirrors[i].replacement)
|
|
|
+ conf->have_replacement = 1;
|
|
|
conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
|
|
|
if (!conf->r10buf_pool)
|
|
|
return -ENOMEM;
|
|
@@ -2355,9 +2608,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
bitmap_end_sync(mddev->bitmap, sect,
|
|
|
&sync_blocks, 1);
|
|
|
}
|
|
|
- } else /* completed sync */
|
|
|
+ } else {
|
|
|
+ /* completed sync */
|
|
|
+ if ((!mddev->bitmap || conf->fullsync)
|
|
|
+ && conf->have_replacement
|
|
|
+ && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
|
|
+ /* Completed a full sync so the replacements
|
|
|
+ * are now fully recovered.
|
|
|
+ */
|
|
|
+ for (i = 0; i < conf->raid_disks; i++)
|
|
|
+ if (conf->mirrors[i].replacement)
|
|
|
+ conf->mirrors[i].replacement
|
|
|
+ ->recovery_offset
|
|
|
+ = MaxSector;
|
|
|
+ }
|
|
|
conf->fullsync = 0;
|
|
|
-
|
|
|
+ }
|
|
|
bitmap_close_sync(mddev->bitmap);
|
|
|
close_sync(conf);
|
|
|
*skipped = 1;
|
|
@@ -2414,23 +2680,30 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
sector_t sect;
|
|
|
int must_sync;
|
|
|
int any_working;
|
|
|
-
|
|
|
- if (conf->mirrors[i].rdev == NULL ||
|
|
|
- test_bit(In_sync, &conf->mirrors[i].rdev->flags))
|
|
|
+ struct mirror_info *mirror = &conf->mirrors[i];
|
|
|
+
|
|
|
+ if ((mirror->rdev == NULL ||
|
|
|
+ test_bit(In_sync, &mirror->rdev->flags))
|
|
|
+ &&
|
|
|
+ (mirror->replacement == NULL ||
|
|
|
+ test_bit(Faulty,
|
|
|
+ &mirror->replacement->flags)))
|
|
|
continue;
|
|
|
|
|
|
still_degraded = 0;
|
|
|
/* want to reconstruct this device */
|
|
|
rb2 = r10_bio;
|
|
|
sect = raid10_find_virt(conf, sector_nr, i);
|
|
|
- /* Unless we are doing a full sync, we only need
|
|
|
- * to recover the block if it is set in the bitmap
|
|
|
+ /* Unless we are doing a full sync, or a replacement
|
|
|
+ * we only need to recover the block if it is set in
|
|
|
+ * the bitmap
|
|
|
*/
|
|
|
must_sync = bitmap_start_sync(mddev->bitmap, sect,
|
|
|
&sync_blocks, 1);
|
|
|
if (sync_blocks < max_sync)
|
|
|
max_sync = sync_blocks;
|
|
|
if (!must_sync &&
|
|
|
+ mirror->replacement == NULL &&
|
|
|
!conf->fullsync) {
|
|
|
/* yep, skip the sync_blocks here, but don't assume
|
|
|
* that there will never be anything to do here
|
|
@@ -2500,33 +2773,60 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
bio->bi_end_io = end_sync_read;
|
|
|
bio->bi_rw = READ;
|
|
|
from_addr = r10_bio->devs[j].addr;
|
|
|
- bio->bi_sector = from_addr +
|
|
|
- conf->mirrors[d].rdev->data_offset;
|
|
|
- bio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
|
|
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
|
|
- atomic_inc(&r10_bio->remaining);
|
|
|
- /* and we write to 'i' */
|
|
|
+ bio->bi_sector = from_addr + rdev->data_offset;
|
|
|
+ bio->bi_bdev = rdev->bdev;
|
|
|
+ atomic_inc(&rdev->nr_pending);
|
|
|
+ /* and we write to 'i' (if not in_sync) */
|
|
|
|
|
|
for (k=0; k<conf->copies; k++)
|
|
|
if (r10_bio->devs[k].devnum == i)
|
|
|
break;
|
|
|
BUG_ON(k == conf->copies);
|
|
|
- bio = r10_bio->devs[1].bio;
|
|
|
- bio->bi_next = biolist;
|
|
|
- biolist = bio;
|
|
|
- bio->bi_private = r10_bio;
|
|
|
- bio->bi_end_io = end_sync_write;
|
|
|
- bio->bi_rw = WRITE;
|
|
|
to_addr = r10_bio->devs[k].addr;
|
|
|
- bio->bi_sector = to_addr +
|
|
|
- conf->mirrors[i].rdev->data_offset;
|
|
|
- bio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
|
|
-
|
|
|
r10_bio->devs[0].devnum = d;
|
|
|
r10_bio->devs[0].addr = from_addr;
|
|
|
r10_bio->devs[1].devnum = i;
|
|
|
r10_bio->devs[1].addr = to_addr;
|
|
|
|
|
|
+ rdev = mirror->rdev;
|
|
|
+ if (!test_bit(In_sync, &rdev->flags)) {
|
|
|
+ bio = r10_bio->devs[1].bio;
|
|
|
+ bio->bi_next = biolist;
|
|
|
+ biolist = bio;
|
|
|
+ bio->bi_private = r10_bio;
|
|
|
+ bio->bi_end_io = end_sync_write;
|
|
|
+ bio->bi_rw = WRITE;
|
|
|
+ bio->bi_sector = to_addr
|
|
|
+ + rdev->data_offset;
|
|
|
+ bio->bi_bdev = rdev->bdev;
|
|
|
+ atomic_inc(&r10_bio->remaining);
|
|
|
+ } else
|
|
|
+ r10_bio->devs[1].bio->bi_end_io = NULL;
|
|
|
+
|
|
|
+ /* and maybe write to replacement */
|
|
|
+ bio = r10_bio->devs[1].repl_bio;
|
|
|
+ if (bio)
|
|
|
+ bio->bi_end_io = NULL;
|
|
|
+ rdev = mirror->replacement;
|
|
|
+ /* Note: if rdev != NULL, then bio
|
|
|
+ * cannot be NULL as r10buf_pool_alloc will
|
|
|
+ * have allocated it.
|
|
|
+ * So the second test here is pointless.
|
|
|
+ * But it keeps semantic-checkers happy, and
|
|
|
+ * this comment keeps human reviewers
|
|
|
+ * happy.
|
|
|
+ */
|
|
|
+ if (rdev == NULL || bio == NULL ||
|
|
|
+ test_bit(Faulty, &rdev->flags))
|
|
|
+ break;
|
|
|
+ bio->bi_next = biolist;
|
|
|
+ biolist = bio;
|
|
|
+ bio->bi_private = r10_bio;
|
|
|
+ bio->bi_end_io = end_sync_write;
|
|
|
+ bio->bi_rw = WRITE;
|
|
|
+ bio->bi_sector = to_addr + rdev->data_offset;
|
|
|
+ bio->bi_bdev = rdev->bdev;
|
|
|
+ atomic_inc(&r10_bio->remaining);
|
|
|
break;
|
|
|
}
|
|
|
if (j == conf->copies) {
|
|
@@ -2544,8 +2844,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
for (k = 0; k < conf->copies; k++)
|
|
|
if (r10_bio->devs[k].devnum == i)
|
|
|
break;
|
|
|
- if (!rdev_set_badblocks(
|
|
|
- conf->mirrors[i].rdev,
|
|
|
+ if (!test_bit(In_sync,
|
|
|
+ &mirror->rdev->flags)
|
|
|
+ && !rdev_set_badblocks(
|
|
|
+ mirror->rdev,
|
|
|
+ r10_bio->devs[k].addr,
|
|
|
+ max_sync, 0))
|
|
|
+ any_working = 0;
|
|
|
+ if (mirror->replacement &&
|
|
|
+ !rdev_set_badblocks(
|
|
|
+ mirror->replacement,
|
|
|
r10_bio->devs[k].addr,
|
|
|
max_sync, 0))
|
|
|
any_working = 0;
|
|
@@ -2556,7 +2864,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
printk(KERN_INFO "md/raid10:%s: insufficient "
|
|
|
"working devices for recovery.\n",
|
|
|
mdname(mddev));
|
|
|
- conf->mirrors[i].recovery_disabled
|
|
|
+ mirror->recovery_disabled
|
|
|
= mddev->recovery_disabled;
|
|
|
}
|
|
|
break;
|
|
@@ -2605,6 +2913,9 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
sector_t first_bad, sector;
|
|
|
int bad_sectors;
|
|
|
|
|
|
+ if (r10_bio->devs[i].repl_bio)
|
|
|
+ r10_bio->devs[i].repl_bio->bi_end_io = NULL;
|
|
|
+
|
|
|
bio = r10_bio->devs[i].bio;
|
|
|
bio->bi_end_io = NULL;
|
|
|
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
@@ -2635,6 +2946,27 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
conf->mirrors[d].rdev->data_offset;
|
|
|
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
|
|
count++;
|
|
|
+
|
|
|
+ if (conf->mirrors[d].replacement == NULL ||
|
|
|
+ test_bit(Faulty,
|
|
|
+ &conf->mirrors[d].replacement->flags))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* Need to set up for writing to the replacement */
|
|
|
+ bio = r10_bio->devs[i].repl_bio;
|
|
|
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
|
+
|
|
|
+ sector = r10_bio->devs[i].addr;
|
|
|
+ atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
|
|
+ bio->bi_next = biolist;
|
|
|
+ biolist = bio;
|
|
|
+ bio->bi_private = r10_bio;
|
|
|
+ bio->bi_end_io = end_sync_write;
|
|
|
+ bio->bi_rw = WRITE;
|
|
|
+ bio->bi_sector = sector +
|
|
|
+ conf->mirrors[d].replacement->data_offset;
|
|
|
+ bio->bi_bdev = conf->mirrors[d].replacement->bdev;
|
|
|
+ count++;
|
|
|
}
|
|
|
|
|
|
if (count < 2) {
|
|
@@ -2643,6 +2975,11 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|
|
if (r10_bio->devs[i].bio->bi_end_io)
|
|
|
rdev_dec_pending(conf->mirrors[d].rdev,
|
|
|
mddev);
|
|
|
+ if (r10_bio->devs[i].repl_bio &&
|
|
|
+ r10_bio->devs[i].repl_bio->bi_end_io)
|
|
|
+ rdev_dec_pending(
|
|
|
+ conf->mirrors[d].replacement,
|
|
|
+ mddev);
|
|
|
}
|
|
|
put_buf(r10_bio);
|
|
|
biolist = NULL;
|
|
@@ -2896,6 +3233,16 @@ static int run(struct mddev *mddev)
|
|
|
continue;
|
|
|
disk = conf->mirrors + disk_idx;
|
|
|
|
|
|
+ if (test_bit(Replacement, &rdev->flags)) {
|
|
|
+ if (disk->replacement)
|
|
|
+ goto out_free_conf;
|
|
|
+ disk->replacement = rdev;
|
|
|
+ } else {
|
|
|
+ if (disk->rdev)
|
|
|
+ goto out_free_conf;
|
|
|
+ disk->rdev = rdev;
|
|
|
+ }
|
|
|
+
|
|
|
disk->rdev = rdev;
|
|
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
|
|
rdev->data_offset << 9);
|
|
@@ -2923,6 +3270,13 @@ static int run(struct mddev *mddev)
|
|
|
|
|
|
disk = conf->mirrors + i;
|
|
|
|
|
|
+ if (!disk->rdev && disk->replacement) {
|
|
|
+ /* The replacement is all we have - use it */
|
|
|
+ disk->rdev = disk->replacement;
|
|
|
+ disk->replacement = NULL;
|
|
|
+ clear_bit(Replacement, &disk->rdev->flags);
|
|
|
+ }
|
|
|
+
|
|
|
if (!disk->rdev ||
|
|
|
!test_bit(In_sync, &disk->rdev->flags)) {
|
|
|
disk->head_position = 0;
|