|
@@ -223,18 +223,14 @@ static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
|
|
|
return slot;
|
|
|
}
|
|
|
|
|
|
-static void return_io(struct bio *return_bi)
|
|
|
+static void return_io(struct bio_list *return_bi)
|
|
|
{
|
|
|
- struct bio *bi = return_bi;
|
|
|
- while (bi) {
|
|
|
-
|
|
|
- return_bi = bi->bi_next;
|
|
|
- bi->bi_next = NULL;
|
|
|
+ struct bio *bi;
|
|
|
+ while ((bi = bio_list_pop(return_bi)) != NULL) {
|
|
|
bi->bi_iter.bi_size = 0;
|
|
|
trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
|
|
|
bi, 0);
|
|
|
bio_endio(bi);
|
|
|
- bi = return_bi;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1177,7 +1173,7 @@ async_copy_data(int frombio, struct bio *bio, struct page **page,
|
|
|
static void ops_complete_biofill(void *stripe_head_ref)
|
|
|
{
|
|
|
struct stripe_head *sh = stripe_head_ref;
|
|
|
- struct bio *return_bi = NULL;
|
|
|
+ struct bio_list return_bi = BIO_EMPTY_LIST;
|
|
|
int i;
|
|
|
|
|
|
pr_debug("%s: stripe %llu\n", __func__,
|
|
@@ -1201,17 +1197,15 @@ static void ops_complete_biofill(void *stripe_head_ref)
|
|
|
while (rbi && rbi->bi_iter.bi_sector <
|
|
|
dev->sector + STRIPE_SECTORS) {
|
|
|
rbi2 = r5_next_bio(rbi, dev->sector);
|
|
|
- if (!raid5_dec_bi_active_stripes(rbi)) {
|
|
|
- rbi->bi_next = return_bi;
|
|
|
- return_bi = rbi;
|
|
|
- }
|
|
|
+ if (!raid5_dec_bi_active_stripes(rbi))
|
|
|
+ bio_list_add(&return_bi, rbi);
|
|
|
rbi = rbi2;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
|
|
|
|
|
|
- return_io(return_bi);
|
|
|
+ return_io(&return_bi);
|
|
|
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
release_stripe(sh);
|
|
@@ -2517,6 +2511,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
|
|
|
set_bit(Blocked, &rdev->flags);
|
|
|
set_bit(Faulty, &rdev->flags);
|
|
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
|
|
+ set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
|
|
printk(KERN_ALERT
|
|
|
"md/raid:%s: Disk failure on %s, disabling device.\n"
|
|
|
"md/raid:%s: Operation continuing on %d devices.\n",
|
|
@@ -3069,7 +3064,7 @@ static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
|
|
|
static void
|
|
|
handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|
|
struct stripe_head_state *s, int disks,
|
|
|
- struct bio **return_bi)
|
|
|
+ struct bio_list *return_bi)
|
|
|
{
|
|
|
int i;
|
|
|
BUG_ON(sh->batch_head);
|
|
@@ -3114,8 +3109,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|
|
bi->bi_error = -EIO;
|
|
|
if (!raid5_dec_bi_active_stripes(bi)) {
|
|
|
md_write_end(conf->mddev);
|
|
|
- bi->bi_next = *return_bi;
|
|
|
- *return_bi = bi;
|
|
|
+ bio_list_add(return_bi, bi);
|
|
|
}
|
|
|
bi = nextbi;
|
|
|
}
|
|
@@ -3139,8 +3133,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|
|
bi->bi_error = -EIO;
|
|
|
if (!raid5_dec_bi_active_stripes(bi)) {
|
|
|
md_write_end(conf->mddev);
|
|
|
- bi->bi_next = *return_bi;
|
|
|
- *return_bi = bi;
|
|
|
+ bio_list_add(return_bi, bi);
|
|
|
}
|
|
|
bi = bi2;
|
|
|
}
|
|
@@ -3163,10 +3156,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|
|
r5_next_bio(bi, sh->dev[i].sector);
|
|
|
|
|
|
bi->bi_error = -EIO;
|
|
|
- if (!raid5_dec_bi_active_stripes(bi)) {
|
|
|
- bi->bi_next = *return_bi;
|
|
|
- *return_bi = bi;
|
|
|
- }
|
|
|
+ if (!raid5_dec_bi_active_stripes(bi))
|
|
|
+ bio_list_add(return_bi, bi);
|
|
|
bi = nextbi;
|
|
|
}
|
|
|
}
|
|
@@ -3445,7 +3436,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
|
|
* never LOCKED, so we don't need to test 'failed' directly.
|
|
|
*/
|
|
|
static void handle_stripe_clean_event(struct r5conf *conf,
|
|
|
- struct stripe_head *sh, int disks, struct bio **return_bi)
|
|
|
+ struct stripe_head *sh, int disks, struct bio_list *return_bi)
|
|
|
{
|
|
|
int i;
|
|
|
struct r5dev *dev;
|
|
@@ -3479,8 +3470,7 @@ returnbi:
|
|
|
wbi2 = r5_next_bio(wbi, dev->sector);
|
|
|
if (!raid5_dec_bi_active_stripes(wbi)) {
|
|
|
md_write_end(conf->mddev);
|
|
|
- wbi->bi_next = *return_bi;
|
|
|
- *return_bi = wbi;
|
|
|
+ bio_list_add(return_bi, wbi);
|
|
|
}
|
|
|
wbi = wbi2;
|
|
|
}
|
|
@@ -4613,7 +4603,15 @@ finish:
|
|
|
md_wakeup_thread(conf->mddev->thread);
|
|
|
}
|
|
|
|
|
|
- return_io(s.return_bi);
|
|
|
+ if (!bio_list_empty(&s.return_bi)) {
|
|
|
+ if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
|
|
|
+ spin_lock_irq(&conf->device_lock);
|
|
|
+ bio_list_merge(&conf->return_bi, &s.return_bi);
|
|
|
+ spin_unlock_irq(&conf->device_lock);
|
|
|
+ md_wakeup_thread(conf->mddev->thread);
|
|
|
+ } else
|
|
|
+ return_io(&s.return_bi);
|
|
|
+ }
|
|
|
|
|
|
clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
|
|
|
}
|
|
@@ -4672,12 +4670,12 @@ static int raid5_congested(struct mddev *mddev, int bits)
|
|
|
|
|
|
static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
|
|
|
{
|
|
|
+ struct r5conf *conf = mddev->private;
|
|
|
sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
|
|
|
- unsigned int chunk_sectors = mddev->chunk_sectors;
|
|
|
+ unsigned int chunk_sectors;
|
|
|
unsigned int bio_sectors = bio_sectors(bio);
|
|
|
|
|
|
- if (mddev->new_chunk_sectors < mddev->chunk_sectors)
|
|
|
- chunk_sectors = mddev->new_chunk_sectors;
|
|
|
+ chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
|
|
|
return chunk_sectors >=
|
|
|
((sector & (chunk_sectors - 1)) + bio_sectors);
|
|
|
}
|
|
@@ -5325,6 +5323,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|
|
sector_t stripe_addr;
|
|
|
int reshape_sectors;
|
|
|
struct list_head stripes;
|
|
|
+ sector_t retn;
|
|
|
|
|
|
if (sector_nr == 0) {
|
|
|
/* If restarting in the middle, skip the initial sectors */
|
|
@@ -5332,6 +5331,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|
|
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
|
|
|
sector_nr = raid5_size(mddev, 0, 0)
|
|
|
- conf->reshape_progress;
|
|
|
+ } else if (mddev->reshape_backwards &&
|
|
|
+ conf->reshape_progress == MaxSector) {
|
|
|
+ /* shouldn't happen, but just in case, finish up.*/
|
|
|
+ sector_nr = MaxSector;
|
|
|
} else if (!mddev->reshape_backwards &&
|
|
|
conf->reshape_progress > 0)
|
|
|
sector_nr = conf->reshape_progress;
|
|
@@ -5340,7 +5343,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|
|
mddev->curr_resync_completed = sector_nr;
|
|
|
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
|
|
*skipped = 1;
|
|
|
- return sector_nr;
|
|
|
+ retn = sector_nr;
|
|
|
+ goto finish;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -5348,10 +5352,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|
|
* If old and new chunk sizes differ, we need to process the
|
|
|
* largest of these
|
|
|
*/
|
|
|
- if (mddev->new_chunk_sectors > mddev->chunk_sectors)
|
|
|
- reshape_sectors = mddev->new_chunk_sectors;
|
|
|
- else
|
|
|
- reshape_sectors = mddev->chunk_sectors;
|
|
|
+
|
|
|
+ reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors);
|
|
|
|
|
|
/* We update the metadata at least every 10 seconds, or when
|
|
|
* the data about to be copied would over-write the source of
|
|
@@ -5366,11 +5368,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|
|
safepos = conf->reshape_safe;
|
|
|
sector_div(safepos, data_disks);
|
|
|
if (mddev->reshape_backwards) {
|
|
|
- writepos -= min_t(sector_t, reshape_sectors, writepos);
|
|
|
+ BUG_ON(writepos < reshape_sectors);
|
|
|
+ writepos -= reshape_sectors;
|
|
|
readpos += reshape_sectors;
|
|
|
safepos += reshape_sectors;
|
|
|
} else {
|
|
|
writepos += reshape_sectors;
|
|
|
+ /* readpos and safepos are worst-case calculations.
|
|
|
+ * A negative number is overly pessimistic, and causes
|
|
|
+ * obvious problems for unsigned storage. So clip to 0.
|
|
|
+ */
|
|
|
readpos -= min_t(sector_t, reshape_sectors, readpos);
|
|
|
safepos -= min_t(sector_t, reshape_sectors, safepos);
|
|
|
}
|
|
@@ -5513,7 +5520,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|
|
* then we need to write out the superblock.
|
|
|
*/
|
|
|
sector_nr += reshape_sectors;
|
|
|
- if ((sector_nr - mddev->curr_resync_completed) * 2
|
|
|
+ retn = reshape_sectors;
|
|
|
+finish:
|
|
|
+ if (mddev->curr_resync_completed > mddev->resync_max ||
|
|
|
+ (sector_nr - mddev->curr_resync_completed) * 2
|
|
|
>= mddev->resync_max - mddev->curr_resync_completed) {
|
|
|
/* Cannot proceed until we've updated the superblock... */
|
|
|
wait_event(conf->wait_for_overlap,
|
|
@@ -5538,7 +5548,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
|
|
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
|
|
}
|
|
|
ret:
|
|
|
- return reshape_sectors;
|
|
|
+ return retn;
|
|
|
}
|
|
|
|
|
|
static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
|
|
@@ -5794,6 +5804,18 @@ static void raid5d(struct md_thread *thread)
|
|
|
|
|
|
md_check_recovery(mddev);
|
|
|
|
|
|
+ if (!bio_list_empty(&conf->return_bi) &&
|
|
|
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
|
|
+ struct bio_list tmp = BIO_EMPTY_LIST;
|
|
|
+ spin_lock_irq(&conf->device_lock);
|
|
|
+ if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
|
|
+ bio_list_merge(&tmp, &conf->return_bi);
|
|
|
+ bio_list_init(&conf->return_bi);
|
|
|
+ }
|
|
|
+ spin_unlock_irq(&conf->device_lock);
|
|
|
+ return_io(&tmp);
|
|
|
+ }
|
|
|
+
|
|
|
blk_start_plug(&plug);
|
|
|
handled = 0;
|
|
|
spin_lock_irq(&conf->device_lock);
|
|
@@ -6234,8 +6256,8 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
|
|
/* size is defined by the smallest of previous and new size */
|
|
|
raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
|
|
|
|
|
|
- sectors &= ~((sector_t)mddev->chunk_sectors - 1);
|
|
|
- sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
|
|
|
+ sectors &= ~((sector_t)conf->chunk_sectors - 1);
|
|
|
+ sectors &= ~((sector_t)conf->prev_chunk_sectors - 1);
|
|
|
return sectors * (raid_disks - conf->max_degraded);
|
|
|
}
|
|
|
|
|
@@ -6453,6 +6475,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|
|
INIT_LIST_HEAD(&conf->hold_list);
|
|
|
INIT_LIST_HEAD(&conf->delayed_list);
|
|
|
INIT_LIST_HEAD(&conf->bitmap_list);
|
|
|
+ bio_list_init(&conf->return_bi);
|
|
|
init_llist_head(&conf->released_stripes);
|
|
|
atomic_set(&conf->active_stripes, 0);
|
|
|
atomic_set(&conf->preread_active_stripes, 0);
|
|
@@ -6542,6 +6565,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|
|
if (conf->reshape_progress != MaxSector) {
|
|
|
conf->prev_chunk_sectors = mddev->chunk_sectors;
|
|
|
conf->prev_algo = mddev->layout;
|
|
|
+ } else {
|
|
|
+ conf->prev_chunk_sectors = conf->chunk_sectors;
|
|
|
+ conf->prev_algo = conf->algorithm;
|
|
|
}
|
|
|
|
|
|
conf->min_nr_stripes = NR_STRIPES;
|
|
@@ -6661,6 +6687,8 @@ static int run(struct mddev *mddev)
|
|
|
sector_t here_new, here_old;
|
|
|
int old_disks;
|
|
|
int max_degraded = (mddev->level == 6 ? 2 : 1);
|
|
|
+ int chunk_sectors;
|
|
|
+ int new_data_disks;
|
|
|
|
|
|
if (mddev->new_level != mddev->level) {
|
|
|
printk(KERN_ERR "md/raid:%s: unsupported reshape "
|
|
@@ -6672,28 +6700,25 @@ static int run(struct mddev *mddev)
|
|
|
/* reshape_position must be on a new-stripe boundary, and one
|
|
|
* further up in new geometry must map after here in old
|
|
|
* geometry.
|
|
|
+ * If the chunk sizes are different, then as we perform reshape
|
|
|
+ * in units of the largest of the two, reshape_position needs
|
|
|
+ * be a multiple of the largest chunk size times new data disks.
|
|
|
*/
|
|
|
here_new = mddev->reshape_position;
|
|
|
- if (sector_div(here_new, mddev->new_chunk_sectors *
|
|
|
- (mddev->raid_disks - max_degraded))) {
|
|
|
+ chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors);
|
|
|
+ new_data_disks = mddev->raid_disks - max_degraded;
|
|
|
+ if (sector_div(here_new, chunk_sectors * new_data_disks)) {
|
|
|
printk(KERN_ERR "md/raid:%s: reshape_position not "
|
|
|
"on a stripe boundary\n", mdname(mddev));
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
- reshape_offset = here_new * mddev->new_chunk_sectors;
|
|
|
+ reshape_offset = here_new * chunk_sectors;
|
|
|
/* here_new is the stripe we will write to */
|
|
|
here_old = mddev->reshape_position;
|
|
|
- sector_div(here_old, mddev->chunk_sectors *
|
|
|
- (old_disks-max_degraded));
|
|
|
+ sector_div(here_old, chunk_sectors * (old_disks-max_degraded));
|
|
|
/* here_old is the first stripe that we might need to read
|
|
|
* from */
|
|
|
if (mddev->delta_disks == 0) {
|
|
|
- if ((here_new * mddev->new_chunk_sectors !=
|
|
|
- here_old * mddev->chunk_sectors)) {
|
|
|
- printk(KERN_ERR "md/raid:%s: reshape position is"
|
|
|
- " confused - aborting\n", mdname(mddev));
|
|
|
- return -EINVAL;
|
|
|
- }
|
|
|
/* We cannot be sure it is safe to start an in-place
|
|
|
* reshape. It is only safe if user-space is monitoring
|
|
|
* and taking constant backups.
|
|
@@ -6712,10 +6737,10 @@ static int run(struct mddev *mddev)
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
} else if (mddev->reshape_backwards
|
|
|
- ? (here_new * mddev->new_chunk_sectors + min_offset_diff <=
|
|
|
- here_old * mddev->chunk_sectors)
|
|
|
- : (here_new * mddev->new_chunk_sectors >=
|
|
|
- here_old * mddev->chunk_sectors + (-min_offset_diff))) {
|
|
|
+ ? (here_new * chunk_sectors + min_offset_diff <=
|
|
|
+ here_old * chunk_sectors)
|
|
|
+ : (here_new * chunk_sectors >=
|
|
|
+ here_old * chunk_sectors + (-min_offset_diff))) {
|
|
|
/* Reading from the same stripe as writing to - bad */
|
|
|
printk(KERN_ERR "md/raid:%s: reshape_position too early for "
|
|
|
"auto-recovery - aborting.\n",
|
|
@@ -6967,7 +6992,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
|
|
|
int i;
|
|
|
|
|
|
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
|
|
|
- mddev->chunk_sectors / 2, mddev->layout);
|
|
|
+ conf->chunk_sectors / 2, mddev->layout);
|
|
|
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
|
|
|
for (i = 0; i < conf->raid_disks; i++)
|
|
|
seq_printf (seq, "%s",
|
|
@@ -7173,7 +7198,9 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
|
|
|
* worth it.
|
|
|
*/
|
|
|
sector_t newsize;
|
|
|
- sectors &= ~((sector_t)mddev->chunk_sectors - 1);
|
|
|
+ struct r5conf *conf = mddev->private;
|
|
|
+
|
|
|
+ sectors &= ~((sector_t)conf->chunk_sectors - 1);
|
|
|
newsize = raid5_size(mddev, sectors, mddev->raid_disks);
|
|
|
if (mddev->external_size &&
|
|
|
mddev->array_sectors > newsize)
|
|
@@ -7412,6 +7439,7 @@ static void end_reshape(struct r5conf *conf)
|
|
|
rdev->data_offset = rdev->new_data_offset;
|
|
|
smp_wmb();
|
|
|
conf->reshape_progress = MaxSector;
|
|
|
+ conf->mddev->reshape_position = MaxSector;
|
|
|
spin_unlock_irq(&conf->device_lock);
|
|
|
wake_up(&conf->wait_for_overlap);
|
|
|
|