|
@@ -1202,7 +1202,8 @@ alloc_r1bio(struct mddev *mddev, struct bio *bio, sector_t sectors_handled)
|
|
|
return r1_bio;
|
|
|
}
|
|
|
|
|
|
-static void raid1_read_request(struct mddev *mddev, struct bio *bio)
|
|
|
+static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
|
|
+ int max_read_sectors)
|
|
|
{
|
|
|
struct r1conf *conf = mddev->private;
|
|
|
struct raid1_info *mirror;
|
|
@@ -1211,7 +1212,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio)
|
|
|
struct bitmap *bitmap = mddev->bitmap;
|
|
|
const int op = bio_op(bio);
|
|
|
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
|
|
- int sectors_handled;
|
|
|
int max_sectors;
|
|
|
int rdisk;
|
|
|
|
|
@@ -1222,12 +1222,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio)
|
|
|
wait_read_barrier(conf, bio->bi_iter.bi_sector);
|
|
|
|
|
|
r1_bio = alloc_r1bio(mddev, bio, 0);
|
|
|
+ r1_bio->sectors = max_read_sectors;
|
|
|
|
|
|
/*
|
|
|
* make_request() can abort the operation when read-ahead is being
|
|
|
* used and no empty request is available.
|
|
|
*/
|
|
|
-read_again:
|
|
|
rdisk = read_balance(conf, r1_bio, &max_sectors);
|
|
|
|
|
|
if (rdisk < 0) {
|
|
@@ -1247,11 +1247,20 @@ read_again:
|
|
|
wait_event(bitmap->behind_wait,
|
|
|
atomic_read(&bitmap->behind_writes) == 0);
|
|
|
}
|
|
|
+
|
|
|
+ if (max_sectors < bio_sectors(bio)) {
|
|
|
+ struct bio *split = bio_split(bio, max_sectors,
|
|
|
+ GFP_NOIO, conf->bio_split);
|
|
|
+ bio_chain(split, bio);
|
|
|
+ generic_make_request(bio);
|
|
|
+ bio = split;
|
|
|
+ r1_bio->master_bio = bio;
|
|
|
+ r1_bio->sectors = max_sectors;
|
|
|
+ }
|
|
|
+
|
|
|
r1_bio->read_disk = rdisk;
|
|
|
|
|
|
read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
|
|
|
- bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
|
|
|
- max_sectors);
|
|
|
|
|
|
r1_bio->bios[rdisk] = read_bio;
|
|
|
|
|
@@ -1270,30 +1279,11 @@ read_again:
|
|
|
read_bio, disk_devt(mddev->gendisk),
|
|
|
r1_bio->sector);
|
|
|
|
|
|
- if (max_sectors < r1_bio->sectors) {
|
|
|
- /*
|
|
|
- * could not read all from this device, so we will need another
|
|
|
- * r1_bio.
|
|
|
- */
|
|
|
- sectors_handled = (r1_bio->sector + max_sectors
|
|
|
- - bio->bi_iter.bi_sector);
|
|
|
- r1_bio->sectors = max_sectors;
|
|
|
- bio_inc_remaining(bio);
|
|
|
-
|
|
|
- /*
|
|
|
- * Cannot call generic_make_request directly as that will be
|
|
|
- * queued in __make_request and subsequent mempool_alloc might
|
|
|
- * block waiting for it. So hand bio over to raid1d.
|
|
|
- */
|
|
|
- reschedule_retry(r1_bio);
|
|
|
-
|
|
|
- r1_bio = alloc_r1bio(mddev, bio, sectors_handled);
|
|
|
- goto read_again;
|
|
|
- } else
|
|
|
- generic_make_request(read_bio);
|
|
|
+ generic_make_request(read_bio);
|
|
|
}
|
|
|
|
|
|
-static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
+static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|
|
+ int max_write_sectors)
|
|
|
{
|
|
|
struct r1conf *conf = mddev->private;
|
|
|
struct r1bio *r1_bio;
|
|
@@ -1304,9 +1294,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
struct blk_plug_cb *cb;
|
|
|
struct raid1_plug_cb *plug = NULL;
|
|
|
int first_clone;
|
|
|
- int sectors_handled;
|
|
|
int max_sectors;
|
|
|
- sector_t offset;
|
|
|
|
|
|
/*
|
|
|
* Register the new request and wait if the reconstruction
|
|
@@ -1345,6 +1333,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
wait_barrier(conf, bio->bi_iter.bi_sector);
|
|
|
|
|
|
r1_bio = alloc_r1bio(mddev, bio, 0);
|
|
|
+ r1_bio->sectors = max_write_sectors;
|
|
|
|
|
|
if (conf->pending_count >= max_queued_requests) {
|
|
|
md_wakeup_thread(mddev->thread);
|
|
@@ -1443,17 +1432,21 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
goto retry_write;
|
|
|
}
|
|
|
|
|
|
- if (max_sectors < r1_bio->sectors)
|
|
|
+ if (max_sectors < bio_sectors(bio)) {
|
|
|
+ struct bio *split = bio_split(bio, max_sectors,
|
|
|
+ GFP_NOIO, conf->bio_split);
|
|
|
+ bio_chain(split, bio);
|
|
|
+ generic_make_request(bio);
|
|
|
+ bio = split;
|
|
|
+ r1_bio->master_bio = bio;
|
|
|
r1_bio->sectors = max_sectors;
|
|
|
-
|
|
|
- sectors_handled = r1_bio->sector + max_sectors - bio->bi_iter.bi_sector;
|
|
|
+ }
|
|
|
|
|
|
atomic_set(&r1_bio->remaining, 1);
|
|
|
atomic_set(&r1_bio->behind_remaining, 0);
|
|
|
|
|
|
first_clone = 1;
|
|
|
|
|
|
- offset = r1_bio->sector - bio->bi_iter.bi_sector;
|
|
|
for (i = 0; i < disks; i++) {
|
|
|
struct bio *mbio = NULL;
|
|
|
if (!r1_bio->bios[i])
|
|
@@ -1470,7 +1463,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
< mddev->bitmap_info.max_write_behind) &&
|
|
|
!waitqueue_active(&bitmap->behind_wait)) {
|
|
|
mbio = alloc_behind_master_bio(r1_bio, bio,
|
|
|
- offset << 9,
|
|
|
+ 0,
|
|
|
max_sectors << 9);
|
|
|
}
|
|
|
|
|
@@ -1486,10 +1479,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
mbio = bio_clone_fast(r1_bio->behind_master_bio,
|
|
|
GFP_NOIO,
|
|
|
mddev->bio_set);
|
|
|
- else {
|
|
|
+ else
|
|
|
mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
|
|
|
- bio_trim(mbio, offset, max_sectors);
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
if (r1_bio->behind_master_bio) {
|
|
@@ -1536,19 +1527,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
if (!plug)
|
|
|
md_wakeup_thread(mddev->thread);
|
|
|
}
|
|
|
- /* Mustn't call r1_bio_write_done before this next test,
|
|
|
- * as it could result in the bio being freed.
|
|
|
- */
|
|
|
- if (sectors_handled < bio_sectors(bio)) {
|
|
|
- /* We need another r1_bio, which must be counted */
|
|
|
- sector_t sect = bio->bi_iter.bi_sector + sectors_handled;
|
|
|
-
|
|
|
- inc_pending(conf, sect);
|
|
|
- bio_inc_remaining(bio);
|
|
|
- r1_bio_write_done(r1_bio);
|
|
|
- r1_bio = alloc_r1bio(mddev, bio, sectors_handled);
|
|
|
- goto retry_write;
|
|
|
- }
|
|
|
|
|
|
r1_bio_write_done(r1_bio);
|
|
|
|
|
@@ -1558,7 +1536,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|
|
|
|
|
static void raid1_make_request(struct mddev *mddev, struct bio *bio)
|
|
|
{
|
|
|
- struct bio *split;
|
|
|
sector_t sectors;
|
|
|
|
|
|
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
|
|
@@ -1566,43 +1543,20 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- /* if bio exceeds barrier unit boundary, split it */
|
|
|
- do {
|
|
|
- sectors = align_to_barrier_unit_end(
|
|
|
- bio->bi_iter.bi_sector, bio_sectors(bio));
|
|
|
- if (sectors < bio_sectors(bio)) {
|
|
|
- split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
|
|
|
- bio_chain(split, bio);
|
|
|
- } else {
|
|
|
- split = bio;
|
|
|
- }
|
|
|
-
|
|
|
- if (bio_data_dir(split) == READ) {
|
|
|
- raid1_read_request(mddev, split);
|
|
|
+ /*
|
|
|
+ * There is a limit to the maximum size, but
|
|
|
+ * the read/write handler might find a lower limit
|
|
|
+ * due to bad blocks. To avoid multiple splits,
|
|
|
+ * we pass the maximum number of sectors down
|
|
|
+ * and let the lower level perform the split.
|
|
|
+ */
|
|
|
+ sectors = align_to_barrier_unit_end(
|
|
|
+ bio->bi_iter.bi_sector, bio_sectors(bio));
|
|
|
|
|
|
- /*
|
|
|
- * If a bio is splitted, the first part of bio will
|
|
|
- * pass barrier but the bio is queued in
|
|
|
- * current->bio_list (see generic_make_request). If
|
|
|
- * there is a raise_barrier() called here, the second
|
|
|
- * part of bio can't pass barrier. But since the first
|
|
|
- * part bio isn't dispatched to underlaying disks yet,
|
|
|
- * the barrier is never released, hence raise_barrier
|
|
|
- * will alays wait. We have a deadlock.
|
|
|
- * Note, this only happens in read path. For write
|
|
|
- * path, the first part of bio is dispatched in a
|
|
|
- * schedule() call (because of blk plug) or offloaded
|
|
|
- * to raid10d.
|
|
|
- * Quitting from the function immediately can change
|
|
|
- * the bio order queued in bio_list and avoid the deadlock.
|
|
|
- */
|
|
|
- if (split != bio) {
|
|
|
- generic_make_request(bio);
|
|
|
- break;
|
|
|
- }
|
|
|
- } else
|
|
|
- raid1_write_request(mddev, split);
|
|
|
- } while (split != bio);
|
|
|
+ if (bio_data_dir(bio) == READ)
|
|
|
+ raid1_read_request(mddev, bio, sectors);
|
|
|
+ else
|
|
|
+ raid1_write_request(mddev, bio, sectors);
|
|
|
}
|
|
|
|
|
|
static void raid1_status(struct seq_file *seq, struct mddev *mddev)
|
|
@@ -2647,10 +2601,7 @@ static void raid1d(struct md_thread *thread)
|
|
|
else if (test_bit(R1BIO_ReadError, &r1_bio->state))
|
|
|
handle_read_error(conf, r1_bio);
|
|
|
else
|
|
|
- /* just a partial read to be scheduled from separate
|
|
|
- * context
|
|
|
- */
|
|
|
- generic_make_request(r1_bio->bios[r1_bio->read_disk]);
|
|
|
+ WARN_ON_ONCE(1);
|
|
|
|
|
|
cond_resched();
|
|
|
if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING))
|
|
@@ -3038,6 +2989,10 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
|
|
if (!conf->r1bio_pool)
|
|
|
goto abort;
|
|
|
|
|
|
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
|
|
|
+ if (!conf->bio_split)
|
|
|
+ goto abort;
|
|
|
+
|
|
|
conf->poolinfo->mddev = mddev;
|
|
|
|
|
|
err = -EINVAL;
|
|
@@ -3119,6 +3074,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
|
|
kfree(conf->nr_waiting);
|
|
|
kfree(conf->nr_queued);
|
|
|
kfree(conf->barrier);
|
|
|
+ if (conf->bio_split)
|
|
|
+ bioset_free(conf->bio_split);
|
|
|
kfree(conf);
|
|
|
}
|
|
|
return ERR_PTR(err);
|
|
@@ -3224,6 +3181,8 @@ static void raid1_free(struct mddev *mddev, void *priv)
|
|
|
kfree(conf->nr_waiting);
|
|
|
kfree(conf->nr_queued);
|
|
|
kfree(conf->barrier);
|
|
|
+ if (conf->bio_split)
|
|
|
+ bioset_free(conf->bio_split);
|
|
|
kfree(conf);
|
|
|
}
|
|
|
|