|
@@ -1087,23 +1087,122 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
|
|
kfree(plug);
|
|
|
}
|
|
|
|
|
|
-static void __make_request(struct mddev *mddev, struct bio *bio)
|
|
|
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
|
|
+ struct r10bio *r10_bio)
|
|
|
{
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
- struct r10bio *r10_bio;
|
|
|
struct bio *read_bio;
|
|
|
+ const int op = bio_op(bio);
|
|
|
+ const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
|
|
+ int sectors_handled;
|
|
|
+ int max_sectors;
|
|
|
+ sector_t sectors;
|
|
|
+ struct md_rdev *rdev;
|
|
|
+ int slot;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Register the new request and wait if the reconstruction
|
|
|
+ * thread has put up a bar for new requests.
|
|
|
+ * Continue immediately if no resync is active currently.
|
|
|
+ */
|
|
|
+ wait_barrier(conf);
|
|
|
+
|
|
|
+ sectors = bio_sectors(bio);
|
|
|
+ while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
|
|
+ bio->bi_iter.bi_sector < conf->reshape_progress &&
|
|
|
+ bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
|
|
|
+ /*
|
|
|
+ * IO spans the reshape position. Need to wait for reshape to
|
|
|
+ * pass
|
|
|
+ */
|
|
|
+ raid10_log(conf->mddev, "wait reshape");
|
|
|
+ allow_barrier(conf);
|
|
|
+ wait_event(conf->wait_barrier,
|
|
|
+ conf->reshape_progress <= bio->bi_iter.bi_sector ||
|
|
|
+ conf->reshape_progress >= bio->bi_iter.bi_sector +
|
|
|
+ sectors);
|
|
|
+ wait_barrier(conf);
|
|
|
+ }
|
|
|
+
|
|
|
+read_again:
|
|
|
+ rdev = read_balance(conf, r10_bio, &max_sectors);
|
|
|
+ if (!rdev) {
|
|
|
+ raid_end_bio_io(r10_bio);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ slot = r10_bio->read_slot;
|
|
|
+
|
|
|
+ read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
|
|
+ bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
|
|
|
+ max_sectors);
|
|
|
+
|
|
|
+ r10_bio->devs[slot].bio = read_bio;
|
|
|
+ r10_bio->devs[slot].rdev = rdev;
|
|
|
+
|
|
|
+ read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
|
|
|
+ choose_data_offset(r10_bio, rdev);
|
|
|
+ read_bio->bi_bdev = rdev->bdev;
|
|
|
+ read_bio->bi_end_io = raid10_end_read_request;
|
|
|
+ bio_set_op_attrs(read_bio, op, do_sync);
|
|
|
+ if (test_bit(FailFast, &rdev->flags) &&
|
|
|
+ test_bit(R10BIO_FailFast, &r10_bio->state))
|
|
|
+ read_bio->bi_opf |= MD_FAILFAST;
|
|
|
+ read_bio->bi_private = r10_bio;
|
|
|
+
|
|
|
+ if (mddev->gendisk)
|
|
|
+ trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
|
|
|
+ read_bio, disk_devt(mddev->gendisk),
|
|
|
+ r10_bio->sector);
|
|
|
+ if (max_sectors < r10_bio->sectors) {
|
|
|
+ /*
|
|
|
+ * Could not read all from this device, so we will need another
|
|
|
+ * r10_bio.
|
|
|
+ */
|
|
|
+ sectors_handled = (r10_bio->sector + max_sectors
|
|
|
+ - bio->bi_iter.bi_sector);
|
|
|
+ r10_bio->sectors = max_sectors;
|
|
|
+ spin_lock_irq(&conf->device_lock);
|
|
|
+ if (bio->bi_phys_segments == 0)
|
|
|
+ bio->bi_phys_segments = 2;
|
|
|
+ else
|
|
|
+ bio->bi_phys_segments++;
|
|
|
+ spin_unlock_irq(&conf->device_lock);
|
|
|
+ /*
|
|
|
+ * Cannot call generic_make_request directly as that will be
|
|
|
+ * queued in __generic_make_request and subsequent
|
|
|
+ * mempool_alloc might block waiting for it. so hand bio over
|
|
|
+ * to raid10d.
|
|
|
+ */
|
|
|
+ reschedule_retry(r10_bio);
|
|
|
+
|
|
|
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
|
|
+
|
|
|
+ r10_bio->master_bio = bio;
|
|
|
+ r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
|
|
+ r10_bio->state = 0;
|
|
|
+ r10_bio->mddev = mddev;
|
|
|
+ r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
|
|
|
+ goto read_again;
|
|
|
+ } else
|
|
|
+ generic_make_request(read_bio);
|
|
|
+ return;
|
|
|
+}
|
|
|
+
|
|
|
+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|
|
+ struct r10bio *r10_bio)
|
|
|
+{
|
|
|
+ struct r10conf *conf = mddev->private;
|
|
|
int i;
|
|
|
const int op = bio_op(bio);
|
|
|
- const int rw = bio_data_dir(bio);
|
|
|
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
|
|
|
const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
|
|
|
unsigned long flags;
|
|
|
struct md_rdev *blocked_rdev;
|
|
|
struct blk_plug_cb *cb;
|
|
|
struct raid10_plug_cb *plug = NULL;
|
|
|
+ sector_t sectors;
|
|
|
int sectors_handled;
|
|
|
int max_sectors;
|
|
|
- int sectors;
|
|
|
|
|
|
md_write_start(mddev, bio);
|
|
|
|
|
@@ -1118,8 +1217,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
|
|
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
|
|
bio->bi_iter.bi_sector < conf->reshape_progress &&
|
|
|
bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
|
|
|
- /* IO spans the reshape position. Need to wait for
|
|
|
- * reshape to pass
|
|
|
+ /*
|
|
|
+ * IO spans the reshape position. Need to wait for reshape to
|
|
|
+ * pass
|
|
|
*/
|
|
|
raid10_log(conf->mddev, "wait reshape");
|
|
|
allow_barrier(conf);
|
|
@@ -1129,8 +1229,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
|
|
sectors);
|
|
|
wait_barrier(conf);
|
|
|
}
|
|
|
+
|
|
|
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
|
|
- bio_data_dir(bio) == WRITE &&
|
|
|
(mddev->reshape_backwards
|
|
|
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
|
|
|
bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
|
|
@@ -1148,98 +1248,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
|
|
|
conf->reshape_safe = mddev->reshape_position;
|
|
|
}
|
|
|
|
|
|
- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
|
|
-
|
|
|
- r10_bio->master_bio = bio;
|
|
|
- r10_bio->sectors = sectors;
|
|
|
-
|
|
|
- r10_bio->mddev = mddev;
|
|
|
- r10_bio->sector = bio->bi_iter.bi_sector;
|
|
|
- r10_bio->state = 0;
|
|
|
-
|
|
|
- /* We might need to issue multiple reads to different
|
|
|
- * devices if there are bad blocks around, so we keep
|
|
|
- * track of the number of reads in bio->bi_phys_segments.
|
|
|
- * If this is 0, there is only one r10_bio and no locking
|
|
|
- * will be needed when the request completes. If it is
|
|
|
- * non-zero, then it is the number of not-completed requests.
|
|
|
- */
|
|
|
- bio->bi_phys_segments = 0;
|
|
|
- bio_clear_flag(bio, BIO_SEG_VALID);
|
|
|
-
|
|
|
- if (rw == READ) {
|
|
|
- /*
|
|
|
- * read balancing logic:
|
|
|
- */
|
|
|
- struct md_rdev *rdev;
|
|
|
- int slot;
|
|
|
-
|
|
|
-read_again:
|
|
|
- rdev = read_balance(conf, r10_bio, &max_sectors);
|
|
|
- if (!rdev) {
|
|
|
- raid_end_bio_io(r10_bio);
|
|
|
- return;
|
|
|
- }
|
|
|
- slot = r10_bio->read_slot;
|
|
|
-
|
|
|
- read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
|
|
- bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
|
|
|
- max_sectors);
|
|
|
-
|
|
|
- r10_bio->devs[slot].bio = read_bio;
|
|
|
- r10_bio->devs[slot].rdev = rdev;
|
|
|
-
|
|
|
- read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
|
|
|
- choose_data_offset(r10_bio, rdev);
|
|
|
- read_bio->bi_bdev = rdev->bdev;
|
|
|
- read_bio->bi_end_io = raid10_end_read_request;
|
|
|
- bio_set_op_attrs(read_bio, op, do_sync);
|
|
|
- if (test_bit(FailFast, &rdev->flags) &&
|
|
|
- test_bit(R10BIO_FailFast, &r10_bio->state))
|
|
|
- read_bio->bi_opf |= MD_FAILFAST;
|
|
|
- read_bio->bi_private = r10_bio;
|
|
|
-
|
|
|
- if (mddev->gendisk)
|
|
|
- trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
|
|
|
- read_bio, disk_devt(mddev->gendisk),
|
|
|
- r10_bio->sector);
|
|
|
- if (max_sectors < r10_bio->sectors) {
|
|
|
- /* Could not read all from this device, so we will
|
|
|
- * need another r10_bio.
|
|
|
- */
|
|
|
- sectors_handled = (r10_bio->sector + max_sectors
|
|
|
- - bio->bi_iter.bi_sector);
|
|
|
- r10_bio->sectors = max_sectors;
|
|
|
- spin_lock_irq(&conf->device_lock);
|
|
|
- if (bio->bi_phys_segments == 0)
|
|
|
- bio->bi_phys_segments = 2;
|
|
|
- else
|
|
|
- bio->bi_phys_segments++;
|
|
|
- spin_unlock_irq(&conf->device_lock);
|
|
|
- /* Cannot call generic_make_request directly
|
|
|
- * as that will be queued in __generic_make_request
|
|
|
- * and subsequent mempool_alloc might block
|
|
|
- * waiting for it. so hand bio over to raid10d.
|
|
|
- */
|
|
|
- reschedule_retry(r10_bio);
|
|
|
-
|
|
|
- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
|
|
-
|
|
|
- r10_bio->master_bio = bio;
|
|
|
- r10_bio->sectors = bio_sectors(bio) - sectors_handled;
|
|
|
- r10_bio->state = 0;
|
|
|
- r10_bio->mddev = mddev;
|
|
|
- r10_bio->sector = bio->bi_iter.bi_sector +
|
|
|
- sectors_handled;
|
|
|
- goto read_again;
|
|
|
- } else
|
|
|
- generic_make_request(read_bio);
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * WRITE:
|
|
|
- */
|
|
|
if (conf->pending_count >= max_queued_requests) {
|
|
|
md_wakeup_thread(mddev->thread);
|
|
|
raid10_log(mddev, "wait queued");
|
|
@@ -1300,8 +1308,7 @@ retry_write:
|
|
|
int bad_sectors;
|
|
|
int is_bad;
|
|
|
|
|
|
- is_bad = is_badblock(rdev, dev_sector,
|
|
|
- max_sectors,
|
|
|
+ is_bad = is_badblock(rdev, dev_sector, max_sectors,
|
|
|
&first_bad, &bad_sectors);
|
|
|
if (is_bad < 0) {
|
|
|
/* Mustn't write here until the bad block
|
|
@@ -1405,8 +1412,7 @@ retry_write:
|
|
|
r10_bio->devs[i].bio = mbio;
|
|
|
|
|
|
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
|
|
|
- choose_data_offset(r10_bio,
|
|
|
- rdev));
|
|
|
+ choose_data_offset(r10_bio, rdev));
|
|
|
mbio->bi_bdev = rdev->bdev;
|
|
|
mbio->bi_end_io = raid10_end_write_request;
|
|
|
bio_set_op_attrs(mbio, op, do_sync | do_fua);
|
|
@@ -1457,8 +1463,7 @@ retry_write:
|
|
|
r10_bio->devs[i].repl_bio = mbio;
|
|
|
|
|
|
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
|
|
|
- choose_data_offset(
|
|
|
- r10_bio, rdev));
|
|
|
+ choose_data_offset(r10_bio, rdev));
|
|
|
mbio->bi_bdev = rdev->bdev;
|
|
|
mbio->bi_end_io = raid10_end_write_request;
|
|
|
bio_set_op_attrs(mbio, op, do_sync | do_fua);
|
|
@@ -1503,6 +1508,36 @@ retry_write:
|
|
|
one_write_done(r10_bio);
|
|
|
}
|
|
|
|
|
|
+static void __make_request(struct mddev *mddev, struct bio *bio)
|
|
|
+{
|
|
|
+ struct r10conf *conf = mddev->private;
|
|
|
+ struct r10bio *r10_bio;
|
|
|
+
|
|
|
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
|
|
+
|
|
|
+ r10_bio->master_bio = bio;
|
|
|
+ r10_bio->sectors = bio_sectors(bio);
|
|
|
+
|
|
|
+ r10_bio->mddev = mddev;
|
|
|
+ r10_bio->sector = bio->bi_iter.bi_sector;
|
|
|
+ r10_bio->state = 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We might need to issue multiple reads to different devices if there
|
|
|
+ * are bad blocks around, so we keep track of the number of reads in
|
|
|
+ * bio->bi_phys_segments. If this is 0, there is only one r10_bio and
|
|
|
+ * no locking will be needed when the request completes. If it is
|
|
|
+ * non-zero, then it is the number of not-completed requests.
|
|
|
+ */
|
|
|
+ bio->bi_phys_segments = 0;
|
|
|
+ bio_clear_flag(bio, BIO_SEG_VALID);
|
|
|
+
|
|
|
+ if (bio_data_dir(bio) == READ)
|
|
|
+ raid10_read_request(mddev, bio, r10_bio);
|
|
|
+ else
|
|
|
+ raid10_write_request(mddev, bio, r10_bio);
|
|
|
+}
|
|
|
+
|
|
|
static void raid10_make_request(struct mddev *mddev, struct bio *bio)
|
|
|
{
|
|
|
struct r10conf *conf = mddev->private;
|