瀏覽代碼

md/raid10: abort delayed writes when device fails.

When writing to an array with a bitmap enabled, the writes are grouped
in batches which are preceded by an update to the bitmap.

It is quite likely if that a drive develops a problem which is not
media related, that the bitmap write will be the first to report an
error and cause the device to be marked faulty (as the bitmap write is
at the start of a batch).

In this case, there is point submiting the subsequent writes to the
failed device - that just wastes times.

So re-check the Faulty state of a device before submitting a
delayed write.

This requires that we keep the 'rdev', rather than the 'bdev' in the
bio, then swap in the bdev just before final submission.

Reported-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
NeilBrown 8 年之前
父節點
當前提交
a9ae93c8cc
共有 1 個文件被更改,包括 16 次插入6 次删除
  1. 16 6
      drivers/md/raid10.c

+ 16 - 6
drivers/md/raid10.c

@@ -858,9 +858,14 @@ static void flush_pending_writes(struct r10conf *conf)
 
 
 		while (bio) { /* submit pending writes */
 		while (bio) { /* submit pending writes */
 			struct bio *next = bio->bi_next;
 			struct bio *next = bio->bi_next;
+			struct md_rdev *rdev = (void*)bio->bi_bdev;
 			bio->bi_next = NULL;
 			bio->bi_next = NULL;
-			if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
-			    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+			bio->bi_bdev = rdev->bdev;
+			if (test_bit(Faulty, &rdev->flags)) {
+				bio->bi_error = -EIO;
+				bio_endio(bio);
+			} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
+					    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
 				/* Just ignore it */
 				/* Just ignore it */
 				bio_endio(bio);
 				bio_endio(bio);
 			else
 			else
@@ -1036,9 +1041,14 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 
 
 	while (bio) { /* submit pending writes */
 	while (bio) { /* submit pending writes */
 		struct bio *next = bio->bi_next;
 		struct bio *next = bio->bi_next;
+		struct md_rdev *rdev = (void*)bio->bi_bdev;
 		bio->bi_next = NULL;
 		bio->bi_next = NULL;
-		if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
-		    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+		bio->bi_bdev = rdev->bdev;
+		if (test_bit(Faulty, &rdev->flags)) {
+			bio->bi_error = -EIO;
+			bio_endio(bio);
+		} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
+				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
 			/* Just ignore it */
 			/* Just ignore it */
 			bio_endio(bio);
 			bio_endio(bio);
 		else
 		else
@@ -1357,7 +1367,7 @@ retry_write:
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
 					   choose_data_offset(r10_bio,
 					   choose_data_offset(r10_bio,
 							      rdev));
 							      rdev));
-			mbio->bi_bdev = rdev->bdev;
+			mbio->bi_bdev = (void*)rdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
 			mbio->bi_private = r10_bio;
 			mbio->bi_private = r10_bio;
@@ -1399,7 +1409,7 @@ retry_write:
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
 					   choose_data_offset(
 					   choose_data_offset(
 						   r10_bio, rdev));
 						   r10_bio, rdev));
-			mbio->bi_bdev = rdev->bdev;
+			mbio->bi_bdev = (void*)rdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
 			mbio->bi_private = r10_bio;
 			mbio->bi_private = r10_bio;