8 years ago · 07e8336484
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2897,6 +2897,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
 
				 	return r_sector;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * There are cases where we want handle_stripe_dirtying() and
			
 
				+ * schedule_reconstruction() to delay towrite to some dev of a stripe.
			
 
				+ *
			
 
				+ * This function checks whether we want to delay the towrite. Specifically,
			
 
				+ * we delay the towrite when:
			
 
				+ *
			
 
				+ *   1. degraded stripe has a non-overwrite to the missing dev, AND this
			
 
				+ *      stripe has data in journal (for other devices).
			
 
				+ *
			
 
				+ *      In this case, when reading data for the non-overwrite dev, it is
			
 
				+ *      necessary to handle complex rmw of write back cache (prexor with
			
 
				+ *      orig_page, and xor with page). To keep read path simple, we would
			
 
				+ *      like to flush data in journal to RAID disks first, so complex rmw
			
 
				+ *      is handled in the write patch (handle_stripe_dirtying).
			
 
				+ *
			
 
				+ */
			
 
				+static inline bool delay_towrite(struct r5dev *dev,
			
 
				+				   struct stripe_head_state *s)
			
 
				+{
			
 
				+	return !test_bit(R5_OVERWRITE, &dev->flags) &&
			
 
				+		!test_bit(R5_Insync, &dev->flags) && s->injournal;
			
 
				+}
			
 
				+
			
 
				 static void
			
 
				 schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
			
 
				 			 int rcw, int expand)
			
@@ -2917,7 +2941,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 
				 		for (i = disks; i--; ) {
			
 
				 			struct r5dev *dev = &sh->dev[i];
			
 
				 
			
 
				-			if (dev->towrite) {
			
 
				+			if (dev->towrite && !delay_towrite(dev, s)) {
			
 
				 				set_bit(R5_LOCKED, &dev->flags);
			
 
				 				set_bit(R5_Wantdrain, &dev->flags);
			
 
				 				if (!expand)
			
@@ -3494,10 +3518,26 @@ static void handle_stripe_fill(struct stripe_head *sh,
 
				 	 * midst of changing due to a write
			
 
				 	 */
			
 
				 	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
			
 
				-	    !sh->reconstruct_state)
			
 
				+	    !sh->reconstruct_state) {
			
 
				+
			
 
				+		/*
			
 
				+		 * For degraded stripe with data in journal, do not handle
			
 
				+		 * read requests yet, instead, flush the stripe to raid
			
 
				+		 * disks first, this avoids handling complex rmw of write
			
 
				+		 * back cache (prexor with orig_page, and then xor with
			
 
				+		 * page) in the read path
			
 
				+		 */
			
 
				+		if (s->injournal && s->failed) {
			
 
				+			if (test_bit(STRIPE_R5C_CACHING, &sh->state))
			
 
				+				r5c_make_stripe_write_out(sh);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				 		for (i = disks; i--; )
			
 
				 			if (fetch_block(sh, s, i, disks))
			
 
				 				break;
			
 
				+	}
			
 
				+out:
			
 
				 	set_bit(STRIPE_HANDLE, &sh->state);
			
 
				 }
			
 
				 
			
@@ -3653,7 +3693,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 
				 	} else for (i = disks; i--; ) {
			
 
				 		/* would I have to read this buffer for read_modify_write */
			
 
				 		struct r5dev *dev = &sh->dev[i];
			
 
				-		if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx ||
			
 
				+		if (((dev->towrite && !delay_towrite(dev, s)) ||
			
 
				+		     i == sh->pd_idx || i == sh->qd_idx ||
			
 
				 		     test_bit(R5_InJournal, &dev->flags)) &&
			
 
				 		    !test_bit(R5_LOCKED, &dev->flags) &&
			
 
				 		    !(uptodate_for_rmw(dev) ||
			
@@ -3717,7 +3758,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 
				 
			
 
				 		for (i = disks; i--; ) {
			
 
				 			struct r5dev *dev = &sh->dev[i];
			
 
				-			if ((dev->towrite ||
			
 
				+			if (((dev->towrite && !delay_towrite(dev, s)) ||
			
 
				 			     i == sh->pd_idx || i == sh->qd_idx ||
			
 
				 			     test_bit(R5_InJournal, &dev->flags)) &&
			
 
				 			    !test_bit(R5_LOCKED, &dev->flags) &&