|
@@ -2897,6 +2897,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
|
|
|
return r_sector;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * There are cases where we want handle_stripe_dirtying() and
|
|
|
+ * schedule_reconstruction() to delay towrite to some dev of a stripe.
|
|
|
+ *
|
|
|
+ * This function checks whether we want to delay the towrite. Specifically,
|
|
|
+ * we delay the towrite when:
|
|
|
+ *
|
|
|
+ * 1. degraded stripe has a non-overwrite to the missing dev, AND this
|
|
|
+ * stripe has data in journal (for other devices).
|
|
|
+ *
|
|
|
+ * In this case, when reading data for the non-overwrite dev, it is
|
|
|
+ * necessary to handle complex rmw of write back cache (prexor with
|
|
|
+ * orig_page, and xor with page). To keep read path simple, we would
|
|
|
+ * like to flush data in journal to RAID disks first, so complex rmw
|
|
|
+ * is handled in the write patch (handle_stripe_dirtying).
|
|
|
+ *
|
|
|
+ */
|
|
|
+static inline bool delay_towrite(struct r5dev *dev,
|
|
|
+ struct stripe_head_state *s)
|
|
|
+{
|
|
|
+ return !test_bit(R5_OVERWRITE, &dev->flags) &&
|
|
|
+ !test_bit(R5_Insync, &dev->flags) && s->injournal;
|
|
|
+}
|
|
|
+
|
|
|
static void
|
|
|
schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
|
|
|
int rcw, int expand)
|
|
@@ -2917,7 +2941,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
|
|
|
for (i = disks; i--; ) {
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
|
|
|
- if (dev->towrite) {
|
|
|
+ if (dev->towrite && !delay_towrite(dev, s)) {
|
|
|
set_bit(R5_LOCKED, &dev->flags);
|
|
|
set_bit(R5_Wantdrain, &dev->flags);
|
|
|
if (!expand)
|
|
@@ -3494,10 +3518,26 @@ static void handle_stripe_fill(struct stripe_head *sh,
|
|
|
* midst of changing due to a write
|
|
|
*/
|
|
|
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
|
|
|
- !sh->reconstruct_state)
|
|
|
+ !sh->reconstruct_state) {
|
|
|
+
|
|
|
+ /*
|
|
|
+ * For degraded stripe with data in journal, do not handle
|
|
|
+ * read requests yet, instead, flush the stripe to raid
|
|
|
+ * disks first, this avoids handling complex rmw of write
|
|
|
+ * back cache (prexor with orig_page, and then xor with
|
|
|
+ * page) in the read path
|
|
|
+ */
|
|
|
+ if (s->injournal && s->failed) {
|
|
|
+ if (test_bit(STRIPE_R5C_CACHING, &sh->state))
|
|
|
+ r5c_make_stripe_write_out(sh);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
for (i = disks; i--; )
|
|
|
if (fetch_block(sh, s, i, disks))
|
|
|
break;
|
|
|
+ }
|
|
|
+out:
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
}
|
|
|
|
|
@@ -3653,7 +3693,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
|
|
|
} else for (i = disks; i--; ) {
|
|
|
/* would I have to read this buffer for read_modify_write */
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
- if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx ||
|
|
|
+ if (((dev->towrite && !delay_towrite(dev, s)) ||
|
|
|
+ i == sh->pd_idx || i == sh->qd_idx ||
|
|
|
test_bit(R5_InJournal, &dev->flags)) &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
!(uptodate_for_rmw(dev) ||
|
|
@@ -3717,7 +3758,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
|
|
|
|
|
|
for (i = disks; i--; ) {
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
- if ((dev->towrite ||
|
|
|
+ if (((dev->towrite && !delay_towrite(dev, s)) ||
|
|
|
i == sh->pd_idx || i == sh->qd_idx ||
|
|
|
test_bit(R5_InJournal, &dev->flags)) &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|