|
@@ -1317,7 +1317,9 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
|
|
|
* destination buffer is recorded in srcs[count] and the Q destination
|
|
|
* is recorded in srcs[count+1]].
|
|
|
*/
|
|
|
-static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
|
|
|
+static int set_syndrome_sources(struct page **srcs,
|
|
|
+ struct stripe_head *sh,
|
|
|
+ int srctype)
|
|
|
{
|
|
|
int disks = sh->disks;
|
|
|
int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
|
|
@@ -1332,8 +1334,15 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
|
|
|
i = d0_idx;
|
|
|
do {
|
|
|
int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
|
|
|
+ struct r5dev *dev = &sh->dev[i];
|
|
|
|
|
|
- srcs[slot] = sh->dev[i].page;
|
|
|
+ if (i == sh->qd_idx || i == sh->pd_idx ||
|
|
|
+ (srctype == SYNDROME_SRC_ALL) ||
|
|
|
+ (srctype == SYNDROME_SRC_WANT_DRAIN &&
|
|
|
+ test_bit(R5_Wantdrain, &dev->flags)) ||
|
|
|
+ (srctype == SYNDROME_SRC_WRITTEN &&
|
|
|
+ dev->written))
|
|
|
+ srcs[slot] = sh->dev[i].page;
|
|
|
i = raid6_next_disk(i, disks);
|
|
|
} while (i != d0_idx);
|
|
|
|
|
@@ -1373,7 +1382,7 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
|
|
|
atomic_inc(&sh->count);
|
|
|
|
|
|
if (target == qd_idx) {
|
|
|
- count = set_syndrome_sources(blocks, sh);
|
|
|
+ count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
|
|
|
blocks[count] = NULL; /* regenerating p is not necessary */
|
|
|
BUG_ON(blocks[count+1] != dest); /* q should already be set */
|
|
|
init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
|
|
@@ -1481,7 +1490,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
|
|
|
tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
|
|
|
&submit);
|
|
|
|
|
|
- count = set_syndrome_sources(blocks, sh);
|
|
|
+ count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
|
|
|
init_async_submit(&submit, ASYNC_TX_FENCE, tx,
|
|
|
ops_complete_compute, sh,
|
|
|
to_addr_conv(sh, percpu, 0));
|
|
@@ -1515,8 +1524,8 @@ static void ops_complete_prexor(void *stripe_head_ref)
|
|
|
}
|
|
|
|
|
|
static struct dma_async_tx_descriptor *
|
|
|
-ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
|
|
|
- struct dma_async_tx_descriptor *tx)
|
|
|
+ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu,
|
|
|
+ struct dma_async_tx_descriptor *tx)
|
|
|
{
|
|
|
int disks = sh->disks;
|
|
|
struct page **xor_srcs = to_addr_page(percpu, 0);
|
|
@@ -1544,6 +1553,26 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
|
|
|
return tx;
|
|
|
}
|
|
|
|
|
|
+static struct dma_async_tx_descriptor *
|
|
|
+ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu,
|
|
|
+ struct dma_async_tx_descriptor *tx)
|
|
|
+{
|
|
|
+ struct page **blocks = to_addr_page(percpu, 0);
|
|
|
+ int count;
|
|
|
+ struct async_submit_ctl submit;
|
|
|
+
|
|
|
+ pr_debug("%s: stripe %llu\n", __func__,
|
|
|
+ (unsigned long long)sh->sector);
|
|
|
+
|
|
|
+ count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN);
|
|
|
+
|
|
|
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx,
|
|
|
+ ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
|
|
|
+ tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
|
|
|
+
|
|
|
+ return tx;
|
|
|
+}
|
|
|
+
|
|
|
static struct dma_async_tx_descriptor *
|
|
|
ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
|
|
|
{
|
|
@@ -1746,6 +1775,8 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
|
|
|
int count, i, j = 0;
|
|
|
struct stripe_head *head_sh = sh;
|
|
|
int last_stripe;
|
|
|
+ int synflags;
|
|
|
+ unsigned long txflags;
|
|
|
|
|
|
pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
|
|
|
|
|
@@ -1765,14 +1796,23 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
|
|
|
|
|
|
again:
|
|
|
blocks = to_addr_page(percpu, j);
|
|
|
- count = set_syndrome_sources(blocks, sh);
|
|
|
+
|
|
|
+ if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
|
|
|
+ synflags = SYNDROME_SRC_WRITTEN;
|
|
|
+ txflags = ASYNC_TX_ACK | ASYNC_TX_PQ_XOR_DST;
|
|
|
+ } else {
|
|
|
+ synflags = SYNDROME_SRC_ALL;
|
|
|
+ txflags = ASYNC_TX_ACK;
|
|
|
+ }
|
|
|
+
|
|
|
+ count = set_syndrome_sources(blocks, sh, synflags);
|
|
|
last_stripe = !head_sh->batch_head ||
|
|
|
list_first_entry(&sh->batch_list,
|
|
|
struct stripe_head, batch_list) == head_sh;
|
|
|
|
|
|
if (last_stripe) {
|
|
|
atomic_inc(&head_sh->count);
|
|
|
- init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
|
|
|
+ init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
|
|
|
head_sh, to_addr_conv(sh, percpu, j));
|
|
|
} else
|
|
|
init_async_submit(&submit, 0, tx, NULL, NULL,
|
|
@@ -1843,7 +1883,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu
|
|
|
(unsigned long long)sh->sector, checkp);
|
|
|
|
|
|
BUG_ON(sh->batch_head);
|
|
|
- count = set_syndrome_sources(srcs, sh);
|
|
|
+ count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL);
|
|
|
if (!checkp)
|
|
|
srcs[count] = NULL;
|
|
|
|
|
@@ -1884,8 +1924,12 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
|
|
|
async_tx_ack(tx);
|
|
|
}
|
|
|
|
|
|
- if (test_bit(STRIPE_OP_PREXOR, &ops_request))
|
|
|
- tx = ops_run_prexor(sh, percpu, tx);
|
|
|
+ if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
|
|
|
+ if (level < 6)
|
|
|
+ tx = ops_run_prexor5(sh, percpu, tx);
|
|
|
+ else
|
|
|
+ tx = ops_run_prexor6(sh, percpu, tx);
|
|
|
+ }
|
|
|
|
|
|
if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
|
|
|
tx = ops_run_biodrain(sh, tx);
|
|
@@ -2770,7 +2814,7 @@ static void
|
|
|
schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
|
|
|
int rcw, int expand)
|
|
|
{
|
|
|
- int i, pd_idx = sh->pd_idx, disks = sh->disks;
|
|
|
+ int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks;
|
|
|
struct r5conf *conf = sh->raid_conf;
|
|
|
int level = conf->level;
|
|
|
|
|
@@ -2806,13 +2850,15 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
|
|
|
if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
|
|
|
atomic_inc(&conf->pending_full_writes);
|
|
|
} else {
|
|
|
- BUG_ON(level == 6);
|
|
|
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
|
|
|
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
|
|
|
+ BUG_ON(level == 6 &&
|
|
|
+ (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) ||
|
|
|
+ test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags))));
|
|
|
|
|
|
for (i = disks; i--; ) {
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
- if (i == pd_idx)
|
|
|
+ if (i == pd_idx || i == qd_idx)
|
|
|
continue;
|
|
|
|
|
|
if (dev->towrite &&
|
|
@@ -3476,28 +3522,27 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
int rmw = 0, rcw = 0, i;
|
|
|
sector_t recovery_cp = conf->mddev->recovery_cp;
|
|
|
|
|
|
- /* RAID6 requires 'rcw' in current implementation.
|
|
|
- * Otherwise, check whether resync is now happening or should start.
|
|
|
+ /* Check whether resync is now happening or should start.
|
|
|
* If yes, then the array is dirty (after unclean shutdown or
|
|
|
* initial creation), so parity in some stripes might be inconsistent.
|
|
|
* In this case, we need to always do reconstruct-write, to ensure
|
|
|
* that in case of drive failure or read-error correction, we
|
|
|
* generate correct data from the parity.
|
|
|
*/
|
|
|
- if (conf->max_degraded == 2 ||
|
|
|
+ if (conf->rmw_level == PARITY_DISABLE_RMW ||
|
|
|
(recovery_cp < MaxSector && sh->sector >= recovery_cp &&
|
|
|
s->failed == 0)) {
|
|
|
/* Calculate the real rcw later - for now make it
|
|
|
* look like rcw is cheaper
|
|
|
*/
|
|
|
rcw = 1; rmw = 2;
|
|
|
- pr_debug("force RCW max_degraded=%u, recovery_cp=%llu sh->sector=%llu\n",
|
|
|
- conf->max_degraded, (unsigned long long)recovery_cp,
|
|
|
+ pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n",
|
|
|
+ conf->rmw_level, (unsigned long long)recovery_cp,
|
|
|
(unsigned long long)sh->sector);
|
|
|
} else for (i = disks; i--; ) {
|
|
|
/* would I have to read this buffer for read_modify_write */
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
- if ((dev->towrite || i == sh->pd_idx) &&
|
|
|
+ if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
!(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
test_bit(R5_Wantcompute, &dev->flags))) {
|
|
@@ -3507,7 +3552,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
rmw += 2*disks; /* cannot read it */
|
|
|
}
|
|
|
/* Would I have to read this buffer for reconstruct_write */
|
|
|
- if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
|
|
|
+ if (!test_bit(R5_OVERWRITE, &dev->flags) &&
|
|
|
+ i != sh->pd_idx && i != sh->qd_idx &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
!(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
test_bit(R5_Wantcompute, &dev->flags))) {
|
|
@@ -3520,7 +3566,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
pr_debug("for sector %llu, rmw=%d rcw=%d\n",
|
|
|
(unsigned long long)sh->sector, rmw, rcw);
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
- if (rmw < rcw && rmw > 0) {
|
|
|
+ if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_ENABLE_RMW)) && rmw > 0) {
|
|
|
/* prefer read-modify-write, but need to get some data */
|
|
|
if (conf->mddev->queue)
|
|
|
blk_add_trace_msg(conf->mddev->queue,
|
|
@@ -3528,7 +3574,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
(unsigned long long)sh->sector, rmw);
|
|
|
for (i = disks; i--; ) {
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
- if ((dev->towrite || i == sh->pd_idx) &&
|
|
|
+ if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
!(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
test_bit(R5_Wantcompute, &dev->flags)) &&
|
|
@@ -3547,7 +3593,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if (rcw <= rmw && rcw > 0) {
|
|
|
+ if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_ENABLE_RMW)) && rcw > 0) {
|
|
|
/* want reconstruct write, but need to get some data */
|
|
|
int qread =0;
|
|
|
rcw = 0;
|
|
@@ -6344,10 +6390,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|
|
}
|
|
|
|
|
|
conf->level = mddev->new_level;
|
|
|
- if (conf->level == 6)
|
|
|
+ if (conf->level == 6) {
|
|
|
conf->max_degraded = 2;
|
|
|
- else
|
|
|
+ if (raid6_call.xor_syndrome)
|
|
|
+ conf->rmw_level = PARITY_ENABLE_RMW;
|
|
|
+ else
|
|
|
+ conf->rmw_level = PARITY_DISABLE_RMW;
|
|
|
+ } else {
|
|
|
conf->max_degraded = 1;
|
|
|
+ conf->rmw_level = PARITY_ENABLE_RMW;
|
|
|
+ }
|
|
|
conf->algorithm = mddev->new_layout;
|
|
|
conf->reshape_progress = mddev->reshape_position;
|
|
|
if (conf->reshape_progress != MaxSector) {
|