|
@@ -40,6 +40,47 @@
|
|
|
*/
|
|
|
#define R5L_POOL_SIZE 4
|
|
|
|
|
|
+/*
|
|
|
+ * r5c journal modes of the array: write-back or write-through.
|
|
|
+ * write-through mode has identical behavior as existing log only
|
|
|
+ * implementation.
|
|
|
+ */
|
|
|
+enum r5c_journal_mode {
|
|
|
+ R5C_JOURNAL_MODE_WRITE_THROUGH = 0,
|
|
|
+ R5C_JOURNAL_MODE_WRITE_BACK = 1,
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * raid5 cache state machine
|
|
|
+ *
|
|
|
+ * With rhe RAID cache, each stripe works in two phases:
|
|
|
+ * - caching phase
|
|
|
+ * - writing-out phase
|
|
|
+ *
|
|
|
+ * These two phases are controlled by bit STRIPE_R5C_CACHING:
|
|
|
+ * if STRIPE_R5C_CACHING == 0, the stripe is in writing-out phase
|
|
|
+ * if STRIPE_R5C_CACHING == 1, the stripe is in caching phase
|
|
|
+ *
|
|
|
+ * When there is no journal, or the journal is in write-through mode,
|
|
|
+ * the stripe is always in writing-out phase.
|
|
|
+ *
|
|
|
+ * For write-back journal, the stripe is sent to caching phase on write
|
|
|
+ * (r5c_try_caching_write). r5c_make_stripe_write_out() kicks off
|
|
|
+ * the write-out phase by clearing STRIPE_R5C_CACHING.
|
|
|
+ *
|
|
|
+ * Stripes in caching phase do not write the raid disks. Instead, all
|
|
|
+ * writes are committed from the log device. Therefore, a stripe in
|
|
|
+ * caching phase handles writes as:
|
|
|
+ * - write to log device
|
|
|
+ * - return IO
|
|
|
+ *
|
|
|
+ * Stripes in writing-out phase handle writes as:
|
|
|
+ * - calculate parity
|
|
|
+ * - write pending data and parity to journal
|
|
|
+ * - write data and parity to raid disks
|
|
|
+ * - return IO for pending writes
|
|
|
+ */
|
|
|
+
|
|
|
struct r5l_log {
|
|
|
struct md_rdev *rdev;
|
|
|
|
|
@@ -96,6 +137,9 @@ struct r5l_log {
|
|
|
spinlock_t no_space_stripes_lock;
|
|
|
|
|
|
bool need_cache_flush;
|
|
|
+
|
|
|
+ /* for r5c_cache */
|
|
|
+ enum r5c_journal_mode r5c_journal_mode;
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -133,6 +177,12 @@ enum r5l_io_unit_state {
|
|
|
IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */
|
|
|
};
|
|
|
|
|
|
+bool r5c_is_writeback(struct r5l_log *log)
|
|
|
+{
|
|
|
+ return (log != NULL &&
|
|
|
+ log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK);
|
|
|
+}
|
|
|
+
|
|
|
static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc)
|
|
|
{
|
|
|
start += inc;
|
|
@@ -168,12 +218,51 @@ static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
|
|
|
io->state = state;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Put the stripe into writing-out phase by clearing STRIPE_R5C_CACHING.
|
|
|
+ * This function should only be called in write-back mode.
|
|
|
+ */
|
|
|
+static void r5c_make_stripe_write_out(struct stripe_head *sh)
|
|
|
+{
|
|
|
+ struct r5conf *conf = sh->raid_conf;
|
|
|
+ struct r5l_log *log = conf->log;
|
|
|
+
|
|
|
+ BUG_ON(!r5c_is_writeback(log));
|
|
|
+
|
|
|
+ WARN_ON(!test_bit(STRIPE_R5C_CACHING, &sh->state));
|
|
|
+ clear_bit(STRIPE_R5C_CACHING, &sh->state);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Setting proper flags after writing (or flushing) data and/or parity to the
|
|
|
+ * log device. This is called from r5l_log_endio() or r5l_log_flush_endio().
|
|
|
+ */
|
|
|
+static void r5c_finish_cache_stripe(struct stripe_head *sh)
|
|
|
+{
|
|
|
+ struct r5l_log *log = sh->raid_conf->log;
|
|
|
+
|
|
|
+ if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) {
|
|
|
+ BUG_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
|
|
|
+ /*
|
|
|
+ * Set R5_InJournal for parity dev[pd_idx]. This means
|
|
|
+ * all data AND parity in the journal. For RAID 6, it is
|
|
|
+ * NOT necessary to set the flag for dev[qd_idx], as the
|
|
|
+ * two parities are written out together.
|
|
|
+ */
|
|
|
+ set_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags);
|
|
|
+ } else
|
|
|
+ BUG(); /* write-back logic in next patch */
|
|
|
+}
|
|
|
+
|
|
|
static void r5l_io_run_stripes(struct r5l_io_unit *io)
|
|
|
{
|
|
|
struct stripe_head *sh, *next;
|
|
|
|
|
|
list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
|
|
|
list_del_init(&sh->log_list);
|
|
|
+
|
|
|
+ r5c_finish_cache_stripe(sh);
|
|
|
+
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
raid5_release_stripe(sh);
|
|
|
}
|
|
@@ -412,18 +501,19 @@ static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh,
|
|
|
r5l_append_payload_page(log, sh->dev[i].page);
|
|
|
}
|
|
|
|
|
|
- if (sh->qd_idx >= 0) {
|
|
|
+ if (parity_pages == 2) {
|
|
|
r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY,
|
|
|
sh->sector, sh->dev[sh->pd_idx].log_checksum,
|
|
|
sh->dev[sh->qd_idx].log_checksum, true);
|
|
|
r5l_append_payload_page(log, sh->dev[sh->pd_idx].page);
|
|
|
r5l_append_payload_page(log, sh->dev[sh->qd_idx].page);
|
|
|
- } else {
|
|
|
+ } else if (parity_pages == 1) {
|
|
|
r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY,
|
|
|
sh->sector, sh->dev[sh->pd_idx].log_checksum,
|
|
|
0, false);
|
|
|
r5l_append_payload_page(log, sh->dev[sh->pd_idx].page);
|
|
|
- }
|
|
|
+ } else /* Just writing data, not parity, in caching phase */
|
|
|
+ BUG_ON(parity_pages != 0);
|
|
|
|
|
|
list_add_tail(&sh->log_list, &io->stripe_list);
|
|
|
atomic_inc(&io->pending_stripe);
|
|
@@ -455,6 +545,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
|
|
|
return -EAGAIN;
|
|
|
}
|
|
|
|
|
|
+ WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
|
|
|
+
|
|
|
for (i = 0; i < sh->disks; i++) {
|
|
|
void *addr;
|
|
|
|
|
@@ -1112,6 +1204,49 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp)
|
|
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Try handle write operation in caching phase. This function should only
|
|
|
+ * be called in write-back mode.
|
|
|
+ *
|
|
|
+ * If all outstanding writes can be handled in caching phase, returns 0
|
|
|
+ * If writes requires write-out phase, call r5c_make_stripe_write_out()
|
|
|
+ * and returns -EAGAIN
|
|
|
+ */
|
|
|
+int r5c_try_caching_write(struct r5conf *conf,
|
|
|
+ struct stripe_head *sh,
|
|
|
+ struct stripe_head_state *s,
|
|
|
+ int disks)
|
|
|
+{
|
|
|
+ struct r5l_log *log = conf->log;
|
|
|
+
|
|
|
+ BUG_ON(!r5c_is_writeback(log));
|
|
|
+
|
|
|
+ /* more write-back logic in next patches */
|
|
|
+ r5c_make_stripe_write_out(sh);
|
|
|
+ return -EAGAIN;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * clean up the stripe (clear R5_InJournal for dev[pd_idx] etc.) after the
|
|
|
+ * stripe is committed to RAID disks.
|
|
|
+ */
|
|
|
+void r5c_finish_stripe_write_out(struct r5conf *conf,
|
|
|
+ struct stripe_head *sh,
|
|
|
+ struct stripe_head_state *s)
|
|
|
+{
|
|
|
+ if (!conf->log ||
|
|
|
+ !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
|
|
|
+ return;
|
|
|
+
|
|
|
+ WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
|
|
|
+ clear_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags);
|
|
|
+
|
|
|
+ if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
|
|
|
+ return;
|
|
|
+ BUG(); /* write-back logic in following patches */
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
static int r5l_load_log(struct r5l_log *log)
|
|
|
{
|
|
|
struct md_rdev *rdev = log->rdev;
|
|
@@ -1249,6 +1384,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
|
|
INIT_LIST_HEAD(&log->no_space_stripes);
|
|
|
spin_lock_init(&log->no_space_stripes_lock);
|
|
|
|
|
|
+ log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
|
|
|
+
|
|
|
if (r5l_load_log(log))
|
|
|
goto error;
|
|
|
|