|
@@ -1,6 +1,6 @@
|
|
|
/*
|
|
|
* Copyright (C) 2010-2011 Neil Brown
|
|
|
- * Copyright (C) 2010-2017 Red Hat, Inc. All rights reserved.
|
|
|
+ * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved.
|
|
|
*
|
|
|
* This file is released under the GPL.
|
|
|
*/
|
|
@@ -29,9 +29,6 @@
|
|
|
*/
|
|
|
#define MIN_RAID456_JOURNAL_SPACE (4*2048)
|
|
|
|
|
|
-/* Global list of all raid sets */
|
|
|
-static LIST_HEAD(raid_sets);
|
|
|
-
|
|
|
static bool devices_handle_discard_safely = false;
|
|
|
|
|
|
/*
|
|
@@ -227,7 +224,6 @@ struct rs_layout {
|
|
|
|
|
|
struct raid_set {
|
|
|
struct dm_target *ti;
|
|
|
- struct list_head list;
|
|
|
|
|
|
uint32_t stripe_cache_entries;
|
|
|
unsigned long ctr_flags;
|
|
@@ -273,19 +269,6 @@ static void rs_config_restore(struct raid_set *rs, struct rs_layout *l)
|
|
|
mddev->new_chunk_sectors = l->new_chunk_sectors;
|
|
|
}
|
|
|
|
|
|
-/* Find any raid_set in active slot for @rs on global list */
|
|
|
-static struct raid_set *rs_find_active(struct raid_set *rs)
|
|
|
-{
|
|
|
- struct raid_set *r;
|
|
|
- struct mapped_device *md = dm_table_get_md(rs->ti->table);
|
|
|
-
|
|
|
- list_for_each_entry(r, &raid_sets, list)
|
|
|
- if (r != rs && dm_table_get_md(r->ti->table) == md)
|
|
|
- return r;
|
|
|
-
|
|
|
- return NULL;
|
|
|
-}
|
|
|
-
|
|
|
/* raid10 algorithms (i.e. formats) */
|
|
|
#define ALGORITHM_RAID10_DEFAULT 0
|
|
|
#define ALGORITHM_RAID10_NEAR 1
|
|
@@ -764,7 +747,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|
|
|
|
|
mddev_init(&rs->md);
|
|
|
|
|
|
- INIT_LIST_HEAD(&rs->list);
|
|
|
rs->raid_disks = raid_devs;
|
|
|
rs->delta_disks = 0;
|
|
|
|
|
@@ -782,9 +764,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|
|
for (i = 0; i < raid_devs; i++)
|
|
|
md_rdev_init(&rs->dev[i].rdev);
|
|
|
|
|
|
- /* Add @rs to global list. */
|
|
|
- list_add(&rs->list, &raid_sets);
|
|
|
-
|
|
|
/*
|
|
|
* Remaining items to be initialized by further RAID params:
|
|
|
* rs->md.persistent
|
|
@@ -797,7 +776,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|
|
return rs;
|
|
|
}
|
|
|
|
|
|
-/* Free all @rs allocations and remove it from global list. */
|
|
|
+/* Free all @rs allocations */
|
|
|
static void raid_set_free(struct raid_set *rs)
|
|
|
{
|
|
|
int i;
|
|
@@ -815,8 +794,6 @@ static void raid_set_free(struct raid_set *rs)
|
|
|
dm_put_device(rs->ti, rs->dev[i].data_dev);
|
|
|
}
|
|
|
|
|
|
- list_del(&rs->list);
|
|
|
-
|
|
|
kfree(rs);
|
|
|
}
|
|
|
|
|
@@ -2649,7 +2626,7 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
- /* HM FIXME: get InSync raid_dev? */
|
|
|
+ /* HM FIXME: get In_Sync raid_dev? */
|
|
|
rdev = &rs->dev[0].rdev;
|
|
|
|
|
|
if (rs->delta_disks < 0) {
|
|
@@ -3149,6 +3126,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|
|
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
|
|
rs_set_new(rs);
|
|
|
} else if (rs_is_recovering(rs)) {
|
|
|
+ /* Rebuild particular devices */
|
|
|
+ if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
|
|
|
+ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
|
|
+ rs_setup_recovery(rs, MaxSector);
|
|
|
+ }
|
|
|
/* A recovering raid set may be resized */
|
|
|
; /* skip setup rs */
|
|
|
} else if (rs_is_reshaping(rs)) {
|
|
@@ -3242,6 +3224,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|
|
/* Start raid set read-only and assumed clean to change in raid_resume() */
|
|
|
rs->md.ro = 1;
|
|
|
rs->md.in_sync = 1;
|
|
|
+
|
|
|
+ /* Keep array frozen */
|
|
|
set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
|
|
|
|
|
|
/* Has to be held on running the array */
|
|
@@ -3265,7 +3249,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|
|
rs->callbacks.congested_fn = raid_is_congested;
|
|
|
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
|
|
|
|
|
|
- /* If raid4/5/6 journal mode explictely requested (only possible with journal dev) -> set it */
|
|
|
+ /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
|
|
|
if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
|
|
|
r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
|
|
|
if (r) {
|
|
@@ -3350,32 +3334,53 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
|
|
|
return DM_MAPIO_SUBMITTED;
|
|
|
}
|
|
|
|
|
|
-/* Return string describing the current sync action of @mddev */
|
|
|
-static const char *decipher_sync_action(struct mddev *mddev, unsigned long recovery)
|
|
|
+/* Return sync state string for @state */
|
|
|
+enum sync_state { st_frozen, st_reshape, st_resync, st_check, st_repair, st_recover, st_idle };
|
|
|
+static const char *sync_str(enum sync_state state)
|
|
|
+{
|
|
|
+ /* Has to be in above sync_state order! */
|
|
|
+ static const char *sync_strs[] = {
|
|
|
+ "frozen",
|
|
|
+ "reshape",
|
|
|
+ "resync",
|
|
|
+ "check",
|
|
|
+ "repair",
|
|
|
+ "recover",
|
|
|
+ "idle"
|
|
|
+ };
|
|
|
+
|
|
|
+ return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef";
|
|
|
+};
|
|
|
+
|
|
|
+/* Return enum sync_state for @mddev derived from @recovery flags */
|
|
|
+static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
|
|
|
{
|
|
|
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
|
|
|
- return "frozen";
|
|
|
+ return st_frozen;
|
|
|
|
|
|
- /* The MD sync thread can be done with io but still be running */
|
|
|
+ /* The MD sync thread can be done with io or be interrupted but still be running */
|
|
|
if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
|
|
|
(test_bit(MD_RECOVERY_RUNNING, &recovery) ||
|
|
|
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
|
|
|
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
|
|
|
- return "reshape";
|
|
|
+ return st_reshape;
|
|
|
|
|
|
if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
|
|
|
if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
|
|
|
- return "resync";
|
|
|
- else if (test_bit(MD_RECOVERY_CHECK, &recovery))
|
|
|
- return "check";
|
|
|
- return "repair";
|
|
|
+ return st_resync;
|
|
|
+ if (test_bit(MD_RECOVERY_CHECK, &recovery))
|
|
|
+ return st_check;
|
|
|
+ return st_repair;
|
|
|
}
|
|
|
|
|
|
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
|
|
- return "recover";
|
|
|
+ return st_recover;
|
|
|
+
|
|
|
+ if (mddev->reshape_position != MaxSector)
|
|
|
+ return st_reshape;
|
|
|
}
|
|
|
|
|
|
- return "idle";
|
|
|
+ return st_idle;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -3409,6 +3414,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|
|
sector_t resync_max_sectors)
|
|
|
{
|
|
|
sector_t r;
|
|
|
+ enum sync_state state;
|
|
|
struct mddev *mddev = &rs->md;
|
|
|
|
|
|
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
|
@@ -3419,20 +3425,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|
|
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
|
|
|
|
|
} else {
|
|
|
- if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) &&
|
|
|
- !test_bit(MD_RECOVERY_INTR, &recovery) &&
|
|
|
- (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
|
|
|
- test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
|
|
|
- test_bit(MD_RECOVERY_RUNNING, &recovery)))
|
|
|
- r = mddev->curr_resync_completed;
|
|
|
- else
|
|
|
+ state = decipher_sync_action(mddev, recovery);
|
|
|
+
|
|
|
+ if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
|
|
|
r = mddev->recovery_cp;
|
|
|
+ else
|
|
|
+ r = mddev->curr_resync_completed;
|
|
|
|
|
|
- if (r >= resync_max_sectors &&
|
|
|
- (!test_bit(MD_RECOVERY_REQUESTED, &recovery) ||
|
|
|
- (!test_bit(MD_RECOVERY_FROZEN, &recovery) &&
|
|
|
- !test_bit(MD_RECOVERY_NEEDED, &recovery) &&
|
|
|
- !test_bit(MD_RECOVERY_RUNNING, &recovery)))) {
|
|
|
+ if (state == st_idle && r >= resync_max_sectors) {
|
|
|
/*
|
|
|
* Sync complete.
|
|
|
*/
|
|
@@ -3440,24 +3440,20 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|
|
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
|
|
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
|
|
|
|
|
- } else if (test_bit(MD_RECOVERY_RECOVER, &recovery)) {
|
|
|
+ } else if (state == st_recover)
|
|
|
/*
|
|
|
* In case we are recovering, the array is not in sync
|
|
|
* and health chars should show the recovering legs.
|
|
|
*/
|
|
|
;
|
|
|
-
|
|
|
- } else if (test_bit(MD_RECOVERY_SYNC, &recovery) &&
|
|
|
- !test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
|
|
+ else if (state == st_resync)
|
|
|
/*
|
|
|
* If "resync" is occurring, the raid set
|
|
|
* is or may be out of sync hence the health
|
|
|
* characters shall be 'a'.
|
|
|
*/
|
|
|
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
|
|
-
|
|
|
- } else if (test_bit(MD_RECOVERY_RESHAPE, &recovery) &&
|
|
|
- !test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
|
|
+ else if (state == st_reshape)
|
|
|
/*
|
|
|
* If "reshape" is occurring, the raid set
|
|
|
* is or may be out of sync hence the health
|
|
@@ -3465,7 +3461,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|
|
*/
|
|
|
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
|
|
|
|
|
- } else if (test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
|
|
+ else if (state == st_check || state == st_repair)
|
|
|
/*
|
|
|
* If "check" or "repair" is occurring, the raid set has
|
|
|
* undergone an initial sync and the health characters
|
|
@@ -3473,12 +3469,12 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
|
|
*/
|
|
|
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
|
|
|
|
|
- } else {
|
|
|
+ else {
|
|
|
struct md_rdev *rdev;
|
|
|
|
|
|
/*
|
|
|
* We are idle and recovery is needed, prevent 'A' chars race
|
|
|
- * caused by components still set to in-sync by constrcuctor.
|
|
|
+ * caused by components still set to in-sync by constructor.
|
|
|
*/
|
|
|
if (test_bit(MD_RECOVERY_NEEDED, &recovery))
|
|
|
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
|
@@ -3542,7 +3538,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|
|
progress = rs_get_progress(rs, recovery, resync_max_sectors);
|
|
|
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
|
|
|
atomic64_read(&mddev->resync_mismatches) : 0;
|
|
|
- sync_action = decipher_sync_action(&rs->md, recovery);
|
|
|
+ sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
|
|
|
|
|
|
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
|
|
|
for (i = 0; i < rs->raid_disks; i++)
|
|
@@ -3892,14 +3888,13 @@ static int rs_start_reshape(struct raid_set *rs)
|
|
|
struct mddev *mddev = &rs->md;
|
|
|
struct md_personality *pers = mddev->pers;
|
|
|
|
|
|
+ /* Don't allow the sync thread to work until the table gets reloaded. */
|
|
|
+ set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
|
|
|
+
|
|
|
r = rs_setup_reshape(rs);
|
|
|
if (r)
|
|
|
return r;
|
|
|
|
|
|
- /* Need to be resumed to be able to start reshape, recovery is frozen until raid_resume() though */
|
|
|
- if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
|
|
|
- mddev_resume(mddev);
|
|
|
-
|
|
|
/*
|
|
|
* Check any reshape constraints enforced by the personalility
|
|
|
*
|
|
@@ -3923,10 +3918,6 @@ static int rs_start_reshape(struct raid_set *rs)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /* Suspend because a resume will happen in raid_resume() */
|
|
|
- set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
|
|
|
- mddev_suspend(mddev);
|
|
|
-
|
|
|
/*
|
|
|
* Now reshape got set up, update superblocks to
|
|
|
* reflect the fact so that a table reload will
|
|
@@ -3947,29 +3938,6 @@ static int raid_preresume(struct dm_target *ti)
|
|
|
if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
|
|
|
return 0;
|
|
|
|
|
|
- if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
|
|
|
- struct raid_set *rs_active = rs_find_active(rs);
|
|
|
-
|
|
|
- if (rs_active) {
|
|
|
- /*
|
|
|
- * In case no rebuilds have been requested
|
|
|
- * and an active table slot exists, copy
|
|
|
- * current resynchonization completed and
|
|
|
- * reshape position pointers across from
|
|
|
- * suspended raid set in the active slot.
|
|
|
- *
|
|
|
- * This resumes the new mapping at current
|
|
|
- * offsets to continue recover/reshape without
|
|
|
- * necessarily redoing a raid set partially or
|
|
|
- * causing data corruption in case of a reshape.
|
|
|
- */
|
|
|
- if (rs_active->md.curr_resync_completed != MaxSector)
|
|
|
- mddev->curr_resync_completed = rs_active->md.curr_resync_completed;
|
|
|
- if (rs_active->md.reshape_position != MaxSector)
|
|
|
- mddev->reshape_position = rs_active->md.reshape_position;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
/*
|
|
|
* The superblocks need to be updated on disk if the
|
|
|
* array is new or new devices got added (thus zeroed
|
|
@@ -4046,7 +4014,7 @@ static void raid_resume(struct dm_target *ti)
|
|
|
|
|
|
static struct target_type raid_target = {
|
|
|
.name = "raid",
|
|
|
- .version = {1, 13, 2},
|
|
|
+ .version = {1, 14, 0},
|
|
|
.module = THIS_MODULE,
|
|
|
.ctr = raid_ctr,
|
|
|
.dtr = raid_dtr,
|