|
@@ -69,7 +69,7 @@
|
|
|
|
|
|
#include <trace/events/block.h>
|
|
|
#include "md.h"
|
|
|
-#include "bitmap.h"
|
|
|
+#include "md-bitmap.h"
|
|
|
#include "md-cluster.h"
|
|
|
|
|
|
#ifndef MODULE
|
|
@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
|
|
|
* call has finished, the bio has been linked into some internal structure
|
|
|
* and so is visible to ->quiesce(), so we don't need the refcount any more.
|
|
|
*/
|
|
|
+static bool is_suspended(struct mddev *mddev, struct bio *bio)
|
|
|
+{
|
|
|
+ if (mddev->suspended)
|
|
|
+ return true;
|
|
|
+ if (bio_data_dir(bio) != WRITE)
|
|
|
+ return false;
|
|
|
+ if (mddev->suspend_lo >= mddev->suspend_hi)
|
|
|
+ return false;
|
|
|
+ if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
|
|
|
+ return false;
|
|
|
+ if (bio_end_sector(bio) < mddev->suspend_lo)
|
|
|
+ return false;
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
void md_handle_request(struct mddev *mddev, struct bio *bio)
|
|
|
{
|
|
|
check_suspended:
|
|
|
rcu_read_lock();
|
|
|
- if (mddev->suspended) {
|
|
|
+ if (is_suspended(mddev, bio)) {
|
|
|
DEFINE_WAIT(__wait);
|
|
|
for (;;) {
|
|
|
prepare_to_wait(&mddev->sb_wait, &__wait,
|
|
|
TASK_UNINTERRUPTIBLE);
|
|
|
- if (!mddev->suspended)
|
|
|
+ if (!is_suspended(mddev, bio))
|
|
|
break;
|
|
|
rcu_read_unlock();
|
|
|
schedule();
|
|
@@ -344,12 +359,17 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
|
|
|
void mddev_suspend(struct mddev *mddev)
|
|
|
{
|
|
|
WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
|
|
|
+ lockdep_assert_held(&mddev->reconfig_mutex);
|
|
|
if (mddev->suspended++)
|
|
|
return;
|
|
|
synchronize_rcu();
|
|
|
wake_up(&mddev->sb_wait);
|
|
|
+ set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
|
|
+ smp_mb__after_atomic();
|
|
|
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
|
|
|
mddev->pers->quiesce(mddev, 1);
|
|
|
+ clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
|
|
+ wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
|
|
|
|
|
|
del_timer_sync(&mddev->safemode_timer);
|
|
|
}
|
|
@@ -357,6 +377,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
|
|
|
|
|
|
void mddev_resume(struct mddev *mddev)
|
|
|
{
|
|
|
+ lockdep_assert_held(&mddev->reconfig_mutex);
|
|
|
if (--mddev->suspended)
|
|
|
return;
|
|
|
wake_up(&mddev->sb_wait);
|
|
@@ -663,6 +684,7 @@ void mddev_unlock(struct mddev *mddev)
|
|
|
*/
|
|
|
spin_lock(&pers_lock);
|
|
|
md_wakeup_thread(mddev->thread);
|
|
|
+ wake_up(&mddev->sb_wait);
|
|
|
spin_unlock(&pers_lock);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(mddev_unlock);
|
|
@@ -2313,7 +2335,7 @@ static void export_array(struct mddev *mddev)
|
|
|
|
|
|
static bool set_in_sync(struct mddev *mddev)
|
|
|
{
|
|
|
- WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock));
|
|
|
+ lockdep_assert_held(&mddev->lock);
|
|
|
if (!mddev->in_sync) {
|
|
|
mddev->sync_checkers++;
|
|
|
spin_unlock(&mddev->lock);
|
|
@@ -2432,10 +2454,18 @@ repeat:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /* First make sure individual recovery_offsets are correct */
|
|
|
+ /*
|
|
|
+ * First make sure individual recovery_offsets are correct
|
|
|
+ * curr_resync_completed can only be used during recovery.
|
|
|
+ * During reshape/resync it might use array-addresses rather
|
|
|
+ * that device addresses.
|
|
|
+ */
|
|
|
rdev_for_each(rdev, mddev) {
|
|
|
if (rdev->raid_disk >= 0 &&
|
|
|
mddev->delta_disks >= 0 &&
|
|
|
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
|
|
|
+ test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
|
|
|
+ !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
|
|
!test_bit(Journal, &rdev->flags) &&
|
|
|
!test_bit(In_sync, &rdev->flags) &&
|
|
|
mddev->curr_resync_completed > rdev->recovery_offset)
|
|
@@ -4824,7 +4854,7 @@ suspend_lo_show(struct mddev *mddev, char *page)
|
|
|
static ssize_t
|
|
|
suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
|
|
|
{
|
|
|
- unsigned long long old, new;
|
|
|
+ unsigned long long new;
|
|
|
int err;
|
|
|
|
|
|
err = kstrtoull(buf, 10, &new);
|
|
@@ -4840,16 +4870,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
|
|
|
if (mddev->pers == NULL ||
|
|
|
mddev->pers->quiesce == NULL)
|
|
|
goto unlock;
|
|
|
- old = mddev->suspend_lo;
|
|
|
+ mddev_suspend(mddev);
|
|
|
mddev->suspend_lo = new;
|
|
|
- if (new >= old)
|
|
|
- /* Shrinking suspended region */
|
|
|
- mddev->pers->quiesce(mddev, 2);
|
|
|
- else {
|
|
|
- /* Expanding suspended region - need to wait */
|
|
|
- mddev->pers->quiesce(mddev, 1);
|
|
|
- mddev->pers->quiesce(mddev, 0);
|
|
|
- }
|
|
|
+ mddev_resume(mddev);
|
|
|
+
|
|
|
err = 0;
|
|
|
unlock:
|
|
|
mddev_unlock(mddev);
|
|
@@ -4867,7 +4891,7 @@ suspend_hi_show(struct mddev *mddev, char *page)
|
|
|
static ssize_t
|
|
|
suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
|
|
|
{
|
|
|
- unsigned long long old, new;
|
|
|
+ unsigned long long new;
|
|
|
int err;
|
|
|
|
|
|
err = kstrtoull(buf, 10, &new);
|
|
@@ -4880,19 +4904,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
|
|
|
if (err)
|
|
|
return err;
|
|
|
err = -EINVAL;
|
|
|
- if (mddev->pers == NULL ||
|
|
|
- mddev->pers->quiesce == NULL)
|
|
|
+ if (mddev->pers == NULL)
|
|
|
goto unlock;
|
|
|
- old = mddev->suspend_hi;
|
|
|
+
|
|
|
+ mddev_suspend(mddev);
|
|
|
mddev->suspend_hi = new;
|
|
|
- if (new <= old)
|
|
|
- /* Shrinking suspended region */
|
|
|
- mddev->pers->quiesce(mddev, 2);
|
|
|
- else {
|
|
|
- /* Expanding suspended region - need to wait */
|
|
|
- mddev->pers->quiesce(mddev, 1);
|
|
|
- mddev->pers->quiesce(mddev, 0);
|
|
|
- }
|
|
|
+ mddev_resume(mddev);
|
|
|
+
|
|
|
err = 0;
|
|
|
unlock:
|
|
|
mddev_unlock(mddev);
|
|
@@ -5834,8 +5852,14 @@ void md_stop(struct mddev *mddev)
|
|
|
* This is called from dm-raid
|
|
|
*/
|
|
|
__md_stop(mddev);
|
|
|
- if (mddev->bio_set)
|
|
|
+ if (mddev->bio_set) {
|
|
|
bioset_free(mddev->bio_set);
|
|
|
+ mddev->bio_set = NULL;
|
|
|
+ }
|
|
|
+ if (mddev->sync_set) {
|
|
|
+ bioset_free(mddev->sync_set);
|
|
|
+ mddev->sync_set = NULL;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
EXPORT_SYMBOL_GPL(md_stop);
|
|
@@ -6362,7 +6386,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
- if (has_journal) {
|
|
|
+ if (has_journal || mddev->bitmap) {
|
|
|
export_rdev(rdev);
|
|
|
return -EBUSY;
|
|
|
}
|
|
@@ -6618,22 +6642,26 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
|
|
|
return -ENOENT; /* cannot remove what isn't there */
|
|
|
err = 0;
|
|
|
if (mddev->pers) {
|
|
|
- mddev->pers->quiesce(mddev, 1);
|
|
|
if (fd >= 0) {
|
|
|
struct bitmap *bitmap;
|
|
|
|
|
|
bitmap = bitmap_create(mddev, -1);
|
|
|
+ mddev_suspend(mddev);
|
|
|
if (!IS_ERR(bitmap)) {
|
|
|
mddev->bitmap = bitmap;
|
|
|
err = bitmap_load(mddev);
|
|
|
} else
|
|
|
err = PTR_ERR(bitmap);
|
|
|
- }
|
|
|
- if (fd < 0 || err) {
|
|
|
+ if (err) {
|
|
|
+ bitmap_destroy(mddev);
|
|
|
+ fd = -1;
|
|
|
+ }
|
|
|
+ mddev_resume(mddev);
|
|
|
+ } else if (fd < 0) {
|
|
|
+ mddev_suspend(mddev);
|
|
|
bitmap_destroy(mddev);
|
|
|
- fd = -1; /* make sure to put the file */
|
|
|
+ mddev_resume(mddev);
|
|
|
}
|
|
|
- mddev->pers->quiesce(mddev, 0);
|
|
|
}
|
|
|
if (fd < 0) {
|
|
|
struct file *f = mddev->bitmap_info.file;
|
|
@@ -6735,7 +6763,7 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
|
|
|
|
|
void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
|
|
|
{
|
|
|
- WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
|
|
|
+ lockdep_assert_held(&mddev->reconfig_mutex);
|
|
|
|
|
|
if (mddev->external_size)
|
|
|
return;
|
|
@@ -6917,8 +6945,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
|
|
mddev->bitmap_info.default_offset;
|
|
|
mddev->bitmap_info.space =
|
|
|
mddev->bitmap_info.default_space;
|
|
|
- mddev->pers->quiesce(mddev, 1);
|
|
|
bitmap = bitmap_create(mddev, -1);
|
|
|
+ mddev_suspend(mddev);
|
|
|
if (!IS_ERR(bitmap)) {
|
|
|
mddev->bitmap = bitmap;
|
|
|
rv = bitmap_load(mddev);
|
|
@@ -6926,7 +6954,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
|
|
rv = PTR_ERR(bitmap);
|
|
|
if (rv)
|
|
|
bitmap_destroy(mddev);
|
|
|
- mddev->pers->quiesce(mddev, 0);
|
|
|
+ mddev_resume(mddev);
|
|
|
} else {
|
|
|
/* remove the bitmap */
|
|
|
if (!mddev->bitmap) {
|
|
@@ -6949,9 +6977,9 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
|
|
mddev->bitmap_info.nodes = 0;
|
|
|
md_cluster_ops->leave(mddev);
|
|
|
}
|
|
|
- mddev->pers->quiesce(mddev, 1);
|
|
|
+ mddev_suspend(mddev);
|
|
|
bitmap_destroy(mddev);
|
|
|
- mddev->pers->quiesce(mddev, 0);
|
|
|
+ mddev_resume(mddev);
|
|
|
mddev->bitmap_info.offset = 0;
|
|
|
}
|
|
|
}
|
|
@@ -7468,8 +7496,8 @@ void md_wakeup_thread(struct md_thread *thread)
|
|
|
{
|
|
|
if (thread) {
|
|
|
pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
|
|
|
- if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags))
|
|
|
- wake_up(&thread->wqueue);
|
|
|
+ set_bit(THREAD_WAKEUP, &thread->flags);
|
|
|
+ wake_up(&thread->wqueue);
|
|
|
}
|
|
|
}
|
|
|
EXPORT_SYMBOL(md_wakeup_thread);
|
|
@@ -8039,7 +8067,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
|
|
|
if (did_change)
|
|
|
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
|
|
wait_event(mddev->sb_wait,
|
|
|
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended);
|
|
|
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
|
|
|
+ mddev->suspended);
|
|
|
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
|
|
|
percpu_ref_put(&mddev->writes_pending);
|
|
|
return false;
|
|
@@ -8110,7 +8139,6 @@ void md_allow_write(struct mddev *mddev)
|
|
|
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
|
|
/* wait for the dirty state to be recorded in the metadata */
|
|
|
wait_event(mddev->sb_wait,
|
|
|
- !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
|
|
|
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
|
|
} else
|
|
|
spin_unlock(&mddev->lock);
|
|
@@ -8477,16 +8505,19 @@ void md_do_sync(struct md_thread *thread)
|
|
|
} else {
|
|
|
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
|
|
mddev->curr_resync = MaxSector;
|
|
|
- rcu_read_lock();
|
|
|
- rdev_for_each_rcu(rdev, mddev)
|
|
|
- if (rdev->raid_disk >= 0 &&
|
|
|
- mddev->delta_disks >= 0 &&
|
|
|
- !test_bit(Journal, &rdev->flags) &&
|
|
|
- !test_bit(Faulty, &rdev->flags) &&
|
|
|
- !test_bit(In_sync, &rdev->flags) &&
|
|
|
- rdev->recovery_offset < mddev->curr_resync)
|
|
|
- rdev->recovery_offset = mddev->curr_resync;
|
|
|
- rcu_read_unlock();
|
|
|
+ if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
|
|
+ test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
|
|
|
+ rcu_read_lock();
|
|
|
+ rdev_for_each_rcu(rdev, mddev)
|
|
|
+ if (rdev->raid_disk >= 0 &&
|
|
|
+ mddev->delta_disks >= 0 &&
|
|
|
+ !test_bit(Journal, &rdev->flags) &&
|
|
|
+ !test_bit(Faulty, &rdev->flags) &&
|
|
|
+ !test_bit(In_sync, &rdev->flags) &&
|
|
|
+ rdev->recovery_offset < mddev->curr_resync)
|
|
|
+ rdev->recovery_offset = mddev->curr_resync;
|
|
|
+ rcu_read_unlock();
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
skip:
|
|
@@ -8813,6 +8844,16 @@ void md_check_recovery(struct mddev *mddev)
|
|
|
unlock:
|
|
|
wake_up(&mddev->sb_wait);
|
|
|
mddev_unlock(mddev);
|
|
|
+ } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
|
|
|
+ /* Write superblock - thread that called mddev_suspend()
|
|
|
+ * holds reconfig_mutex for us.
|
|
|
+ */
|
|
|
+ set_bit(MD_UPDATING_SB, &mddev->flags);
|
|
|
+ smp_mb__after_atomic();
|
|
|
+ if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
|
|
|
+ md_update_sb(mddev, 0);
|
|
|
+ clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
|
|
|
+ wake_up(&mddev->sb_wait);
|
|
|
}
|
|
|
}
|
|
|
EXPORT_SYMBOL(md_check_recovery);
|