9 лет назад · 8e018c21da
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -96,7 +96,6 @@ struct r5l_log {
 
				 	spinlock_t no_space_stripes_lock;
			
 
				 
			
 
				 	bool need_cache_flush;
			
 
				-	bool in_teardown;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 
				 
			
 
				 	mddev = log->rdev->mddev;
			
 
				 	/*
			
 
				-	 * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and
			
 
				-	 * wait for this thread to finish. This thread waits for
			
 
				-	 * MD_CHANGE_PENDING clear, which is supposed to be done in
			
 
				-	 * md_check_recovery(). md_check_recovery() tries to get
			
 
				-	 * reconfig_mutex. Since r5l_quiesce already holds the mutex,
			
 
				-	 * md_check_recovery() fails, so the PENDING never get cleared. The
			
 
				-	 * in_teardown check workaround this issue.
			
 
				+	 * Discard could zero data, so before discard we must make sure
			
 
				+	 * superblock is updated to new log tail. Updating superblock (either
			
 
				+	 * directly call md_update_sb() or depend on md thread) must hold
			
 
				+	 * reconfig mutex. On the other hand, raid5_quiesce is called with
			
 
				+	 * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
			
 
				+	 * for all IO finish, hence waitting for reclaim thread, while reclaim
			
 
				+	 * thread is calling this function and waitting for reconfig mutex. So
			
 
				+	 * there is a deadlock. We workaround this issue with a trylock.
			
 
				+	 * FIXME: we could miss discard if we can't take reconfig mutex
			
 
				 	 */
			
 
				-	if (!log->in_teardown) {
			
 
				-		set_mask_bits(&mddev->flags, 0,
			
 
				-			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
			
 
				-		md_wakeup_thread(mddev->thread);
			
 
				-		wait_event(mddev->sb_wait,
			
 
				-			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
			
 
				-			log->in_teardown);
			
 
				-		/*
			
 
				-		 * r5l_quiesce could run after in_teardown check and hold
			
 
				-		 * mutex first. Superblock might get updated twice.
			
 
				-		 */
			
 
				-		if (log->in_teardown)
			
 
				-			md_update_sb(mddev, 1);
			
 
				-	} else {
			
 
				-		WARN_ON(!mddev_is_locked(mddev));
			
 
				-		md_update_sb(mddev, 1);
			
 
				-	}
			
 
				+	set_mask_bits(&mddev->flags, 0,
			
 
				+		BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
			
 
				+	if (!mddev_trylock(mddev))
			
 
				+		return;
			
 
				+	md_update_sb(mddev, 1);
			
 
				+	mddev_unlock(mddev);
			
 
				 
			
 
				 	/* discard IO error really doesn't matter, ignore it */
			
 
				 	if (log->last_checkpoint < end) {
			
@@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
 
				 	if (!log || state == 2)
			
 
				 		return;
			
 
				 	if (state == 0) {
			
 
				-		log->in_teardown = 0;
			
 
				 		/*
			
 
				 		 * This is a special case for hotadd. In suspend, the array has
			
 
				 		 * no journal. In resume, journal is initialized as well as the
			
@@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
 
				 		log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
			
 
				 					log->rdev->mddev, "reclaim");
			
 
				 	} else if (state == 1) {
			
 
				-		/*
			
 
				-		 * at this point all stripes are finished, so io_unit is at
			
 
				-		 * least in STRIPE_END state
			
 
				-		 */
			
 
				-		log->in_teardown = 1;
			
 
				 		/* make sure r5l_write_super_and_discard_space exits */
			
 
				 		mddev = log->rdev->mddev;
			
 
				 		wake_up(&mddev->sb_wait);