10 лет назад · 2a013e37ce
--- a/Documentation/md-cluster.txt
+++ b/Documentation/md-cluster.txt
@@ -91,7 +91,7 @@ The algorithm is:
 
				     this message inappropriate or redundant.
			
 
				 
			
 
				  3. sender write LVB.
			
 
				-    sender down-convert MESSAGE from EX to CR
			
 
				+    sender down-convert MESSAGE from EX to CW
			
 
				     sender try to get EX of ACK
			
 
				     [ wait until all receiver has *processed* the MESSAGE ]
			
 
				 
			
@@ -112,7 +112,7 @@ The algorithm is:
 
				     sender down-convert ACK from EX to CR
			
 
				     sender release MESSAGE
			
 
				     sender release TOKEN
			
 
				-                               receiver upconvert to EX of MESSAGE
			
 
				+                               receiver upconvert to PR of MESSAGE
			
 
				                                receiver get CR of ACK
			
 
				                                receiver release MESSAGE
			
 
				 
			
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -45,6 +45,7 @@ struct resync_info {
 
				 /* md_cluster_info flags */
			
 
				 #define		MD_CLUSTER_WAITING_FOR_NEWDISK		1
			
 
				 #define		MD_CLUSTER_SUSPEND_READ_BALANCING	2
			
 
				+#define		MD_CLUSTER_BEGIN_JOIN_CLUSTER		3
			
 
				 
			
 
				 
			
 
				 struct md_cluster_info {
			
@@ -52,7 +53,6 @@ struct md_cluster_info {
 
				 	dlm_lockspace_t *lockspace;
			
 
				 	int slot_number;
			
 
				 	struct completion completion;
			
 
				-	struct dlm_lock_resource *sb_lock;
			
 
				 	struct mutex sb_mutex;
			
 
				 	struct dlm_lock_resource *bitmap_lockres;
			
 
				 	struct list_head suspend_list;
			
@@ -75,6 +75,7 @@ enum msg_type {
 
				 	NEWDISK,
			
 
				 	REMOVE,
			
 
				 	RE_ADD,
			
 
				+	BITMAP_NEEDS_SYNC,
			
 
				 };
			
 
				 
			
 
				 struct cluster_msg {
			
@@ -99,7 +100,6 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
 
				 {
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	init_completion(&res->completion);
			
 
				 	ret = dlm_lock(res->ls, mode, &res->lksb,
			
 
				 			res->flags, res->name, strlen(res->name),
			
 
				 			0, sync_ast, res, res->bast);
			
@@ -124,6 +124,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
 
				 	res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
			
 
				 	if (!res)
			
 
				 		return NULL;
			
 
				+	init_completion(&res->completion);
			
 
				 	res->ls = cinfo->lockspace;
			
 
				 	res->mddev = mddev;
			
 
				 	namelen = strlen(name);
			
@@ -165,11 +166,24 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
 
				 
			
 
				 static void lockres_free(struct dlm_lock_resource *res)
			
 
				 {
			
 
				+	int ret;
			
 
				+
			
 
				 	if (!res)
			
 
				 		return;
			
 
				 
			
 
				-	init_completion(&res->completion);
			
 
				-	dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
			
 
				+	/* cancel a lock request or a conversion request that is blocked */
			
 
				+	res->flags |= DLM_LKF_CANCEL;
			
 
				+retry:
			
 
				+	ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
			
 
				+	if (unlikely(ret != 0)) {
			
 
				+		pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
			
 
				+
			
 
				+		/* if a lock conversion is cancelled, then the lock is put
			
 
				+		 * back to grant queue, need to ensure it is unlocked */
			
 
				+		if (ret == -DLM_ECANCEL)
			
 
				+			goto retry;
			
 
				+	}
			
 
				+	res->flags &= ~DLM_LKF_CANCEL;
			
 
				 	wait_for_completion(&res->completion);
			
 
				 
			
 
				 	kfree(res->name);
			
@@ -177,18 +191,6 @@ static void lockres_free(struct dlm_lock_resource *res)
 
				 	kfree(res);
			
 
				 }
			
 
				 
			
 
				-static char *pretty_uuid(char *dest, char *src)
			
 
				-{
			
 
				-	int i, len = 0;
			
 
				-
			
 
				-	for (i = 0; i < 16; i++) {
			
 
				-		if (i == 4 || i == 6 || i == 8 || i == 10)
			
 
				-			len += sprintf(dest + len, "-");
			
 
				-		len += sprintf(dest + len, "%02x", (__u8)src[i]);
			
 
				-	}
			
 
				-	return dest;
			
 
				-}
			
 
				-
			
 
				 static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
			
 
				 		sector_t lo, sector_t hi)
			
 
				 {
			
@@ -281,16 +283,11 @@ static void recover_prep(void *arg)
 
				 	set_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
			
 
				 }
			
 
				 
			
 
				-static void recover_slot(void *arg, struct dlm_slot *slot)
			
 
				+static void __recover_slot(struct mddev *mddev, int slot)
			
 
				 {
			
 
				-	struct mddev *mddev = arg;
			
 
				 	struct md_cluster_info *cinfo = mddev->cluster_info;
			
 
				 
			
 
				-	pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
			
 
				-			mddev->bitmap_info.cluster_name,
			
 
				-			slot->nodeid, slot->slot,
			
 
				-			cinfo->slot_number);
			
 
				-	set_bit(slot->slot - 1, &cinfo->recovery_map);
			
 
				+	set_bit(slot, &cinfo->recovery_map);
			
 
				 	if (!cinfo->recovery_thread) {
			
 
				 		cinfo->recovery_thread = md_register_thread(recover_bitmaps,
			
 
				 				mddev, "recover");
			
@@ -302,6 +299,20 @@ static void recover_slot(void *arg, struct dlm_slot *slot)
 
				 	md_wakeup_thread(cinfo->recovery_thread);
			
 
				 }
			
 
				 
			
 
				+static void recover_slot(void *arg, struct dlm_slot *slot)
			
 
				+{
			
 
				+	struct mddev *mddev = arg;
			
 
				+	struct md_cluster_info *cinfo = mddev->cluster_info;
			
 
				+
			
 
				+	pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
			
 
				+			mddev->bitmap_info.cluster_name,
			
 
				+			slot->nodeid, slot->slot,
			
 
				+			cinfo->slot_number);
			
 
				+	/* deduct one since dlm slot starts from one while the num of
			
 
				+	 * cluster-md begins with 0 */
			
 
				+	__recover_slot(mddev, slot->slot - 1);
			
 
				+}
			
 
				+
			
 
				 static void recover_done(void *arg, struct dlm_slot *slots,
			
 
				 		int num_slots, int our_slot,
			
 
				 		uint32_t generation)
			
@@ -310,10 +321,17 @@ static void recover_done(void *arg, struct dlm_slot *slots,
 
				 	struct md_cluster_info *cinfo = mddev->cluster_info;
			
 
				 
			
 
				 	cinfo->slot_number = our_slot;
			
 
				-	complete(&cinfo->completion);
			
 
				+	/* completion is only need to be complete when node join cluster,
			
 
				+	 * it doesn't need to run during another node's failure */
			
 
				+	if (test_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state)) {
			
 
				+		complete(&cinfo->completion);
			
 
				+		clear_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
			
 
				+	}
			
 
				 	clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
			
 
				 }
			
 
				 
			
 
				+/* the ops is called when node join the cluster, and do lock recovery
			
 
				+ * if node failure occurs */
			
 
				 static const struct dlm_lockspace_ops md_ls_ops = {
			
 
				 	.recover_prep = recover_prep,
			
 
				 	.recover_slot = recover_slot,
			
@@ -388,7 +406,7 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
 
				 	int len;
			
 
				 
			
 
				 	len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
			
 
				-	pretty_uuid(disk_uuid + len, cmsg->uuid);
			
 
				+	sprintf(disk_uuid + len, "%pU", cmsg->uuid);
			
 
				 	snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot);
			
 
				 	pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
			
 
				 	init_completion(&cinfo->newdisk_completion);
			
@@ -457,6 +475,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 
				 			__func__, __LINE__, msg->slot);
			
 
				 		process_readd_disk(mddev, msg);
			
 
				 		break;
			
 
				+	case BITMAP_NEEDS_SYNC:
			
 
				+		pr_info("%s: %d Received BITMAP_NEEDS_SYNC from %d\n",
			
 
				+			__func__, __LINE__, msg->slot);
			
 
				+		__recover_slot(mddev, msg->slot);
			
 
				+		break;
			
 
				 	default:
			
 
				 		pr_warn("%s:%d Received unknown message from %d\n",
			
 
				 			__func__, __LINE__, msg->slot);
			
@@ -472,6 +495,7 @@ static void recv_daemon(struct md_thread *thread)
 
				 	struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
			
 
				 	struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
			
 
				 	struct cluster_msg msg;
			
 
				+	int ret;
			
 
				 
			
 
				 	/*get CR on Message*/
			
 
				 	if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
			
@@ -484,13 +508,21 @@ static void recv_daemon(struct md_thread *thread)
 
				 	process_recvd_msg(thread->mddev, &msg);
			
 
				 
			
 
				 	/*release CR on ack_lockres*/
			
 
				-	dlm_unlock_sync(ack_lockres);
			
 
				-	/*up-convert to EX on message_lockres*/
			
 
				-	dlm_lock_sync(message_lockres, DLM_LOCK_EX);
			
 
				+	ret = dlm_unlock_sync(ack_lockres);
			
 
				+	if (unlikely(ret != 0))
			
 
				+		pr_info("unlock ack failed return %d\n", ret);
			
 
				+	/*up-convert to PR on message_lockres*/
			
 
				+	ret = dlm_lock_sync(message_lockres, DLM_LOCK_PR);
			
 
				+	if (unlikely(ret != 0))
			
 
				+		pr_info("lock PR on msg failed return %d\n", ret);
			
 
				 	/*get CR on ack_lockres again*/
			
 
				-	dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
			
 
				+	ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
			
 
				+	if (unlikely(ret != 0))
			
 
				+		pr_info("lock CR on ack failed return %d\n", ret);
			
 
				 	/*release CR on message_lockres*/
			
 
				-	dlm_unlock_sync(message_lockres);
			
 
				+	ret = dlm_unlock_sync(message_lockres);
			
 
				+	if (unlikely(ret != 0))
			
 
				+		pr_info("unlock msg failed return %d\n", ret);
			
 
				 }
			
 
				 
			
 
				 /* lock_comm()
			
@@ -519,7 +551,7 @@ static void unlock_comm(struct md_cluster_info *cinfo)
 
				  * The function:
			
 
				  * 1. Grabs the message lockresource in EX mode
			
 
				  * 2. Copies the message to the message LVB
			
 
				- * 3. Downconverts message lockresource to CR
			
 
				+ * 3. Downconverts message lockresource to CW
			
 
				  * 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
			
 
				  *    and the other nodes read the message. The thread will wait here until all other
			
 
				  *    nodes have released ack lock resource.
			
@@ -540,12 +572,12 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 
				 
			
 
				 	memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
			
 
				 			sizeof(struct cluster_msg));
			
 
				-	/*down-convert EX to CR on Message*/
			
 
				-	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CR);
			
 
				+	/*down-convert EX to CW on Message*/
			
 
				+	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CW);
			
 
				 	if (error) {
			
 
				-		pr_err("md-cluster: failed to convert EX to CR on MESSAGE(%d)\n",
			
 
				+		pr_err("md-cluster: failed to convert EX to CW on MESSAGE(%d)\n",
			
 
				 				error);
			
 
				-		goto failed_message;
			
 
				+		goto failed_ack;
			
 
				 	}
			
 
				 
			
 
				 	/*up-convert CR to EX on Ack*/
			
@@ -565,7 +597,13 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 
				 	}
			
 
				 
			
 
				 failed_ack:
			
 
				-	dlm_unlock_sync(cinfo->message_lockres);
			
 
				+	error = dlm_unlock_sync(cinfo->message_lockres);
			
 
				+	if (unlikely(error != 0)) {
			
 
				+		pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
			
 
				+			error);
			
 
				+		/* in case the message can't be released due to some reason */
			
 
				+		goto failed_ack;
			
 
				+	}
			
 
				 failed_message:
			
 
				 	return error;
			
 
				 }
			
@@ -587,6 +625,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 
				 	struct dlm_lock_resource *bm_lockres;
			
 
				 	struct suspend_info *s;
			
 
				 	char str[64];
			
 
				+	sector_t lo, hi;
			
 
				 
			
 
				 
			
 
				 	for (i = 0; i < total_slots; i++) {
			
@@ -617,9 +656,24 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 
				 			lockres_free(bm_lockres);
			
 
				 			continue;
			
 
				 		}
			
 
				-		if (ret)
			
 
				+		if (ret) {
			
 
				+			lockres_free(bm_lockres);
			
 
				 			goto out;
			
 
				-		/* TODO: Read the disk bitmap sb and check if it needs recovery */
			
 
				+		}
			
 
				+
			
 
				+		/* Read the disk bitmap sb and check if it needs recovery */
			
 
				+		ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
			
 
				+		if (ret) {
			
 
				+			pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
			
 
				+			lockres_free(bm_lockres);
			
 
				+			continue;
			
 
				+		}
			
 
				+		if ((hi > 0) && (lo < mddev->recovery_cp)) {
			
 
				+			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			
 
				+			mddev->recovery_cp = lo;
			
 
				+			md_check_recovery(mddev);
			
 
				+		}
			
 
				+
			
 
				 		dlm_unlock_sync(bm_lockres);
			
 
				 		lockres_free(bm_lockres);
			
 
				 	}
			
@@ -633,20 +687,20 @@ static int join(struct mddev *mddev, int nodes)
 
				 	int ret, ops_rv;
			
 
				 	char str[64];
			
 
				 
			
 
				-	if (!try_module_get(THIS_MODULE))
			
 
				-		return -ENOENT;
			
 
				-
			
 
				 	cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL);
			
 
				 	if (!cinfo)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				+	INIT_LIST_HEAD(&cinfo->suspend_list);
			
 
				+	spin_lock_init(&cinfo->suspend_lock);
			
 
				 	init_completion(&cinfo->completion);
			
 
				+	set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
			
 
				 
			
 
				 	mutex_init(&cinfo->sb_mutex);
			
 
				 	mddev->cluster_info = cinfo;
			
 
				 
			
 
				 	memset(str, 0, 64);
			
 
				-	pretty_uuid(str, mddev->uuid);
			
 
				+	sprintf(str, "%pU", mddev->uuid);
			
 
				 	ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
			
 
				 				DLM_LSFL_FS, LVB_SIZE,
			
 
				 				&md_ls_ops, mddev, &ops_rv, &cinfo->lockspace);
			
@@ -659,12 +713,6 @@ static int join(struct mddev *mddev, int nodes)
 
				 		ret = -ERANGE;
			
 
				 		goto err;
			
 
				 	}
			
 
				-	cinfo->sb_lock = lockres_init(mddev, "cmd-super",
			
 
				-					NULL, 0);
			
 
				-	if (!cinfo->sb_lock) {
			
 
				-		ret = -ENOMEM;
			
 
				-		goto err;
			
 
				-	}
			
 
				 	/* Initiate the communication resources */
			
 
				 	ret = -ENOMEM;
			
 
				 	cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
			
@@ -705,9 +753,6 @@ static int join(struct mddev *mddev, int nodes)
 
				 		goto err;
			
 
				 	}
			
 
				 
			
 
				-	INIT_LIST_HEAD(&cinfo->suspend_list);
			
 
				-	spin_lock_init(&cinfo->suspend_lock);
			
 
				-
			
 
				 	ret = gather_all_resync_info(mddev, nodes);
			
 
				 	if (ret)
			
 
				 		goto err;
			
@@ -719,12 +764,10 @@ static int join(struct mddev *mddev, int nodes)
 
				 	lockres_free(cinfo->ack_lockres);
			
 
				 	lockres_free(cinfo->no_new_dev_lockres);
			
 
				 	lockres_free(cinfo->bitmap_lockres);
			
 
				-	lockres_free(cinfo->sb_lock);
			
 
				 	if (cinfo->lockspace)
			
 
				 		dlm_release_lockspace(cinfo->lockspace, 2);
			
 
				 	mddev->cluster_info = NULL;
			
 
				 	kfree(cinfo);
			
 
				-	module_put(THIS_MODULE);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -740,7 +783,6 @@ static int leave(struct mddev *mddev)
 
				 	lockres_free(cinfo->token_lockres);
			
 
				 	lockres_free(cinfo->ack_lockres);
			
 
				 	lockres_free(cinfo->no_new_dev_lockres);
			
 
				-	lockres_free(cinfo->sb_lock);
			
 
				 	lockres_free(cinfo->bitmap_lockres);
			
 
				 	dlm_release_lockspace(cinfo->lockspace, 2);
			
 
				 	return 0;
			
@@ -817,8 +859,17 @@ static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
 
				 
			
 
				 static void resync_finish(struct mddev *mddev)
			
 
				 {
			
 
				+	struct md_cluster_info *cinfo = mddev->cluster_info;
			
 
				+	struct cluster_msg cmsg;
			
 
				+	int slot = cinfo->slot_number - 1;
			
 
				+
			
 
				 	pr_info("%s:%d\n", __func__, __LINE__);
			
 
				 	resync_send(mddev, RESYNCING, 0, 0);
			
 
				+	if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
			
 
				+		cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
			
 
				+		cmsg.slot = cpu_to_le32(slot);
			
 
				+		sendmsg(cinfo, &cmsg);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static int area_resyncing(struct mddev *mddev, int direction,
			
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -483,6 +483,8 @@ static void mddev_put(struct mddev *mddev)
 
				 		bioset_free(bs);
			
 
				 }
			
 
				 
			
 
				+static void md_safemode_timeout(unsigned long data);
			
 
				+
			
 
				 void mddev_init(struct mddev *mddev)
			
 
				 {
			
 
				 	mutex_init(&mddev->open_mutex);
			
@@ -490,7 +492,8 @@ void mddev_init(struct mddev *mddev)
 
				 	mutex_init(&mddev->bitmap_info.mutex);
			
 
				 	INIT_LIST_HEAD(&mddev->disks);
			
 
				 	INIT_LIST_HEAD(&mddev->all_mddevs);
			
 
				-	init_timer(&mddev->safemode_timer);
			
 
				+	setup_timer(&mddev->safemode_timer, md_safemode_timeout,
			
 
				+		    (unsigned long) mddev);
			
 
				 	atomic_set(&mddev->active, 1);
			
 
				 	atomic_set(&mddev->openers, 0);
			
 
				 	atomic_set(&mddev->active_io, 0);
			
@@ -3255,8 +3258,6 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void md_safemode_timeout(unsigned long data);
			
 
				-
			
 
				 static ssize_t
			
 
				 safe_delay_show(struct mddev *mddev, char *page)
			
 
				 {
			
@@ -4189,6 +4190,8 @@ action_show(struct mddev *mddev, char *page)
 
				 				type = "repair";
			
 
				 		} else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
			
 
				 			type = "recover";
			
 
				+		else if (mddev->reshape_position != MaxSector)
			
 
				+			type = "reshape";
			
 
				 	}
			
 
				 	return sprintf(page, "%s\n", type);
			
 
				 }
			
@@ -5180,8 +5183,6 @@ int md_run(struct mddev *mddev)
 
				 	atomic_set(&mddev->max_corr_read_errors,
			
 
				 		   MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
			
 
				 	mddev->safemode = 0;
			
 
				-	mddev->safemode_timer.function = md_safemode_timeout;
			
 
				-	mddev->safemode_timer.data = (unsigned long) mddev;
			
 
				 	mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
			
 
				 	mddev->in_sync = 1;
			
 
				 	smp_wmb();
			
@@ -5194,6 +5195,11 @@ int md_run(struct mddev *mddev)
 
				 			if (sysfs_link_rdev(mddev, rdev))
			
 
				 				/* failure here is OK */;
			
 
				 
			
 
				+	if (mddev->degraded && !mddev->ro)
			
 
				+		/* This ensures that recovering status is reported immediately
			
 
				+		 * via sysfs - until a lack of spares is confirmed.
			
 
				+		 */
			
 
				+		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
			
 
				 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			
 
				 
			
 
				 	if (mddev->flags & MD_UPDATE_SB_FLAGS)
			
@@ -5741,16 +5747,16 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg)
 
				 
			
 
				 	err = 0;
			
 
				 	spin_lock(&mddev->lock);
			
 
				-	/* bitmap disabled, zero the first byte and copy out */
			
 
				-	if (!mddev->bitmap_info.file)
			
 
				-		file->pathname[0] = '\0';
			
 
				-	else if ((ptr = file_path(mddev->bitmap_info.file,
			
 
				-			       file->pathname, sizeof(file->pathname))),
			
 
				-		 IS_ERR(ptr))
			
 
				-		err = PTR_ERR(ptr);
			
 
				-	else
			
 
				-		memmove(file->pathname, ptr,
			
 
				-			sizeof(file->pathname)-(ptr-file->pathname));
			
 
				+	/* bitmap enabled */
			
 
				+	if (mddev->bitmap_info.file) {
			
 
				+		ptr = file_path(mddev->bitmap_info.file, file->pathname,
			
 
				+				sizeof(file->pathname));
			
 
				+		if (IS_ERR(ptr))
			
 
				+			err = PTR_ERR(ptr);
			
 
				+		else
			
 
				+			memmove(file->pathname, ptr,
			
 
				+				sizeof(file->pathname)-(ptr-file->pathname));
			
 
				+	}
			
 
				 	spin_unlock(&mddev->lock);
			
 
				 
			
 
				 	if (err == 0 &&
			
@@ -7069,7 +7075,7 @@ static void status_unused(struct seq_file *seq)
 
				 	seq_printf(seq, "\n");
			
 
				 }
			
 
				 
			
 
				-static void status_resync(struct seq_file *seq, struct mddev *mddev)
			
 
				+static int status_resync(struct seq_file *seq, struct mddev *mddev)
			
 
				 {
			
 
				 	sector_t max_sectors, resync, res;
			
 
				 	unsigned long dt, db;
			
@@ -7077,18 +7083,32 @@ static void status_resync(struct seq_file *seq, struct mddev *mddev)
 
				 	int scale;
			
 
				 	unsigned int per_milli;
			
 
				 
			
 
				-	if (mddev->curr_resync <= 3)
			
 
				-		resync = 0;
			
 
				-	else
			
 
				-		resync = mddev->curr_resync
			
 
				-			- atomic_read(&mddev->recovery_active);
			
 
				-
			
 
				 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
			
 
				 	    test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
			
 
				 		max_sectors = mddev->resync_max_sectors;
			
 
				 	else
			
 
				 		max_sectors = mddev->dev_sectors;
			
 
				 
			
 
				+	resync = mddev->curr_resync;
			
 
				+	if (resync <= 3) {
			
 
				+		if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
			
 
				+			/* Still cleaning up */
			
 
				+			resync = max_sectors;
			
 
				+	} else
			
 
				+		resync -= atomic_read(&mddev->recovery_active);
			
 
				+
			
 
				+	if (resync == 0) {
			
 
				+		if (mddev->recovery_cp < MaxSector) {
			
 
				+			seq_printf(seq, "\tresync=PENDING");
			
 
				+			return 1;
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+	if (resync < 3) {
			
 
				+		seq_printf(seq, "\tresync=DELAYED");
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				 	WARN_ON(max_sectors == 0);
			
 
				 	/* Pick 'scale' such that (resync>>scale)*1000 will fit
			
 
				 	 * in a sector_t, and (max_sectors>>scale) will fit in a
			
@@ -7153,6 +7173,7 @@ static void status_resync(struct seq_file *seq, struct mddev *mddev)
 
				 		   ((unsigned long)rt % 60)/6);
			
 
				 
			
 
				 	seq_printf(seq, " speed=%ldK/sec", db/2/dt);
			
 
				+	return 1;
			
 
				 }
			
 
				 
			
 
				 static void *md_seq_start(struct seq_file *seq, loff_t *pos)
			
@@ -7298,13 +7319,8 @@ static int md_seq_show(struct seq_file *seq, void *v)
 
				 			mddev->pers->status(seq, mddev);
			
 
				 			seq_printf(seq, "\n      ");
			
 
				 			if (mddev->pers->sync_request) {
			
 
				-				if (mddev->curr_resync > 2) {
			
 
				-					status_resync(seq, mddev);
			
 
				+				if (status_resync(seq, mddev))
			
 
				 					seq_printf(seq, "\n      ");
			
 
				-				} else if (mddev->curr_resync >= 1)
			
 
				-					seq_printf(seq, "\tresync=DELAYED\n      ");
			
 
				-				else if (mddev->recovery_cp < MaxSector)
			
 
				-					seq_printf(seq, "\tresync=PENDING\n      ");
			
 
				 			}
			
 
				 		} else
			
 
				 			seq_printf(seq, "\n       ");
			
@@ -7387,15 +7403,19 @@ int unregister_md_personality(struct md_personality *p)
 
				 }
			
 
				 EXPORT_SYMBOL(unregister_md_personality);
			
 
				 
			
 
				-int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module)
			
 
				+int register_md_cluster_operations(struct md_cluster_operations *ops,
			
 
				+				   struct module *module)
			
 
				 {
			
 
				-	if (md_cluster_ops != NULL)
			
 
				-		return -EALREADY;
			
 
				+	int ret = 0;
			
 
				 	spin_lock(&pers_lock);
			
 
				-	md_cluster_ops = ops;
			
 
				-	md_cluster_mod = module;
			
 
				+	if (md_cluster_ops != NULL)
			
 
				+		ret = -EALREADY;
			
 
				+	else {
			
 
				+		md_cluster_ops = ops;
			
 
				+		md_cluster_mod = module;
			
 
				+	}
			
 
				 	spin_unlock(&pers_lock);
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 EXPORT_SYMBOL(register_md_cluster_operations);
			
 
				 
			
@@ -7793,7 +7813,8 @@ void md_do_sync(struct md_thread *thread)
 
				 		      > (max_sectors >> 4)) ||
			
 
				 		     time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
			
 
				 		     (j - mddev->curr_resync_completed)*2
			
 
				-		     >= mddev->resync_max - mddev->curr_resync_completed
			
 
				+		     >= mddev->resync_max - mddev->curr_resync_completed ||
			
 
				+		     mddev->curr_resync_completed > mddev->resync_max
			
 
				 			    )) {
			
 
				 			/* time to update curr_resync_completed */
			
 
				 			wait_event(mddev->recovery_wait,
			
@@ -7838,6 +7859,9 @@ void md_do_sync(struct md_thread *thread)
 
				 			break;
			
 
				 
			
 
				 		j += sectors;
			
 
				+		if (j > max_sectors)
			
 
				+			/* when skipping, extra large numbers can be returned. */
			
 
				+			j = max_sectors;
			
 
				 		if (j > 2)
			
 
				 			mddev->curr_resync = j;
			
 
				 		if (mddev_is_clustered(mddev))
			
@@ -7906,12 +7930,15 @@ void md_do_sync(struct md_thread *thread)
 
				 	blk_finish_plug(&plug);
			
 
				 	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
			
 
				 
			
 
				+	if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
			
 
				+	    !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
			
 
				+	    mddev->curr_resync > 2) {
			
 
				+		mddev->curr_resync_completed = mddev->curr_resync;
			
 
				+		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
			
 
				+	}
			
 
				 	/* tell personality that we are finished */
			
 
				 	mddev->pers->sync_request(mddev, max_sectors, &skipped);
			
 
				 
			
 
				-	if (mddev_is_clustered(mddev))
			
 
				-		md_cluster_ops->resync_finish(mddev);
			
 
				-
			
 
				 	if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
			
 
				 	    mddev->curr_resync > 2) {
			
 
				 		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
			
@@ -7945,6 +7972,9 @@ void md_do_sync(struct md_thread *thread)
 
				 		}
			
 
				 	}
			
 
				  skip:
			
 
				+	if (mddev_is_clustered(mddev))
			
 
				+		md_cluster_ops->resync_finish(mddev);
			
 
				+
			
 
				 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
			
 
				 
			
 
				 	spin_lock(&mddev->lock);
			
@@ -7955,11 +7985,11 @@ void md_do_sync(struct md_thread *thread)
 
				 		mddev->resync_max = MaxSector;
			
 
				 	} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
			
 
				 		mddev->resync_min = mddev->curr_resync_completed;
			
 
				+	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
			
 
				 	mddev->curr_resync = 0;
			
 
				 	spin_unlock(&mddev->lock);
			
 
				 
			
 
				 	wake_up(&resync_wait);
			
 
				-	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
			
 
				 	md_wakeup_thread(mddev->thread);
			
 
				 	return;
			
 
				 }
			
@@ -8128,6 +8158,7 @@ void md_check_recovery(struct mddev *mddev)
 
				 			 */
			
 
				 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
			
 
				 			md_reap_sync_thread(mddev);
			
 
				+			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
			
 
				 			clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			
 
				 			goto unlock;
			
 
				 		}
			
@@ -8574,6 +8605,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 
				 		/* Make sure they get written out promptly */
			
 
				 		sysfs_notify_dirent_safe(rdev->sysfs_state);
			
 
				 		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
			
 
				+		set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
			
 
				 		md_wakeup_thread(rdev->mddev->thread);
			
 
				 	}
			
 
				 	return rv;
			
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -83,7 +83,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 
				 	char b[BDEVNAME_SIZE];
			
 
				 	char b2[BDEVNAME_SIZE];
			
 
				 	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
			
 
				-	bool discard_supported = false;
			
 
				+	unsigned short blksize = 512;
			
 
				 
			
 
				 	if (!conf)
			
 
				 		return -ENOMEM;
			
@@ -98,6 +98,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 
				 		sector_div(sectors, mddev->chunk_sectors);
			
 
				 		rdev1->sectors = sectors * mddev->chunk_sectors;
			
 
				 
			
 
				+		blksize = max(blksize, queue_logical_block_size(
			
 
				+				      rdev1->bdev->bd_disk->queue));
			
 
				+
			
 
				 		rdev_for_each(rdev2, mddev) {
			
 
				 			pr_debug("md/raid0:%s:   comparing %s(%llu)"
			
 
				 				 " with %s(%llu)\n",
			
@@ -134,6 +137,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 
				 	}
			
 
				 	pr_debug("md/raid0:%s: FINAL %d zones\n",
			
 
				 		 mdname(mddev), conf->nr_strip_zones);
			
 
				+	/*
			
 
				+	 * now since we have the hard sector sizes, we can make sure
			
 
				+	 * chunk size is a multiple of that sector size
			
 
				+	 */
			
 
				+	if ((mddev->chunk_sectors << 9) % blksize) {
			
 
				+		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
			
 
				+		       mdname(mddev),
			
 
				+		       mddev->chunk_sectors << 9, blksize);
			
 
				+		err = -EINVAL;
			
 
				+		goto abort;
			
 
				+	}
			
 
				+
			
 
				 	err = -ENOMEM;
			
 
				 	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
			
 
				 				conf->nr_strip_zones, GFP_KERNEL);
			
@@ -188,16 +203,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 
				 		}
			
 
				 		dev[j] = rdev1;
			
 
				 
			
 
				-		if (mddev->queue)
			
 
				-			disk_stack_limits(mddev->gendisk, rdev1->bdev,
			
 
				-					  rdev1->data_offset << 9);
			
 
				-
			
 
				 		if (!smallest || (rdev1->sectors < smallest->sectors))
			
 
				 			smallest = rdev1;
			
 
				 		cnt++;
			
 
				-
			
 
				-		if (blk_queue_discard(bdev_get_queue(rdev1->bdev)))
			
 
				-			discard_supported = true;
			
 
				 	}
			
 
				 	if (cnt != mddev->raid_disks) {
			
 
				 		printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
			
@@ -258,28 +266,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 
				 			 (unsigned long long)smallest->sectors);
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * now since we have the hard sector sizes, we can make sure
			
 
				-	 * chunk size is a multiple of that sector size
			
 
				-	 */
			
 
				-	if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
			
 
				-		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
			
 
				-		       mdname(mddev),
			
 
				-		       mddev->chunk_sectors << 9);
			
 
				-		goto abort;
			
 
				-	}
			
 
				-
			
 
				-	if (mddev->queue) {
			
 
				-		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
			
 
				-		blk_queue_io_opt(mddev->queue,
			
 
				-				 (mddev->chunk_sectors << 9) * mddev->raid_disks);
			
 
				-
			
 
				-		if (!discard_supported)
			
 
				-			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
			
 
				-		else
			
 
				-			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
			
 
				-	}
			
 
				-
			
 
				 	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
			
 
				 	*private_conf = conf;
			
 
				 
			
@@ -378,12 +364,6 @@ static int raid0_run(struct mddev *mddev)
 
				 	if (md_check_no_bitmap(mddev))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	if (mddev->queue) {
			
 
				-		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
			
 
				-		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
			
 
				-		blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
			
 
				-	}
			
 
				-
			
 
				 	/* if private is not null, we are here after takeover */
			
 
				 	if (mddev->private == NULL) {
			
 
				 		ret = create_strip_zones(mddev, &conf);
			
@@ -392,6 +372,29 @@ static int raid0_run(struct mddev *mddev)
 
				 		mddev->private = conf;
			
 
				 	}
			
 
				 	conf = mddev->private;
			
 
				+	if (mddev->queue) {
			
 
				+		struct md_rdev *rdev;
			
 
				+		bool discard_supported = false;
			
 
				+
			
 
				+		rdev_for_each(rdev, mddev) {
			
 
				+			disk_stack_limits(mddev->gendisk, rdev->bdev,
			
 
				+					  rdev->data_offset << 9);
			
 
				+			if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
			
 
				+				discard_supported = true;
			
 
				+		}
			
 
				+		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
			
 
				+		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
			
 
				+		blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
			
 
				+
			
 
				+		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
			
 
				+		blk_queue_io_opt(mddev->queue,
			
 
				+				 (mddev->chunk_sectors << 9) * mddev->raid_disks);
			
 
				+
			
 
				+		if (!discard_supported)
			
 
				+			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
			
 
				+		else
			
 
				+			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
			
 
				+	}
			
 
				 
			
 
				 	/* calculate array device size */
			
 
				 	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
			
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1474,6 +1474,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 
				 	 */
			
 
				 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
			
 
				 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
			
 
				+	set_bit(MD_CHANGE_PENDING, &mddev->flags);
			
 
				 	printk(KERN_ALERT
			
 
				 	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
			
 
				 	       "md/raid1:%s: Operation continuing on %d devices.\n",
			
@@ -2235,6 +2236,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
 
				 static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
			
 
				 {
			
 
				 	int m;
			
 
				+	bool fail = false;
			
 
				 	for (m = 0; m < conf->raid_disks * 2 ; m++)
			
 
				 		if (r1_bio->bios[m] == IO_MADE_GOOD) {
			
 
				 			struct md_rdev *rdev = conf->mirrors[m].rdev;
			
@@ -2247,6 +2249,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
 
				 			 * narrow down and record precise write
			
 
				 			 * errors.
			
 
				 			 */
			
 
				+			fail = true;
			
 
				 			if (!narrow_write_error(r1_bio, m)) {
			
 
				 				md_error(conf->mddev,
			
 
				 					 conf->mirrors[m].rdev);
			
@@ -2258,7 +2261,13 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
 
				 		}
			
 
				 	if (test_bit(R1BIO_WriteError, &r1_bio->state))
			
 
				 		close_write(r1_bio);
			
 
				-	raid_end_bio_io(r1_bio);
			
 
				+	if (fail) {
			
 
				+		spin_lock_irq(&conf->device_lock);
			
 
				+		list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
			
 
				+		spin_unlock_irq(&conf->device_lock);
			
 
				+		md_wakeup_thread(conf->mddev->thread);
			
 
				+	} else
			
 
				+		raid_end_bio_io(r1_bio);
			
 
				 }
			
 
				 
			
 
				 static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
			
@@ -2364,6 +2373,23 @@ static void raid1d(struct md_thread *thread)
 
				 
			
 
				 	md_check_recovery(mddev);
			
 
				 
			
 
				+	if (!list_empty_careful(&conf->bio_end_io_list) &&
			
 
				+	    !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
			
 
				+		LIST_HEAD(tmp);
			
 
				+		spin_lock_irqsave(&conf->device_lock, flags);
			
 
				+		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
			
 
				+			list_add(&tmp, &conf->bio_end_io_list);
			
 
				+			list_del_init(&conf->bio_end_io_list);
			
 
				+		}
			
 
				+		spin_unlock_irqrestore(&conf->device_lock, flags);
			
 
				+		while (!list_empty(&tmp)) {
			
 
				+			r1_bio = list_first_entry(&conf->bio_end_io_list,
			
 
				+						  struct r1bio, retry_list);
			
 
				+			list_del(&r1_bio->retry_list);
			
 
				+			raid_end_bio_io(r1_bio);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	blk_start_plug(&plug);
			
 
				 	for (;;) {
			
 
				 
			
@@ -2763,6 +2789,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 
				 	conf->raid_disks = mddev->raid_disks;
			
 
				 	conf->mddev = mddev;
			
 
				 	INIT_LIST_HEAD(&conf->retry_list);
			
 
				+	INIT_LIST_HEAD(&conf->bio_end_io_list);
			
 
				 
			
 
				 	spin_lock_init(&conf->resync_lock);
			
 
				 	init_waitqueue_head(&conf->wait_barrier);
			
@@ -3057,6 +3084,7 @@ static int raid1_reshape(struct mddev *mddev)
 
				 
			
 
				 	unfreeze_array(conf);
			
 
				 
			
 
				+	set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
			
 
				 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			
 
				 	md_wakeup_thread(mddev->thread);
			
 
				 
			
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -61,6 +61,11 @@ struct r1conf {
 
				 	 * block, or anything else.
			
 
				 	 */
			
 
				 	struct list_head	retry_list;
			
 
				+	/* A separate list of r1bio which just need raid_end_bio_io called.
			
 
				+	 * This mustn't happen for writes which had any errors if the superblock
			
 
				+	 * needs to be written.
			
 
				+	 */
			
 
				+	struct list_head	bio_end_io_list;
			
 
				 
			
 
				 	/* queue pending writes to be submitted on unplug */
			
 
				 	struct bio_list		pending_bio_list;
			
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1589,6 +1589,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 
				 	set_bit(Blocked, &rdev->flags);
			
 
				 	set_bit(Faulty, &rdev->flags);
			
 
				 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
			
 
				+	set_bit(MD_CHANGE_PENDING, &mddev->flags);
			
 
				 	spin_unlock_irqrestore(&conf->device_lock, flags);
			
 
				 	printk(KERN_ALERT
			
 
				 	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
			
@@ -2623,6 +2624,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 
				 		}
			
 
				 		put_buf(r10_bio);
			
 
				 	} else {
			
 
				+		bool fail = false;
			
 
				 		for (m = 0; m < conf->copies; m++) {
			
 
				 			int dev = r10_bio->devs[m].devnum;
			
 
				 			struct bio *bio = r10_bio->devs[m].bio;
			
@@ -2634,6 +2636,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 
				 					r10_bio->sectors, 0);
			
 
				 				rdev_dec_pending(rdev, conf->mddev);
			
 
				 			} else if (bio != NULL && bio->bi_error) {
			
 
				+				fail = true;
			
 
				 				if (!narrow_write_error(r10_bio, m)) {
			
 
				 					md_error(conf->mddev, rdev);
			
 
				 					set_bit(R10BIO_Degraded,
			
@@ -2654,7 +2657,13 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 
				 		if (test_bit(R10BIO_WriteError,
			
 
				 			     &r10_bio->state))
			
 
				 			close_write(r10_bio);
			
 
				-		raid_end_bio_io(r10_bio);
			
 
				+		if (fail) {
			
 
				+			spin_lock_irq(&conf->device_lock);
			
 
				+			list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
			
 
				+			spin_unlock_irq(&conf->device_lock);
			
 
				+			md_wakeup_thread(conf->mddev->thread);
			
 
				+		} else
			
 
				+			raid_end_bio_io(r10_bio);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2669,6 +2678,23 @@ static void raid10d(struct md_thread *thread)
 
				 
			
 
				 	md_check_recovery(mddev);
			
 
				 
			
 
				+	if (!list_empty_careful(&conf->bio_end_io_list) &&
			
 
				+	    !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
			
 
				+		LIST_HEAD(tmp);
			
 
				+		spin_lock_irqsave(&conf->device_lock, flags);
			
 
				+		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
			
 
				+			list_add(&tmp, &conf->bio_end_io_list);
			
 
				+			list_del_init(&conf->bio_end_io_list);
			
 
				+		}
			
 
				+		spin_unlock_irqrestore(&conf->device_lock, flags);
			
 
				+		while (!list_empty(&tmp)) {
			
 
				+			r10_bio = list_first_entry(&conf->bio_end_io_list,
			
 
				+						  struct r10bio, retry_list);
			
 
				+			list_del(&r10_bio->retry_list);
			
 
				+			raid_end_bio_io(r10_bio);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	blk_start_plug(&plug);
			
 
				 	for (;;) {
			
 
				 
			
@@ -3443,6 +3469,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 
				 	conf->reshape_safe = conf->reshape_progress;
			
 
				 	spin_lock_init(&conf->device_lock);
			
 
				 	INIT_LIST_HEAD(&conf->retry_list);
			
 
				+	INIT_LIST_HEAD(&conf->bio_end_io_list);
			
 
				 
			
 
				 	spin_lock_init(&conf->resync_lock);
			
 
				 	init_waitqueue_head(&conf->wait_barrier);
			
@@ -4097,7 +4124,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 
				 	 * at a time, possibly less if that exceeds RESYNC_PAGES,
			
 
				 	 * or we hit a bad block or something.
			
 
				 	 * This might mean we pause for normal IO in the middle of
			
 
				-	 * a chunk, but that is not a problem was mddev->reshape_position
			
 
				+	 * a chunk, but that is not a problem as mddev->reshape_position
			
 
				 	 * can record any location.
			
 
				 	 *
			
 
				 	 * If we will want to write to a location that isn't
			
@@ -4121,7 +4148,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 
				 	 *
			
 
				 	 * In all this the minimum difference in data offsets
			
 
				 	 * (conf->offset_diff - always positive) allows a bit of slack,
			
 
				-	 * so next can be after 'safe', but not by more than offset_disk
			
 
				+	 * so next can be after 'safe', but not by more than offset_diff
			
 
				 	 *
			
 
				 	 * We need to prepare all the bios here before we start any IO
			
 
				 	 * to ensure the size we choose is acceptable to all devices.
			
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -53,6 +53,12 @@ struct r10conf {
 
				 	sector_t		offset_diff;
			
 
				 
			
 
				 	struct list_head	retry_list;
			
 
				+	/* A separate list of r1bio which just need raid_end_bio_io called.
			
 
				+	 * This mustn't happen for writes which had any errors if the superblock
			
 
				+	 * needs to be written.
			
 
				+	 */
			
 
				+	struct list_head	bio_end_io_list;
			
 
				+
			
 
				 	/* queue pending writes and submit them on unplug */
			
 
				 	struct bio_list		pending_bio_list;
			
 
				 	int			pending_count;
			
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -223,18 +223,14 @@ static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
 
				 	return slot;
			
 
				 }
			
 
				 
			
 
				-static void return_io(struct bio *return_bi)
			
 
				+static void return_io(struct bio_list *return_bi)
			
 
				 {
			
 
				-	struct bio *bi = return_bi;
			
 
				-	while (bi) {
			
 
				-
			
 
				-		return_bi = bi->bi_next;
			
 
				-		bi->bi_next = NULL;
			
 
				+	struct bio *bi;
			
 
				+	while ((bi = bio_list_pop(return_bi)) != NULL) {
			
 
				 		bi->bi_iter.bi_size = 0;
			
 
				 		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
			
 
				 					 bi, 0);
			
 
				 		bio_endio(bi);
			
 
				-		bi = return_bi;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1177,7 +1173,7 @@ async_copy_data(int frombio, struct bio *bio, struct page **page,
 
				 static void ops_complete_biofill(void *stripe_head_ref)
			
 
				 {
			
 
				 	struct stripe_head *sh = stripe_head_ref;
			
 
				-	struct bio *return_bi = NULL;
			
 
				+	struct bio_list return_bi = BIO_EMPTY_LIST;
			
 
				 	int i;
			
 
				 
			
 
				 	pr_debug("%s: stripe %llu\n", __func__,
			
@@ -1201,17 +1197,15 @@ static void ops_complete_biofill(void *stripe_head_ref)
 
				 			while (rbi && rbi->bi_iter.bi_sector <
			
 
				 				dev->sector + STRIPE_SECTORS) {
			
 
				 				rbi2 = r5_next_bio(rbi, dev->sector);
			
 
				-				if (!raid5_dec_bi_active_stripes(rbi)) {
			
 
				-					rbi->bi_next = return_bi;
			
 
				-					return_bi = rbi;
			
 
				-				}
			
 
				+				if (!raid5_dec_bi_active_stripes(rbi))
			
 
				+					bio_list_add(&return_bi, rbi);
			
 
				 				rbi = rbi2;
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 	clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
			
 
				 
			
 
				-	return_io(return_bi);
			
 
				+	return_io(&return_bi);
			
 
				 
			
 
				 	set_bit(STRIPE_HANDLE, &sh->state);
			
 
				 	release_stripe(sh);
			
@@ -2517,6 +2511,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 
				 	set_bit(Blocked, &rdev->flags);
			
 
				 	set_bit(Faulty, &rdev->flags);
			
 
				 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
			
 
				+	set_bit(MD_CHANGE_PENDING, &mddev->flags);
			
 
				 	printk(KERN_ALERT
			
 
				 	       "md/raid:%s: Disk failure on %s, disabling device.\n"
			
 
				 	       "md/raid:%s: Operation continuing on %d devices.\n",
			
@@ -3069,7 +3064,7 @@ static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
 
				 static void
			
 
				 handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
			
 
				 				struct stripe_head_state *s, int disks,
			
 
				-				struct bio **return_bi)
			
 
				+				struct bio_list *return_bi)
			
 
				 {
			
 
				 	int i;
			
 
				 	BUG_ON(sh->batch_head);
			
@@ -3114,8 +3109,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 
				 			bi->bi_error = -EIO;
			
 
				 			if (!raid5_dec_bi_active_stripes(bi)) {
			
 
				 				md_write_end(conf->mddev);
			
 
				-				bi->bi_next = *return_bi;
			
 
				-				*return_bi = bi;
			
 
				+				bio_list_add(return_bi, bi);
			
 
				 			}
			
 
				 			bi = nextbi;
			
 
				 		}
			
@@ -3139,8 +3133,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 
				 			bi->bi_error = -EIO;
			
 
				 			if (!raid5_dec_bi_active_stripes(bi)) {
			
 
				 				md_write_end(conf->mddev);
			
 
				-				bi->bi_next = *return_bi;
			
 
				-				*return_bi = bi;
			
 
				+				bio_list_add(return_bi, bi);
			
 
				 			}
			
 
				 			bi = bi2;
			
 
				 		}
			
@@ -3163,10 +3156,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 
				 					r5_next_bio(bi, sh->dev[i].sector);
			
 
				 
			
 
				 				bi->bi_error = -EIO;
			
 
				-				if (!raid5_dec_bi_active_stripes(bi)) {
			
 
				-					bi->bi_next = *return_bi;
			
 
				-					*return_bi = bi;
			
 
				-				}
			
 
				+				if (!raid5_dec_bi_active_stripes(bi))
			
 
				+					bio_list_add(return_bi, bi);
			
 
				 				bi = nextbi;
			
 
				 			}
			
 
				 		}
			
@@ -3445,7 +3436,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
 
				  * never LOCKED, so we don't need to test 'failed' directly.
			
 
				  */
			
 
				 static void handle_stripe_clean_event(struct r5conf *conf,
			
 
				-	struct stripe_head *sh, int disks, struct bio **return_bi)
			
 
				+	struct stripe_head *sh, int disks, struct bio_list *return_bi)
			
 
				 {
			
 
				 	int i;
			
 
				 	struct r5dev *dev;
			
@@ -3479,8 +3470,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
 
				 					wbi2 = r5_next_bio(wbi, dev->sector);
			
 
				 					if (!raid5_dec_bi_active_stripes(wbi)) {
			
 
				 						md_write_end(conf->mddev);
			
 
				-						wbi->bi_next = *return_bi;
			
 
				-						*return_bi = wbi;
			
 
				+						bio_list_add(return_bi, wbi);
			
 
				 					}
			
 
				 					wbi = wbi2;
			
 
				 				}
			
@@ -4613,7 +4603,15 @@ static void handle_stripe(struct stripe_head *sh)
 
				 			md_wakeup_thread(conf->mddev->thread);
			
 
				 	}
			
 
				 
			
 
				-	return_io(s.return_bi);
			
 
				+	if (!bio_list_empty(&s.return_bi)) {
			
 
				+		if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
			
 
				+			spin_lock_irq(&conf->device_lock);
			
 
				+			bio_list_merge(&conf->return_bi, &s.return_bi);
			
 
				+			spin_unlock_irq(&conf->device_lock);
			
 
				+			md_wakeup_thread(conf->mddev->thread);
			
 
				+		} else
			
 
				+			return_io(&s.return_bi);
			
 
				+	}
			
 
				 
			
 
				 	clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
			
 
				 }
			
@@ -4672,12 +4670,12 @@ static int raid5_congested(struct mddev *mddev, int bits)
 
				 
			
 
				 static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
			
 
				 {
			
 
				+	struct r5conf *conf = mddev->private;
			
 
				 	sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
			
 
				-	unsigned int chunk_sectors = mddev->chunk_sectors;
			
 
				+	unsigned int chunk_sectors;
			
 
				 	unsigned int bio_sectors = bio_sectors(bio);
			
 
				 
			
 
				-	if (mddev->new_chunk_sectors < mddev->chunk_sectors)
			
 
				-		chunk_sectors = mddev->new_chunk_sectors;
			
 
				+	chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
			
 
				 	return  chunk_sectors >=
			
 
				 		((sector & (chunk_sectors - 1)) + bio_sectors);
			
 
				 }
			
@@ -5325,6 +5323,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 
				 	sector_t stripe_addr;
			
 
				 	int reshape_sectors;
			
 
				 	struct list_head stripes;
			
 
				+	sector_t retn;
			
 
				 
			
 
				 	if (sector_nr == 0) {
			
 
				 		/* If restarting in the middle, skip the initial sectors */
			
@@ -5332,6 +5331,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 
				 		    conf->reshape_progress < raid5_size(mddev, 0, 0)) {
			
 
				 			sector_nr = raid5_size(mddev, 0, 0)
			
 
				 				- conf->reshape_progress;
			
 
				+		} else if (mddev->reshape_backwards &&
			
 
				+			   conf->reshape_progress == MaxSector) {
			
 
				+			/* shouldn't happen, but just in case, finish up.*/
			
 
				+			sector_nr = MaxSector;
			
 
				 		} else if (!mddev->reshape_backwards &&
			
 
				 			   conf->reshape_progress > 0)
			
 
				 			sector_nr = conf->reshape_progress;
			
@@ -5340,7 +5343,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 
				 			mddev->curr_resync_completed = sector_nr;
			
 
				 			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
			
 
				 			*skipped = 1;
			
 
				-			return sector_nr;
			
 
				+			retn = sector_nr;
			
 
				+			goto finish;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -5348,10 +5352,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 
				 	 * If old and new chunk sizes differ, we need to process the
			
 
				 	 * largest of these
			
 
				 	 */
			
 
				-	if (mddev->new_chunk_sectors > mddev->chunk_sectors)
			
 
				-		reshape_sectors = mddev->new_chunk_sectors;
			
 
				-	else
			
 
				-		reshape_sectors = mddev->chunk_sectors;
			
 
				+
			
 
				+	reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors);
			
 
				 
			
 
				 	/* We update the metadata at least every 10 seconds, or when
			
 
				 	 * the data about to be copied would over-write the source of
			
@@ -5366,11 +5368,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 
				 	safepos = conf->reshape_safe;
			
 
				 	sector_div(safepos, data_disks);
			
 
				 	if (mddev->reshape_backwards) {
			
 
				-		writepos -= min_t(sector_t, reshape_sectors, writepos);
			
 
				+		BUG_ON(writepos < reshape_sectors);
			
 
				+		writepos -= reshape_sectors;
			
 
				 		readpos += reshape_sectors;
			
 
				 		safepos += reshape_sectors;
			
 
				 	} else {
			
 
				 		writepos += reshape_sectors;
			
 
				+		/* readpos and safepos are worst-case calculations.
			
 
				+		 * A negative number is overly pessimistic, and causes
			
 
				+		 * obvious problems for unsigned storage.  So clip to 0.
			
 
				+		 */
			
 
				 		readpos -= min_t(sector_t, reshape_sectors, readpos);
			
 
				 		safepos -= min_t(sector_t, reshape_sectors, safepos);
			
 
				 	}
			
@@ -5513,7 +5520,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 
				 	 * then we need to write out the superblock.
			
 
				 	 */
			
 
				 	sector_nr += reshape_sectors;
			
 
				-	if ((sector_nr - mddev->curr_resync_completed) * 2
			
 
				+	retn = reshape_sectors;
			
 
				+finish:
			
 
				+	if (mddev->curr_resync_completed > mddev->resync_max ||
			
 
				+	    (sector_nr - mddev->curr_resync_completed) * 2
			
 
				 	    >= mddev->resync_max - mddev->curr_resync_completed) {
			
 
				 		/* Cannot proceed until we've updated the superblock... */
			
 
				 		wait_event(conf->wait_for_overlap,
			
@@ -5538,7 +5548,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 
				 		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
			
 
				 	}
			
 
				 ret:
			
 
				-	return reshape_sectors;
			
 
				+	return retn;
			
 
				 }
			
 
				 
			
 
				 static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
			
@@ -5794,6 +5804,18 @@ static void raid5d(struct md_thread *thread)
 
				 
			
 
				 	md_check_recovery(mddev);
			
 
				 
			
 
				+	if (!bio_list_empty(&conf->return_bi) &&
			
 
				+	    !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
			
 
				+		struct bio_list tmp = BIO_EMPTY_LIST;
			
 
				+		spin_lock_irq(&conf->device_lock);
			
 
				+		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
			
 
				+			bio_list_merge(&tmp, &conf->return_bi);
			
 
				+			bio_list_init(&conf->return_bi);
			
 
				+		}
			
 
				+		spin_unlock_irq(&conf->device_lock);
			
 
				+		return_io(&tmp);
			
 
				+	}
			
 
				+
			
 
				 	blk_start_plug(&plug);
			
 
				 	handled = 0;
			
 
				 	spin_lock_irq(&conf->device_lock);
			
@@ -6234,8 +6256,8 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
 
				 		/* size is defined by the smallest of previous and new size */
			
 
				 		raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
			
 
				 
			
 
				-	sectors &= ~((sector_t)mddev->chunk_sectors - 1);
			
 
				-	sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
			
 
				+	sectors &= ~((sector_t)conf->chunk_sectors - 1);
			
 
				+	sectors &= ~((sector_t)conf->prev_chunk_sectors - 1);
			
 
				 	return sectors * (raid_disks - conf->max_degraded);
			
 
				 }
			
 
				 
			
@@ -6453,6 +6475,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 
				 	INIT_LIST_HEAD(&conf->hold_list);
			
 
				 	INIT_LIST_HEAD(&conf->delayed_list);
			
 
				 	INIT_LIST_HEAD(&conf->bitmap_list);
			
 
				+	bio_list_init(&conf->return_bi);
			
 
				 	init_llist_head(&conf->released_stripes);
			
 
				 	atomic_set(&conf->active_stripes, 0);
			
 
				 	atomic_set(&conf->preread_active_stripes, 0);
			
@@ -6542,6 +6565,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 
				 	if (conf->reshape_progress != MaxSector) {
			
 
				 		conf->prev_chunk_sectors = mddev->chunk_sectors;
			
 
				 		conf->prev_algo = mddev->layout;
			
 
				+	} else {
			
 
				+		conf->prev_chunk_sectors = conf->chunk_sectors;
			
 
				+		conf->prev_algo = conf->algorithm;
			
 
				 	}
			
 
				 
			
 
				 	conf->min_nr_stripes = NR_STRIPES;
			
@@ -6661,6 +6687,8 @@ static int run(struct mddev *mddev)
 
				 		sector_t here_new, here_old;
			
 
				 		int old_disks;
			
 
				 		int max_degraded = (mddev->level == 6 ? 2 : 1);
			
 
				+		int chunk_sectors;
			
 
				+		int new_data_disks;
			
 
				 
			
 
				 		if (mddev->new_level != mddev->level) {
			
 
				 			printk(KERN_ERR "md/raid:%s: unsupported reshape "
			
@@ -6672,28 +6700,25 @@ static int run(struct mddev *mddev)
 
				 		/* reshape_position must be on a new-stripe boundary, and one
			
 
				 		 * further up in new geometry must map after here in old
			
 
				 		 * geometry.
			
 
				+		 * If the chunk sizes are different, then as we perform reshape
			
 
				+		 * in units of the largest of the two, reshape_position needs
			
 
				+		 * be a multiple of the largest chunk size times new data disks.
			
 
				 		 */
			
 
				 		here_new = mddev->reshape_position;
			
 
				-		if (sector_div(here_new, mddev->new_chunk_sectors *
			
 
				-			       (mddev->raid_disks - max_degraded))) {
			
 
				+		chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors);
			
 
				+		new_data_disks = mddev->raid_disks - max_degraded;
			
 
				+		if (sector_div(here_new, chunk_sectors * new_data_disks)) {
			
 
				 			printk(KERN_ERR "md/raid:%s: reshape_position not "
			
 
				 			       "on a stripe boundary\n", mdname(mddev));
			
 
				 			return -EINVAL;
			
 
				 		}
			
 
				-		reshape_offset = here_new * mddev->new_chunk_sectors;
			
 
				+		reshape_offset = here_new * chunk_sectors;
			
 
				 		/* here_new is the stripe we will write to */
			
 
				 		here_old = mddev->reshape_position;
			
 
				-		sector_div(here_old, mddev->chunk_sectors *
			
 
				-			   (old_disks-max_degraded));
			
 
				+		sector_div(here_old, chunk_sectors * (old_disks-max_degraded));
			
 
				 		/* here_old is the first stripe that we might need to read
			
 
				 		 * from */
			
 
				 		if (mddev->delta_disks == 0) {
			
 
				-			if ((here_new * mddev->new_chunk_sectors !=
			
 
				-			     here_old * mddev->chunk_sectors)) {
			
 
				-				printk(KERN_ERR "md/raid:%s: reshape position is"
			
 
				-				       " confused - aborting\n", mdname(mddev));
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				 			/* We cannot be sure it is safe to start an in-place
			
 
				 			 * reshape.  It is only safe if user-space is monitoring
			
 
				 			 * and taking constant backups.
			
@@ -6712,10 +6737,10 @@ static int run(struct mddev *mddev)
 
				 				return -EINVAL;
			
 
				 			}
			
 
				 		} else if (mddev->reshape_backwards
			
 
				-		    ? (here_new * mddev->new_chunk_sectors + min_offset_diff <=
			
 
				-		       here_old * mddev->chunk_sectors)
			
 
				-		    : (here_new * mddev->new_chunk_sectors >=
			
 
				-		       here_old * mddev->chunk_sectors + (-min_offset_diff))) {
			
 
				+		    ? (here_new * chunk_sectors + min_offset_diff <=
			
 
				+		       here_old * chunk_sectors)
			
 
				+		    : (here_new * chunk_sectors >=
			
 
				+		       here_old * chunk_sectors + (-min_offset_diff))) {
			
 
				 			/* Reading from the same stripe as writing to - bad */
			
 
				 			printk(KERN_ERR "md/raid:%s: reshape_position too early for "
			
 
				 			       "auto-recovery - aborting.\n",
			
@@ -6967,7 +6992,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
 
				 	int i;
			
 
				 
			
 
				 	seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
			
 
				-		mddev->chunk_sectors / 2, mddev->layout);
			
 
				+		conf->chunk_sectors / 2, mddev->layout);
			
 
				 	seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
			
 
				 	for (i = 0; i < conf->raid_disks; i++)
			
 
				 		seq_printf (seq, "%s",
			
@@ -7173,7 +7198,9 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
 
				 	 * worth it.
			
 
				 	 */
			
 
				 	sector_t newsize;
			
 
				-	sectors &= ~((sector_t)mddev->chunk_sectors - 1);
			
 
				+	struct r5conf *conf = mddev->private;
			
 
				+
			
 
				+	sectors &= ~((sector_t)conf->chunk_sectors - 1);
			
 
				 	newsize = raid5_size(mddev, sectors, mddev->raid_disks);
			
 
				 	if (mddev->external_size &&
			
 
				 	    mddev->array_sectors > newsize)
			
@@ -7412,6 +7439,7 @@ static void end_reshape(struct r5conf *conf)
 
				 			rdev->data_offset = rdev->new_data_offset;
			
 
				 		smp_wmb();
			
 
				 		conf->reshape_progress = MaxSector;
			
 
				+		conf->mddev->reshape_position = MaxSector;
			
 
				 		spin_unlock_irq(&conf->device_lock);
			
 
				 		wake_up(&conf->wait_for_overlap);
			
 
				 
			
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -265,7 +265,7 @@ struct stripe_head_state {
 
				 	int dec_preread_active;
			
 
				 	unsigned long ops_request;
			
 
				 
			
 
				-	struct bio *return_bi;
			
 
				+	struct bio_list return_bi;
			
 
				 	struct md_rdev *blocked_rdev;
			
 
				 	int handle_bad_blocks;
			
 
				 };
			
@@ -476,6 +476,9 @@ struct r5conf {
 
				 	int			skip_copy; /* Don't copy data from bio to stripe cache */
			
 
				 	struct list_head	*last_hold; /* detect hold_list promotions */
			
 
				 
			
 
				+	/* bios to have bi_end_io called after metadata is synced */
			
 
				+	struct bio_list		return_bi;
			
 
				+
			
 
				 	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */
			
 
				 	/* unfortunately we need two cache names as we temporarily have
			
 
				 	 * two caches.
			
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -40,9 +40,20 @@
 
				 					(unsigned long)bytes, ptrs);	\
			
 
				 		kernel_neon_end();					\
			
 
				 	}								\
			
 
				+	static void raid6_neon ## _n ## _xor_syndrome(int disks,	\
			
 
				+					int start, int stop, 		\
			
 
				+					size_t bytes, void **ptrs)	\
			
 
				+	{								\
			
 
				+		void raid6_neon ## _n  ## _xor_syndrome_real(int,	\
			
 
				+				int, int, unsigned long, void**);	\
			
 
				+		kernel_neon_begin();					\
			
 
				+		raid6_neon ## _n ## _xor_syndrome_real(disks,		\
			
 
				+			start, stop, (unsigned long)bytes, ptrs);	\
			
 
				+		kernel_neon_end();					\
			
 
				+	}								\
			
 
				 	struct raid6_calls const raid6_neonx ## _n = {			\
			
 
				 		raid6_neon ## _n ## _gen_syndrome,			\
			
 
				-		NULL,		/* XOR not yet implemented */		\
			
 
				+		raid6_neon ## _n ## _xor_syndrome,			\
			
 
				 		raid6_have_neon,					\
			
 
				 		"neonx" #_n,						\
			
 
				 		0							\
			
--- a/lib/raid6/neon.uc
+++ b/lib/raid6/neon.uc
@@ -3,6 +3,7 @@
 
				  *   neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
			
 
				  *
			
 
				  *   Copyright (C) 2012 Rob Herring
			
 
				+ *   Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
			
 
				  *
			
 
				  *   Based on altivec.uc:
			
 
				  *     Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
			
@@ -78,3 +79,48 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
 
				 		vst1q_u8(&q[d+NSIZE*$$], wq$$);
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
			
 
				+				    unsigned long bytes, void **ptrs)
			
 
				+{
			
 
				+	uint8_t **dptr = (uint8_t **)ptrs;
			
 
				+	uint8_t *p, *q;
			
 
				+	int d, z, z0;
			
 
				+
			
 
				+	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
			
 
				+	const unative_t x1d = NBYTES(0x1d);
			
 
				+
			
 
				+	z0 = stop;		/* P/Q right side optimization */
			
 
				+	p = dptr[disks-2];	/* XOR parity */
			
 
				+	q = dptr[disks-1];	/* RS syndrome */
			
 
				+
			
 
				+	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
			
 
				+		wq$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
			
 
				+		wp$$ = veorq_u8(vld1q_u8(&p[d+$$*NSIZE]), wq$$);
			
 
				+
			
 
				+		/* P/Q data pages */
			
 
				+		for ( z = z0-1 ; z >= start ; z-- ) {
			
 
				+			wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
			
 
				+			wp$$ = veorq_u8(wp$$, wd$$);
			
 
				+			w2$$ = MASK(wq$$);
			
 
				+			w1$$ = SHLBYTE(wq$$);
			
 
				+
			
 
				+			w2$$ = vandq_u8(w2$$, x1d);
			
 
				+			w1$$ = veorq_u8(w1$$, w2$$);
			
 
				+			wq$$ = veorq_u8(w1$$, wd$$);
			
 
				+		}
			
 
				+		/* P/Q left side optimization */
			
 
				+		for ( z = start-1 ; z >= 0 ; z-- ) {
			
 
				+			w2$$ = MASK(wq$$);
			
 
				+			w1$$ = SHLBYTE(wq$$);
			
 
				+
			
 
				+			w2$$ = vandq_u8(w2$$, x1d);
			
 
				+			wq$$ = veorq_u8(w1$$, w2$$);
			
 
				+		}
			
 
				+		w1$$ = vld1q_u8(&q[d+NSIZE*$$]);
			
 
				+		wq$$ = veorq_u8(wq$$, w1$$);
			
 
				+
			
 
				+		vst1q_u8(&p[d+NSIZE*$$], wp$$);
			
 
				+		vst1q_u8(&q[d+NSIZE*$$], wq$$);
			
 
				+	}
			
 
				+}