11 years ago · 5a6ac9eacb
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -72,6 +72,7 @@
 
				 enum btrfs_rbio_ops {
			
 
				 	BTRFS_RBIO_WRITE	= 0,
			
 
				 	BTRFS_RBIO_READ_REBUILD	= 1,
			
 
				+	BTRFS_RBIO_PARITY_SCRUB	= 2,
			
 
				 };
			
 
				 
			
 
				 struct btrfs_raid_bio {
			
@@ -130,6 +131,7 @@ struct btrfs_raid_bio {
 
				 	/* number of data stripes (no p/q) */
			
 
				 	int nr_data;
			
 
				 
			
 
				+	int stripe_npages;
			
 
				 	/*
			
 
				 	 * set if we're doing a parity rebuild
			
 
				 	 * for a read from higher up, which is handled
			
@@ -144,6 +146,7 @@ struct btrfs_raid_bio {
 
				 	/* second bad stripe (for raid6 use) */
			
 
				 	int failb;
			
 
				 
			
 
				+	int scrubp;
			
 
				 	/*
			
 
				 	 * number of pages needed to represent the full
			
 
				 	 * stripe
			
@@ -178,6 +181,11 @@ struct btrfs_raid_bio {
 
				 	 * here for faster lookup
			
 
				 	 */
			
 
				 	struct page **bio_pages;
			
 
				+
			
 
				+	/*
			
 
				+	 * bitmap to record which horizontal stripe has data
			
 
				+	 */
			
 
				+	unsigned long *dbitmap;
			
 
				 };
			
 
				 
			
 
				 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
			
@@ -192,6 +200,10 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio);
 
				 static void index_rbio_pages(struct btrfs_raid_bio *rbio);
			
 
				 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
			
 
				 
			
 
				+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
			
 
				+					 int need_check);
			
 
				+static void async_scrub_parity(struct btrfs_raid_bio *rbio);
			
 
				+
			
 
				 /*
			
 
				  * the stripe hash table is used for locking, and to collect
			
 
				  * bios in hopes of making a full stripe
			
@@ -593,10 +605,20 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
 
				 	    cur->raid_map[0])
			
 
				 		return 0;
			
 
				 
			
 
				-	/* reads can't merge with writes */
			
 
				-	if (last->operation != cur->operation) {
			
 
				+	/* we can't merge with different operations */
			
 
				+	if (last->operation != cur->operation)
			
 
				+		return 0;
			
 
				+	/*
			
 
				+	 * We've need read the full stripe from the drive.
			
 
				+	 * check and repair the parity and write the new results.
			
 
				+	 *
			
 
				+	 * We're not allowed to add any new bios to the
			
 
				+	 * bio list here, anyone else that wants to
			
 
				+	 * change this stripe needs to do their own rmw.
			
 
				+	 */
			
 
				+	if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
			
 
				+	    cur->operation == BTRFS_RBIO_PARITY_SCRUB)
			
 
				 		return 0;
			
 
				-	}
			
 
				 
			
 
				 	return 1;
			
 
				 }
			
@@ -789,9 +811,12 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 
				 
			
 
				 			if (next->operation == BTRFS_RBIO_READ_REBUILD)
			
 
				 				async_read_rebuild(next);
			
 
				-			else if (next->operation == BTRFS_RBIO_WRITE){
			
 
				+			else if (next->operation == BTRFS_RBIO_WRITE) {
			
 
				 				steal_rbio(rbio, next);
			
 
				 				async_rmw_stripe(next);
			
 
				+			} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
			
 
				+				steal_rbio(rbio, next);
			
 
				+				async_scrub_parity(next);
			
 
				 			}
			
 
				 
			
 
				 			goto done_nolock;
			
@@ -957,9 +982,11 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
 
				 	struct btrfs_raid_bio *rbio;
			
 
				 	int nr_data = 0;
			
 
				 	int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes);
			
 
				+	int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
			
 
				 	void *p;
			
 
				 
			
 
				-	rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2,
			
 
				+	rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
			
 
				+		       DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
			
 
				 			GFP_NOFS);
			
 
				 	if (!rbio)
			
 
				 		return ERR_PTR(-ENOMEM);
			
@@ -974,6 +1001,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
 
				 	rbio->fs_info = root->fs_info;
			
 
				 	rbio->stripe_len = stripe_len;
			
 
				 	rbio->nr_pages = num_pages;
			
 
				+	rbio->stripe_npages = stripe_npages;
			
 
				 	rbio->faila = -1;
			
 
				 	rbio->failb = -1;
			
 
				 	atomic_set(&rbio->refs, 1);
			
@@ -987,6 +1015,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
 
				 	p = rbio + 1;
			
 
				 	rbio->stripe_pages = p;
			
 
				 	rbio->bio_pages = p + sizeof(struct page *) * num_pages;
			
 
				+	rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
			
 
				 
			
 
				 	if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
			
 
				 		nr_data = bbio->num_stripes - 2;
			
@@ -1781,6 +1810,14 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 
				 	index_rbio_pages(rbio);
			
 
				 
			
 
				 	for (pagenr = 0; pagenr < nr_pages; pagenr++) {
			
 
				+		/*
			
 
				+		 * Now we just use bitmap to mark the horizontal stripes in
			
 
				+		 * which we have data when doing parity scrub.
			
 
				+		 */
			
 
				+		if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
			
 
				+		    !test_bit(pagenr, rbio->dbitmap))
			
 
				+			continue;
			
 
				+
			
 
				 		/* setup our array of pointers with pages
			
 
				 		 * from each stripe
			
 
				 		 */
			
@@ -1925,7 +1962,13 @@ cleanup_io:
 
				 	} else if (err == 0) {
			
 
				 		rbio->faila = -1;
			
 
				 		rbio->failb = -1;
			
 
				-		finish_rmw(rbio);
			
 
				+
			
 
				+		if (rbio->operation == BTRFS_RBIO_WRITE)
			
 
				+			finish_rmw(rbio);
			
 
				+		else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
			
 
				+			finish_parity_scrub(rbio, 0);
			
 
				+		else
			
 
				+			BUG();
			
 
				 	} else {
			
 
				 		rbio_orig_end_io(rbio, err, 0);
			
 
				 	}
			
@@ -2133,3 +2176,462 @@ static void read_rebuild_work(struct btrfs_work *work)
 
				 	rbio = container_of(work, struct btrfs_raid_bio, work);
			
 
				 	__raid56_parity_recover(rbio);
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * The following code is used to scrub/replace the parity stripe
			
 
				+ *
			
 
				+ * Note: We need make sure all the pages that add into the scrub/replace
			
 
				+ * raid bio are correct and not be changed during the scrub/replace. That
			
 
				+ * is those pages just hold metadata or file data with checksum.
			
 
				+ */
			
 
				+
			
 
				+struct btrfs_raid_bio *
			
 
				+raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
			
 
				+			       struct btrfs_bio *bbio, u64 *raid_map,
			
 
				+			       u64 stripe_len, struct btrfs_device *scrub_dev,
			
 
				+			       unsigned long *dbitmap, int stripe_nsectors)
			
 
				+{
			
 
				+	struct btrfs_raid_bio *rbio;
			
 
				+	int i;
			
 
				+
			
 
				+	rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
			
 
				+	if (IS_ERR(rbio))
			
 
				+		return NULL;
			
 
				+	bio_list_add(&rbio->bio_list, bio);
			
 
				+	/*
			
 
				+	 * This is a special bio which is used to hold the completion handler
			
 
				+	 * and make the scrub rbio is similar to the other types
			
 
				+	 */
			
 
				+	ASSERT(!bio->bi_iter.bi_size);
			
 
				+	rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
			
 
				+
			
 
				+	for (i = 0; i < bbio->num_stripes; i++) {
			
 
				+		if (bbio->stripes[i].dev == scrub_dev) {
			
 
				+			rbio->scrubp = i;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Now we just support the sectorsize equals to page size */
			
 
				+	ASSERT(root->sectorsize == PAGE_SIZE);
			
 
				+	ASSERT(rbio->stripe_npages == stripe_nsectors);
			
 
				+	bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
			
 
				+
			
 
				+	return rbio;
			
 
				+}
			
 
				+
			
 
				+void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
			
 
				+				   struct page *page, u64 logical)
			
 
				+{
			
 
				+	int stripe_offset;
			
 
				+	int index;
			
 
				+
			
 
				+	ASSERT(logical >= rbio->raid_map[0]);
			
 
				+	ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] +
			
 
				+				rbio->stripe_len * rbio->nr_data);
			
 
				+	stripe_offset = (int)(logical - rbio->raid_map[0]);
			
 
				+	index = stripe_offset >> PAGE_CACHE_SHIFT;
			
 
				+	rbio->bio_pages[index] = page;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We just scrub the parity that we have correct data on the same horizontal,
			
 
				+ * so we needn't allocate all pages for all the stripes.
			
 
				+ */
			
 
				+static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
			
 
				+{
			
 
				+	int i;
			
 
				+	int bit;
			
 
				+	int index;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
			
 
				+		for (i = 0; i < rbio->bbio->num_stripes; i++) {
			
 
				+			index = i * rbio->stripe_npages + bit;
			
 
				+			if (rbio->stripe_pages[index])
			
 
				+				continue;
			
 
				+
			
 
				+			page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
			
 
				+			if (!page)
			
 
				+				return -ENOMEM;
			
 
				+			rbio->stripe_pages[index] = page;
			
 
				+			ClearPageUptodate(page);
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * end io function used by finish_rmw.  When we finally
			
 
				+ * get here, we've written a full stripe
			
 
				+ */
			
 
				+static void raid_write_parity_end_io(struct bio *bio, int err)
			
 
				+{
			
 
				+	struct btrfs_raid_bio *rbio = bio->bi_private;
			
 
				+
			
 
				+	if (err)
			
 
				+		fail_bio_stripe(rbio, bio);
			
 
				+
			
 
				+	bio_put(bio);
			
 
				+
			
 
				+	if (!atomic_dec_and_test(&rbio->stripes_pending))
			
 
				+		return;
			
 
				+
			
 
				+	err = 0;
			
 
				+
			
 
				+	if (atomic_read(&rbio->error))
			
 
				+		err = -EIO;
			
 
				+
			
 
				+	rbio_orig_end_io(rbio, err, 0);
			
 
				+}
			
 
				+
			
 
				+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
			
 
				+					 int need_check)
			
 
				+{
			
 
				+	struct btrfs_bio *bbio = rbio->bbio;
			
 
				+	void *pointers[bbio->num_stripes];
			
 
				+	int nr_data = rbio->nr_data;
			
 
				+	int stripe;
			
 
				+	int pagenr;
			
 
				+	int p_stripe = -1;
			
 
				+	int q_stripe = -1;
			
 
				+	struct page *p_page = NULL;
			
 
				+	struct page *q_page = NULL;
			
 
				+	struct bio_list bio_list;
			
 
				+	struct bio *bio;
			
 
				+	int ret;
			
 
				+
			
 
				+	bio_list_init(&bio_list);
			
 
				+
			
 
				+	if (bbio->num_stripes - rbio->nr_data == 1) {
			
 
				+		p_stripe = bbio->num_stripes - 1;
			
 
				+	} else if (bbio->num_stripes - rbio->nr_data == 2) {
			
 
				+		p_stripe = bbio->num_stripes - 2;
			
 
				+		q_stripe = bbio->num_stripes - 1;
			
 
				+	} else {
			
 
				+		BUG();
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Because the higher layers(scrubber) are unlikely to
			
 
				+	 * use this area of the disk again soon, so don't cache
			
 
				+	 * it.
			
 
				+	 */
			
 
				+	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
			
 
				+
			
 
				+	if (!need_check)
			
 
				+		goto writeback;
			
 
				+
			
 
				+	p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
			
 
				+	if (!p_page)
			
 
				+		goto cleanup;
			
 
				+	SetPageUptodate(p_page);
			
 
				+
			
 
				+	if (q_stripe != -1) {
			
 
				+		q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
			
 
				+		if (!q_page) {
			
 
				+			__free_page(p_page);
			
 
				+			goto cleanup;
			
 
				+		}
			
 
				+		SetPageUptodate(q_page);
			
 
				+	}
			
 
				+
			
 
				+	atomic_set(&rbio->error, 0);
			
 
				+
			
 
				+	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
			
 
				+		struct page *p;
			
 
				+		void *parity;
			
 
				+		/* first collect one page from each data stripe */
			
 
				+		for (stripe = 0; stripe < nr_data; stripe++) {
			
 
				+			p = page_in_rbio(rbio, stripe, pagenr, 0);
			
 
				+			pointers[stripe] = kmap(p);
			
 
				+		}
			
 
				+
			
 
				+		/* then add the parity stripe */
			
 
				+		pointers[stripe++] = kmap(p_page);
			
 
				+
			
 
				+		if (q_stripe != -1) {
			
 
				+
			
 
				+			/*
			
 
				+			 * raid6, add the qstripe and call the
			
 
				+			 * library function to fill in our p/q
			
 
				+			 */
			
 
				+			pointers[stripe++] = kmap(q_page);
			
 
				+
			
 
				+			raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE,
			
 
				+						pointers);
			
 
				+		} else {
			
 
				+			/* raid5 */
			
 
				+			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
			
 
				+			run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
			
 
				+		}
			
 
				+
			
 
				+		/* Check scrubbing pairty and repair it */
			
 
				+		p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
			
 
				+		parity = kmap(p);
			
 
				+		if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
			
 
				+			memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
			
 
				+		else
			
 
				+			/* Parity is right, needn't writeback */
			
 
				+			bitmap_clear(rbio->dbitmap, pagenr, 1);
			
 
				+		kunmap(p);
			
 
				+
			
 
				+		for (stripe = 0; stripe < bbio->num_stripes; stripe++)
			
 
				+			kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
			
 
				+	}
			
 
				+
			
 
				+	__free_page(p_page);
			
 
				+	if (q_page)
			
 
				+		__free_page(q_page);
			
 
				+
			
 
				+writeback:
			
 
				+	/*
			
 
				+	 * time to start writing.  Make bios for everything from the
			
 
				+	 * higher layers (the bio_list in our rbio) and our p/q.  Ignore
			
 
				+	 * everything else.
			
 
				+	 */
			
 
				+	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
			
 
				+		struct page *page;
			
 
				+
			
 
				+		page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
			
 
				+		ret = rbio_add_io_page(rbio, &bio_list,
			
 
				+			       page, rbio->scrubp, pagenr, rbio->stripe_len);
			
 
				+		if (ret)
			
 
				+			goto cleanup;
			
 
				+	}
			
 
				+
			
 
				+	nr_data = bio_list_size(&bio_list);
			
 
				+	if (!nr_data) {
			
 
				+		/* Every parity is right */
			
 
				+		rbio_orig_end_io(rbio, 0, 0);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	atomic_set(&rbio->stripes_pending, nr_data);
			
 
				+
			
 
				+	while (1) {
			
 
				+		bio = bio_list_pop(&bio_list);
			
 
				+		if (!bio)
			
 
				+			break;
			
 
				+
			
 
				+		bio->bi_private = rbio;
			
 
				+		bio->bi_end_io = raid_write_parity_end_io;
			
 
				+		BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
			
 
				+		submit_bio(WRITE, bio);
			
 
				+	}
			
 
				+	return;
			
 
				+
			
 
				+cleanup:
			
 
				+	rbio_orig_end_io(rbio, -EIO, 0);
			
 
				+}
			
 
				+
			
 
				+static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
			
 
				+{
			
 
				+	if (stripe >= 0 && stripe < rbio->nr_data)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * While we're doing the parity check and repair, we could have errors
			
 
				+ * in reading pages off the disk.  This checks for errors and if we're
			
 
				+ * not able to read the page it'll trigger parity reconstruction.  The
			
 
				+ * parity scrub will be finished after we've reconstructed the failed
			
 
				+ * stripes
			
 
				+ */
			
 
				+static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
			
 
				+{
			
 
				+	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
			
 
				+		goto cleanup;
			
 
				+
			
 
				+	if (rbio->faila >= 0 || rbio->failb >= 0) {
			
 
				+		int dfail = 0, failp = -1;
			
 
				+
			
 
				+		if (is_data_stripe(rbio, rbio->faila))
			
 
				+			dfail++;
			
 
				+		else if (is_parity_stripe(rbio->faila))
			
 
				+			failp = rbio->faila;
			
 
				+
			
 
				+		if (is_data_stripe(rbio, rbio->failb))
			
 
				+			dfail++;
			
 
				+		else if (is_parity_stripe(rbio->failb))
			
 
				+			failp = rbio->failb;
			
 
				+
			
 
				+		/*
			
 
				+		 * Because we can not use a scrubbing parity to repair
			
 
				+		 * the data, so the capability of the repair is declined.
			
 
				+		 * (In the case of RAID5, we can not repair anything)
			
 
				+		 */
			
 
				+		if (dfail > rbio->bbio->max_errors - 1)
			
 
				+			goto cleanup;
			
 
				+
			
 
				+		/*
			
 
				+		 * If all data is good, only parity is correctly, just
			
 
				+		 * repair the parity.
			
 
				+		 */
			
 
				+		if (dfail == 0) {
			
 
				+			finish_parity_scrub(rbio, 0);
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Here means we got one corrupted data stripe and one
			
 
				+		 * corrupted parity on RAID6, if the corrupted parity
			
 
				+		 * is scrubbing parity, luckly, use the other one to repair
			
 
				+		 * the data, or we can not repair the data stripe.
			
 
				+		 */
			
 
				+		if (failp != rbio->scrubp)
			
 
				+			goto cleanup;
			
 
				+
			
 
				+		__raid_recover_end_io(rbio);
			
 
				+	} else {
			
 
				+		finish_parity_scrub(rbio, 1);
			
 
				+	}
			
 
				+	return;
			
 
				+
			
 
				+cleanup:
			
 
				+	rbio_orig_end_io(rbio, -EIO, 0);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * end io for the read phase of the rmw cycle.  All the bios here are physical
			
 
				+ * stripe bios we've read from the disk so we can recalculate the parity of the
			
 
				+ * stripe.
			
 
				+ *
			
 
				+ * This will usually kick off finish_rmw once all the bios are read in, but it
			
 
				+ * may trigger parity reconstruction if we had any errors along the way
			
 
				+ */
			
 
				+static void raid56_parity_scrub_end_io(struct bio *bio, int err)
			
 
				+{
			
 
				+	struct btrfs_raid_bio *rbio = bio->bi_private;
			
 
				+
			
 
				+	if (err)
			
 
				+		fail_bio_stripe(rbio, bio);
			
 
				+	else
			
 
				+		set_bio_pages_uptodate(bio);
			
 
				+
			
 
				+	bio_put(bio);
			
 
				+
			
 
				+	if (!atomic_dec_and_test(&rbio->stripes_pending))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * this will normally call finish_rmw to start our write
			
 
				+	 * but if there are any failed stripes we'll reconstruct
			
 
				+	 * from parity first
			
 
				+	 */
			
 
				+	validate_rbio_for_parity_scrub(rbio);
			
 
				+}
			
 
				+
			
 
				+static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
			
 
				+{
			
 
				+	int bios_to_read = 0;
			
 
				+	struct btrfs_bio *bbio = rbio->bbio;
			
 
				+	struct bio_list bio_list;
			
 
				+	int ret;
			
 
				+	int pagenr;
			
 
				+	int stripe;
			
 
				+	struct bio *bio;
			
 
				+
			
 
				+	ret = alloc_rbio_essential_pages(rbio);
			
 
				+	if (ret)
			
 
				+		goto cleanup;
			
 
				+
			
 
				+	bio_list_init(&bio_list);
			
 
				+
			
 
				+	atomic_set(&rbio->error, 0);
			
 
				+	/*
			
 
				+	 * build a list of bios to read all the missing parts of this
			
 
				+	 * stripe
			
 
				+	 */
			
 
				+	for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
			
 
				+		for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
			
 
				+			struct page *page;
			
 
				+			/*
			
 
				+			 * we want to find all the pages missing from
			
 
				+			 * the rbio and read them from the disk.  If
			
 
				+			 * page_in_rbio finds a page in the bio list
			
 
				+			 * we don't need to read it off the stripe.
			
 
				+			 */
			
 
				+			page = page_in_rbio(rbio, stripe, pagenr, 1);
			
 
				+			if (page)
			
 
				+				continue;
			
 
				+
			
 
				+			page = rbio_stripe_page(rbio, stripe, pagenr);
			
 
				+			/*
			
 
				+			 * the bio cache may have handed us an uptodate
			
 
				+			 * page.  If so, be happy and use it
			
 
				+			 */
			
 
				+			if (PageUptodate(page))
			
 
				+				continue;
			
 
				+
			
 
				+			ret = rbio_add_io_page(rbio, &bio_list, page,
			
 
				+				       stripe, pagenr, rbio->stripe_len);
			
 
				+			if (ret)
			
 
				+				goto cleanup;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	bios_to_read = bio_list_size(&bio_list);
			
 
				+	if (!bios_to_read) {
			
 
				+		/*
			
 
				+		 * this can happen if others have merged with
			
 
				+		 * us, it means there is nothing left to read.
			
 
				+		 * But if there are missing devices it may not be
			
 
				+		 * safe to do the full stripe write yet.
			
 
				+		 */
			
 
				+		goto finish;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * the bbio may be freed once we submit the last bio.  Make sure
			
 
				+	 * not to touch it after that
			
 
				+	 */
			
 
				+	atomic_set(&rbio->stripes_pending, bios_to_read);
			
 
				+	while (1) {
			
 
				+		bio = bio_list_pop(&bio_list);
			
 
				+		if (!bio)
			
 
				+			break;
			
 
				+
			
 
				+		bio->bi_private = rbio;
			
 
				+		bio->bi_end_io = raid56_parity_scrub_end_io;
			
 
				+
			
 
				+		btrfs_bio_wq_end_io(rbio->fs_info, bio,
			
 
				+				    BTRFS_WQ_ENDIO_RAID56);
			
 
				+
			
 
				+		BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
			
 
				+		submit_bio(READ, bio);
			
 
				+	}
			
 
				+	/* the actual write will happen once the reads are done */
			
 
				+	return;
			
 
				+
			
 
				+cleanup:
			
 
				+	rbio_orig_end_io(rbio, -EIO, 0);
			
 
				+	return;
			
 
				+
			
 
				+finish:
			
 
				+	validate_rbio_for_parity_scrub(rbio);
			
 
				+}
			
 
				+
			
 
				+static void scrub_parity_work(struct btrfs_work *work)
			
 
				+{
			
 
				+	struct btrfs_raid_bio *rbio;
			
 
				+
			
 
				+	rbio = container_of(work, struct btrfs_raid_bio, work);
			
 
				+	raid56_parity_scrub_stripe(rbio);
			
 
				+}
			
 
				+
			
 
				+static void async_scrub_parity(struct btrfs_raid_bio *rbio)
			
 
				+{
			
 
				+	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
			
 
				+			scrub_parity_work, NULL, NULL);
			
 
				+
			
 
				+	btrfs_queue_work(rbio->fs_info->rmw_workers,
			
 
				+			 &rbio->work);
			
 
				+}
			
 
				+
			
 
				+void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
			
 
				+{
			
 
				+	if (!lock_stripe_add(rbio))
			
 
				+		async_scrub_parity(rbio);
			
 
				+}
			
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -39,6 +39,9 @@ static inline int nr_data_stripes(struct map_lookup *map)
 
				 #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) ||		\
			
 
				 			     ((x) == RAID6_Q_STRIPE))
			
 
				 
			
 
				+struct btrfs_raid_bio;
			
 
				+struct btrfs_device;
			
 
				+
			
 
				 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
			
 
				 				 struct btrfs_bio *bbio, u64 *raid_map,
			
 
				 				 u64 stripe_len, int mirror_num, int hold_bbio);
			
@@ -46,6 +49,15 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
 
				 			       struct btrfs_bio *bbio, u64 *raid_map,
			
 
				 			       u64 stripe_len);
			
 
				 
			
 
				+struct btrfs_raid_bio *
			
 
				+raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
			
 
				+			       struct btrfs_bio *bbio, u64 *raid_map,
			
 
				+			       u64 stripe_len, struct btrfs_device *scrub_dev,
			
 
				+			       unsigned long *dbitmap, int stripe_nsectors);
			
 
				+void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
			
 
				+				   struct page *page, u64 logical);
			
 
				+void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
			
 
				+
			
 
				 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
			
 
				 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
			
 
				 #endif
			
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -74,6 +74,7 @@ struct scrub_page {
 
				 	struct scrub_block	*sblock;
			
 
				 	struct page		*page;
			
 
				 	struct btrfs_device	*dev;
			
 
				+	struct list_head	list;
			
 
				 	u64			flags;  /* extent flags */
			
 
				 	u64			generation;
			
 
				 	u64			logical;
			
@@ -114,14 +115,52 @@ struct scrub_block {
 
				 	atomic_t		outstanding_pages;
			
 
				 	atomic_t		ref_count; /* free mem on transition to zero */
			
 
				 	struct scrub_ctx	*sctx;
			
 
				+	struct scrub_parity	*sparity;
			
 
				 	struct {
			
 
				 		unsigned int	header_error:1;
			
 
				 		unsigned int	checksum_error:1;
			
 
				 		unsigned int	no_io_error_seen:1;
			
 
				 		unsigned int	generation_error:1; /* also sets header_error */
			
 
				+
			
 
				+		/* The following is for the data used to check parity */
			
 
				+		/* It is for the data with checksum */
			
 
				+		unsigned int	data_corrected:1;
			
 
				 	};
			
 
				 };
			
 
				 
			
 
				+/* Used for the chunks with parity stripe such RAID5/6 */
			
 
				+struct scrub_parity {
			
 
				+	struct scrub_ctx	*sctx;
			
 
				+
			
 
				+	struct btrfs_device	*scrub_dev;
			
 
				+
			
 
				+	u64			logic_start;
			
 
				+
			
 
				+	u64			logic_end;
			
 
				+
			
 
				+	int			nsectors;
			
 
				+
			
 
				+	int			stripe_len;
			
 
				+
			
 
				+	atomic_t		ref_count;
			
 
				+
			
 
				+	struct list_head	spages;
			
 
				+
			
 
				+	/* Work of parity check and repair */
			
 
				+	struct btrfs_work	work;
			
 
				+
			
 
				+	/* Mark the parity blocks which have data */
			
 
				+	unsigned long		*dbitmap;
			
 
				+
			
 
				+	/*
			
 
				+	 * Mark the parity blocks which have data, but errors happen when
			
 
				+	 * read data or check data
			
 
				+	 */
			
 
				+	unsigned long		*ebitmap;
			
 
				+
			
 
				+	unsigned long		bitmap[0];
			
 
				+};
			
 
				+
			
 
				 struct scrub_wr_ctx {
			
 
				 	struct scrub_bio *wr_curr_bio;
			
 
				 	struct btrfs_device *tgtdev;
			
@@ -227,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock);
 
				 static void scrub_block_put(struct scrub_block *sblock);
			
 
				 static void scrub_page_get(struct scrub_page *spage);
			
 
				 static void scrub_page_put(struct scrub_page *spage);
			
 
				+static void scrub_parity_get(struct scrub_parity *sparity);
			
 
				+static void scrub_parity_put(struct scrub_parity *sparity);
			
 
				 static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
			
 
				 				    struct scrub_page *spage);
			
 
				 static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
			
@@ -943,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
				 		 */
			
 
				 		spin_lock(&sctx->stat_lock);
			
 
				 		sctx->stat.unverified_errors++;
			
 
				+		sblock_to_check->data_corrected = 1;
			
 
				 		spin_unlock(&sctx->stat_lock);
			
 
				 
			
 
				 		if (sctx->is_dev_replace)
			
@@ -1203,6 +1245,7 @@ nodatasum_case:
 
				 corrected_error:
			
 
				 			spin_lock(&sctx->stat_lock);
			
 
				 			sctx->stat.corrected_errors++;
			
 
				+			sblock_to_check->data_corrected = 1;
			
 
				 			spin_unlock(&sctx->stat_lock);
			
 
				 			printk_ratelimited_in_rcu(KERN_ERR
			
 
				 				"BTRFS: fixed up error at logical %llu on dev %s\n",
			
@@ -1644,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
 
				 {
			
 
				 	int page_num;
			
 
				 
			
 
				+	/*
			
 
				+	 * This block is used for the check of the parity on the source device,
			
 
				+	 * so the data needn't be written into the destination device.
			
 
				+	 */
			
 
				+	if (sblock->sparity)
			
 
				+		return;
			
 
				+
			
 
				 	for (page_num = 0; page_num < sblock->page_count; page_num++) {
			
 
				 		int ret;
			
 
				 
			
@@ -2025,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock)
 
				 	if (atomic_dec_and_test(&sblock->ref_count)) {
			
 
				 		int i;
			
 
				 
			
 
				+		if (sblock->sparity)
			
 
				+			scrub_parity_put(sblock->sparity);
			
 
				+
			
 
				 		for (i = 0; i < sblock->page_count; i++)
			
 
				 			scrub_page_put(sblock->pagev[i]);
			
 
				 		kfree(sblock);
			
@@ -2282,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
 
				 	scrub_pending_bio_dec(sctx);
			
 
				 }
			
 
				 
			
 
				+static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
			
 
				+				       unsigned long *bitmap,
			
 
				+				       u64 start, u64 len)
			
 
				+{
			
 
				+	int offset;
			
 
				+	int nsectors;
			
 
				+	int sectorsize = sparity->sctx->dev_root->sectorsize;
			
 
				+
			
 
				+	if (len >= sparity->stripe_len) {
			
 
				+		bitmap_set(bitmap, 0, sparity->nsectors);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	start -= sparity->logic_start;
			
 
				+	offset = (int)do_div(start, sparity->stripe_len);
			
 
				+	offset /= sectorsize;
			
 
				+	nsectors = (int)len / sectorsize;
			
 
				+
			
 
				+	if (offset + nsectors <= sparity->nsectors) {
			
 
				+		bitmap_set(bitmap, offset, nsectors);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	bitmap_set(bitmap, offset, sparity->nsectors - offset);
			
 
				+	bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
			
 
				+}
			
 
				+
			
 
				+static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
			
 
				+						   u64 start, u64 len)
			
 
				+{
			
 
				+	__scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
			
 
				+}
			
 
				+
			
 
				+static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
			
 
				+						  u64 start, u64 len)
			
 
				+{
			
 
				+	__scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
			
 
				+}
			
 
				+
			
 
				 static void scrub_block_complete(struct scrub_block *sblock)
			
 
				 {
			
 
				+	int corrupted = 0;
			
 
				+
			
 
				 	if (!sblock->no_io_error_seen) {
			
 
				+		corrupted = 1;
			
 
				 		scrub_handle_errored_block(sblock);
			
 
				 	} else {
			
 
				 		/*
			
@@ -2292,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock)
 
				 		 * dev replace case, otherwise write here in dev replace
			
 
				 		 * case.
			
 
				 		 */
			
 
				-		if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
			
 
				+		corrupted = scrub_checksum(sblock);
			
 
				+		if (!corrupted && sblock->sctx->is_dev_replace)
			
 
				 			scrub_write_block_to_dev_replace(sblock);
			
 
				 	}
			
 
				+
			
 
				+	if (sblock->sparity && corrupted && !sblock->data_corrected) {
			
 
				+		u64 start = sblock->pagev[0]->logical;
			
 
				+		u64 end = sblock->pagev[sblock->page_count - 1]->logical +
			
 
				+			  PAGE_SIZE;
			
 
				+
			
 
				+		scrub_parity_mark_sectors_error(sblock->sparity,
			
 
				+						start, end - start);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
			
@@ -2386,6 +2491,132 @@ behind_scrub_pages:
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int scrub_pages_for_parity(struct scrub_parity *sparity,
			
 
				+				  u64 logical, u64 len,
			
 
				+				  u64 physical, struct btrfs_device *dev,
			
 
				+				  u64 flags, u64 gen, int mirror_num, u8 *csum)
			
 
				+{
			
 
				+	struct scrub_ctx *sctx = sparity->sctx;
			
 
				+	struct scrub_block *sblock;
			
 
				+	int index;
			
 
				+
			
 
				+	sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
			
 
				+	if (!sblock) {
			
 
				+		spin_lock(&sctx->stat_lock);
			
 
				+		sctx->stat.malloc_errors++;
			
 
				+		spin_unlock(&sctx->stat_lock);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	/* one ref inside this function, plus one for each page added to
			
 
				+	 * a bio later on */
			
 
				+	atomic_set(&sblock->ref_count, 1);
			
 
				+	sblock->sctx = sctx;
			
 
				+	sblock->no_io_error_seen = 1;
			
 
				+	sblock->sparity = sparity;
			
 
				+	scrub_parity_get(sparity);
			
 
				+
			
 
				+	for (index = 0; len > 0; index++) {
			
 
				+		struct scrub_page *spage;
			
 
				+		u64 l = min_t(u64, len, PAGE_SIZE);
			
 
				+
			
 
				+		spage = kzalloc(sizeof(*spage), GFP_NOFS);
			
 
				+		if (!spage) {
			
 
				+leave_nomem:
			
 
				+			spin_lock(&sctx->stat_lock);
			
 
				+			sctx->stat.malloc_errors++;
			
 
				+			spin_unlock(&sctx->stat_lock);
			
 
				+			scrub_block_put(sblock);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+		BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
			
 
				+		/* For scrub block */
			
 
				+		scrub_page_get(spage);
			
 
				+		sblock->pagev[index] = spage;
			
 
				+		/* For scrub parity */
			
 
				+		scrub_page_get(spage);
			
 
				+		list_add_tail(&spage->list, &sparity->spages);
			
 
				+		spage->sblock = sblock;
			
 
				+		spage->dev = dev;
			
 
				+		spage->flags = flags;
			
 
				+		spage->generation = gen;
			
 
				+		spage->logical = logical;
			
 
				+		spage->physical = physical;
			
 
				+		spage->mirror_num = mirror_num;
			
 
				+		if (csum) {
			
 
				+			spage->have_csum = 1;
			
 
				+			memcpy(spage->csum, csum, sctx->csum_size);
			
 
				+		} else {
			
 
				+			spage->have_csum = 0;
			
 
				+		}
			
 
				+		sblock->page_count++;
			
 
				+		spage->page = alloc_page(GFP_NOFS);
			
 
				+		if (!spage->page)
			
 
				+			goto leave_nomem;
			
 
				+		len -= l;
			
 
				+		logical += l;
			
 
				+		physical += l;
			
 
				+	}
			
 
				+
			
 
				+	WARN_ON(sblock->page_count == 0);
			
 
				+	for (index = 0; index < sblock->page_count; index++) {
			
 
				+		struct scrub_page *spage = sblock->pagev[index];
			
 
				+		int ret;
			
 
				+
			
 
				+		ret = scrub_add_page_to_rd_bio(sctx, spage);
			
 
				+		if (ret) {
			
 
				+			scrub_block_put(sblock);
			
 
				+			return ret;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* last one frees, either here or in bio completion for last page */
			
 
				+	scrub_block_put(sblock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int scrub_extent_for_parity(struct scrub_parity *sparity,
			
 
				+				   u64 logical, u64 len,
			
 
				+				   u64 physical, struct btrfs_device *dev,
			
 
				+				   u64 flags, u64 gen, int mirror_num)
			
 
				+{
			
 
				+	struct scrub_ctx *sctx = sparity->sctx;
			
 
				+	int ret;
			
 
				+	u8 csum[BTRFS_CSUM_SIZE];
			
 
				+	u32 blocksize;
			
 
				+
			
 
				+	if (flags & BTRFS_EXTENT_FLAG_DATA) {
			
 
				+		blocksize = sctx->sectorsize;
			
 
				+	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
			
 
				+		blocksize = sctx->nodesize;
			
 
				+	} else {
			
 
				+		blocksize = sctx->sectorsize;
			
 
				+		WARN_ON(1);
			
 
				+	}
			
 
				+
			
 
				+	while (len) {
			
 
				+		u64 l = min_t(u64, len, blocksize);
			
 
				+		int have_csum = 0;
			
 
				+
			
 
				+		if (flags & BTRFS_EXTENT_FLAG_DATA) {
			
 
				+			/* push csums to sbio */
			
 
				+			have_csum = scrub_find_csum(sctx, logical, l, csum);
			
 
				+			if (have_csum == 0)
			
 
				+				goto skip;
			
 
				+		}
			
 
				+		ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
			
 
				+					     flags, gen, mirror_num,
			
 
				+					     have_csum ? csum : NULL);
			
 
				+skip:
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+		len -= l;
			
 
				+		logical += l;
			
 
				+		physical += l;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Given a physical address, this will calculate it's
			
 
				  * logical offset. if this is a parity stripe, it will return
			
@@ -2394,7 +2625,8 @@ behind_scrub_pages:
 
				  * return 0 if it is a data stripe, 1 means parity stripe.
			
 
				  */
			
 
				 static int get_raid56_logic_offset(u64 physical, int num,
			
 
				-				   struct map_lookup *map, u64 *offset)
			
 
				+				   struct map_lookup *map, u64 *offset,
			
 
				+				   u64 *stripe_start)
			
 
				 {
			
 
				 	int i;
			
 
				 	int j = 0;
			
@@ -2405,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num,
 
				 
			
 
				 	last_offset = (physical - map->stripes[num].physical) *
			
 
				 		      nr_data_stripes(map);
			
 
				+	if (stripe_start)
			
 
				+		*stripe_start = last_offset;
			
 
				+
			
 
				 	*offset = last_offset;
			
 
				 	for (i = 0; i < nr_data_stripes(map); i++) {
			
 
				 		*offset = last_offset + i * map->stripe_len;
			
@@ -2427,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+static void scrub_free_parity(struct scrub_parity *sparity)
			
 
				+{
			
 
				+	struct scrub_ctx *sctx = sparity->sctx;
			
 
				+	struct scrub_page *curr, *next;
			
 
				+	int nbits;
			
 
				+
			
 
				+	nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
			
 
				+	if (nbits) {
			
 
				+		spin_lock(&sctx->stat_lock);
			
 
				+		sctx->stat.read_errors += nbits;
			
 
				+		sctx->stat.uncorrectable_errors += nbits;
			
 
				+		spin_unlock(&sctx->stat_lock);
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry_safe(curr, next, &sparity->spages, list) {
			
 
				+		list_del_init(&curr->list);
			
 
				+		scrub_page_put(curr);
			
 
				+	}
			
 
				+
			
 
				+	kfree(sparity);
			
 
				+}
			
 
				+
			
 
				+static void scrub_parity_bio_endio(struct bio *bio, int error)
			
 
				+{
			
 
				+	struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
			
 
				+	struct scrub_ctx *sctx = sparity->sctx;
			
 
				+
			
 
				+	if (error)
			
 
				+		bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
			
 
				+			  sparity->nsectors);
			
 
				+
			
 
				+	scrub_free_parity(sparity);
			
 
				+	scrub_pending_bio_dec(sctx);
			
 
				+	bio_put(bio);
			
 
				+}
			
 
				+
			
 
				+static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
			
 
				+{
			
 
				+	struct scrub_ctx *sctx = sparity->sctx;
			
 
				+	struct bio *bio;
			
 
				+	struct btrfs_raid_bio *rbio;
			
 
				+	struct scrub_page *spage;
			
 
				+	struct btrfs_bio *bbio = NULL;
			
 
				+	u64 *raid_map = NULL;
			
 
				+	u64 length;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
			
 
				+			   sparity->nsectors))
			
 
				+		goto out;
			
 
				+
			
 
				+	length = sparity->logic_end - sparity->logic_start + 1;
			
 
				+	ret = btrfs_map_sblock(sctx->dev_root->fs_info, REQ_GET_READ_MIRRORS,
			
 
				+			       sparity->logic_start,
			
 
				+			       &length, &bbio, 0, &raid_map);
			
 
				+	if (ret || !bbio || !raid_map)
			
 
				+		goto bbio_out;
			
 
				+
			
 
				+	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
			
 
				+	if (!bio)
			
 
				+		goto bbio_out;
			
 
				+
			
 
				+	bio->bi_iter.bi_sector = sparity->logic_start >> 9;
			
 
				+	bio->bi_private = sparity;
			
 
				+	bio->bi_end_io = scrub_parity_bio_endio;
			
 
				+
			
 
				+	rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
			
 
				+					      raid_map, length,
			
 
				+					      sparity->scrub_dev,
			
 
				+					      sparity->dbitmap,
			
 
				+					      sparity->nsectors);
			
 
				+	if (!rbio)
			
 
				+		goto rbio_out;
			
 
				+
			
 
				+	list_for_each_entry(spage, &sparity->spages, list)
			
 
				+		raid56_parity_add_scrub_pages(rbio, spage->page,
			
 
				+					      spage->logical);
			
 
				+
			
 
				+	scrub_pending_bio_inc(sctx);
			
 
				+	raid56_parity_submit_scrub_rbio(rbio);
			
 
				+	return;
			
 
				+
			
 
				+rbio_out:
			
 
				+	bio_put(bio);
			
 
				+bbio_out:
			
 
				+	kfree(bbio);
			
 
				+	kfree(raid_map);
			
 
				+	bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
			
 
				+		  sparity->nsectors);
			
 
				+	spin_lock(&sctx->stat_lock);
			
 
				+	sctx->stat.malloc_errors++;
			
 
				+	spin_unlock(&sctx->stat_lock);
			
 
				+out:
			
 
				+	scrub_free_parity(sparity);
			
 
				+}
			
 
				+
			
 
				+static inline int scrub_calc_parity_bitmap_len(int nsectors)
			
 
				+{
			
 
				+	return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
			
 
				+}
			
 
				+
			
 
				+static void scrub_parity_get(struct scrub_parity *sparity)
			
 
				+{
			
 
				+	atomic_inc(&sparity->ref_count);
			
 
				+}
			
 
				+
			
 
				+static void scrub_parity_put(struct scrub_parity *sparity)
			
 
				+{
			
 
				+	if (!atomic_dec_and_test(&sparity->ref_count))
			
 
				+		return;
			
 
				+
			
 
				+	scrub_parity_check_and_repair(sparity);
			
 
				+}
			
 
				+
			
 
				+static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
			
 
				+						  struct map_lookup *map,
			
 
				+						  struct btrfs_device *sdev,
			
 
				+						  struct btrfs_path *path,
			
 
				+						  u64 logic_start,
			
 
				+						  u64 logic_end)
			
 
				+{
			
 
				+	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
			
 
				+	struct btrfs_root *root = fs_info->extent_root;
			
 
				+	struct btrfs_root *csum_root = fs_info->csum_root;
			
 
				+	struct btrfs_extent_item *extent;
			
 
				+	u64 flags;
			
 
				+	int ret;
			
 
				+	int slot;
			
 
				+	struct extent_buffer *l;
			
 
				+	struct btrfs_key key;
			
 
				+	u64 generation;
			
 
				+	u64 extent_logical;
			
 
				+	u64 extent_physical;
			
 
				+	u64 extent_len;
			
 
				+	struct btrfs_device *extent_dev;
			
 
				+	struct scrub_parity *sparity;
			
 
				+	int nsectors;
			
 
				+	int bitmap_len;
			
 
				+	int extent_mirror_num;
			
 
				+	int stop_loop = 0;
			
 
				+
			
 
				+	nsectors = map->stripe_len / root->sectorsize;
			
 
				+	bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
			
 
				+	sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
			
 
				+			  GFP_NOFS);
			
 
				+	if (!sparity) {
			
 
				+		spin_lock(&sctx->stat_lock);
			
 
				+		sctx->stat.malloc_errors++;
			
 
				+		spin_unlock(&sctx->stat_lock);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	sparity->stripe_len = map->stripe_len;
			
 
				+	sparity->nsectors = nsectors;
			
 
				+	sparity->sctx = sctx;
			
 
				+	sparity->scrub_dev = sdev;
			
 
				+	sparity->logic_start = logic_start;
			
 
				+	sparity->logic_end = logic_end;
			
 
				+	atomic_set(&sparity->ref_count, 1);
			
 
				+	INIT_LIST_HEAD(&sparity->spages);
			
 
				+	sparity->dbitmap = sparity->bitmap;
			
 
				+	sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
			
 
				+
			
 
				+	ret = 0;
			
 
				+	while (logic_start < logic_end) {
			
 
				+		if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
			
 
				+			key.type = BTRFS_METADATA_ITEM_KEY;
			
 
				+		else
			
 
				+			key.type = BTRFS_EXTENT_ITEM_KEY;
			
 
				+		key.objectid = logic_start;
			
 
				+		key.offset = (u64)-1;
			
 
				+
			
 
				+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
			
 
				+		if (ret < 0)
			
 
				+			goto out;
			
 
				+
			
 
				+		if (ret > 0) {
			
 
				+			ret = btrfs_previous_extent_item(root, path, 0);
			
 
				+			if (ret < 0)
			
 
				+				goto out;
			
 
				+			if (ret > 0) {
			
 
				+				btrfs_release_path(path);
			
 
				+				ret = btrfs_search_slot(NULL, root, &key,
			
 
				+							path, 0, 0);
			
 
				+				if (ret < 0)
			
 
				+					goto out;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		stop_loop = 0;
			
 
				+		while (1) {
			
 
				+			u64 bytes;
			
 
				+
			
 
				+			l = path->nodes[0];
			
 
				+			slot = path->slots[0];
			
 
				+			if (slot >= btrfs_header_nritems(l)) {
			
 
				+				ret = btrfs_next_leaf(root, path);
			
 
				+				if (ret == 0)
			
 
				+					continue;
			
 
				+				if (ret < 0)
			
 
				+					goto out;
			
 
				+
			
 
				+				stop_loop = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+			btrfs_item_key_to_cpu(l, &key, slot);
			
 
				+
			
 
				+			if (key.type == BTRFS_METADATA_ITEM_KEY)
			
 
				+				bytes = root->nodesize;
			
 
				+			else
			
 
				+				bytes = key.offset;
			
 
				+
			
 
				+			if (key.objectid + bytes <= logic_start)
			
 
				+				goto next;
			
 
				+
			
 
				+			if (key.type != BTRFS_EXTENT_ITEM_KEY &&
			
 
				+			    key.type != BTRFS_METADATA_ITEM_KEY)
			
 
				+				goto next;
			
 
				+
			
 
				+			if (key.objectid > logic_end) {
			
 
				+				stop_loop = 1;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			while (key.objectid >= logic_start + map->stripe_len)
			
 
				+				logic_start += map->stripe_len;
			
 
				+
			
 
				+			extent = btrfs_item_ptr(l, slot,
			
 
				+						struct btrfs_extent_item);
			
 
				+			flags = btrfs_extent_flags(l, extent);
			
 
				+			generation = btrfs_extent_generation(l, extent);
			
 
				+
			
 
				+			if (key.objectid < logic_start &&
			
 
				+			    (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
			
 
				+				btrfs_err(fs_info,
			
 
				+					  "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
			
 
				+					   key.objectid, logic_start);
			
 
				+				goto next;
			
 
				+			}
			
 
				+again:
			
 
				+			extent_logical = key.objectid;
			
 
				+			extent_len = bytes;
			
 
				+
			
 
				+			if (extent_logical < logic_start) {
			
 
				+				extent_len -= logic_start - extent_logical;
			
 
				+				extent_logical = logic_start;
			
 
				+			}
			
 
				+
			
 
				+			if (extent_logical + extent_len >
			
 
				+			    logic_start + map->stripe_len)
			
 
				+				extent_len = logic_start + map->stripe_len -
			
 
				+					     extent_logical;
			
 
				+
			
 
				+			scrub_parity_mark_sectors_data(sparity, extent_logical,
			
 
				+						       extent_len);
			
 
				+
			
 
				+			scrub_remap_extent(fs_info, extent_logical,
			
 
				+					   extent_len, &extent_physical,
			
 
				+					   &extent_dev,
			
 
				+					   &extent_mirror_num);
			
 
				+
			
 
				+			ret = btrfs_lookup_csums_range(csum_root,
			
 
				+						extent_logical,
			
 
				+						extent_logical + extent_len - 1,
			
 
				+						&sctx->csum_list, 1);
			
 
				+			if (ret)
			
 
				+				goto out;
			
 
				+
			
 
				+			ret = scrub_extent_for_parity(sparity, extent_logical,
			
 
				+						      extent_len,
			
 
				+						      extent_physical,
			
 
				+						      extent_dev, flags,
			
 
				+						      generation,
			
 
				+						      extent_mirror_num);
			
 
				+			if (ret)
			
 
				+				goto out;
			
 
				+
			
 
				+			scrub_free_csums(sctx);
			
 
				+			if (extent_logical + extent_len <
			
 
				+			    key.objectid + bytes) {
			
 
				+				logic_start += map->stripe_len;
			
 
				+
			
 
				+				if (logic_start >= logic_end) {
			
 
				+					stop_loop = 1;
			
 
				+					break;
			
 
				+				}
			
 
				+
			
 
				+				if (logic_start < key.objectid + bytes) {
			
 
				+					cond_resched();
			
 
				+					goto again;
			
 
				+				}
			
 
				+			}
			
 
				+next:
			
 
				+			path->slots[0]++;
			
 
				+		}
			
 
				+
			
 
				+		btrfs_release_path(path);
			
 
				+
			
 
				+		if (stop_loop)
			
 
				+			break;
			
 
				+
			
 
				+		logic_start += map->stripe_len;
			
 
				+	}
			
 
				+out:
			
 
				+	if (ret < 0)
			
 
				+		scrub_parity_mark_sectors_error(sparity, logic_start,
			
 
				+						logic_end - logic_start + 1);
			
 
				+	scrub_parity_put(sparity);
			
 
				+	scrub_submit(sctx);
			
 
				+	mutex_lock(&sctx->wr_ctx.wr_lock);
			
 
				+	scrub_wr_submit(sctx);
			
 
				+	mutex_unlock(&sctx->wr_ctx.wr_lock);
			
 
				+
			
 
				+	btrfs_release_path(path);
			
 
				+	return ret < 0 ? ret : 0;
			
 
				+}
			
 
				+
			
 
				 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
			
 
				 					   struct map_lookup *map,
			
 
				 					   struct btrfs_device *scrub_dev,
			
 
				 					   int num, u64 base, u64 length,
			
 
				 					   int is_dev_replace)
			
 
				 {
			
 
				-	struct btrfs_path *path;
			
 
				+	struct btrfs_path *path, *ppath;
			
 
				 	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
			
 
				 	struct btrfs_root *root = fs_info->extent_root;
			
 
				 	struct btrfs_root *csum_root = fs_info->csum_root;
			
@@ -2460,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 	u64 extent_logical;
			
 
				 	u64 extent_physical;
			
 
				 	u64 extent_len;
			
 
				+	u64 stripe_logical;
			
 
				+	u64 stripe_end;
			
 
				 	struct btrfs_device *extent_dev;
			
 
				 	int extent_mirror_num;
			
 
				 	int stop_loop = 0;
			
@@ -2485,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 		mirror_num = num % map->num_stripes + 1;
			
 
				 	} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				 				BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				-		get_raid56_logic_offset(physical, num, map, &offset);
			
 
				+		get_raid56_logic_offset(physical, num, map, &offset, NULL);
			
 
				 		increment = map->stripe_len * nr_data_stripes(map);
			
 
				 		mirror_num = 1;
			
 
				 	} else {
			
@@ -2497,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 	if (!path)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				+	ppath = btrfs_alloc_path();
			
 
				+	if (!ppath) {
			
 
				+		btrfs_free_path(ppath);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * work on commit root. The related disk blocks are static as
			
 
				 	 * long as COW is applied. This means, it is save to rewrite
			
@@ -2515,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 	if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				 			 BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				 		get_raid56_logic_offset(physical_end, num,
			
 
				-					map, &logic_end);
			
 
				+					map, &logic_end, NULL);
			
 
				 		logic_end += base;
			
 
				 	} else {
			
 
				 		logic_end = logical + increment * nstripes;
			
@@ -2562,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
				 		if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
			
 
				 				BTRFS_BLOCK_GROUP_RAID6)) {
			
 
				 			ret = get_raid56_logic_offset(physical, num,
			
 
				-					map, &logical);
			
 
				+					map, &logical, &stripe_logical);
			
 
				 			logical += base;
			
 
				-			if (ret)
			
 
				+			if (ret) {
			
 
				+				stripe_logical += base;
			
 
				+				stripe_end = stripe_logical + increment - 1;
			
 
				+				ret = scrub_raid56_parity(sctx, map, scrub_dev,
			
 
				+						ppath, stripe_logical,
			
 
				+						stripe_end);
			
 
				+				if (ret)
			
 
				+					goto out;
			
 
				 				goto skip;
			
 
				+			}
			
 
				 		}
			
 
				 		/*
			
 
				 		 * canceled?
			
@@ -2716,13 +3284,25 @@ again:
 
				 					 * loop until we find next data stripe
			
 
				 					 * or we have finished all stripes.
			
 
				 					 */
			
 
				-					do {
			
 
				-						physical += map->stripe_len;
			
 
				-						ret = get_raid56_logic_offset(
			
 
				-								physical, num,
			
 
				-								map, &logical);
			
 
				-						logical += base;
			
 
				-					} while (physical < physical_end && ret);
			
 
				+loop:
			
 
				+					physical += map->stripe_len;
			
 
				+					ret = get_raid56_logic_offset(physical,
			
 
				+							num, map, &logical,
			
 
				+							&stripe_logical);
			
 
				+					logical += base;
			
 
				+
			
 
				+					if (ret && physical < physical_end) {
			
 
				+						stripe_logical += base;
			
 
				+						stripe_end = stripe_logical +
			
 
				+								increment - 1;
			
 
				+						ret = scrub_raid56_parity(sctx,
			
 
				+							map, scrub_dev, ppath,
			
 
				+							stripe_logical,
			
 
				+							stripe_end);
			
 
				+						if (ret)
			
 
				+							goto out;
			
 
				+						goto loop;
			
 
				+					}
			
 
				 				} else {
			
 
				 					physical += map->stripe_len;
			
 
				 					logical += increment;
			
@@ -2763,6 +3343,7 @@ out:
 
				 
			
 
				 	blk_finish_plug(&plug);
			
 
				 	btrfs_free_path(path);
			
 
				+	btrfs_free_path(ppath);
			
 
				 	return ret < 0 ? ret : 0;
			
 
				 }