7 жил өмнө · 8b9b6f2554
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -188,15 +188,6 @@ struct scrub_ctx {
 
				 	refcount_t              refs;
			
 
				 };
			
 
				 
			
 
				-struct scrub_fixup_nodatasum {
			
 
				-	struct scrub_ctx	*sctx;
			
 
				-	struct btrfs_device	*dev;
			
 
				-	u64			logical;
			
 
				-	struct btrfs_root	*root;
			
 
				-	struct btrfs_work	work;
			
 
				-	int			mirror_num;
			
 
				-};
			
 
				-
			
 
				 struct scrub_warning {
			
 
				 	struct btrfs_path	*path;
			
 
				 	u64			extent_item_size;
			
@@ -215,8 +206,6 @@ struct full_stripe_lock {
 
				 
			
 
				 static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
			
 
				 static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
			
 
				-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
			
 
				-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
			
 
				 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
			
 
				 static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
			
 
				 				     struct scrub_block *sblocks_for_recheck);
			
@@ -531,60 +520,6 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * used for workers that require transaction commits (i.e., for the
			
 
				- * NOCOW case)
			
 
				- */
			
 
				-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
			
 
				-{
			
 
				-	struct btrfs_fs_info *fs_info = sctx->fs_info;
			
 
				-
			
 
				-	refcount_inc(&sctx->refs);
			
 
				-	/*
			
 
				-	 * increment scrubs_running to prevent cancel requests from
			
 
				-	 * completing as long as a worker is running. we must also
			
 
				-	 * increment scrubs_paused to prevent deadlocking on pause
			
 
				-	 * requests used for transactions commits (as the worker uses a
			
 
				-	 * transaction context). it is safe to regard the worker
			
 
				-	 * as paused for all matters practical. effectively, we only
			
 
				-	 * avoid cancellation requests from completing.
			
 
				-	 */
			
 
				-	mutex_lock(&fs_info->scrub_lock);
			
 
				-	atomic_inc(&fs_info->scrubs_running);
			
 
				-	atomic_inc(&fs_info->scrubs_paused);
			
 
				-	mutex_unlock(&fs_info->scrub_lock);
			
 
				-
			
 
				-	/*
			
 
				-	 * check if @scrubs_running=@scrubs_paused condition
			
 
				-	 * inside wait_event() is not an atomic operation.
			
 
				-	 * which means we may inc/dec @scrub_running/paused
			
 
				-	 * at any time. Let's wake up @scrub_pause_wait as
			
 
				-	 * much as we can to let commit transaction blocked less.
			
 
				-	 */
			
 
				-	wake_up(&fs_info->scrub_pause_wait);
			
 
				-
			
 
				-	atomic_inc(&sctx->workers_pending);
			
 
				-}
			
 
				-
			
 
				-/* used for workers that require transaction commits */
			
 
				-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
			
 
				-{
			
 
				-	struct btrfs_fs_info *fs_info = sctx->fs_info;
			
 
				-
			
 
				-	/*
			
 
				-	 * see scrub_pending_trans_workers_inc() why we're pretending
			
 
				-	 * to be paused in the scrub counters
			
 
				-	 */
			
 
				-	mutex_lock(&fs_info->scrub_lock);
			
 
				-	atomic_dec(&fs_info->scrubs_running);
			
 
				-	atomic_dec(&fs_info->scrubs_paused);
			
 
				-	mutex_unlock(&fs_info->scrub_lock);
			
 
				-	atomic_dec(&sctx->workers_pending);
			
 
				-	wake_up(&fs_info->scrub_pause_wait);
			
 
				-	wake_up(&sctx->list_wait);
			
 
				-	scrub_put_ctx(sctx);
			
 
				-}
			
 
				-
			
 
				 static void scrub_free_csums(struct scrub_ctx *sctx)
			
 
				 {
			
 
				 	while (!list_empty(&sctx->csum_list)) {
			
@@ -858,194 +793,6 @@ out:
 
				 	btrfs_free_path(path);
			
 
				 }
			
 
				 
			
 
				-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
			
 
				-{
			
 
				-	struct page *page = NULL;
			
 
				-	unsigned long index;
			
 
				-	struct scrub_fixup_nodatasum *fixup = fixup_ctx;
			
 
				-	int ret;
			
 
				-	int corrected = 0;
			
 
				-	struct btrfs_key key;
			
 
				-	struct inode *inode = NULL;
			
 
				-	struct btrfs_fs_info *fs_info;
			
 
				-	u64 end = offset + PAGE_SIZE - 1;
			
 
				-	struct btrfs_root *local_root;
			
 
				-	int srcu_index;
			
 
				-
			
 
				-	key.objectid = root;
			
 
				-	key.type = BTRFS_ROOT_ITEM_KEY;
			
 
				-	key.offset = (u64)-1;
			
 
				-
			
 
				-	fs_info = fixup->root->fs_info;
			
 
				-	srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
			
 
				-
			
 
				-	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
			
 
				-	if (IS_ERR(local_root)) {
			
 
				-		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
			
 
				-		return PTR_ERR(local_root);
			
 
				-	}
			
 
				-
			
 
				-	key.type = BTRFS_INODE_ITEM_KEY;
			
 
				-	key.objectid = inum;
			
 
				-	key.offset = 0;
			
 
				-	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
			
 
				-	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
			
 
				-	if (IS_ERR(inode))
			
 
				-		return PTR_ERR(inode);
			
 
				-
			
 
				-	index = offset >> PAGE_SHIFT;
			
 
				-
			
 
				-	page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
			
 
				-	if (!page) {
			
 
				-		ret = -ENOMEM;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (PageUptodate(page)) {
			
 
				-		if (PageDirty(page)) {
			
 
				-			/*
			
 
				-			 * we need to write the data to the defect sector. the
			
 
				-			 * data that was in that sector is not in memory,
			
 
				-			 * because the page was modified. we must not write the
			
 
				-			 * modified page to that sector.
			
 
				-			 *
			
 
				-			 * TODO: what could be done here: wait for the delalloc
			
 
				-			 *       runner to write out that page (might involve
			
 
				-			 *       COW) and see whether the sector is still
			
 
				-			 *       referenced afterwards.
			
 
				-			 *
			
 
				-			 * For the meantime, we'll treat this error
			
 
				-			 * incorrectable, although there is a chance that a
			
 
				-			 * later scrub will find the bad sector again and that
			
 
				-			 * there's no dirty page in memory, then.
			
 
				-			 */
			
 
				-			ret = -EIO;
			
 
				-			goto out;
			
 
				-		}
			
 
				-		ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
			
 
				-					fixup->logical, page,
			
 
				-					offset - page_offset(page),
			
 
				-					fixup->mirror_num);
			
 
				-		unlock_page(page);
			
 
				-		corrected = !ret;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * we need to get good data first. the general readpage path
			
 
				-		 * will call repair_io_failure for us, we just have to make
			
 
				-		 * sure we read the bad mirror.
			
 
				-		 */
			
 
				-		ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
			
 
				-					EXTENT_DAMAGED);
			
 
				-		if (ret) {
			
 
				-			/* set_extent_bits should give proper error */
			
 
				-			WARN_ON(ret > 0);
			
 
				-			if (ret > 0)
			
 
				-				ret = -EFAULT;
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
			
 
				-						btrfs_get_extent,
			
 
				-						fixup->mirror_num);
			
 
				-		wait_on_page_locked(page);
			
 
				-
			
 
				-		corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
			
 
				-						end, EXTENT_DAMAGED, 0, NULL);
			
 
				-		if (!corrected)
			
 
				-			clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
			
 
				-						EXTENT_DAMAGED);
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	if (page)
			
 
				-		put_page(page);
			
 
				-
			
 
				-	iput(inode);
			
 
				-
			
 
				-	if (ret < 0)
			
 
				-		return ret;
			
 
				-
			
 
				-	if (ret == 0 && corrected) {
			
 
				-		/*
			
 
				-		 * we only need to call readpage for one of the inodes belonging
			
 
				-		 * to this extent. so make iterate_extent_inodes stop
			
 
				-		 */
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-static void scrub_fixup_nodatasum(struct btrfs_work *work)
			
 
				-{
			
 
				-	struct btrfs_fs_info *fs_info;
			
 
				-	int ret;
			
 
				-	struct scrub_fixup_nodatasum *fixup;
			
 
				-	struct scrub_ctx *sctx;
			
 
				-	struct btrfs_trans_handle *trans = NULL;
			
 
				-	struct btrfs_path *path;
			
 
				-	int uncorrectable = 0;
			
 
				-
			
 
				-	fixup = container_of(work, struct scrub_fixup_nodatasum, work);
			
 
				-	sctx = fixup->sctx;
			
 
				-	fs_info = fixup->root->fs_info;
			
 
				-
			
 
				-	path = btrfs_alloc_path();
			
 
				-	if (!path) {
			
 
				-		spin_lock(&sctx->stat_lock);
			
 
				-		++sctx->stat.malloc_errors;
			
 
				-		spin_unlock(&sctx->stat_lock);
			
 
				-		uncorrectable = 1;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	trans = btrfs_join_transaction(fixup->root);
			
 
				-	if (IS_ERR(trans)) {
			
 
				-		uncorrectable = 1;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * the idea is to trigger a regular read through the standard path. we
			
 
				-	 * read a page from the (failed) logical address by specifying the
			
 
				-	 * corresponding copynum of the failed sector. thus, that readpage is
			
 
				-	 * expected to fail.
			
 
				-	 * that is the point where on-the-fly error correction will kick in
			
 
				-	 * (once it's finished) and rewrite the failed sector if a good copy
			
 
				-	 * can be found.
			
 
				-	 */
			
 
				-	ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
			
 
				-					  scrub_fixup_readpage, fixup, false);
			
 
				-	if (ret < 0) {
			
 
				-		uncorrectable = 1;
			
 
				-		goto out;
			
 
				-	}
			
 
				-	WARN_ON(ret != 1);
			
 
				-
			
 
				-	spin_lock(&sctx->stat_lock);
			
 
				-	++sctx->stat.corrected_errors;
			
 
				-	spin_unlock(&sctx->stat_lock);
			
 
				-
			
 
				-out:
			
 
				-	if (trans && !IS_ERR(trans))
			
 
				-		btrfs_end_transaction(trans);
			
 
				-	if (uncorrectable) {
			
 
				-		spin_lock(&sctx->stat_lock);
			
 
				-		++sctx->stat.uncorrectable_errors;
			
 
				-		spin_unlock(&sctx->stat_lock);
			
 
				-		btrfs_dev_replace_stats_inc(
			
 
				-			&fs_info->dev_replace.num_uncorrectable_read_errors);
			
 
				-		btrfs_err_rl_in_rcu(fs_info,
			
 
				-		    "unable to fixup (nodatasum) error at logical %llu on dev %s",
			
 
				-			fixup->logical, rcu_str_deref(fixup->dev->name));
			
 
				-	}
			
 
				-
			
 
				-	btrfs_free_path(path);
			
 
				-	kfree(fixup);
			
 
				-
			
 
				-	scrub_pending_trans_workers_dec(sctx);
			
 
				-}
			
 
				-
			
 
				 static inline void scrub_get_recover(struct scrub_recover *recover)
			
 
				 {
			
 
				 	refcount_inc(&recover->refs);
			
@@ -1239,42 +986,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * NOTE: Even for nodatasum case, it's still possible that it's a
			
 
				-	 * compressed data extent, thus scrub_fixup_nodatasum(), which write
			
 
				-	 * inode page cache onto disk, could cause serious data corruption.
			
 
				-	 *
			
 
				-	 * So here we could only read from disk, and hope our recovery could
			
 
				-	 * reach disk before the newer write.
			
 
				-	 */
			
 
				-	if (0 && !is_metadata && !have_csum) {
			
 
				-		struct scrub_fixup_nodatasum *fixup_nodatasum;
			
 
				-
			
 
				-		WARN_ON(sctx->is_dev_replace);
			
 
				-
			
 
				-		/*
			
 
				-		 * !is_metadata and !have_csum, this means that the data
			
 
				-		 * might not be COWed, that it might be modified
			
 
				-		 * concurrently. The general strategy to work on the
			
 
				-		 * commit root does not help in the case when COW is not
			
 
				-		 * used.
			
 
				-		 */
			
 
				-		fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
			
 
				-		if (!fixup_nodatasum)
			
 
				-			goto did_not_correct_error;
			
 
				-		fixup_nodatasum->sctx = sctx;
			
 
				-		fixup_nodatasum->dev = dev;
			
 
				-		fixup_nodatasum->logical = logical;
			
 
				-		fixup_nodatasum->root = fs_info->extent_root;
			
 
				-		fixup_nodatasum->mirror_num = failed_mirror_index + 1;
			
 
				-		scrub_pending_trans_workers_inc(sctx);
			
 
				-		btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
			
 
				-				scrub_fixup_nodatasum, NULL, NULL);
			
 
				-		btrfs_queue_work(fs_info->scrub_workers,
			
 
				-				 &fixup_nodatasum->work);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * now build and submit the bios for the other mirrors, check
			
 
				 	 * checksums.