浏览代码

Merge tag 'for-4.16-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "We have a few assorted fixes, some of them show up during fstests so I
  gave them more testing"

* tag 'for-4.16-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: Fix use-after-free when cleaning up fs_devs with a single stale device
  Btrfs: fix null pointer dereference when replacing missing device
  btrfs: remove spurious WARN_ON(ref->count < 0) in find_parent_nodes
  btrfs: Ignore errors from btrfs_qgroup_trace_extent_post
  Btrfs: fix unexpected -EEXIST when creating new inode
  Btrfs: fix use-after-free on root->orphan_block_rsv
  Btrfs: fix btrfs_evict_inode to handle abnormal inodes correctly
  Btrfs: fix extent state leak from tree log
  Btrfs: fix crash due to not cleaning up tree log block's dirty bits
  Btrfs: fix deadlock in run_delalloc_nocow
Linus Torvalds 7 年之前
父节点
当前提交
da370f1d63
共有 7 个文件被更改,包括 80 次插入21 次删除
  1. 10 1
      fs/btrfs/backref.c
  2. 2 1
      fs/btrfs/delayed-ref.c
  3. 4 0
      fs/btrfs/extent-tree.c
  4. 26 15
      fs/btrfs/inode.c
  5. 7 2
      fs/btrfs/qgroup.c
  6. 30 2
      fs/btrfs/tree-log.c
  7. 1 0
      fs/btrfs/volumes.c

+ 10 - 1
fs/btrfs/backref.c

@@ -1264,7 +1264,16 @@ again:
 	while (node) {
 	while (node) {
 		ref = rb_entry(node, struct prelim_ref, rbnode);
 		ref = rb_entry(node, struct prelim_ref, rbnode);
 		node = rb_next(&ref->rbnode);
 		node = rb_next(&ref->rbnode);
-		WARN_ON(ref->count < 0);
+		/*
+		 * ref->count < 0 can happen here if there are delayed
+		 * refs with a node->action of BTRFS_DROP_DELAYED_REF.
+		 * prelim_ref_insert() relies on this when merging
+		 * identical refs to keep the overall count correct.
+		 * prelim_ref_insert() will merge only those refs
+		 * which compare identically.  Any refs having
+		 * e.g. different offsets would not be merged,
+		 * and would retain their original ref->count < 0.
+		 */
 		if (roots && ref->count && ref->root_id && ref->parent == 0) {
 		if (roots && ref->count && ref->root_id && ref->parent == 0) {
 			if (sc && sc->root_objectid &&
 			if (sc && sc->root_objectid &&
 			    ref->root_id != sc->root_objectid) {
 			    ref->root_id != sc->root_objectid) {

+ 2 - 1
fs/btrfs/delayed-ref.c

@@ -821,7 +821,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 	spin_unlock(&delayed_refs->lock);
 	spin_unlock(&delayed_refs->lock);
 
 
 	if (qrecord_inserted)
 	if (qrecord_inserted)
-		return btrfs_qgroup_trace_extent_post(fs_info, record);
+		btrfs_qgroup_trace_extent_post(fs_info, record);
+
 	return 0;
 	return 0;
 
 
 free_head_ref:
 free_head_ref:

+ 4 - 0
fs/btrfs/extent-tree.c

@@ -2147,6 +2147,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 			u64 bytes;
 			u64 bytes;
 			struct request_queue *req_q;
 			struct request_queue *req_q;
 
 
+			if (!stripe->dev->bdev) {
+				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
+				continue;
+			}
 			req_q = bdev_get_queue(stripe->dev->bdev);
 			req_q = bdev_get_queue(stripe->dev->bdev);
 			if (!blk_queue_discard(req_q))
 			if (!blk_queue_discard(req_q))
 				continue;
 				continue;

+ 26 - 15
fs/btrfs/inode.c

@@ -1335,8 +1335,11 @@ next_slot:
 		leaf = path->nodes[0];
 		leaf = path->nodes[0];
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 			ret = btrfs_next_leaf(root, path);
 			ret = btrfs_next_leaf(root, path);
-			if (ret < 0)
+			if (ret < 0) {
+				if (cow_start != (u64)-1)
+					cur_offset = cow_start;
 				goto error;
 				goto error;
+			}
 			if (ret > 0)
 			if (ret > 0)
 				break;
 				break;
 			leaf = path->nodes[0];
 			leaf = path->nodes[0];
@@ -3385,6 +3388,11 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 		ret = btrfs_orphan_reserve_metadata(trans, inode);
 		ret = btrfs_orphan_reserve_metadata(trans, inode);
 		ASSERT(!ret);
 		ASSERT(!ret);
 		if (ret) {
 		if (ret) {
+			/*
+			 * dec doesn't need spin_lock as ->orphan_block_rsv
+			 * would be released only if ->orphan_inodes is
+			 * zero.
+			 */
 			atomic_dec(&root->orphan_inodes);
 			atomic_dec(&root->orphan_inodes);
 			clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 			clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 				  &inode->runtime_flags);
 				  &inode->runtime_flags);
@@ -3399,12 +3407,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 	if (insert >= 1) {
 	if (insert >= 1) {
 		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
 		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
 		if (ret) {
 		if (ret) {
-			atomic_dec(&root->orphan_inodes);
 			if (reserve) {
 			if (reserve) {
 				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 					  &inode->runtime_flags);
 					  &inode->runtime_flags);
 				btrfs_orphan_release_metadata(inode);
 				btrfs_orphan_release_metadata(inode);
 			}
 			}
+			/*
+			 * btrfs_orphan_commit_root may race with us and set
+			 * ->orphan_block_rsv to zero, in order to avoid that,
+			 * decrease ->orphan_inodes after everything is done.
+			 */
+			atomic_dec(&root->orphan_inodes);
 			if (ret != -EEXIST) {
 			if (ret != -EEXIST) {
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 					  &inode->runtime_flags);
 					  &inode->runtime_flags);
@@ -3436,28 +3449,26 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 {
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_root *root = inode->root;
 	int delete_item = 0;
 	int delete_item = 0;
-	int release_rsv = 0;
 	int ret = 0;
 	int ret = 0;
 
 
-	spin_lock(&root->orphan_lock);
 	if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 	if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 			       &inode->runtime_flags))
 			       &inode->runtime_flags))
 		delete_item = 1;
 		delete_item = 1;
 
 
+	if (delete_item && trans)
+		ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+
 	if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 	if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 			       &inode->runtime_flags))
 			       &inode->runtime_flags))
-		release_rsv = 1;
-	spin_unlock(&root->orphan_lock);
+		btrfs_orphan_release_metadata(inode);
 
 
-	if (delete_item) {
+	/*
+	 * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
+	 * to zero, in order to avoid that, decrease ->orphan_inodes after
+	 * everything is done.
+	 */
+	if (delete_item)
 		atomic_dec(&root->orphan_inodes);
 		atomic_dec(&root->orphan_inodes);
-		if (trans)
-			ret = btrfs_del_orphan_item(trans, root,
-						    btrfs_ino(inode));
-	}
-
-	if (release_rsv)
-		btrfs_orphan_release_metadata(inode);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -5281,7 +5292,7 @@ void btrfs_evict_inode(struct inode *inode)
 	trace_btrfs_inode_evict(inode);
 	trace_btrfs_inode_evict(inode);
 
 
 	if (!root) {
 	if (!root) {
-		kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+		clear_inode(inode);
 		return;
 		return;
 	}
 	}
 
 

+ 7 - 2
fs/btrfs/qgroup.c

@@ -1442,8 +1442,13 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
 	int ret;
 	int ret;
 
 
 	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
 	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
-	if (ret < 0)
-		return ret;
+	if (ret < 0) {
+		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+		btrfs_warn(fs_info,
+"error accounting new delayed refs extent (err code: %d), quota inconsistent",
+			ret);
+		return 0;
+	}
 
 
 	/*
 	/*
 	 * Here we don't need to get the lock of
 	 * Here we don't need to get the lock of

+ 30 - 2
fs/btrfs/tree-log.c

@@ -29,6 +29,7 @@
 #include "hash.h"
 #include "hash.h"
 #include "compression.h"
 #include "compression.h"
 #include "qgroup.h"
 #include "qgroup.h"
+#include "inode-map.h"
 
 
 /* magic values for the inode_only field in btrfs_log_inode:
 /* magic values for the inode_only field in btrfs_log_inode:
  *
  *
@@ -2472,6 +2473,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 					clean_tree_block(fs_info, next);
 					clean_tree_block(fs_info, next);
 					btrfs_wait_tree_block_writeback(next);
 					btrfs_wait_tree_block_writeback(next);
 					btrfs_tree_unlock(next);
 					btrfs_tree_unlock(next);
+				} else {
+					if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+						clear_extent_buffer_dirty(next);
 				}
 				}
 
 
 				WARN_ON(root_owner !=
 				WARN_ON(root_owner !=
@@ -2552,6 +2556,9 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 					clean_tree_block(fs_info, next);
 					clean_tree_block(fs_info, next);
 					btrfs_wait_tree_block_writeback(next);
 					btrfs_wait_tree_block_writeback(next);
 					btrfs_tree_unlock(next);
 					btrfs_tree_unlock(next);
+				} else {
+					if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+						clear_extent_buffer_dirty(next);
 				}
 				}
 
 
 				WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
 				WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
@@ -2630,6 +2637,9 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 				clean_tree_block(fs_info, next);
 				clean_tree_block(fs_info, next);
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 				btrfs_tree_unlock(next);
+			} else {
+				if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
+					clear_extent_buffer_dirty(next);
 			}
 			}
 
 
 			WARN_ON(log->root_key.objectid !=
 			WARN_ON(log->root_key.objectid !=
@@ -3018,13 +3028,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 
 
 	while (1) {
 	while (1) {
 		ret = find_first_extent_bit(&log->dirty_log_pages,
 		ret = find_first_extent_bit(&log->dirty_log_pages,
-				0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
+				0, &start, &end,
+				EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT,
 				NULL);
 				NULL);
 		if (ret)
 		if (ret)
 			break;
 			break;
 
 
 		clear_extent_bits(&log->dirty_log_pages, start, end,
 		clear_extent_bits(&log->dirty_log_pages, start, end,
-				  EXTENT_DIRTY | EXTENT_NEW);
+				  EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
 	}
 	}
 
 
 	/*
 	/*
@@ -5677,6 +5688,23 @@ again:
 						      path);
 						      path);
 		}
 		}
 
 
+		if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
+			struct btrfs_root *root = wc.replay_dest;
+
+			btrfs_release_path(path);
+
+			/*
+			 * We have just replayed everything, and the highest
+			 * objectid of fs roots probably has changed in case
+			 * some inode_item's got replayed.
+			 *
+			 * root->objectid_mutex is not acquired as log replay
+			 * could only happen during mount.
+			 */
+			ret = btrfs_find_highest_objectid(root,
+						  &root->highest_objectid);
+		}
+
 		key.offset = found_key.offset - 1;
 		key.offset = found_key.offset - 1;
 		wc.replay_dest->log_root = NULL;
 		wc.replay_dest->log_root = NULL;
 		free_extent_buffer(log->node);
 		free_extent_buffer(log->node);

+ 1 - 0
fs/btrfs/volumes.c

@@ -645,6 +645,7 @@ static void btrfs_free_stale_devices(const char *path,
 				btrfs_sysfs_remove_fsid(fs_devs);
 				btrfs_sysfs_remove_fsid(fs_devs);
 				list_del(&fs_devs->list);
 				list_del(&fs_devs->list);
 				free_fs_devices(fs_devs);
 				free_fs_devices(fs_devs);
+				break;
 			} else {
 			} else {
 				fs_devs->num_devices--;
 				fs_devs->num_devices--;
 				list_del(&dev->dev_list);
 				list_del(&dev->dev_list);