9 years ago · 73d32ce21e
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1991,7 +1991,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
 
															 	ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
														
 
															 	if (!ifp) {
														
 
															-		kfree(fspath);
														
 
															+		vfree(fspath);
														
 
															 		return ERR_PTR(-ENOMEM);
														
 
															 	}
														
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -743,8 +743,11 @@ out:
 
															 static struct {
														
 
															 	struct list_head idle_ws;
														
 
															 	spinlock_t ws_lock;
														
 
															-	int num_ws;
														
 
															-	atomic_t alloc_ws;
														
 
															+	/* Number of free workspaces */
														
 
															+	int free_ws;
														
 
															+	/* Total number of allocated workspaces */
														
 
															+	atomic_t total_ws;
														
 
															+	/* Waiters for a free workspace */
														
 
															 	wait_queue_head_t ws_wait;
														
 
															 } btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
														
@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
 
															 	int i;
														
 
															 	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
														
 
															+		struct list_head *workspace;
														
 
															+
														
 
															 		INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
														
 
															 		spin_lock_init(&btrfs_comp_ws[i].ws_lock);
														
 
															-		atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
														
 
															+		atomic_set(&btrfs_comp_ws[i].total_ws, 0);
														
 
															 		init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
														
 
															+
														
 
															+		/*
														
 
															+		 * Preallocate one workspace for each compression type so
														
 
															+		 * we can guarantee forward progress in the worst case
														
 
															+		 */
														
 
															+		workspace = btrfs_compress_op[i]->alloc_workspace();
														
 
															+		if (IS_ERR(workspace)) {
														
 
															+			printk(KERN_WARNING
														
 
															+	"BTRFS: cannot preallocate compression workspace, will try later");
														
 
															+		} else {
														
 
															+			atomic_set(&btrfs_comp_ws[i].total_ws, 1);
														
 
															+			btrfs_comp_ws[i].free_ws = 1;
														
 
															+			list_add(workspace, &btrfs_comp_ws[i].idle_ws);
														
 
															+		}
														
 
															 	}
														
 
															 }
														
 
															 /*
														
 
															- * this finds an available workspace or allocates a new one
														
 
															- * ERR_PTR is returned if things go bad.
														
 
															+ * This finds an available workspace or allocates a new one.
														
 
															+ * If it's not possible to allocate a new one, waits until there's one.
														
 
															+ * Preallocation makes a forward progress guarantees and we do not return
														
 
															+ * errors.
														
 
															  */
														
 
															 static struct list_head *find_workspace(int type)
														
 
															 {
														
@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)
 
															 	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;
														
 
															 	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock;
														
 
															-	atomic_t *alloc_ws		= &btrfs_comp_ws[idx].alloc_ws;
														
 
															+	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;
														
 
															 	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait;
														
 
															-	int *num_ws			= &btrfs_comp_ws[idx].num_ws;
														
 
															+	int *free_ws			= &btrfs_comp_ws[idx].free_ws;
														
 
															 again:
														
 
															 	spin_lock(ws_lock);
														
 
															 	if (!list_empty(idle_ws)) {
														
 
															 		workspace = idle_ws->next;
														
 
															 		list_del(workspace);
														
 
															-		(*num_ws)--;
														
 
															+		(*free_ws)--;
														
 
															 		spin_unlock(ws_lock);
														
 
															 		return workspace;
														
 
															 	}
														
 
															-	if (atomic_read(alloc_ws) > cpus) {
														
 
															+	if (atomic_read(total_ws) > cpus) {
														
 
															 		DEFINE_WAIT(wait);
														
 
															 		spin_unlock(ws_lock);
														
 
															 		prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
														
 
															-		if (atomic_read(alloc_ws) > cpus && !*num_ws)
														
 
															+		if (atomic_read(total_ws) > cpus && !*free_ws)
														
 
															 			schedule();
														
 
															 		finish_wait(ws_wait, &wait);
														
 
															 		goto again;
														
 
															 	}
														
 
															-	atomic_inc(alloc_ws);
														
 
															+	atomic_inc(total_ws);
														
 
															 	spin_unlock(ws_lock);
														
 
															 	workspace = btrfs_compress_op[idx]->alloc_workspace();
														
 
															 	if (IS_ERR(workspace)) {
														
 
															-		atomic_dec(alloc_ws);
														
 
															+		atomic_dec(total_ws);
														
 
															 		wake_up(ws_wait);
														
 
															+
														
 
															+		/*
														
 
															+		 * Do not return the error but go back to waiting. There's a
														
 
															+		 * workspace preallocated for each type and the compression
														
 
															+		 * time is bounded so we get to a workspace eventually. This
														
 
															+		 * makes our caller's life easier.
														
 
															+		 *
														
 
															+		 * To prevent silent and low-probability deadlocks (when the
														
 
															+		 * initial preallocation fails), check if there are any
														
 
															+		 * workspaces at all.
														
 
															+		 */
														
 
															+		if (atomic_read(total_ws) == 0) {
														
 
															+			static DEFINE_RATELIMIT_STATE(_rs,
														
 
															+					/* once per minute */ 60 * HZ,
														
 
															+					/* no burst */ 1);
														
 
															+
														
 
															+			if (__ratelimit(&_rs)) {
														
 
															+				printk(KERN_WARNING
														
 
															+			    "no compression workspaces, low memory, retrying");
														
 
															+			}
														
 
															+		}
														
 
															+		goto again;
														
 
															 	}
														
 
															 	return workspace;
														
 
															 }
														
@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
 
															 	int idx = type - 1;
														
 
															 	struct list_head *idle_ws	= &btrfs_comp_ws[idx].idle_ws;
														
 
															 	spinlock_t *ws_lock		= &btrfs_comp_ws[idx].ws_lock;
														
 
															-	atomic_t *alloc_ws		= &btrfs_comp_ws[idx].alloc_ws;
														
 
															+	atomic_t *total_ws		= &btrfs_comp_ws[idx].total_ws;
														
 
															 	wait_queue_head_t *ws_wait	= &btrfs_comp_ws[idx].ws_wait;
														
 
															-	int *num_ws			= &btrfs_comp_ws[idx].num_ws;
														
 
															+	int *free_ws			= &btrfs_comp_ws[idx].free_ws;
														
 
															 	spin_lock(ws_lock);
														
 
															-	if (*num_ws < num_online_cpus()) {
														
 
															+	if (*free_ws < num_online_cpus()) {
														
 
															 		list_add(workspace, idle_ws);
														
 
															-		(*num_ws)++;
														
 
															+		(*free_ws)++;
														
 
															 		spin_unlock(ws_lock);
														
 
															 		goto wake;
														
 
															 	}
														
 
															 	spin_unlock(ws_lock);
														
 
															 	btrfs_compress_op[idx]->free_workspace(workspace);
														
 
															-	atomic_dec(alloc_ws);
														
 
															+	atomic_dec(total_ws);
														
 
															 wake:
														
 
															 	/*
														
 
															 	 * Make sure counter is updated before we wake up waiters.
														
@@ -857,7 +900,7 @@ static void free_workspaces(void)
 
															 			workspace = btrfs_comp_ws[i].idle_ws.next;
														
 
															 			list_del(workspace);
														
 
															 			btrfs_compress_op[i]->free_workspace(workspace);
														
 
															-			atomic_dec(&btrfs_comp_ws[i].alloc_ws);
														
 
															+			atomic_dec(&btrfs_comp_ws[i].total_ws);
														
 
															 		}
														
 
															 	}
														
 
															 }
														
@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 
															 	int ret;
														
 
															 	workspace = find_workspace(type);
														
 
															-	if (IS_ERR(workspace))
														
 
															-		return PTR_ERR(workspace);
														
 
															 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
														
 
															 						      start, len, pages,
														
@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
 
															 	int ret;
														
 
															 	workspace = find_workspace(type);
														
 
															-	if (IS_ERR(workspace))
														
 
															-		return PTR_ERR(workspace);
														
 
															 	ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
														
 
															 							 disk_start,
														
@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 
															 	int ret;
														
 
															 	workspace = find_workspace(type);
														
 
															-	if (IS_ERR(workspace))
														
 
															-		return PTR_ERR(workspace);
														
 
															 	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
														
 
															 						  dest_page, start_byte,
														
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4122,6 +4122,7 @@ void btrfs_test_inode_set_ops(struct inode *inode);
 
															 /* ioctl.c */
														
 
															 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
														
 
															+long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
														
 
															 int btrfs_ioctl_get_supported_features(void __user *arg);
														
 
															 void btrfs_update_iflags(struct inode *inode);
														
 
															 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
														
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2517,6 +2517,7 @@ int open_ctree(struct super_block *sb,
 
															 	int num_backups_tried = 0;
														
 
															 	int backup_index = 0;
														
 
															 	int max_active;
														
 
															+	bool cleaner_mutex_locked = false;
														
 
															 	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
														
 
															 	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
														
@@ -2997,6 +2998,13 @@ retry_root_backup:
 
															 		goto fail_sysfs;
														
 
															 	}
														
 
															+	/*
														
 
															+	 * Hold the cleaner_mutex thread here so that we don't block
														
 
															+	 * for a long time on btrfs_recover_relocation.  cleaner_kthread
														
 
															+	 * will wait for us to finish mounting the filesystem.
														
 
															+	 */
														
 
															+	mutex_lock(&fs_info->cleaner_mutex);
														
 
															+	cleaner_mutex_locked = true;
														
 
															 	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
														
 
															 					       "btrfs-cleaner");
														
 
															 	if (IS_ERR(fs_info->cleaner_kthread))
														
@@ -3056,10 +3064,8 @@ retry_root_backup:
 
															 		ret = btrfs_cleanup_fs_roots(fs_info);
														
 
															 		if (ret)
														
 
															 			goto fail_qgroup;
														
 
															-
														
 
															-		mutex_lock(&fs_info->cleaner_mutex);
														
 
															+		/* We locked cleaner_mutex before creating cleaner_kthread. */
														
 
															 		ret = btrfs_recover_relocation(tree_root);
														
 
															-		mutex_unlock(&fs_info->cleaner_mutex);
														
 
															 		if (ret < 0) {
														
 
															 			printk(KERN_WARNING
														
 
															 			       "BTRFS: failed to recover relocation\n");
														
@@ -3067,6 +3073,8 @@ retry_root_backup:
 
															 			goto fail_qgroup;
														
 
															 		}
														
 
															 	}
														
 
															+	mutex_unlock(&fs_info->cleaner_mutex);
														
 
															+	cleaner_mutex_locked = false;
														
 
															 	location.objectid = BTRFS_FS_TREE_OBJECTID;
														
 
															 	location.type = BTRFS_ROOT_ITEM_KEY;
														
@@ -3180,6 +3188,10 @@ fail_cleaner:
 
															 	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
														
 
															 fail_sysfs:
														
 
															+	if (cleaner_mutex_locked) {
														
 
															+		mutex_unlock(&fs_info->cleaner_mutex);
														
 
															+		cleaner_mutex_locked = false;
														
 
															+	}
														
 
															 	btrfs_sysfs_remove_mounted(fs_info);
														
 
															 fail_fsdev_sysfs:
														
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4620,7 +4620,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 
															 	/* Calc the number of the pages we need flush for space reservation */
														
 
															 	items = calc_reclaim_items_nr(root, to_reclaim);
														
 
															-	to_reclaim = items * EXTENT_SIZE_PER_ITEM;
														
 
															+	to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
														
 
															 	trans = (struct btrfs_trans_handle *)current->journal_info;
														
 
															 	block_rsv = &root->fs_info->delalloc_block_rsv;
														
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3200,14 +3200,10 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
 
															 	return ret;
														
 
															 }
														
 
															-static noinline void update_nr_written(struct page *page,
														
 
															-				      struct writeback_control *wbc,
														
 
															-				      unsigned long nr_written)
														
 
															+static void update_nr_written(struct page *page, struct writeback_control *wbc,
														
 
															+			      unsigned long nr_written)
														
 
															 {
														
 
															 	wbc->nr_to_write -= nr_written;
														
 
															-	if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
														
 
															-	    wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
														
 
															-		page->mapping->writeback_index = page->index + nr_written;
														
 
															 }
														
 
															 /*
														
@@ -3920,12 +3916,13 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 
															 	struct inode *inode = mapping->host;
														
 
															 	int ret = 0;
														
 
															 	int done = 0;
														
 
															-	int err = 0;
														
 
															 	int nr_to_write_done = 0;
														
 
															 	struct pagevec pvec;
														
 
															 	int nr_pages;
														
 
															 	pgoff_t index;
														
 
															 	pgoff_t end;		/* Inclusive */
														
 
															+	pgoff_t done_index;
														
 
															+	int range_whole = 0;
														
 
															 	int scanned = 0;
														
 
															 	int tag;
														
@@ -3948,6 +3945,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 
															 	} else {
														
 
															 		index = wbc->range_start >> PAGE_SHIFT;
														
 
															 		end = wbc->range_end >> PAGE_SHIFT;
														
 
															+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
														
 
															+			range_whole = 1;
														
 
															 		scanned = 1;
														
 
															 	}
														
 
															 	if (wbc->sync_mode == WB_SYNC_ALL)
														
@@ -3957,6 +3956,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 
															 retry:
														
 
															 	if (wbc->sync_mode == WB_SYNC_ALL)
														
 
															 		tag_pages_for_writeback(mapping, index, end);
														
 
															+	done_index = index;
														
 
															 	while (!done && !nr_to_write_done && (index <= end) &&
														
 
															 	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
														
 
															 			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
														
@@ -3966,6 +3966,7 @@ retry:
 
															 		for (i = 0; i < nr_pages; i++) {
														
 
															 			struct page *page = pvec.pages[i];
														
 
															+			done_index = page->index;
														
 
															 			/*
														
 
															 			 * At this point we hold neither mapping->tree_lock nor
														
 
															 			 * lock on the page itself: the page may be truncated or
														
@@ -4007,8 +4008,20 @@ retry:
 
															 				unlock_page(page);
														
 
															 				ret = 0;
														
 
															 			}
														
 
															-			if (!err && ret < 0)
														
 
															-				err = ret;
														
 
															+			if (ret < 0) {
														
 
															+				/*
														
 
															+				 * done_index is set past this page,
														
 
															+				 * so media errors will not choke
														
 
															+				 * background writeout for the entire
														
 
															+				 * file. This has consequences for
														
 
															+				 * range_cyclic semantics (ie. it may
														
 
															+				 * not be suitable for data integrity
														
 
															+				 * writeout).
														
 
															+				 */
														
 
															+				done_index = page->index + 1;
														
 
															+				done = 1;
														
 
															+				break;
														
 
															+			}
														
 
															 			/*
														
 
															 			 * the filesystem may choose to bump up nr_to_write.
														
@@ -4020,7 +4033,7 @@ retry:
 
															 		pagevec_release(&pvec);
														
 
															 		cond_resched();
														
 
															 	}
														
 
															-	if (!scanned && !done && !err) {
														
 
															+	if (!scanned && !done) {
														
 
															 		/*
														
 
															 		 * We hit the last page and there is more work to be done: wrap
														
 
															 		 * back to the start of the file
														
@@ -4029,8 +4042,12 @@ retry:
 
															 		index = 0;
														
 
															 		goto retry;
														
 
															 	}
														
 
															+
														
 
															+	if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
														
 
															+		mapping->writeback_index = done_index;
														
 
															+
														
 
															 	btrfs_add_delayed_iput(inode);
														
 
															-	return err;
														
 
															+	return ret;
														
 
															 }
														
 
															 static void flush_epd_write_bio(struct extent_page_data *epd)
														
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1696,7 +1696,9 @@ again:
 
															 			btrfs_end_write_no_snapshoting(root);
														
 
															 			btrfs_delalloc_release_metadata(inode, release_bytes);
														
 
															 		} else {
														
 
															-			btrfs_delalloc_release_space(inode, pos, release_bytes);
														
 
															+			btrfs_delalloc_release_space(inode,
														
 
															+						round_down(pos, root->sectorsize),
														
 
															+						release_bytes);
														
 
															 		}
														
 
															 	}
														
@@ -2956,7 +2958,7 @@ const struct file_operations btrfs_file_operations = {
 
															 	.fallocate	= btrfs_fallocate,
														
 
															 	.unlocked_ioctl	= btrfs_ioctl,
														
 
															 #ifdef CONFIG_COMPAT
														
 
															-	.compat_ioctl	= btrfs_ioctl,
														
 
															+	.compat_ioctl	= btrfs_compat_ioctl,
														
 
															 #endif
														
 
															 	.copy_file_range = btrfs_copy_file_range,
														
 
															 	.clone_file_range = btrfs_clone_file_range,
														
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10184,7 +10184,7 @@ static const struct file_operations btrfs_dir_file_operations = {
 
															 	.iterate	= btrfs_real_readdir,
														
 
															 	.unlocked_ioctl	= btrfs_ioctl,
														
 
															 #ifdef CONFIG_COMPAT
														
 
															-	.compat_ioctl	= btrfs_ioctl,
														
 
															+	.compat_ioctl	= btrfs_compat_ioctl,
														
 
															 #endif
														
 
															 	.release        = btrfs_release_file,
														
 
															 	.fsync		= btrfs_sync_file,
														
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -439,7 +439,7 @@ static noinline int create_subvol(struct inode *dir,
 
															 {
														
 
															 	struct btrfs_trans_handle *trans;
														
 
															 	struct btrfs_key key;
														
 
															-	struct btrfs_root_item root_item;
														
 
															+	struct btrfs_root_item *root_item;
														
 
															 	struct btrfs_inode_item *inode_item;
														
 
															 	struct extent_buffer *leaf;
														
 
															 	struct btrfs_root *root = BTRFS_I(dir)->root;
														
@@ -455,16 +455,22 @@ static noinline int create_subvol(struct inode *dir,
 
															 	u64 qgroup_reserved;
														
 
															 	uuid_le new_uuid;
														
 
															+	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
														
 
															+	if (!root_item)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															 	ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
														
 
															 	if (ret)
														
 
															-		return ret;
														
 
															+		goto fail_free;
														
 
															 	/*
														
 
															 	 * Don't create subvolume whose level is not zero. Or qgroup will be
														
 
															 	 * screwed up since it assume subvolme qgroup's level to be 0.
														
 
															 	 */
														
 
															-	if (btrfs_qgroup_level(objectid))
														
 
															-		return -ENOSPC;
														
 
															+	if (btrfs_qgroup_level(objectid)) {
														
 
															+		ret = -ENOSPC;
														
 
															+		goto fail_free;
														
 
															+	}
														
 
															 	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
														
 
															 	/*
														
@@ -474,14 +480,14 @@ static noinline int create_subvol(struct inode *dir,
 
															 	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
														
 
															 					       8, &qgroup_reserved, false);
														
 
															 	if (ret)
														
 
															-		return ret;
														
 
															+		goto fail_free;
														
 
															 	trans = btrfs_start_transaction(root, 0);
														
 
															 	if (IS_ERR(trans)) {
														
 
															 		ret = PTR_ERR(trans);
														
 
															 		btrfs_subvolume_release_metadata(root, &block_rsv,
														
 
															 						 qgroup_reserved);
														
 
															-		return ret;
														
 
															+		goto fail_free;
														
 
															 	}
														
 
															 	trans->block_rsv = &block_rsv;
														
 
															 	trans->bytes_reserved = block_rsv.size;
														
@@ -509,47 +515,45 @@ static noinline int create_subvol(struct inode *dir,
 
															 			    BTRFS_UUID_SIZE);
														
 
															 	btrfs_mark_buffer_dirty(leaf);
														
 
															-	memset(&root_item, 0, sizeof(root_item));
														
 
															-
														
 
															-	inode_item = &root_item.inode;
														
 
															+	inode_item = &root_item->inode;
														
 
															 	btrfs_set_stack_inode_generation(inode_item, 1);
														
 
															 	btrfs_set_stack_inode_size(inode_item, 3);
														
 
															 	btrfs_set_stack_inode_nlink(inode_item, 1);
														
 
															 	btrfs_set_stack_inode_nbytes(inode_item, root->nodesize);
														
 
															 	btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
														
 
															-	btrfs_set_root_flags(&root_item, 0);
														
 
															-	btrfs_set_root_limit(&root_item, 0);
														
 
															+	btrfs_set_root_flags(root_item, 0);
														
 
															+	btrfs_set_root_limit(root_item, 0);
														
 
															 	btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
														
 
															-	btrfs_set_root_bytenr(&root_item, leaf->start);
														
 
															-	btrfs_set_root_generation(&root_item, trans->transid);
														
 
															-	btrfs_set_root_level(&root_item, 0);
														
 
															-	btrfs_set_root_refs(&root_item, 1);
														
 
															-	btrfs_set_root_used(&root_item, leaf->len);
														
 
															-	btrfs_set_root_last_snapshot(&root_item, 0);
														
 
															+	btrfs_set_root_bytenr(root_item, leaf->start);
														
 
															+	btrfs_set_root_generation(root_item, trans->transid);
														
 
															+	btrfs_set_root_level(root_item, 0);
														
 
															+	btrfs_set_root_refs(root_item, 1);
														
 
															+	btrfs_set_root_used(root_item, leaf->len);
														
 
															+	btrfs_set_root_last_snapshot(root_item, 0);
														
 
															-	btrfs_set_root_generation_v2(&root_item,
														
 
															-			btrfs_root_generation(&root_item));
														
 
															+	btrfs_set_root_generation_v2(root_item,
														
 
															+			btrfs_root_generation(root_item));
														
 
															 	uuid_le_gen(&new_uuid);
														
 
															-	memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
														
 
															-	btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec);
														
 
															-	btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec);
														
 
															-	root_item.ctime = root_item.otime;
														
 
															-	btrfs_set_root_ctransid(&root_item, trans->transid);
														
 
															-	btrfs_set_root_otransid(&root_item, trans->transid);
														
 
															+	memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
														
 
															+	btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
														
 
															+	btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
														
 
															+	root_item->ctime = root_item->otime;
														
 
															+	btrfs_set_root_ctransid(root_item, trans->transid);
														
 
															+	btrfs_set_root_otransid(root_item, trans->transid);
														
 
															 	btrfs_tree_unlock(leaf);
														
 
															 	free_extent_buffer(leaf);
														
 
															 	leaf = NULL;
														
 
															-	btrfs_set_root_dirid(&root_item, new_dirid);
														
 
															+	btrfs_set_root_dirid(root_item, new_dirid);
														
 
															 	key.objectid = objectid;
														
 
															 	key.offset = 0;
														
 
															 	key.type = BTRFS_ROOT_ITEM_KEY;
														
 
															 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
														
 
															-				&root_item);
														
 
															+				root_item);
														
 
															 	if (ret)
														
 
															 		goto fail;
														
@@ -601,12 +605,13 @@ static noinline int create_subvol(struct inode *dir,
 
															 	BUG_ON(ret);
														
 
															 	ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
														
 
															-				  root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
														
 
															+				  root_item->uuid, BTRFS_UUID_KEY_SUBVOL,
														
 
															 				  objectid);
														
 
															 	if (ret)
														
 
															 		btrfs_abort_transaction(trans, root, ret);
														
 
															 fail:
														
 
															+	kfree(root_item);
														
 
															 	trans->block_rsv = NULL;
														
 
															 	trans->bytes_reserved = 0;
														
 
															 	btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
														
@@ -629,6 +634,10 @@ fail:
 
															 		d_instantiate(dentry, inode);
														
 
															 	}
														
 
															 	return ret;
														
 
															+
														
 
															+fail_free:
														
 
															+	kfree(root_item);
														
 
															+	return ret;
														
 
															 }
														
 
															 static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root)
														
@@ -2680,32 +2689,31 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 
															 	if (ret)
														
 
															 		return ret;
														
 
															-	vol_args = memdup_user(arg, sizeof(*vol_args));
														
 
															-	if (IS_ERR(vol_args)) {
														
 
															-		ret = PTR_ERR(vol_args);
														
 
															-		goto err_drop;
														
 
															-	}
														
 
															-
														
 
															-	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
														
 
															-
														
 
															 	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
														
 
															 			1)) {
														
 
															 		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
														
 
															+		goto out_drop_write;
														
 
															+	}
														
 
															+
														
 
															+	vol_args = memdup_user(arg, sizeof(*vol_args));
														
 
															+	if (IS_ERR(vol_args)) {
														
 
															+		ret = PTR_ERR(vol_args);
														
 
															 		goto out;
														
 
															 	}
														
 
															+	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
														
 
															 	mutex_lock(&root->fs_info->volume_mutex);
														
 
															 	ret = btrfs_rm_device(root, vol_args->name);
														
 
															 	mutex_unlock(&root->fs_info->volume_mutex);
														
 
															-	atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
														
 
															 	if (!ret)
														
 
															 		btrfs_info(root->fs_info, "disk deleted %s",vol_args->name);
														
 
															-
														
 
															-out:
														
 
															 	kfree(vol_args);
														
 
															-err_drop:
														
 
															+out:
														
 
															+	atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
														
 
															+out_drop_write:
														
 
															 	mnt_drop_write_file(file);
														
 
															+
														
 
															 	return ret;
														
 
															 }
														
@@ -3468,13 +3476,16 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 
															 	u64 last_dest_end = destoff;
														
 
															 	ret = -ENOMEM;
														
 
															-	buf = vmalloc(root->nodesize);
														
 
															-	if (!buf)
														
 
															-		return ret;
														
 
															+	buf = kmalloc(root->nodesize, GFP_KERNEL | __GFP_NOWARN);
														
 
															+	if (!buf) {
														
 
															+		buf = vmalloc(root->nodesize);
														
 
															+		if (!buf)
														
 
															+			return ret;
														
 
															+	}
														
 
															 	path = btrfs_alloc_path();
														
 
															 	if (!path) {
														
 
															-		vfree(buf);
														
 
															+		kvfree(buf);
														
 
															 		return ret;
														
 
															 	}
														
@@ -3775,7 +3786,7 @@ process_slot:
 
															 out:
														
 
															 	btrfs_free_path(path);
														
 
															-	vfree(buf);
														
 
															+	kvfree(buf);
														
 
															 	return ret;
														
 
															 }
														
@@ -5394,9 +5405,15 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
 
															 	if (ret)
														
 
															 		return ret;
														
 
															+	ret = mnt_want_write_file(file);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															 	trans = btrfs_start_transaction(root, 0);
														
 
															-	if (IS_ERR(trans))
														
 
															-		return PTR_ERR(trans);
														
 
															+	if (IS_ERR(trans)) {
														
 
															+		ret = PTR_ERR(trans);
														
 
															+		goto out_drop_write;
														
 
															+	}
														
 
															 	spin_lock(&root->fs_info->super_lock);
														
 
															 	newflags = btrfs_super_compat_flags(super_block);
														
@@ -5415,7 +5432,11 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
 
															 	btrfs_set_super_incompat_flags(super_block, newflags);
														
 
															 	spin_unlock(&root->fs_info->super_lock);
														
 
															-	return btrfs_commit_transaction(trans, root);
														
 
															+	ret = btrfs_commit_transaction(trans, root);
														
 
															+out_drop_write:
														
 
															+	mnt_drop_write_file(file);
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															 long btrfs_ioctl(struct file *file, unsigned int
														
@@ -5552,3 +5573,24 @@ long btrfs_ioctl(struct file *file, unsigned int
 
															 	return -ENOTTY;
														
 
															 }
														
 
															+
														
 
															+#ifdef CONFIG_COMPAT
														
 
															+long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
														
 
															+{
														
 
															+	switch (cmd) {
														
 
															+	case FS_IOC32_GETFLAGS:
														
 
															+		cmd = FS_IOC_GETFLAGS;
														
 
															+		break;
														
 
															+	case FS_IOC32_SETFLAGS:
														
 
															+		cmd = FS_IOC_SETFLAGS;
														
 
															+		break;
														
 
															+	case FS_IOC32_GETVERSION:
														
 
															+		cmd = FS_IOC_GETVERSION;
														
 
															+		break;
														
 
															+	default:
														
 
															+		return -ENOIOCTLCMD;
														
 
															+	}
														
 
															+
														
 
															+	return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
														
 
															+}
														
 
															+#endif
														
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1350,7 +1350,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 
															 		recover->bbio = bbio;
														
 
															 		recover->map_length = mapped_length;
														
 
															-		BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
														
 
															+		BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
														
 
															 		nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
														
@@ -2127,6 +2127,8 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
 
															 	if (bio->bi_error)
														
 
															 		sblock->no_io_error_seen = 0;
														
 
															+	bio_put(bio);
														
 
															+
														
 
															 	btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
														
 
															 }
														
@@ -2860,7 +2862,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
 
															 	int extent_mirror_num;
														
 
															 	int stop_loop = 0;
														
 
															-	nsectors = map->stripe_len / root->sectorsize;
														
 
															+	nsectors = div_u64(map->stripe_len, root->sectorsize);
														
 
															 	bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
														
 
															 	sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
														
 
															 			  GFP_NOFS);
														
@@ -3070,7 +3072,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
															 	int slot;
														
 
															 	u64 nstripes;
														
 
															 	struct extent_buffer *l;
														
 
															-	struct btrfs_key key;
														
 
															 	u64 physical;
														
 
															 	u64 logical;
														
 
															 	u64 logic_end;
														
@@ -3079,7 +3080,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
															 	int mirror_num;
														
 
															 	struct reada_control *reada1;
														
 
															 	struct reada_control *reada2;
														
 
															-	struct btrfs_key key_start;
														
 
															+	struct btrfs_key key;
														
 
															 	struct btrfs_key key_end;
														
 
															 	u64 increment = map->stripe_len;
														
 
															 	u64 offset;
														
@@ -3158,21 +3159,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 
															 	scrub_blocked_if_needed(fs_info);
														
 
															 	/* FIXME it might be better to start readahead at commit root */
														
 
															-	key_start.objectid = logical;
														
 
															-	key_start.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															-	key_start.offset = (u64)0;
														
 
															+	key.objectid = logical;
														
 
															+	key.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+	key.offset = (u64)0;
														
 
															 	key_end.objectid = logic_end;
														
 
															 	key_end.type = BTRFS_METADATA_ITEM_KEY;
														
 
															 	key_end.offset = (u64)-1;
														
 
															-	reada1 = btrfs_reada_add(root, &key_start, &key_end);
														
 
															+	reada1 = btrfs_reada_add(root, &key, &key_end);
														
 
															-	key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
														
 
															-	key_start.type = BTRFS_EXTENT_CSUM_KEY;
														
 
															-	key_start.offset = logical;
														
 
															+	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
														
 
															+	key.type = BTRFS_EXTENT_CSUM_KEY;
														
 
															+	key.offset = logical;
														
 
															 	key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
														
 
															 	key_end.type = BTRFS_EXTENT_CSUM_KEY;
														
 
															 	key_end.offset = logic_end;
														
 
															-	reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
														
 
															+	reada2 = btrfs_reada_add(csum_root, &key, &key_end);
														
 
															 	if (!IS_ERR(reada1))
														
 
															 		btrfs_reada_wait(reada1);
														
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5939,6 +5939,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
															 	u32 i;
														
 
															 	u64 *clone_sources_tmp = NULL;
														
 
															 	int clone_sources_to_rollback = 0;
														
 
															+	unsigned alloc_size;
														
 
															 	int sort_clone_roots = 0;
														
 
															 	int index;
														
@@ -5978,6 +5979,12 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
															 		goto out;
														
 
															 	}
														
 
															+	if (arg->clone_sources_count >
														
 
															+	    ULLONG_MAX / sizeof(*arg->clone_sources)) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															 	if (!access_ok(VERIFY_READ, arg->clone_sources,
														
 
															 			sizeof(*arg->clone_sources) *
														
 
															 			arg->clone_sources_count)) {
														
@@ -6022,40 +6029,53 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
															 	sctx->clone_roots_cnt = arg->clone_sources_count;
														
 
															 	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
														
 
															-	sctx->send_buf = vmalloc(sctx->send_max_size);
														
 
															+	sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN);
														
 
															 	if (!sctx->send_buf) {
														
 
															-		ret = -ENOMEM;
														
 
															-		goto out;
														
 
															+		sctx->send_buf = vmalloc(sctx->send_max_size);
														
 
															+		if (!sctx->send_buf) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto out;
														
 
															+		}
														
 
															 	}
														
 
															-	sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
														
 
															+	sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN);
														
 
															 	if (!sctx->read_buf) {
														
 
															-		ret = -ENOMEM;
														
 
															-		goto out;
														
 
															+		sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
														
 
															+		if (!sctx->read_buf) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto out;
														
 
															+		}
														
 
															 	}
														
 
															 	sctx->pending_dir_moves = RB_ROOT;
														
 
															 	sctx->waiting_dir_moves = RB_ROOT;
														
 
															 	sctx->orphan_dirs = RB_ROOT;
														
 
															-	sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
														
 
															-			(arg->clone_sources_count + 1));
														
 
															+	alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
														
 
															+
														
 
															+	sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
														
 
															 	if (!sctx->clone_roots) {
														
 
															-		ret = -ENOMEM;
														
 
															-		goto out;
														
 
															+		sctx->clone_roots = vzalloc(alloc_size);
														
 
															+		if (!sctx->clone_roots) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto out;
														
 
															+		}
														
 
															 	}
														
 
															+	alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
														
 
															+
														
 
															 	if (arg->clone_sources_count) {
														
 
															-		clone_sources_tmp = vmalloc(arg->clone_sources_count *
														
 
															-				sizeof(*arg->clone_sources));
														
 
															+		clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
														
 
															 		if (!clone_sources_tmp) {
														
 
															-			ret = -ENOMEM;
														
 
															-			goto out;
														
 
															+			clone_sources_tmp = vmalloc(alloc_size);
														
 
															+			if (!clone_sources_tmp) {
														
 
															+				ret = -ENOMEM;
														
 
															+				goto out;
														
 
															+			}
														
 
															 		}
														
 
															 		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
														
 
															-				arg->clone_sources_count *
														
 
															-				sizeof(*arg->clone_sources));
														
 
															+				alloc_size);
														
 
															 		if (ret) {
														
 
															 			ret = -EFAULT;
														
 
															 			goto out;
														
@@ -6089,7 +6109,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 
															 			sctx->clone_roots[i].root = clone_root;
														
 
															 			clone_sources_to_rollback = i + 1;
														
 
															 		}
														
 
															-		vfree(clone_sources_tmp);
														
 
															+		kvfree(clone_sources_tmp);
														
 
															 		clone_sources_tmp = NULL;
														
 
															 	}
														
@@ -6207,15 +6227,15 @@ out:
 
															 		btrfs_root_dec_send_in_progress(sctx->parent_root);
														
 
															 	kfree(arg);
														
 
															-	vfree(clone_sources_tmp);
														
 
															+	kvfree(clone_sources_tmp);
														
 
															 	if (sctx) {
														
 
															 		if (sctx->send_filp)
														
 
															 			fput(sctx->send_filp);
														
 
															-		vfree(sctx->clone_roots);
														
 
															-		vfree(sctx->send_buf);
														
 
															-		vfree(sctx->read_buf);
														
 
															+		kvfree(sctx->clone_roots);
														
 
															+		kvfree(sctx->send_buf);
														
 
															+		kvfree(sctx->read_buf);
														
 
															 		name_cache_free(sctx);
														
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2051,6 +2051,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
															 	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
														
 
															 	int ret;
														
 
															 	u64 thresh = 0;
														
 
															+	int mixed = 0;
														
 
															 	/*
														
 
															 	 * holding chunk_muext to avoid allocating new chunks, holding
														
@@ -2076,8 +2077,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
															 				}
														
 
															 			}
														
 
															 		}
														
 
															-		if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
														
 
															-			total_free_meta += found->disk_total - found->disk_used;
														
 
															+
														
 
															+		/*
														
 
															+		 * Metadata in mixed block goup profiles are accounted in data
														
 
															+		 */
														
 
															+		if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
														
 
															+			if (found->flags & BTRFS_BLOCK_GROUP_DATA)
														
 
															+				mixed = 1;
														
 
															+			else
														
 
															+				total_free_meta += found->disk_total -
														
 
															+					found->disk_used;
														
 
															+		}
														
 
															 		total_used += found->disk_used;
														
 
															 	}
														
@@ -2090,7 +2100,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
															 	/* Account global block reserve as used, it's in logical size already */
														
 
															 	spin_lock(&block_rsv->lock);
														
 
															-	buf->f_bfree -= block_rsv->size >> bits;
														
 
															+	/* Mixed block groups accounting is not byte-accurate, avoid overflow */
														
 
															+	if (buf->f_bfree >= block_rsv->size >> bits)
														
 
															+		buf->f_bfree -= block_rsv->size >> bits;
														
 
															+	else
														
 
															+		buf->f_bfree = 0;
														
 
															 	spin_unlock(&block_rsv->lock);
														
 
															 	buf->f_bavail = div_u64(total_free_data, factor);
														
@@ -2115,7 +2129,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
															 	 */
														
 
															 	thresh = 4 * 1024 * 1024;
														
 
															-	if (total_free_meta - thresh < block_rsv->size)
														
 
															+	if (!mixed && total_free_meta - thresh < block_rsv->size)
														
 
															 		buf->f_bavail = 0;
														
 
															 	buf->f_type = BTRFS_SUPER_MAGIC;
														
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -120,6 +120,9 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
 
															 	if (!fs_info)
														
 
															 		return -EPERM;
														
 
															+	if (fs_info->sb->s_flags & MS_RDONLY)
														
 
															+		return -EROFS;
														
 
															+
														
 
															 	ret = kstrtoul(skip_spaces(buf), 0, &val);
														
 
															 	if (ret)
														
 
															 		return ret;
														
@@ -364,7 +367,13 @@ static ssize_t btrfs_label_show(struct kobject *kobj,
 
															 {
														
 
															 	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
														
 
															 	char *label = fs_info->super_copy->label;
														
 
															-	return snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
														
 
															+	ssize_t ret;
														
 
															+
														
 
															+	spin_lock(&fs_info->super_lock);
														
 
															+	ret = snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
														
 
															+	spin_unlock(&fs_info->super_lock);
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															 static ssize_t btrfs_label_store(struct kobject *kobj,
														
@@ -374,6 +383,9 @@ static ssize_t btrfs_label_store(struct kobject *kobj,
 
															 	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
														
 
															 	size_t p_len;
														
 
															+	if (!fs_info)
														
 
															+		return -EPERM;
														
 
															+
														
 
															 	if (fs_info->sb->s_flags & MS_RDONLY)
														
 
															 		return -EROFS;
														
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -311,10 +311,11 @@ loop:
 
															  * when the transaction commits
														
 
															  */
														
 
															 static int record_root_in_trans(struct btrfs_trans_handle *trans,
														
 
															-			       struct btrfs_root *root)
														
 
															+			       struct btrfs_root *root,
														
 
															+			       int force)
														
 
															 {
														
 
															-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
														
 
															-	    root->last_trans < trans->transid) {
														
 
															+	if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
														
 
															+	    root->last_trans < trans->transid) || force) {
														
 
															 		WARN_ON(root == root->fs_info->extent_root);
														
 
															 		WARN_ON(root->commit_root != root->node);
														
@@ -331,7 +332,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
 
															 		smp_wmb();
														
 
															 		spin_lock(&root->fs_info->fs_roots_radix_lock);
														
 
															-		if (root->last_trans == trans->transid) {
														
 
															+		if (root->last_trans == trans->transid && !force) {
														
 
															 			spin_unlock(&root->fs_info->fs_roots_radix_lock);
														
 
															 			return 0;
														
 
															 		}
														
@@ -402,7 +403,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
 
															 		return 0;
														
 
															 	mutex_lock(&root->fs_info->reloc_mutex);
														
 
															-	record_root_in_trans(trans, root);
														
 
															+	record_root_in_trans(trans, root, 0);
														
 
															 	mutex_unlock(&root->fs_info->reloc_mutex);
														
 
															 	return 0;
														
@@ -1310,6 +1311,97 @@ int btrfs_defrag_root(struct btrfs_root *root)
 
															 	return ret;
														
 
															 }
														
 
															+/* Bisesctability fixup, remove in 4.8 */
														
 
															+#ifndef btrfs_std_error
														
 
															+#define btrfs_std_error btrfs_handle_fs_error
														
 
															+#endif
														
 
															+
														
 
															+/*
														
 
															+ * Do all special snapshot related qgroup dirty hack.
														
 
															+ *
														
 
															+ * Will do all needed qgroup inherit and dirty hack like switch commit
														
 
															+ * roots inside one transaction and write all btree into disk, to make
														
 
															+ * qgroup works.
														
 
															+ */
														
 
															+static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *src,
														
 
															+				   struct btrfs_root *parent,
														
 
															+				   struct btrfs_qgroup_inherit *inherit,
														
 
															+				   u64 dst_objectid)
														
 
															+{
														
 
															+	struct btrfs_fs_info *fs_info = src->fs_info;
														
 
															+	int ret;
														
 
															+
														
 
															+	/*
														
 
															+	 * Save some performance in the case that qgroups are not
														
 
															+	 * enabled. If this check races with the ioctl, rescan will
														
 
															+	 * kick in anyway.
														
 
															+	 */
														
 
															+	mutex_lock(&fs_info->qgroup_ioctl_lock);
														
 
															+	if (!fs_info->quota_enabled) {
														
 
															+		mutex_unlock(&fs_info->qgroup_ioctl_lock);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * We are going to commit transaction, see btrfs_commit_transaction()
														
 
															+	 * comment for reason locking tree_log_mutex
														
 
															+	 */
														
 
															+	mutex_lock(&fs_info->tree_log_mutex);
														
 
															+
														
 
															+	ret = commit_fs_roots(trans, src);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	ret = btrfs_qgroup_account_extents(trans, fs_info);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	/* Now qgroup are all updated, we can inherit it to new qgroups */
														
 
															+	ret = btrfs_qgroup_inherit(trans, fs_info,
														
 
															+				   src->root_key.objectid, dst_objectid,
														
 
															+				   inherit);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	/*
														
 
															+	 * Now we do a simplified commit transaction, which will:
														
 
															+	 * 1) commit all subvolume and extent tree
														
 
															+	 *    To ensure all subvolume and extent tree have a valid
														
 
															+	 *    commit_root to accounting later insert_dir_item()
														
 
															+	 * 2) write all btree blocks onto disk
														
 
															+	 *    This is to make sure later btree modification will be cowed
														
 
															+	 *    Or commit_root can be populated and cause wrong qgroup numbers
														
 
															+	 * In this simplified commit, we don't really care about other trees
														
 
															+	 * like chunk and root tree, as they won't affect qgroup.
														
 
															+	 * And we don't write super to avoid half committed status.
														
 
															+	 */
														
 
															+	ret = commit_cowonly_roots(trans, src);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	switch_commit_roots(trans->transaction, fs_info);
														
 
															+	ret = btrfs_write_and_wait_transaction(trans, src);
														
 
															+	if (ret)
														
 
															+		btrfs_std_error(fs_info, ret,
														
 
															+			"Error while writing out transaction for qgroup");
														
 
															+
														
 
															+out:
														
 
															+	mutex_unlock(&fs_info->tree_log_mutex);
														
 
															+
														
 
															+	/*
														
 
															+	 * Force parent root to be updated, as we recorded it before so its
														
 
															+	 * last_trans == cur_transid.
														
 
															+	 * Or it won't be committed again onto disk after later
														
 
															+	 * insert_dir_item()
														
 
															+	 */
														
 
															+	if (!ret)
														
 
															+		record_root_in_trans(trans, parent, 1);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * new snapshots need to be created at a very specific time in the
														
 
															  * transaction commit.  This does the actual creation.
														
@@ -1383,7 +1475,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
															 	dentry = pending->dentry;
														
 
															 	parent_inode = pending->dir;
														
 
															 	parent_root = BTRFS_I(parent_inode)->root;
														
 
															-	record_root_in_trans(trans, parent_root);
														
 
															+	record_root_in_trans(trans, parent_root, 0);
														
 
															 	cur_time = current_fs_time(parent_inode->i_sb);
														
@@ -1420,7 +1512,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
															 		goto fail;
														
 
															 	}
														
 
															-	record_root_in_trans(trans, root);
														
 
															+	record_root_in_trans(trans, root, 0);
														
 
															 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
														
 
															 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
														
 
															 	btrfs_check_and_init_root_item(new_root_item);
														
@@ -1516,6 +1608,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
															 		goto fail;
														
 
															 	}
														
 
															+	/*
														
 
															+	 * Do special qgroup accounting for snapshot, as we do some qgroup
														
 
															+	 * snapshot hack to do fast snapshot.
														
 
															+	 * To co-operate with that hack, we do hack again.
														
 
															+	 * Or snapshot will be greatly slowed down by a subtree qgroup rescan
														
 
															+	 */
														
 
															+	ret = qgroup_account_snapshot(trans, root, parent_root,
														
 
															+				      pending->inherit, objectid);
														
 
															+	if (ret < 0)
														
 
															+		goto fail;
														
 
															+
														
 
															 	ret = btrfs_insert_dir_item(trans, parent_root,
														
 
															 				    dentry->d_name.name, dentry->d_name.len,
														
 
															 				    parent_inode, &key,
														
@@ -1559,23 +1662,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
															 		goto fail;
														
 
															 	}
														
 
															-	/*
														
 
															-	 * account qgroup counters before qgroup_inherit()
														
 
															-	 */
														
 
															-	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
														
 
															-	if (ret)
														
 
															-		goto fail;
														
 
															-	ret = btrfs_qgroup_account_extents(trans, fs_info);
														
 
															-	if (ret)
														
 
															-		goto fail;
														
 
															-	ret = btrfs_qgroup_inherit(trans, fs_info,
														
 
															-				   root->root_key.objectid,
														
 
															-				   objectid, pending->inherit);
														
 
															-	if (ret) {
														
 
															-		btrfs_abort_transaction(trans, root, ret);
														
 
															-		goto fail;
														
 
															-	}
														
 
															-
														
 
															 fail:
														
 
															 	pending->error = ret;
														
 
															 dir_item_existed:
														
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1972,11 +1972,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
 
															 	if (srcdev->missing)
														
 
															 		fs_devices->missing_devices--;
														
 
															-	if (srcdev->writeable) {
														
 
															+	if (srcdev->writeable)
														
 
															 		fs_devices->rw_devices--;
														
 
															-		/* zero out the old super if it is writable */
														
 
															-		btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
														
 
															-	}
														
 
															 	if (srcdev->bdev)
														
 
															 		fs_devices->open_devices--;
														
@@ -1987,6 +1984,10 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 
															 {
														
 
															 	struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
														
 
															+	if (srcdev->writeable) {
														
 
															+		/* zero out the old super if it is writable */
														
 
															+		btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
														
 
															+	}
														
 
															 	call_rcu(&srcdev->rcu, free_device);
														
 
															 	/*
														
@@ -2024,10 +2025,9 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 
															 	btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
														
 
															-	if (tgtdev->bdev) {
														
 
															-		btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
														
 
															+	if (tgtdev->bdev)
														
 
															 		fs_info->fs_devices->open_devices--;
														
 
															-	}
														
 
															+
														
 
															 	fs_info->fs_devices->num_devices--;
														
 
															 	next_device = list_entry(fs_info->fs_devices->devices.next,
														
@@ -2038,10 +2038,18 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 
															 		fs_info->fs_devices->latest_bdev = next_device->bdev;
														
 
															 	list_del_rcu(&tgtdev->dev_list);
														
 
															-	call_rcu(&tgtdev->rcu, free_device);
														
 
															-
														
 
															 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
														
 
															 	mutex_unlock(&uuid_mutex);
														
 
															+
														
 
															+	/*
														
 
															+	 * The update_dev_time() with in btrfs_scratch_superblocks()
														
 
															+	 * may lead to a call to btrfs_show_devname() which will try
														
 
															+	 * to hold device_list_mutex. And here this device
														
 
															+	 * is already out of device list, so we don't have to hold
														
 
															+	 * the device_list_mutex lock.
														
 
															+	 */
														
 
															+	btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
														
 
															+	call_rcu(&tgtdev->rcu, free_device);
														
 
															 }
														
 
															 static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
														
@@ -3402,6 +3410,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 
															 	u32 count_meta = 0;
														
 
															 	u32 count_sys = 0;
														
 
															 	int chunk_reserved = 0;
														
 
															+	u64 bytes_used = 0;
														
 
															 	/* step one make some room on all the devices */
														
 
															 	devices = &fs_info->fs_devices->devices;
														
@@ -3540,7 +3549,13 @@ again:
 
															 			goto loop;
														
 
															 		}
														
 
															-		if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
														
 
															+		ASSERT(fs_info->data_sinfo);
														
 
															+		spin_lock(&fs_info->data_sinfo->lock);
														
 
															+		bytes_used = fs_info->data_sinfo->bytes_used;
														
 
															+		spin_unlock(&fs_info->data_sinfo->lock);
														
 
															+
														
 
															+		if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
														
 
															+		    !chunk_reserved && !bytes_used) {
														
 
															 			trans = btrfs_start_transaction(chunk_root, 0);
														
 
															 			if (IS_ERR(trans)) {
														
 
															 				mutex_unlock(&fs_info->delete_unused_bgs_mutex);
														
@@ -3693,10 +3708,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
															 		num_devices--;
														
 
															 	}
														
 
															 	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
														
 
															-	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
														
 
															-	if (num_devices == 1)
														
 
															-		allowed |= BTRFS_BLOCK_GROUP_DUP;
														
 
															-	else if (num_devices > 1)
														
 
															+	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
														
 
															+	if (num_devices > 1)
														
 
															 		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
														
 
															 	if (num_devices > 2)
														
 
															 		allowed |= BTRFS_BLOCK_GROUP_RAID5;
														
@@ -5278,7 +5291,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
															 	stripe_nr = div64_u64(stripe_nr, stripe_len);
														
 
															 	stripe_offset = stripe_nr * stripe_len;
														
 
															-	BUG_ON(offset < stripe_offset);
														
 
															+	if (offset < stripe_offset) {
														
 
															+		btrfs_crit(fs_info, "stripe math has gone wrong, "
														
 
															+			   "stripe_offset=%llu, offset=%llu, start=%llu, "
														
 
															+			   "logical=%llu, stripe_len=%llu",
														
 
															+			   stripe_offset, offset, em->start, logical,
														
 
															+			   stripe_len);
														
 
															+		free_extent_map(em);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															 	/* stripe_offset is the offset of this block in its stripe*/
														
 
															 	stripe_offset = offset - stripe_offset;
														
@@ -5519,7 +5540,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
															 				&stripe_index);
														
 
															 		mirror_num = stripe_index + 1;
														
 
															 	}
														
 
															-	BUG_ON(stripe_index >= map->num_stripes);
														
 
															+	if (stripe_index >= map->num_stripes) {
														
 
															+		btrfs_crit(fs_info, "stripe index math went horribly wrong, "
														
 
															+			   "got stripe_index=%u, num_stripes=%u",
														
 
															+			   stripe_index, map->num_stripes);
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															 	num_alloc_stripes = num_stripes;
														
 
															 	if (dev_replace_is_ongoing) {
														
@@ -6242,7 +6269,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 
															 			"invalid chunk length %llu", length);
														
 
															 		return -EIO;
														
 
															 	}
														
 
															-	if (!is_power_of_2(stripe_len)) {
														
 
															+	if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
														
 
															 		btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
														
 
															 			  stripe_len);
														
 
															 		return -EIO;
														
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -347,7 +347,7 @@ struct map_lookup {
 
															 	u64 type;
														
 
															 	int io_align;
														
 
															 	int io_width;
														
 
															-	int stripe_len;
														
 
															+	u64 stripe_len;
														
 
															 	int sector_size;
														
 
															 	int num_stripes;
														
 
															 	int sub_stripes;