10 years ago · aa493382cb
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -139,6 +139,8 @@ xfs_attr_get(
 
				 
			
 
				 	args.value = value;
			
 
				 	args.valuelen = *valuelenp;
			
 
				+	/* Entirely possible to look up a name which doesn't exist */
			
 
				+	args.op_flags = XFS_DA_OP_OKNOENT;
			
 
				 
			
 
				 	lock_mode = xfs_ilock_attr_map_shared(ip);
			
 
				 	if (!xfs_inode_hasattr(ip))
			
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1822,6 +1822,7 @@ xfs_da3_path_shift(
 
				 	struct xfs_da_args	*args;
			
 
				 	struct xfs_da_node_entry *btree;
			
 
				 	struct xfs_da3_icnode_hdr nodehdr;
			
 
				+	struct xfs_buf		*bp;
			
 
				 	xfs_dablk_t		blkno = 0;
			
 
				 	int			level;
			
 
				 	int			error;
			
@@ -1866,20 +1867,24 @@ xfs_da3_path_shift(
 
				 	 */
			
 
				 	for (blk++, level++; level < path->active; blk++, level++) {
			
 
				 		/*
			
 
				-		 * Release the old block.
			
 
				-		 * (if it's dirty, trans won't actually let go)
			
 
				+		 * Read the next child block into a local buffer.
			
 
				 		 */
			
 
				-		if (release)
			
 
				-			xfs_trans_brelse(args->trans, blk->bp);
			
 
				+		error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp,
			
 
				+					  args->whichfork);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				 
			
 
				 		/*
			
 
				-		 * Read the next child block.
			
 
				+		 * Release the old block (if it's dirty, the trans doesn't
			
 
				+		 * actually let go) and swap the local buffer into the path
			
 
				+		 * structure. This ensures failure of the above read doesn't set
			
 
				+		 * a NULL buffer in an active slot in the path.
			
 
				 		 */
			
 
				+		if (release)
			
 
				+			xfs_trans_brelse(args->trans, blk->bp);
			
 
				 		blk->blkno = blkno;
			
 
				-		error = xfs_da3_node_read(args->trans, dp, blkno, -1,
			
 
				-					&blk->bp, args->whichfork);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				+		blk->bp = bp;
			
 
				+
			
 
				 		info = blk->bp->b_addr;
			
 
				 		ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
			
 
				 		       info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
			
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote {
 
				 typedef struct xfs_attr_leafblock {
			
 
				 	xfs_attr_leaf_hdr_t	hdr;	/* constant-structure header block */
			
 
				 	xfs_attr_leaf_entry_t	entries[1];	/* sorted on key, not name */
			
 
				-	xfs_attr_leaf_name_local_t namelist;	/* grows from bottom of buf */
			
 
				-	xfs_attr_leaf_name_remote_t valuelist;	/* grows from bottom of buf */
			
 
				+	/*
			
 
				+	 * The rest of the block contains the following structures after the
			
 
				+	 * leaf entries, growing from the bottom up. The variables are never
			
 
				+	 * referenced and definining them can actually make gcc optimize away
			
 
				+	 * accesses to the 'entries' array above index 0 so don't do that.
			
 
				+	 *
			
 
				+	 * xfs_attr_leaf_name_local_t namelist;
			
 
				+	 * xfs_attr_leaf_name_remote_t valuelist;
			
 
				+	 */
			
 
				 } xfs_attr_leafblock_t;
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -362,6 +362,7 @@ xfs_dir_lookup(
 
				 	struct xfs_da_args *args;
			
 
				 	int		rval;
			
 
				 	int		v;		/* type-checking value */
			
 
				+	int		lock_mode;
			
 
				 
			
 
				 	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				 	XFS_STATS_INC(xs_dir_lookup);
			
@@ -387,6 +388,7 @@ xfs_dir_lookup(
 
				 	if (ci_name)
			
 
				 		args->op_flags |= XFS_DA_OP_CILOOKUP;
			
 
				 
			
 
				+	lock_mode = xfs_ilock_data_map_shared(dp);
			
 
				 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
			
 
				 		rval = xfs_dir2_sf_lookup(args);
			
 
				 		goto out_check_rval;
			
@@ -419,6 +421,7 @@ out_check_rval:
 
				 		}
			
 
				 	}
			
 
				 out_free:
			
 
				+	xfs_iunlock(dp, lock_mode);
			
 
				 	kmem_free(args);
			
 
				 	return rval;
			
 
				 }
			
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify(
 
				 		return;
			
 
				 	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
			
 
				 	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
			
 
				-		xfs_dir3_data_verify(bp);
			
 
				+		bp->b_ops = &xfs_dir3_data_buf_ops;
			
 
				+		bp->b_ops->verify_read(bp);
			
 
				 		return;
			
 
				 	default:
			
 
				 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
			
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -186,9 +186,6 @@ xfs_mount_validate_sb(
 
				 	if (xfs_sb_version_hassparseinodes(sbp)) {
			
 
				 		uint32_t	align;
			
 
				 
			
 
				-		xfs_alert(mp,
			
 
				-	"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
			
 
				-
			
 
				 		align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
			
 
				 				>> sbp->sb_blocklog;
			
 
				 		if (sbp->sb_inoalignmt != align) {
			
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -171,6 +171,7 @@ xfs_dir2_block_getdents(
 
				 	int			wantoff;	/* starting block offset */
			
 
				 	xfs_off_t		cook;
			
 
				 	struct xfs_da_geometry	*geo = args->geo;
			
 
				+	int			lock_mode;
			
 
				 
			
 
				 	/*
			
 
				 	 * If the block number in the offset is out of range, we're done.
			
@@ -178,7 +179,9 @@ xfs_dir2_block_getdents(
 
				 	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
			
 
				 		return 0;
			
 
				 
			
 
				+	lock_mode = xfs_ilock_data_map_shared(dp);
			
 
				 	error = xfs_dir3_block_read(NULL, dp, &bp);
			
 
				+	xfs_iunlock(dp, lock_mode);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents(
 
				 		 * current buffer, need to get another one.
			
 
				 		 */
			
 
				 		if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
			
 
				+			int	lock_mode;
			
 
				 
			
 
				+			lock_mode = xfs_ilock_data_map_shared(dp);
			
 
				 			error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
			
 
				 						      &curoff, &bp);
			
 
				+			xfs_iunlock(dp, lock_mode);
			
 
				 			if (error || !map_info->map_valid)
			
 
				 				break;
			
 
				 
			
@@ -653,7 +659,6 @@ xfs_readdir(
 
				 	struct xfs_da_args	args = { NULL };
			
 
				 	int			rval;
			
 
				 	int			v;
			
 
				-	uint			lock_mode;
			
 
				 
			
 
				 	trace_xfs_readdir(dp);
			
 
				 
			
@@ -666,7 +671,7 @@ xfs_readdir(
 
				 	args.dp = dp;
			
 
				 	args.geo = dp->i_mount->m_dir_geo;
			
 
				 
			
 
				-	lock_mode = xfs_ilock_data_map_shared(dp);
			
 
				+	xfs_ilock(dp, XFS_IOLOCK_SHARED);
			
 
				 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
			
 
				 		rval = xfs_dir2_sf_getdents(&args, ctx);
			
 
				 	else if ((rval = xfs_dir2_isblock(&args, &v)))
			
@@ -675,7 +680,7 @@ xfs_readdir(
 
				 		rval = xfs_dir2_block_getdents(&args, ctx);
			
 
				 	else
			
 
				 		rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
			
 
				-	xfs_iunlock(dp, lock_mode);
			
 
				+	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
			
 
				 
			
 
				 	return rval;
			
 
				 }
			
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -251,7 +251,7 @@ xfs_qm_init_dquot_blk(
 
				 		d->dd_diskdq.d_id = cpu_to_be32(curid);
			
 
				 		d->dd_diskdq.d_flags = type;
			
 
				 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				-			uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
			
 
				+			uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
			
 
				 			xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
			
 
				 					 XFS_DQUOT_CRC_OFF);
			
 
				 		}
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -317,24 +317,33 @@ xfs_file_read_iter(
 
				 		return -EIO;
			
 
				 
			
 
				 	/*
			
 
				-	 * Locking is a bit tricky here. If we take an exclusive lock
			
 
				-	 * for direct IO, we effectively serialise all new concurrent
			
 
				-	 * read IO to this file and block it behind IO that is currently in
			
 
				-	 * progress because IO in progress holds the IO lock shared. We only
			
 
				-	 * need to hold the lock exclusive to blow away the page cache, so
			
 
				-	 * only take lock exclusively if the page cache needs invalidation.
			
 
				-	 * This allows the normal direct IO case of no page cache pages to
			
 
				-	 * proceeed concurrently without serialisation.
			
 
				+	 * Locking is a bit tricky here. If we take an exclusive lock for direct
			
 
				+	 * IO, we effectively serialise all new concurrent read IO to this file
			
 
				+	 * and block it behind IO that is currently in progress because IO in
			
 
				+	 * progress holds the IO lock shared. We only need to hold the lock
			
 
				+	 * exclusive to blow away the page cache, so only take lock exclusively
			
 
				+	 * if the page cache needs invalidation. This allows the normal direct
			
 
				+	 * IO case of no page cache pages to proceeed concurrently without
			
 
				+	 * serialisation.
			
 
				 	 */
			
 
				 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
			
 
				 	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
			
 
				 		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
			
 
				 		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
			
 
				 
			
 
				+		/*
			
 
				+		 * The generic dio code only flushes the range of the particular
			
 
				+		 * I/O. Because we take an exclusive lock here, this whole
			
 
				+		 * sequence is considerably more expensive for us. This has a
			
 
				+		 * noticeable performance impact for any file with cached pages,
			
 
				+		 * even when outside of the range of the particular I/O.
			
 
				+		 *
			
 
				+		 * Hence, amortize the cost of the lock against a full file
			
 
				+		 * flush and reduce the chances of repeated iolock cycles going
			
 
				+		 * forward.
			
 
				+		 */
			
 
				 		if (inode->i_mapping->nrpages) {
			
 
				-			ret = filemap_write_and_wait_range(
			
 
				-							VFS_I(ip)->i_mapping,
			
 
				-							pos, pos + size - 1);
			
 
				+			ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
			
 
				 			if (ret) {
			
 
				 				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				 				return ret;
			
@@ -345,9 +354,7 @@ xfs_file_read_iter(
 
				 			 * we fail to invalidate a page, but this should never
			
 
				 			 * happen on XFS. Warn if it does fail.
			
 
				 			 */
			
 
				-			ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
			
 
				-					pos >> PAGE_CACHE_SHIFT,
			
 
				-					(pos + size - 1) >> PAGE_CACHE_SHIFT);
			
 
				+			ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
			
 
				 			WARN_ON_ONCE(ret);
			
 
				 			ret = 0;
			
 
				 		}
			
@@ -733,19 +740,19 @@ xfs_file_dio_aio_write(
 
				 	pos = iocb->ki_pos;
			
 
				 	end = pos + count - 1;
			
 
				 
			
 
				+	/*
			
 
				+	 * See xfs_file_read_iter() for why we do a full-file flush here.
			
 
				+	 */
			
 
				 	if (mapping->nrpages) {
			
 
				-		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				-						   pos, end);
			
 
				+		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
			
 
				 		if (ret)
			
 
				 			goto out;
			
 
				 		/*
			
 
				-		 * Invalidate whole pages. This can return an error if
			
 
				-		 * we fail to invalidate a page, but this should never
			
 
				-		 * happen on XFS. Warn if it does fail.
			
 
				+		 * Invalidate whole pages. This can return an error if we fail
			
 
				+		 * to invalidate a page, but this should never happen on XFS.
			
 
				+		 * Warn if it does fail.
			
 
				 		 */
			
 
				-		ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
			
 
				-					pos >> PAGE_CACHE_SHIFT,
			
 
				-					end >> PAGE_CACHE_SHIFT);
			
 
				+		ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
			
 
				 		WARN_ON_ONCE(ret);
			
 
				 		ret = 0;
			
 
				 	}
			
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -250,7 +250,7 @@ xfs_growfs_data_private(
 
				 		agf->agf_freeblks = cpu_to_be32(tmpsize);
			
 
				 		agf->agf_longest = cpu_to_be32(tmpsize);
			
 
				 		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-			uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid);
			
 
				+			uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
			
 
				 
			
 
				 		error = xfs_bwrite(bp);
			
 
				 		xfs_buf_relse(bp);
			
@@ -273,7 +273,7 @@ xfs_growfs_data_private(
 
				 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				 			agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
			
 
				 			agfl->agfl_seqno = cpu_to_be32(agno);
			
 
				-			uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
			
 
				+			uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
			
 
				 		}
			
 
				 
			
 
				 		agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
			
@@ -309,7 +309,7 @@ xfs_growfs_data_private(
 
				 		agi->agi_newino = cpu_to_be32(NULLAGINO);
			
 
				 		agi->agi_dirino = cpu_to_be32(NULLAGINO);
			
 
				 		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
			
 
				+			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
			
 
				 		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
			
 
				 			agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
			
 
				 			agi->agi_free_level = cpu_to_be32(1);
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -164,7 +164,7 @@ xfs_ilock(
 
				 	       (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
			
 
				 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
			
 
				 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
			
 
				-	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
			
 
				+	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
			
 
				 
			
 
				 	if (lock_flags & XFS_IOLOCK_EXCL)
			
 
				 		mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
			
@@ -212,7 +212,7 @@ xfs_ilock_nowait(
 
				 	       (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
			
 
				 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
			
 
				 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
			
 
				-	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
			
 
				+	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
			
 
				 
			
 
				 	if (lock_flags & XFS_IOLOCK_EXCL) {
			
 
				 		if (!mrtryupdate(&ip->i_iolock))
			
@@ -281,7 +281,7 @@ xfs_iunlock(
 
				 	       (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
			
 
				 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
			
 
				 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
			
 
				-	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
			
 
				+	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
			
 
				 	ASSERT(lock_flags != 0);
			
 
				 
			
 
				 	if (lock_flags & XFS_IOLOCK_EXCL)
			
@@ -362,32 +362,52 @@ int xfs_lots_retries;
 
				 int xfs_lock_delays;
			
 
				 #endif
			
 
				 
			
 
				+#ifdef CONFIG_LOCKDEP
			
 
				+static bool
			
 
				+xfs_lockdep_subclass_ok(
			
 
				+	int subclass)
			
 
				+{
			
 
				+	return subclass < MAX_LOCKDEP_SUBCLASSES;
			
 
				+}
			
 
				+#else
			
 
				+#define xfs_lockdep_subclass_ok(subclass)	(true)
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
			
 
				- * value. This shouldn't be called for page fault locking, but we also need to
			
 
				- * ensure we don't overrun the number of lockdep subclasses for the iolock or
			
 
				- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
			
 
				+ * value. This can be called for any type of inode lock combination, including
			
 
				+ * parent locking. Care must be taken to ensure we don't overrun the subclass
			
 
				+ * storage fields in the class mask we build.
			
 
				  */
			
 
				 static inline int
			
 
				 xfs_lock_inumorder(int lock_mode, int subclass)
			
 
				 {
			
 
				+	int	class = 0;
			
 
				+
			
 
				+	ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
			
 
				+			      XFS_ILOCK_RTSUM)));
			
 
				+	ASSERT(xfs_lockdep_subclass_ok(subclass));
			
 
				+
			
 
				 	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
			
 
				-		ASSERT(subclass + XFS_LOCK_INUMORDER <
			
 
				-			(1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
			
 
				-		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
			
 
				+		ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
			
 
				+		ASSERT(xfs_lockdep_subclass_ok(subclass +
			
 
				+						XFS_IOLOCK_PARENT_VAL));
			
 
				+		class += subclass << XFS_IOLOCK_SHIFT;
			
 
				+		if (lock_mode & XFS_IOLOCK_PARENT)
			
 
				+			class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
			
 
				 	}
			
 
				 
			
 
				 	if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
			
 
				-		ASSERT(subclass + XFS_LOCK_INUMORDER <
			
 
				-			(1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
			
 
				-		lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
			
 
				-							XFS_MMAPLOCK_SHIFT;
			
 
				+		ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
			
 
				+		class += subclass << XFS_MMAPLOCK_SHIFT;
			
 
				 	}
			
 
				 
			
 
				-	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
			
 
				-		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
			
 
				+	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
			
 
				+		ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
			
 
				+		class += subclass << XFS_ILOCK_SHIFT;
			
 
				+	}
			
 
				 
			
 
				-	return lock_mode;
			
 
				+	return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -399,6 +419,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
 
				  * transaction (such as truncate). This can result in deadlock since the long
			
 
				  * running trans might need to wait for the inode we just locked in order to
			
 
				  * push the tail and free space in the log.
			
 
				+ *
			
 
				+ * xfs_lock_inodes() can only be used to lock one type of lock at a time -
			
 
				+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
			
 
				+ * lock more than one at a time, lockdep will report false positives saying we
			
 
				+ * have violated locking orders.
			
 
				  */
			
 
				 void
			
 
				 xfs_lock_inodes(
			
@@ -409,8 +434,29 @@ xfs_lock_inodes(
 
				 	int		attempts = 0, i, j, try_lock;
			
 
				 	xfs_log_item_t	*lp;
			
 
				 
			
 
				-	/* currently supports between 2 and 5 inodes */
			
 
				+	/*
			
 
				+	 * Currently supports between 2 and 5 inodes with exclusive locking.  We
			
 
				+	 * support an arbitrary depth of locking here, but absolute limits on
			
 
				+	 * inodes depend on the the type of locking and the limits placed by
			
 
				+	 * lockdep annotations in xfs_lock_inumorder.  These are all checked by
			
 
				+	 * the asserts.
			
 
				+	 */
			
 
				 	ASSERT(ips && inodes >= 2 && inodes <= 5);
			
 
				+	ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
			
 
				+			    XFS_ILOCK_EXCL));
			
 
				+	ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
			
 
				+			      XFS_ILOCK_SHARED)));
			
 
				+	ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
			
 
				+		inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
			
 
				+	ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
			
 
				+		inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
			
 
				+	ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
			
 
				+		inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
			
 
				+
			
 
				+	if (lock_mode & XFS_IOLOCK_EXCL) {
			
 
				+		ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
			
 
				+	} else if (lock_mode & XFS_MMAPLOCK_EXCL)
			
 
				+		ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
			
 
				 
			
 
				 	try_lock = 0;
			
 
				 	i = 0;
			
@@ -629,30 +675,29 @@ xfs_lookup(
 
				 {
			
 
				 	xfs_ino_t		inum;
			
 
				 	int			error;
			
 
				-	uint			lock_mode;
			
 
				 
			
 
				 	trace_xfs_lookup(dp, name);
			
 
				 
			
 
				 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
			
 
				 		return -EIO;
			
 
				 
			
 
				-	lock_mode = xfs_ilock_data_map_shared(dp);
			
 
				+	xfs_ilock(dp, XFS_IOLOCK_SHARED);
			
 
				 	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
			
 
				-	xfs_iunlock(dp, lock_mode);
			
 
				-
			
 
				 	if (error)
			
 
				-		goto out;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
			
 
				 	if (error)
			
 
				 		goto out_free_name;
			
 
				 
			
 
				+	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
			
 
				 	return 0;
			
 
				 
			
 
				 out_free_name:
			
 
				 	if (ci_name)
			
 
				 		kmem_free(ci_name->name);
			
 
				-out:
			
 
				+out_unlock:
			
 
				+	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
			
 
				 	*ipp = NULL;
			
 
				 	return error;
			
 
				 }
			
@@ -787,7 +832,7 @@ xfs_ialloc(
 
				 
			
 
				 	if (ip->i_d.di_version == 3) {
			
 
				 		ASSERT(ip->i_d.di_ino == ino);
			
 
				-		ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
			
 
				+		ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
			
 
				 		ip->i_d.di_crc = 0;
			
 
				 		ip->i_d.di_changecount = 1;
			
 
				 		ip->i_d.di_lsn = 0;
			
@@ -1149,7 +1194,8 @@ xfs_create(
 
				 		goto out_trans_cancel;
			
 
				 
			
 
				 
			
 
				-	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
			
 
				+	xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
			
 
				+		      XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
			
 
				 	unlock_dp_on_error = true;
			
 
				 
			
 
				 	xfs_bmap_init(&free_list, &first_block);
			
@@ -1185,7 +1231,7 @@ xfs_create(
 
				 	 * the transaction cancel unlocking dp so don't do it explicitly in the
			
 
				 	 * error path.
			
 
				 	 */
			
 
				-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 	unlock_dp_on_error = false;
			
 
				 
			
 
				 	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
			
@@ -1258,7 +1304,7 @@ xfs_create(
 
				 	xfs_qm_dqrele(pdqp);
			
 
				 
			
 
				 	if (unlock_dp_on_error)
			
 
				-		xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				+		xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 	return error;
			
 
				 }
			
 
				 
			
@@ -1403,10 +1449,11 @@ xfs_link(
 
				 	if (error)
			
 
				 		goto error_return;
			
 
				 
			
 
				+	xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
			
 
				 	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
			
 
				-	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	/*
			
 
				 	 * If we are using project inheritance, we only allow hard link
			
@@ -2510,9 +2557,10 @@ xfs_remove(
 
				 		goto out_trans_cancel;
			
 
				 	}
			
 
				 
			
 
				+	xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
			
 
				 	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	/*
			
@@ -2893,6 +2941,12 @@ xfs_rename(
 
				 	 * whether the target directory is the same as the source
			
 
				 	 * directory, we can lock from 2 to 4 inodes.
			
 
				 	 */
			
 
				+	if (!new_parent)
			
 
				+		xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
			
 
				+	else
			
 
				+		xfs_lock_two_inodes(src_dp, target_dp,
			
 
				+				    XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
			
 
				+
			
 
				 	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	/*
			
@@ -2900,9 +2954,9 @@ xfs_rename(
 
				 	 * we can rely on either trans_commit or trans_cancel to unlock
			
 
				 	 * them.
			
 
				 	 */
			
 
				-	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 	if (new_parent)
			
 
				-		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
			
 
				+		xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
			
 
				 	if (target_ip)
			
 
				 		xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
			
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 
				  * Flags for lockdep annotations.
			
 
				  *
			
 
				  * XFS_LOCK_PARENT - for directory operations that require locking a
			
 
				- * parent directory inode and a child entry inode.  The parent gets locked
			
 
				- * with this flag so it gets a lockdep subclass of 1 and the child entry
			
 
				- * lock will have a lockdep subclass of 0.
			
 
				+ * parent directory inode and a child entry inode. IOLOCK requires nesting,
			
 
				+ * MMAPLOCK does not support this class, ILOCK requires a single subclass
			
 
				+ * to differentiate parent from child.
			
 
				  *
			
 
				  * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
			
 
				  * inodes do not participate in the normal lock order, and thus have their
			
@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 
				  * XFS_LOCK_INUMORDER - for locking several inodes at the some time
			
 
				  * with xfs_lock_inodes().  This flag is used as the starting subclass
			
 
				  * and each subsequent lock acquired will increment the subclass by one.
			
 
				- * So the first lock acquired will have a lockdep subclass of 4, the
			
 
				- * second lock will have a lockdep subclass of 5, and so on. It is
			
 
				- * the responsibility of the class builder to shift this to the correct
			
 
				- * portion of the lock_mode lockdep mask.
			
 
				+ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
			
 
				+ * limited to the subclasses we can represent via nesting. We need at least
			
 
				+ * 5 inodes nest depth for the ILOCK through rename, and we also have to support
			
 
				+ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
			
 
				+ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
			
 
				+ * 8 subclasses supported by lockdep.
			
 
				+ *
			
 
				+ * This also means we have to number the sub-classes in the lowest bits of
			
 
				+ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
			
 
				+ * mask and we can't use bit-masking to build the subclasses. What a mess.
			
 
				+ *
			
 
				+ * Bit layout:
			
 
				+ *
			
 
				+ * Bit		Lock Region
			
 
				+ * 16-19	XFS_IOLOCK_SHIFT dependencies
			
 
				+ * 20-23	XFS_MMAPLOCK_SHIFT dependencies
			
 
				+ * 24-31	XFS_ILOCK_SHIFT dependencies
			
 
				+ *
			
 
				+ * IOLOCK values
			
 
				+ *
			
 
				+ * 0-3		subclass value
			
 
				+ * 4-7		PARENT subclass values
			
 
				+ *
			
 
				+ * MMAPLOCK values
			
 
				+ *
			
 
				+ * 0-3		subclass value
			
 
				+ * 4-7		unused
			
 
				+ *
			
 
				+ * ILOCK values
			
 
				+ * 0-4		subclass values
			
 
				+ * 5		PARENT subclass (not nestable)
			
 
				+ * 6		RTBITMAP subclass (not nestable)
			
 
				+ * 7		RTSUM subclass (not nestable)
			
 
				+ * 
			
 
				  */
			
 
				-#define XFS_LOCK_PARENT		1
			
 
				-#define XFS_LOCK_RTBITMAP	2
			
 
				-#define XFS_LOCK_RTSUM		3
			
 
				-#define XFS_LOCK_INUMORDER	4
			
 
				-
			
 
				-#define XFS_IOLOCK_SHIFT	16
			
 
				-#define	XFS_IOLOCK_PARENT	(XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
			
 
				-
			
 
				-#define XFS_MMAPLOCK_SHIFT	20
			
 
				-
			
 
				-#define XFS_ILOCK_SHIFT		24
			
 
				-#define	XFS_ILOCK_PARENT	(XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
			
 
				-#define	XFS_ILOCK_RTBITMAP	(XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
			
 
				-#define	XFS_ILOCK_RTSUM		(XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
			
 
				-
			
 
				-#define XFS_IOLOCK_DEP_MASK	0x000f0000
			
 
				-#define XFS_MMAPLOCK_DEP_MASK	0x00f00000
			
 
				-#define XFS_ILOCK_DEP_MASK	0xff000000
			
 
				-#define XFS_LOCK_DEP_MASK	(XFS_IOLOCK_DEP_MASK | \
			
 
				+#define XFS_IOLOCK_SHIFT		16
			
 
				+#define XFS_IOLOCK_PARENT_VAL		4
			
 
				+#define XFS_IOLOCK_MAX_SUBCLASS		(XFS_IOLOCK_PARENT_VAL - 1)
			
 
				+#define XFS_IOLOCK_DEP_MASK		0x000f0000
			
 
				+#define	XFS_IOLOCK_PARENT		(XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
			
 
				+
			
 
				+#define XFS_MMAPLOCK_SHIFT		20
			
 
				+#define XFS_MMAPLOCK_NUMORDER		0
			
 
				+#define XFS_MMAPLOCK_MAX_SUBCLASS	3
			
 
				+#define XFS_MMAPLOCK_DEP_MASK		0x00f00000
			
 
				+
			
 
				+#define XFS_ILOCK_SHIFT			24
			
 
				+#define XFS_ILOCK_PARENT_VAL		5
			
 
				+#define XFS_ILOCK_MAX_SUBCLASS		(XFS_ILOCK_PARENT_VAL - 1)
			
 
				+#define XFS_ILOCK_RTBITMAP_VAL		6
			
 
				+#define XFS_ILOCK_RTSUM_VAL		7
			
 
				+#define XFS_ILOCK_DEP_MASK		0xff000000
			
 
				+#define	XFS_ILOCK_PARENT		(XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
			
 
				+#define	XFS_ILOCK_RTBITMAP		(XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
			
 
				+#define	XFS_ILOCK_RTSUM			(XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
			
 
				+
			
 
				+#define XFS_LOCK_SUBCLASS_MASK	(XFS_IOLOCK_DEP_MASK | \
			
 
				 				 XFS_MMAPLOCK_DEP_MASK | \
			
 
				 				 XFS_ILOCK_DEP_MASK)
			
 
				 
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1895,15 +1895,25 @@ xlog_recover_get_buf_lsn(
 
				 		 */
			
 
				 		goto recover_immediately;
			
 
				 	case XFS_SB_MAGIC:
			
 
				+		/*
			
 
				+		 * superblock uuids are magic. We may or may not have a
			
 
				+		 * sb_meta_uuid on disk, but it will be set in the in-core
			
 
				+		 * superblock. We set the uuid pointer for verification
			
 
				+		 * according to the superblock feature mask to ensure we check
			
 
				+		 * the relevant UUID in the superblock.
			
 
				+		 */
			
 
				 		lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
			
 
				-		uuid = &((struct xfs_dsb *)blk)->sb_uuid;
			
 
				+		if (xfs_sb_version_hasmetauuid(&mp->m_sb))
			
 
				+			uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
			
 
				+		else
			
 
				+			uuid = &((struct xfs_dsb *)blk)->sb_uuid;
			
 
				 		break;
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 	if (lsn != (xfs_lsn_t)-1) {
			
 
				-		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
			
 
				+		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
			
 
				 			goto recover_immediately;
			
 
				 		return lsn;
			
 
				 	}
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1528,6 +1528,10 @@ xfs_fs_fill_super(
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (xfs_sb_version_hassparseinodes(&mp->m_sb))
			
 
				+		xfs_alert(mp,
			
 
				+	"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
			
 
				+
			
 
				 	error = xfs_mountfs(mp);
			
 
				 	if (error)
			
 
				 		goto out_filestream_unmount;
			
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -240,7 +240,8 @@ xfs_symlink(
 
				 	if (error)
			
 
				 		goto out_trans_cancel;
			
 
				 
			
 
				-	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
			
 
				+	xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
			
 
				+		      XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
			
 
				 	unlock_dp_on_error = true;
			
 
				 
			
 
				 	/*
			
@@ -288,7 +289,7 @@ xfs_symlink(
 
				 	 * the transaction cancel unlocking dp so don't do it explicitly in the
			
 
				 	 * error path.
			
 
				 	 */
			
 
				-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 	unlock_dp_on_error = false;
			
 
				 
			
 
				 	/*
			
@@ -421,7 +422,7 @@ out_release_inode:
 
				 	xfs_qm_dqrele(pdqp);
			
 
				 
			
 
				 	if (unlock_dp_on_error)
			
 
				-		xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				+		xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
			
 
				 	return error;
			
 
				 }