9 лет назад · 53d2e6976b
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -286,8 +286,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 
				 	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
			
 
				 		inode_unlock(inode);
			
 
				 
			
 
				-	if ((retval > 0) && end_io)
			
 
				-		end_io(iocb, pos, retval, bh.b_private);
			
 
				+	if (end_io) {
			
 
				+		int err;
			
 
				+
			
 
				+		err = end_io(iocb, pos, retval, bh.b_private);
			
 
				+		if (err)
			
 
				+			retval = err;
			
 
				+	}
			
 
				 
			
 
				 	if (!(flags & DIO_SKIP_DIO_COUNT))
			
 
				 		inode_dio_end(inode);
			
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
 
				 	if (ret == 0)
			
 
				 		ret = transferred;
			
 
				 
			
 
				-	if (dio->end_io && dio->result)
			
 
				-		dio->end_io(dio->iocb, offset, transferred, dio->private);
			
 
				+	if (dio->end_io) {
			
 
				+		int err;
			
 
				+
			
 
				+		err = dio->end_io(dio->iocb, offset, ret, dio->private);
			
 
				+		if (err)
			
 
				+			ret = err;
			
 
				+	}
			
 
				 
			
 
				 	if (!(dio->flags & DIO_SKIP_DIO_COUNT))
			
 
				 		inode_dio_end(dio->inode);
			
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1511,15 +1511,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 
				 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
			
 
				 }
			
 
				 
			
 
				-static inline void ext4_set_io_unwritten_flag(struct inode *inode,
			
 
				-					      struct ext4_io_end *io_end)
			
 
				-{
			
 
				-	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
			
 
				-		io_end->flag |= EXT4_IO_END_UNWRITTEN;
			
 
				-		atomic_inc(&EXT4_I(inode)->i_unwritten);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Inode dynamic state flags
			
 
				  */
			
@@ -3293,6 +3284,27 @@ extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
 
				 extern int ext4_resize_begin(struct super_block *sb);
			
 
				 extern void ext4_resize_end(struct super_block *sb);
			
 
				 
			
 
				+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
			
 
				+					      struct ext4_io_end *io_end)
			
 
				+{
			
 
				+	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
			
 
				+		io_end->flag |= EXT4_IO_END_UNWRITTEN;
			
 
				+		atomic_inc(&EXT4_I(inode)->i_unwritten);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
			
 
				+{
			
 
				+	struct inode *inode = io_end->inode;
			
 
				+
			
 
				+	if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
			
 
				+		io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
			
 
				+		/* Wake up anyone waiting on unwritten extent conversion */
			
 
				+		if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
			
 
				+			wake_up_all(ext4_ioend_wq(inode));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				 #define EFSBADCRC	EBADMSG		/* Bad CRC detected */
			
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3289,22 +3289,32 @@ int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
			
 
				+static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
			
 
				 			    ssize_t size, void *private)
			
 
				 {
			
 
				         ext4_io_end_t *io_end = private;
			
 
				 
			
 
				 	/* if not async direct IO just return */
			
 
				 	if (!io_end)
			
 
				-		return;
			
 
				+		return 0;
			
 
				 
			
 
				 	ext_debug("ext4_end_io_dio(): io_end 0x%p "
			
 
				 		  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
			
 
				 		  io_end, io_end->inode->i_ino, iocb, offset, size);
			
 
				 
			
 
				+	/*
			
 
				+	 * Error during AIO DIO. We cannot convert unwritten extents as the
			
 
				+	 * data was not written. Just clear the unwritten flag and drop io_end.
			
 
				+	 */
			
 
				+	if (size <= 0) {
			
 
				+		ext4_clear_io_unwritten_flag(io_end);
			
 
				+		size = 0;
			
 
				+	}
			
 
				 	io_end->offset = offset;
			
 
				 	io_end->size = size;
			
 
				 	ext4_put_io_end(io_end);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -136,16 +136,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
 
				 	kmem_cache_free(io_end_cachep, io_end);
			
 
				 }
			
 
				 
			
 
				-static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
			
 
				-{
			
 
				-	struct inode *inode = io_end->inode;
			
 
				-
			
 
				-	io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
			
 
				-	/* Wake up anyone waiting on unwritten extent conversion */
			
 
				-	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
			
 
				-		wake_up_all(ext4_ioend_wq(inode));
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Check a range of space and convert unwritten extents to written. Note that
			
 
				  * we are protected from truncate touching same part of extent tree by the
			
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -620,7 +620,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 
				  * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
			
 
				  * to protect io on one node from truncation on another.
			
 
				  */
			
 
				-static void ocfs2_dio_end_io(struct kiocb *iocb,
			
 
				+static int ocfs2_dio_end_io(struct kiocb *iocb,
			
 
				 			     loff_t offset,
			
 
				 			     ssize_t bytes,
			
 
				 			     void *private)
			
@@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 
				 	struct inode *inode = file_inode(iocb->ki_filp);
			
 
				 	int level;
			
 
				 
			
 
				+	if (bytes <= 0)
			
 
				+		return 0;
			
 
				+
			
 
				 	/* this io's submitter should not have unlocked this before we could */
			
 
				 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
			
 
				 
			
@@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 
				 		level = ocfs2_iocb_rw_locked_level(iocb);
			
 
				 		ocfs2_rw_unlock(inode, level);
			
 
				 	}
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static int ocfs2_releasepage(struct page *page, gfp_t wait)
			
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
			
 
				+static int quota_quotaon(struct super_block *sb, int type, qid_t id,
			
 
				 		         struct path *path)
			
 
				 {
			
 
				 	if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
			
@@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return quota for next active quota >= this id, if any exists,
			
 
				+ * otherwise return -ESRCH via ->get_nextdqblk
			
 
				+ */
			
 
				+static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
			
 
				+			  void __user *addr)
			
 
				+{
			
 
				+	struct kqid qid;
			
 
				+	struct qc_dqblk fdq;
			
 
				+	struct if_nextdqblk idq;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!sb->s_qcop->get_nextdqblk)
			
 
				+		return -ENOSYS;
			
 
				+	qid = make_kqid(current_user_ns(), type, id);
			
 
				+	if (!qid_valid(qid))
			
 
				+		return -EINVAL;
			
 
				+	ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+	/* struct if_nextdqblk is a superset of struct if_dqblk */
			
 
				+	copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq);
			
 
				+	idq.dqb_id = from_kqid(current_user_ns(), qid);
			
 
				+	if (copy_to_user(addr, &idq, sizeof(idq)))
			
 
				+		return -EFAULT;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src)
			
 
				 {
			
 
				 	dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit);
			
@@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return quota for next active quota >= this id, if any exists,
			
 
				+ * otherwise return -ESRCH via ->get_nextdqblk.
			
 
				+ */
			
 
				+static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
			
 
				+			    void __user *addr)
			
 
				+{
			
 
				+	struct fs_disk_quota fdq;
			
 
				+	struct qc_dqblk qdq;
			
 
				+	struct kqid qid;
			
 
				+	qid_t id_out;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!sb->s_qcop->get_nextdqblk)
			
 
				+		return -ENOSYS;
			
 
				+	qid = make_kqid(current_user_ns(), type, id);
			
 
				+	if (!qid_valid(qid))
			
 
				+		return -EINVAL;
			
 
				+	ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+	id_out = from_kqid(current_user_ns(), qid);
			
 
				+	copy_to_xfs_dqblk(&fdq, &qdq, type, id_out);
			
 
				+	if (copy_to_user(addr, &fdq, sizeof(fdq)))
			
 
				+		return -EFAULT;
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int quota_rmxquota(struct super_block *sb, void __user *addr)
			
 
				 {
			
 
				 	__u32 flags;
			
@@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 
				 
			
 
				 	switch (cmd) {
			
 
				 	case Q_QUOTAON:
			
 
				-		return quota_quotaon(sb, type, cmd, id, path);
			
 
				+		return quota_quotaon(sb, type, id, path);
			
 
				 	case Q_QUOTAOFF:
			
 
				 		return quota_quotaoff(sb, type);
			
 
				 	case Q_GETFMT:
			
@@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 
				 		return quota_setinfo(sb, type, addr);
			
 
				 	case Q_GETQUOTA:
			
 
				 		return quota_getquota(sb, type, id, addr);
			
 
				+	case Q_GETNEXTQUOTA:
			
 
				+		return quota_getnextquota(sb, type, id, addr);
			
 
				 	case Q_SETQUOTA:
			
 
				 		return quota_setquota(sb, type, id, addr);
			
 
				 	case Q_SYNC:
			
@@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 
				 		return quota_setxquota(sb, type, id, addr);
			
 
				 	case Q_XGETQUOTA:
			
 
				 		return quota_getxquota(sb, type, id, addr);
			
 
				+	case Q_XGETNEXTQUOTA:
			
 
				+		return quota_getnextxquota(sb, type, id, addr);
			
 
				 	case Q_XQUOTASYNC:
			
 
				 		if (sb->s_flags & MS_RDONLY)
			
 
				 			return -EROFS;
			
@@ -708,10 +768,12 @@ static int quotactl_cmd_write(int cmd)
 
				 	switch (cmd) {
			
 
				 	case Q_GETFMT:
			
 
				 	case Q_GETINFO:
			
 
				+	case Q_GETNEXTQUOTA:
			
 
				 	case Q_SYNC:
			
 
				 	case Q_XGETQSTAT:
			
 
				 	case Q_XGETQSTATV:
			
 
				 	case Q_XGETQUOTA:
			
 
				+	case Q_XGETNEXTQUOTA:
			
 
				 	case Q_XQUOTASYNC:
			
 
				 		return 0;
			
 
				 	}
			
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -118,8 +118,6 @@ xfs_allocbt_free_block(
 
				 	xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
			
 
				 			      XFS_EXTENT_BUSY_SKIP_DISCARD);
			
 
				 	xfs_trans_agbtree_delta(cur->bc_tp, -1);
			
 
				-
			
 
				-	xfs_trans_binval(cur->bc_tp, bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -24,22 +24,6 @@
 
				  * Small attribute lists are packed as tightly as possible so as
			
 
				  * to fit into the literal area of the inode.
			
 
				  */
			
 
				-
			
 
				-/*
			
 
				- * Entries are packed toward the top as tight as possible.
			
 
				- */
			
 
				-typedef struct xfs_attr_shortform {
			
 
				-	struct xfs_attr_sf_hdr {	/* constant-structure header block */
			
 
				-		__be16	totsize;	/* total bytes in shortform list */
			
 
				-		__u8	count;	/* count of active entries */
			
 
				-	} hdr;
			
 
				-	struct xfs_attr_sf_entry {
			
 
				-		__uint8_t namelen;	/* actual length of name (no NULL) */
			
 
				-		__uint8_t valuelen;	/* actual length of value (no NULL) */
			
 
				-		__uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
			
 
				-		__uint8_t nameval[1];	/* name & value bytes concatenated */
			
 
				-	} list[1];			/* variable sized array */
			
 
				-} xfs_attr_shortform_t;
			
 
				 typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
			
 
				 typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -477,10 +477,7 @@ xfs_bmap_check_leaf_extents(
 
				 		}
			
 
				 		block = XFS_BUF_TO_BLOCK(bp);
			
 
				 	}
			
 
				-	if (bp_release) {
			
 
				-		bp_release = 0;
			
 
				-		xfs_trans_brelse(NULL, bp);
			
 
				-	}
			
 
				+
			
 
				 	return;
			
 
				 
			
 
				 error0:
			
@@ -912,7 +909,7 @@ xfs_bmap_local_to_extents(
 
				 	 * We don't want to deal with the case of keeping inode data inline yet.
			
 
				 	 * So sending the data fork of a regular inode is invalid.
			
 
				 	 */
			
 
				-	ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
			
 
				+	ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
			
 
				 
			
@@ -1079,7 +1076,7 @@ xfs_bmap_add_attrfork_local(
 
				 	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (S_ISDIR(ip->i_d.di_mode)) {
			
 
				+	if (S_ISDIR(VFS_I(ip)->i_mode)) {
			
 
				 		memset(&dargs, 0, sizeof(dargs));
			
 
				 		dargs.geo = ip->i_mount->m_dir_geo;
			
 
				 		dargs.dp = ip;
			
@@ -1091,7 +1088,7 @@ xfs_bmap_add_attrfork_local(
 
				 		return xfs_dir2_sf_to_block(&dargs);
			
 
				 	}
			
 
				 
			
 
				-	if (S_ISLNK(ip->i_d.di_mode))
			
 
				+	if (S_ISLNK(VFS_I(ip)->i_mode))
			
 
				 		return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
			
 
				 						 flags, XFS_DATA_FORK,
			
 
				 						 xfs_symlink_local_to_remote);
			
@@ -4720,6 +4717,66 @@ xfs_bmapi_write(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * When a delalloc extent is split (e.g., due to a hole punch), the original
			
 
				+ * indlen reservation must be shared across the two new extents that are left
			
 
				+ * behind.
			
 
				+ *
			
 
				+ * Given the original reservation and the worst case indlen for the two new
			
 
				+ * extents (as calculated by xfs_bmap_worst_indlen()), split the original
			
 
				+ * reservation fairly across the two new extents. If necessary, steal available
			
 
				+ * blocks from a deleted extent to make up a reservation deficiency (e.g., if
			
 
				+ * ores == 1). The number of stolen blocks is returned. The availability and
			
 
				+ * subsequent accounting of stolen blocks is the responsibility of the caller.
			
 
				+ */
			
 
				+static xfs_filblks_t
			
 
				+xfs_bmap_split_indlen(
			
 
				+	xfs_filblks_t			ores,		/* original res. */
			
 
				+	xfs_filblks_t			*indlen1,	/* ext1 worst indlen */
			
 
				+	xfs_filblks_t			*indlen2,	/* ext2 worst indlen */
			
 
				+	xfs_filblks_t			avail)		/* stealable blocks */
			
 
				+{
			
 
				+	xfs_filblks_t			len1 = *indlen1;
			
 
				+	xfs_filblks_t			len2 = *indlen2;
			
 
				+	xfs_filblks_t			nres = len1 + len2; /* new total res. */
			
 
				+	xfs_filblks_t			stolen = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Steal as many blocks as we can to try and satisfy the worst case
			
 
				+	 * indlen for both new extents.
			
 
				+	 */
			
 
				+	while (nres > ores && avail) {
			
 
				+		nres--;
			
 
				+		avail--;
			
 
				+		stolen++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * The only blocks available are those reserved for the original
			
 
				+	 * extent and what we can steal from the extent being removed.
			
 
				+	 * If this still isn't enough to satisfy the combined
			
 
				+	 * requirements for the two new extents, skim blocks off of each
			
 
				+	 * of the new reservations until they match what is available.
			
 
				+	 */
			
 
				+	while (nres > ores) {
			
 
				+		if (len1) {
			
 
				+			len1--;
			
 
				+			nres--;
			
 
				+		}
			
 
				+		if (nres == ores)
			
 
				+			break;
			
 
				+		if (len2) {
			
 
				+			len2--;
			
 
				+			nres--;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	*indlen1 = len1;
			
 
				+	*indlen2 = len2;
			
 
				+
			
 
				+	return stolen;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Called by xfs_bmapi to update file extent records and the btree
			
 
				  * after removing space (or undoing a delayed allocation).
			
@@ -4984,28 +5041,29 @@ xfs_bmap_del_extent(
 
				 			XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
			
 
				 		} else {
			
 
				+			xfs_filblks_t	stolen;
			
 
				 			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				-			temp = xfs_bmap_worst_indlen(ip, temp);
			
 
				+
			
 
				+			/*
			
 
				+			 * Distribute the original indlen reservation across the
			
 
				+			 * two new extents. Steal blocks from the deleted extent
			
 
				+			 * if necessary. Stealing blocks simply fudges the
			
 
				+			 * fdblocks accounting in xfs_bunmapi().
			
 
				+			 */
			
 
				+			temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
			
 
				+			temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
			
 
				+			stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
			
 
				+						       del->br_blockcount);
			
 
				+			da_new = temp + temp2 - stolen;
			
 
				+			del->br_blockcount -= stolen;
			
 
				+
			
 
				+			/*
			
 
				+			 * Set the reservation for each extent. Warn if either
			
 
				+			 * is zero as this can lead to delalloc problems.
			
 
				+			 */
			
 
				+			WARN_ON_ONCE(!temp || !temp2);
			
 
				 			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
			
 
				-			temp2 = xfs_bmap_worst_indlen(ip, temp2);
			
 
				 			new.br_startblock = nullstartblock((int)temp2);
			
 
				-			da_new = temp + temp2;
			
 
				-			while (da_new > da_old) {
			
 
				-				if (temp) {
			
 
				-					temp--;
			
 
				-					da_new--;
			
 
				-					xfs_bmbt_set_startblock(ep,
			
 
				-						nullstartblock((int)temp));
			
 
				-				}
			
 
				-				if (da_new == da_old)
			
 
				-					break;
			
 
				-				if (temp2) {
			
 
				-					temp2--;
			
 
				-					da_new--;
			
 
				-					new.br_startblock =
			
 
				-						nullstartblock((int)temp2);
			
 
				-				}
			
 
				-			}
			
 
				 		}
			
 
				 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				 		xfs_iext_insert(ip, *idx + 1, 1, &new, state);
			
@@ -5210,7 +5268,7 @@ xfs_bunmapi(
 
				 			 * This is better than zeroing it.
			
 
				 			 */
			
 
				 			ASSERT(del.br_state == XFS_EXT_NORM);
			
 
				-			ASSERT(xfs_trans_get_block_res(tp) > 0);
			
 
				+			ASSERT(tp->t_blk_res > 0);
			
 
				 			/*
			
 
				 			 * If this spans a realtime extent boundary,
			
 
				 			 * chop it back to the start of the one we end at.
			
@@ -5241,7 +5299,7 @@ xfs_bunmapi(
 
				 				del.br_startblock += mod;
			
 
				 			} else if ((del.br_startoff == start &&
			
 
				 				    (del.br_state == XFS_EXT_UNWRITTEN ||
			
 
				-				     xfs_trans_get_block_res(tp) == 0)) ||
			
 
				+				     tp->t_blk_res == 0)) ||
			
 
				 				   !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
			
 
				 				/*
			
 
				 				 * Can't make it unwritten.  There isn't
			
@@ -5296,9 +5354,37 @@ xfs_bunmapi(
 
				 				goto nodelete;
			
 
				 			}
			
 
				 		}
			
 
				+
			
 
				+		/*
			
 
				+		 * If it's the case where the directory code is running
			
 
				+		 * with no block reservation, and the deleted block is in
			
 
				+		 * the middle of its extent, and the resulting insert
			
 
				+		 * of an extent would cause transformation to btree format,
			
 
				+		 * then reject it.  The calling code will then swap
			
 
				+		 * blocks around instead.
			
 
				+		 * We have to do this now, rather than waiting for the
			
 
				+		 * conversion to btree format, since the transaction
			
 
				+		 * will be dirty.
			
 
				+		 */
			
 
				+		if (!wasdel && tp->t_blk_res == 0 &&
			
 
				+		    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
			
 
				+		    XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
			
 
				+			XFS_IFORK_MAXEXT(ip, whichfork) &&
			
 
				+		    del.br_startoff > got.br_startoff &&
			
 
				+		    del.br_startoff + del.br_blockcount <
			
 
				+		    got.br_startoff + got.br_blockcount) {
			
 
				+			error = -ENOSPC;
			
 
				+			goto error0;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Unreserve quota and update realtime free space, if
			
 
				+		 * appropriate. If delayed allocation, update the inode delalloc
			
 
				+		 * counter now and wait to update the sb counters as
			
 
				+		 * xfs_bmap_del_extent() might need to borrow some blocks.
			
 
				+		 */
			
 
				 		if (wasdel) {
			
 
				 			ASSERT(startblockval(del.br_startblock) > 0);
			
 
				-			/* Update realtime/data freespace, unreserve quota */
			
 
				 			if (isrt) {
			
 
				 				xfs_filblks_t rtexts;
			
 
				 
			
@@ -5309,8 +5395,6 @@ xfs_bunmapi(
 
				 					ip, -((long)del.br_blockcount), 0,
			
 
				 					XFS_QMOPT_RES_RTBLKS);
			
 
				 			} else {
			
 
				-				xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
			
 
				-						 false);
			
 
				 				(void)xfs_trans_reserve_quota_nblks(NULL,
			
 
				 					ip, -((long)del.br_blockcount), 0,
			
 
				 					XFS_QMOPT_RES_REGBLKS);
			
@@ -5321,32 +5405,16 @@ xfs_bunmapi(
 
				 					XFS_BTCUR_BPRV_WASDEL;
			
 
				 		} else if (cur)
			
 
				 			cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
			
 
				-		/*
			
 
				-		 * If it's the case where the directory code is running
			
 
				-		 * with no block reservation, and the deleted block is in
			
 
				-		 * the middle of its extent, and the resulting insert
			
 
				-		 * of an extent would cause transformation to btree format,
			
 
				-		 * then reject it.  The calling code will then swap
			
 
				-		 * blocks around instead.
			
 
				-		 * We have to do this now, rather than waiting for the
			
 
				-		 * conversion to btree format, since the transaction
			
 
				-		 * will be dirty.
			
 
				-		 */
			
 
				-		if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
			
 
				-		    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
			
 
				-		    XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
			
 
				-			XFS_IFORK_MAXEXT(ip, whichfork) &&
			
 
				-		    del.br_startoff > got.br_startoff &&
			
 
				-		    del.br_startoff + del.br_blockcount <
			
 
				-		    got.br_startoff + got.br_blockcount) {
			
 
				-			error = -ENOSPC;
			
 
				-			goto error0;
			
 
				-		}
			
 
				+
			
 
				 		error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
			
 
				 				&tmp_logflags, whichfork);
			
 
				 		logflags |= tmp_logflags;
			
 
				 		if (error)
			
 
				 			goto error0;
			
 
				+
			
 
				+		if (!isrt && wasdel)
			
 
				+			xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
			
 
				+
			
 
				 		bno = del.br_startoff - 1;
			
 
				 nodelete:
			
 
				 		/*
			
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -461,7 +461,7 @@ xfs_bmbt_alloc_block(
 
				 		 * reservation amount is insufficient then we may fail a
			
 
				 		 * block allocation here and corrupt the filesystem.
			
 
				 		 */
			
 
				-		args.minleft = xfs_trans_get_block_res(args.tp);
			
 
				+		args.minleft = args.tp->t_blk_res;
			
 
				 	} else if (cur->bc_private.b.flist->xbf_low) {
			
 
				 		args.type = XFS_ALLOCTYPE_START_BNO;
			
 
				 	} else {
			
@@ -470,7 +470,7 @@ xfs_bmbt_alloc_block(
 
				 
			
 
				 	args.minlen = args.maxlen = args.prod = 1;
			
 
				 	args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
			
 
				-	if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
			
 
				+	if (!args.wasdel && args.tp->t_blk_res == 0) {
			
 
				 		error = -ENOSPC;
			
 
				 		goto error0;
			
 
				 	}
			
@@ -531,7 +531,6 @@ xfs_bmbt_free_block(
 
				 
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
			
 
				-	xfs_trans_binval(tp, bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -294,6 +294,21 @@ xfs_btree_sblock_verify_crc(
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+static int
			
 
				+xfs_btree_free_block(
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	int			error;
			
 
				+
			
 
				+	error = cur->bc_ops->free_block(cur, bp);
			
 
				+	if (!error) {
			
 
				+		xfs_trans_binval(cur->bc_tp, bp);
			
 
				+		XFS_BTREE_STATS_INC(cur, free);
			
 
				+	}
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Delete the btree cursor.
			
 
				  */
			
@@ -3209,6 +3224,7 @@ xfs_btree_kill_iroot(
 
				 	int			level;
			
 
				 	int			index;
			
 
				 	int			numrecs;
			
 
				+	int			error;
			
 
				 #ifdef DEBUG
			
 
				 	union xfs_btree_ptr	ptr;
			
 
				 	int			i;
			
@@ -3272,8 +3288,6 @@ xfs_btree_kill_iroot(
 
				 	cpp = xfs_btree_ptr_addr(cur, 1, cblock);
			
 
				 #ifdef DEBUG
			
 
				 	for (i = 0; i < numrecs; i++) {
			
 
				-		int		error;
			
 
				-
			
 
				 		error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
			
 
				 		if (error) {
			
 
				 			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
			
@@ -3283,8 +3297,11 @@ xfs_btree_kill_iroot(
 
				 #endif
			
 
				 	xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
			
 
				 
			
 
				-	cur->bc_ops->free_block(cur, cbp);
			
 
				-	XFS_BTREE_STATS_INC(cur, free);
			
 
				+	error = xfs_btree_free_block(cur, cbp);
			
 
				+	if (error) {
			
 
				+		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
			
 
				+		return error;
			
 
				+	}
			
 
				 
			
 
				 	cur->bc_bufs[level - 1] = NULL;
			
 
				 	be16_add_cpu(&block->bb_level, -1);
			
@@ -3317,14 +3334,12 @@ xfs_btree_kill_root(
 
				 	 */
			
 
				 	cur->bc_ops->set_root(cur, newroot, -1);
			
 
				 
			
 
				-	error = cur->bc_ops->free_block(cur, bp);
			
 
				+	error = xfs_btree_free_block(cur, bp);
			
 
				 	if (error) {
			
 
				 		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
			
 
				 		return error;
			
 
				 	}
			
 
				 
			
 
				-	XFS_BTREE_STATS_INC(cur, free);
			
 
				-
			
 
				 	cur->bc_bufs[level] = NULL;
			
 
				 	cur->bc_ra[level] = 0;
			
 
				 	cur->bc_nlevels--;
			
@@ -3830,10 +3845,9 @@ xfs_btree_delrec(
 
				 	}
			
 
				 
			
 
				 	/* Free the deleted block. */
			
 
				-	error = cur->bc_ops->free_block(cur, rbp);
			
 
				+	error = xfs_btree_free_block(cur, rbp);
			
 
				 	if (error)
			
 
				 		goto error0;
			
 
				-	XFS_BTREE_STATS_INC(cur, free);
			
 
				 
			
 
				 	/*
			
 
				 	 * If we joined with the left neighbor, set the buffer in the
			
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
 
				  */
			
 
				 #define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
			
 
				 
			
 
				+/*
			
 
				+ * Entries are packed toward the top as tight as possible.
			
 
				+ */
			
 
				+typedef struct xfs_attr_shortform {
			
 
				+	struct xfs_attr_sf_hdr {	/* constant-structure header block */
			
 
				+		__be16	totsize;	/* total bytes in shortform list */
			
 
				+		__u8	count;	/* count of active entries */
			
 
				+	} hdr;
			
 
				+	struct xfs_attr_sf_entry {
			
 
				+		__uint8_t namelen;	/* actual length of name (no NULL) */
			
 
				+		__uint8_t valuelen;	/* actual length of value (no NULL) */
			
 
				+		__uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
			
 
				+		__uint8_t nameval[1];	/* name & value bytes concatenated */
			
 
				+	} list[1];			/* variable sized array */
			
 
				+} xfs_attr_shortform_t;
			
 
				+
			
 
				 typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */
			
 
				 	__be16	base;			  /* base of free region */
			
 
				 	__be16	size;			  /* length of free region */
			
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -176,7 +176,7 @@ xfs_dir_isempty(
 
				 {
			
 
				 	xfs_dir2_sf_hdr_t	*sfp;
			
 
				 
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
			
 
				 	if (dp->i_d.di_size == 0)	/* might happen during shutdown. */
			
 
				 		return 1;
			
 
				 	if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
			
@@ -231,7 +231,7 @@ xfs_dir_init(
 
				 	struct xfs_da_args *args;
			
 
				 	int		error;
			
 
				 
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
			
 
				 	error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
			
 
				 	if (error)
			
 
				 		return error;
			
@@ -266,7 +266,7 @@ xfs_dir_createname(
 
				 	int			rval;
			
 
				 	int			v;		/* type-checking value */
			
 
				 
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
			
 
				 	if (inum) {
			
 
				 		rval = xfs_dir_ino_validate(tp->t_mountp, inum);
			
 
				 		if (rval)
			
@@ -364,7 +364,7 @@ xfs_dir_lookup(
 
				 	int		v;		/* type-checking value */
			
 
				 	int		lock_mode;
			
 
				 
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
			
 
				 	XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
			
 
				 
			
 
				 	/*
			
@@ -443,7 +443,7 @@ xfs_dir_removename(
 
				 	int		rval;
			
 
				 	int		v;		/* type-checking value */
			
 
				 
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
			
 
				 	XFS_STATS_INC(dp->i_mount, xs_dir_remove);
			
 
				 
			
 
				 	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
			
@@ -505,7 +505,7 @@ xfs_dir_replace(
 
				 	int		rval;
			
 
				 	int		v;		/* type-checking value */
			
 
				 
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
			
 
				 
			
 
				 	rval = xfs_dir_ino_validate(tp->t_mountp, inum);
			
 
				 	if (rval)
			
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -2235,6 +2235,9 @@ xfs_dir2_node_trim_free(
 
				 
			
 
				 	dp = args->dp;
			
 
				 	tp = args->trans;
			
 
				+
			
 
				+	*rvalp = 0;
			
 
				+
			
 
				 	/*
			
 
				 	 * Read the freespace block.
			
 
				 	 */
			
@@ -2255,7 +2258,6 @@ xfs_dir2_node_trim_free(
 
				 	 */
			
 
				 	if (freehdr.nused > 0) {
			
 
				 		xfs_trans_brelse(tp, bp);
			
 
				-		*rvalp = 0;
			
 
				 		return 0;
			
 
				 	}
			
 
				 	/*
			
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2403,8 +2403,8 @@ xfs_ialloc_compute_maxlevels(
 
				 
			
 
				 	maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
			
 
				 		XFS_INODES_PER_CHUNK_LOG;
			
 
				-	minleafrecs = mp->m_alloc_mnr[0];
			
 
				-	minnoderecs = mp->m_alloc_mnr[1];
			
 
				+	minleafrecs = mp->m_inobt_mnr[0];
			
 
				+	minnoderecs = mp->m_inobt_mnr[1];
			
 
				 	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
			
 
				 	for (level = 1; maxblocks > 1; level++)
			
 
				 		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
			
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -125,16 +125,8 @@ xfs_inobt_free_block(
 
				 	struct xfs_btree_cur	*cur,
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	xfs_fsblock_t		fsbno;
			
 
				-	int			error;
			
 
				-
			
 
				-	fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
			
 
				-	error = xfs_free_extent(cur->bc_tp, fsbno, 1);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	xfs_trans_binval(cur->bc_tp, bp);
			
 
				-	return error;
			
 
				+	return xfs_free_extent(cur->bc_tp,
			
 
				+			XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
			
 
				 }
			
 
				 
			
 
				 STATIC int
			
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -195,28 +195,50 @@ xfs_imap_to_bp(
 
				 }
			
 
				 
			
 
				 void
			
 
				-xfs_dinode_from_disk(
			
 
				-	xfs_icdinode_t		*to,
			
 
				-	xfs_dinode_t		*from)
			
 
				+xfs_inode_from_disk(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_dinode	*from)
			
 
				 {
			
 
				-	to->di_magic = be16_to_cpu(from->di_magic);
			
 
				-	to->di_mode = be16_to_cpu(from->di_mode);
			
 
				-	to->di_version = from ->di_version;
			
 
				+	struct xfs_icdinode	*to = &ip->i_d;
			
 
				+	struct inode		*inode = VFS_I(ip);
			
 
				+
			
 
				+
			
 
				+	/*
			
 
				+	 * Convert v1 inodes immediately to v2 inode format as this is the
			
 
				+	 * minimum inode version format we support in the rest of the code.
			
 
				+	 */
			
 
				+	to->di_version = from->di_version;
			
 
				+	if (to->di_version == 1) {
			
 
				+		set_nlink(inode, be16_to_cpu(from->di_onlink));
			
 
				+		to->di_projid_lo = 0;
			
 
				+		to->di_projid_hi = 0;
			
 
				+		to->di_version = 2;
			
 
				+	} else {
			
 
				+		set_nlink(inode, be32_to_cpu(from->di_nlink));
			
 
				+		to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
			
 
				+		to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
			
 
				+	}
			
 
				+
			
 
				 	to->di_format = from->di_format;
			
 
				-	to->di_onlink = be16_to_cpu(from->di_onlink);
			
 
				 	to->di_uid = be32_to_cpu(from->di_uid);
			
 
				 	to->di_gid = be32_to_cpu(from->di_gid);
			
 
				-	to->di_nlink = be32_to_cpu(from->di_nlink);
			
 
				-	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
			
 
				-	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
			
 
				-	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
			
 
				 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
			
 
				-	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
			
 
				-	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
			
 
				-	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
			
 
				-	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
			
 
				-	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
			
 
				-	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
			
 
				+
			
 
				+	/*
			
 
				+	 * Time is signed, so need to convert to signed 32 bit before
			
 
				+	 * storing in inode timestamp which may be 64 bit. Otherwise
			
 
				+	 * a time before epoch is converted to a time long after epoch
			
 
				+	 * on 64 bit systems.
			
 
				+	 */
			
 
				+	inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
			
 
				+	inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
			
 
				+	inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
			
 
				+	inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
			
 
				+	inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
			
 
				+	inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
			
 
				+	inode->i_generation = be32_to_cpu(from->di_gen);
			
 
				+	inode->i_mode = be16_to_cpu(from->di_mode);
			
 
				+
			
 
				 	to->di_size = be64_to_cpu(from->di_size);
			
 
				 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
			
 
				 	to->di_extsize = be32_to_cpu(from->di_extsize);
			
@@ -227,42 +249,96 @@ xfs_dinode_from_disk(
 
				 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
			
 
				 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
			
 
				 	to->di_flags	= be16_to_cpu(from->di_flags);
			
 
				-	to->di_gen	= be32_to_cpu(from->di_gen);
			
 
				 
			
 
				 	if (to->di_version == 3) {
			
 
				-		to->di_changecount = be64_to_cpu(from->di_changecount);
			
 
				+		inode->i_version = be64_to_cpu(from->di_changecount);
			
 
				 		to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
			
 
				 		to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
			
 
				 		to->di_flags2 = be64_to_cpu(from->di_flags2);
			
 
				-		to->di_ino = be64_to_cpu(from->di_ino);
			
 
				-		to->di_lsn = be64_to_cpu(from->di_lsn);
			
 
				-		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
			
 
				-		uuid_copy(&to->di_uuid, &from->di_uuid);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 void
			
 
				-xfs_dinode_to_disk(
			
 
				-	xfs_dinode_t		*to,
			
 
				-	xfs_icdinode_t		*from)
			
 
				+xfs_inode_to_disk(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_dinode	*to,
			
 
				+	xfs_lsn_t		lsn)
			
 
				+{
			
 
				+	struct xfs_icdinode	*from = &ip->i_d;
			
 
				+	struct inode		*inode = VFS_I(ip);
			
 
				+
			
 
				+	to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
			
 
				+	to->di_onlink = 0;
			
 
				+
			
 
				+	to->di_version = from->di_version;
			
 
				+	to->di_format = from->di_format;
			
 
				+	to->di_uid = cpu_to_be32(from->di_uid);
			
 
				+	to->di_gid = cpu_to_be32(from->di_gid);
			
 
				+	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
			
 
				+	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
			
 
				+
			
 
				+	memset(to->di_pad, 0, sizeof(to->di_pad));
			
 
				+	to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
			
 
				+	to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
			
 
				+	to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
			
 
				+	to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
			
 
				+	to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
			
 
				+	to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
			
 
				+	to->di_nlink = cpu_to_be32(inode->i_nlink);
			
 
				+	to->di_gen = cpu_to_be32(inode->i_generation);
			
 
				+	to->di_mode = cpu_to_be16(inode->i_mode);
			
 
				+
			
 
				+	to->di_size = cpu_to_be64(from->di_size);
			
 
				+	to->di_nblocks = cpu_to_be64(from->di_nblocks);
			
 
				+	to->di_extsize = cpu_to_be32(from->di_extsize);
			
 
				+	to->di_nextents = cpu_to_be32(from->di_nextents);
			
 
				+	to->di_anextents = cpu_to_be16(from->di_anextents);
			
 
				+	to->di_forkoff = from->di_forkoff;
			
 
				+	to->di_aformat = from->di_aformat;
			
 
				+	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
			
 
				+	to->di_dmstate = cpu_to_be16(from->di_dmstate);
			
 
				+	to->di_flags = cpu_to_be16(from->di_flags);
			
 
				+
			
 
				+	if (from->di_version == 3) {
			
 
				+		to->di_changecount = cpu_to_be64(inode->i_version);
			
 
				+		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
			
 
				+		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
			
 
				+		to->di_flags2 = cpu_to_be64(from->di_flags2);
			
 
				+
			
 
				+		to->di_ino = cpu_to_be64(ip->i_ino);
			
 
				+		to->di_lsn = cpu_to_be64(lsn);
			
 
				+		memset(to->di_pad2, 0, sizeof(to->di_pad2));
			
 
				+		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
			
 
				+		to->di_flushiter = 0;
			
 
				+	} else {
			
 
				+		to->di_flushiter = cpu_to_be16(from->di_flushiter);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_log_dinode_to_disk(
			
 
				+	struct xfs_log_dinode	*from,
			
 
				+	struct xfs_dinode	*to)
			
 
				 {
			
 
				 	to->di_magic = cpu_to_be16(from->di_magic);
			
 
				 	to->di_mode = cpu_to_be16(from->di_mode);
			
 
				-	to->di_version = from ->di_version;
			
 
				+	to->di_version = from->di_version;
			
 
				 	to->di_format = from->di_format;
			
 
				-	to->di_onlink = cpu_to_be16(from->di_onlink);
			
 
				+	to->di_onlink = 0;
			
 
				 	to->di_uid = cpu_to_be32(from->di_uid);
			
 
				 	to->di_gid = cpu_to_be32(from->di_gid);
			
 
				 	to->di_nlink = cpu_to_be32(from->di_nlink);
			
 
				 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
			
 
				 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
			
 
				 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
			
 
				+
			
 
				 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
			
 
				 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
			
 
				 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
			
 
				 	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
			
 
				 	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
			
 
				 	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
			
 
				+
			
 
				 	to->di_size = cpu_to_be64(from->di_size);
			
 
				 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
			
 
				 	to->di_extsize = cpu_to_be32(from->di_extsize);
			
@@ -367,13 +443,10 @@ xfs_iread(
 
				 	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
			
 
				 		/* initialise the on-disk inode core */
			
 
				 		memset(&ip->i_d, 0, sizeof(ip->i_d));
			
 
				-		ip->i_d.di_magic = XFS_DINODE_MAGIC;
			
 
				-		ip->i_d.di_gen = prandom_u32();
			
 
				-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		VFS_I(ip)->i_generation = prandom_u32();
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				 			ip->i_d.di_version = 3;
			
 
				-			ip->i_d.di_ino = ip->i_ino;
			
 
				-			uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid);
			
 
				-		} else
			
 
				+		else
			
 
				 			ip->i_d.di_version = 2;
			
 
				 		return 0;
			
 
				 	}
			
@@ -403,7 +476,7 @@ xfs_iread(
 
				 	 * Otherwise, just get the truly permanent information.
			
 
				 	 */
			
 
				 	if (dip->di_mode) {
			
 
				-		xfs_dinode_from_disk(&ip->i_d, dip);
			
 
				+		xfs_inode_from_disk(ip, dip);
			
 
				 		error = xfs_iformat_fork(ip, dip);
			
 
				 		if (error)  {
			
 
				 #ifdef DEBUG
			
@@ -417,16 +490,10 @@ xfs_iread(
 
				 		 * Partial initialisation of the in-core inode. Just the bits
			
 
				 		 * that xfs_ialloc won't overwrite or relies on being correct.
			
 
				 		 */
			
 
				-		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
			
 
				 		ip->i_d.di_version = dip->di_version;
			
 
				-		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
			
 
				+		VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
			
 
				 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
			
 
				 
			
 
				-		if (dip->di_version == 3) {
			
 
				-			ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
			
 
				-			uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
			
 
				-		}
			
 
				-
			
 
				 		/*
			
 
				 		 * Make sure to pull in the mode here as well in
			
 
				 		 * case the inode is released without being used.
			
@@ -434,25 +501,10 @@ xfs_iread(
 
				 		 * the inode is already free and not try to mess
			
 
				 		 * with the uninitialized part of it.
			
 
				 		 */
			
 
				-		ip->i_d.di_mode = 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Automatically convert version 1 inode formats in memory to version 2
			
 
				-	 * inode format. If the inode is modified, it will get logged and
			
 
				-	 * rewritten as a version 2 inode. We can do this because we set the
			
 
				-	 * superblock feature bit for v2 inodes unconditionally during mount
			
 
				-	 * and it means the reast of the code can assume the inode version is 2
			
 
				-	 * or higher.
			
 
				-	 */
			
 
				-	if (ip->i_d.di_version == 1) {
			
 
				-		ip->i_d.di_version = 2;
			
 
				-		memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
			
 
				-		ip->i_d.di_nlink = ip->i_d.di_onlink;
			
 
				-		ip->i_d.di_onlink = 0;
			
 
				-		xfs_set_projid(ip, 0);
			
 
				+		VFS_I(ip)->i_mode = 0;
			
 
				 	}
			
 
				 
			
 
				+	ASSERT(ip->i_d.di_version >= 2);
			
 
				 	ip->i_delayed_blks = 0;
			
 
				 
			
 
				 	/*
			
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -20,7 +20,36 @@
 
				 
			
 
				 struct xfs_inode;
			
 
				 struct xfs_dinode;
			
 
				-struct xfs_icdinode;
			
 
				+
			
 
				+/*
			
 
				+ * In memory representation of the XFS inode. This is held in the in-core struct
			
 
				+ * xfs_inode and represents the current on disk values but the structure is not
			
 
				+ * in on-disk format.  That is, this structure is always translated to on-disk
			
 
				+ * format specific structures at the appropriate time.
			
 
				+ */
			
 
				+struct xfs_icdinode {
			
 
				+	__int8_t	di_version;	/* inode version */
			
 
				+	__int8_t	di_format;	/* format of di_c data */
			
 
				+	__uint16_t	di_flushiter;	/* incremented on flush */
			
 
				+	__uint32_t	di_uid;		/* owner's user id */
			
 
				+	__uint32_t	di_gid;		/* owner's group id */
			
 
				+	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
			
 
				+	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
			
 
				+	xfs_fsize_t	di_size;	/* number of bytes in file */
			
 
				+	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
			
 
				+	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
			
 
				+	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
			
 
				+	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
			
 
				+	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
			
 
				+	__int8_t	di_aformat;	/* format of attr fork's data */
			
 
				+	__uint32_t	di_dmevmask;	/* DMIG event mask */
			
 
				+	__uint16_t	di_dmstate;	/* DMIG state info */
			
 
				+	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
			
 
				+
			
 
				+	__uint64_t	di_flags2;	/* more random flags */
			
 
				+
			
 
				+	xfs_ictimestamp_t di_crtime;	/* time created */
			
 
				+};
			
 
				 
			
 
				 /*
			
 
				  * Inode location information.  Stored in the inode and passed to
			
@@ -38,8 +67,11 @@ int	xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
 
				 int	xfs_iread(struct xfs_mount *, struct xfs_trans *,
			
 
				 		  struct xfs_inode *, uint);
			
 
				 void	xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
			
 
				-void	xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
			
 
				-void	xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
			
 
				+void	xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
			
 
				+			  xfs_lsn_t lsn);
			
 
				+void	xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
			
 
				+void	xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
			
 
				+			       struct xfs_dinode *to);
			
 
				 
			
 
				 #if defined(DEBUG)
			
 
				 void	xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
			
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -31,6 +31,7 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_attr_sf.h"
			
 
				+#include "xfs_da_format.h"
			
 
				 
			
 
				 kmem_zone_t *xfs_ifork_zone;
			
 
				 
			
@@ -120,7 +121,7 @@ xfs_iformat_fork(
 
				 		return -EFSCORRUPTED;
			
 
				 	}
			
 
				 
			
 
				-	switch (ip->i_d.di_mode & S_IFMT) {
			
 
				+	switch (VFS_I(ip)->i_mode & S_IFMT) {
			
 
				 	case S_IFIFO:
			
 
				 	case S_IFCHR:
			
 
				 	case S_IFBLK:
			
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -290,6 +290,7 @@ typedef struct xfs_inode_log_format_64 {
 
				 	__int32_t		ilf_boffset;	/* off of inode in buffer */
			
 
				 } xfs_inode_log_format_64_t;
			
 
				 
			
 
				+
			
 
				 /*
			
 
				  * Flags for xfs_trans_log_inode flags field.
			
 
				  */
			
@@ -360,15 +361,15 @@ typedef struct xfs_ictimestamp {
 
				 } xfs_ictimestamp_t;
			
 
				 
			
 
				 /*
			
 
				- * NOTE:  This structure must be kept identical to struct xfs_dinode
			
 
				- *	  except for the endianness annotations.
			
 
				+ * Define the format of the inode core that is logged. This structure must be
			
 
				+ * kept identical to struct xfs_dinode except for the endianness annotations.
			
 
				  */
			
 
				-typedef struct xfs_icdinode {
			
 
				+struct xfs_log_dinode {
			
 
				 	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
			
 
				 	__uint16_t	di_mode;	/* mode and type of file */
			
 
				 	__int8_t	di_version;	/* inode version */
			
 
				 	__int8_t	di_format;	/* format of di_c data */
			
 
				-	__uint16_t	di_onlink;	/* old number of links to file */
			
 
				+	__uint8_t	di_pad3[2];	/* unused in v2/3 inodes */
			
 
				 	__uint32_t	di_uid;		/* owner's user id */
			
 
				 	__uint32_t	di_gid;		/* owner's group id */
			
 
				 	__uint32_t	di_nlink;	/* number of links to file */
			
@@ -407,13 +408,13 @@ typedef struct xfs_icdinode {
 
				 	uuid_t		di_uuid;	/* UUID of the filesystem */
			
 
				 
			
 
				 	/* structure must be padded to 64 bit alignment */
			
 
				-} xfs_icdinode_t;
			
 
				+};
			
 
				 
			
 
				-static inline uint xfs_icdinode_size(int version)
			
 
				+static inline uint xfs_log_dinode_size(int version)
			
 
				 {
			
 
				 	if (version == 3)
			
 
				-		return sizeof(struct xfs_icdinode);
			
 
				-	return offsetof(struct xfs_icdinode, di_next_unlinked);
			
 
				+		return sizeof(struct xfs_log_dinode);
			
 
				+	return offsetof(struct xfs_log_dinode, di_next_unlinked);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -495,6 +496,8 @@ enum xfs_blft {
 
				 	XFS_BLFT_ATTR_LEAF_BUF,
			
 
				 	XFS_BLFT_ATTR_RMT_BUF,
			
 
				 	XFS_BLFT_SB_BUF,
			
 
				+	XFS_BLFT_RTBITMAP_BUF,
			
 
				+	XFS_BLFT_RTSUMMARY_BUF,
			
 
				 	XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
			
 
				 };
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -37,7 +37,7 @@ typedef __uint16_t	xfs_qwarncnt_t;
 
				 #define XFS_DQ_PROJ		0x0002		/* project quota */
			
 
				 #define XFS_DQ_GROUP		0x0004		/* a group quota */
			
 
				 #define XFS_DQ_DIRTY		0x0008		/* dquot is dirty */
			
 
				-#define XFS_DQ_FREEING		0x0010		/* dquot is beeing torn down */
			
 
				+#define XFS_DQ_FREEING		0x0010		/* dquot is being torn down */
			
 
				 
			
 
				 #define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
			
 
				 
			
@@ -116,6 +116,7 @@ typedef __uint16_t	xfs_qwarncnt_t;
 
				 #define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
			
 
				 #define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
			
 
				 #define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
			
 
				+#define XFS_QMOPT_DQNEXT	0x0008000 /* return next dquot >= this ID */
			
 
				 
			
 
				 /*
			
 
				  * flags to xfs_trans_mod_dquot to indicate which field needs to be
			
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -41,6 +41,31 @@
 
				  * Realtime allocator bitmap functions shared with userspace.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Real time buffers need verifiers to avoid runtime warnings during IO.
			
 
				+ * We don't have anything to verify, however, so these are just dummy
			
 
				+ * operations.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_rtbuf_verify_read(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_rtbuf_verify_write(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_rtbuf_ops = {
			
 
				+	.name = "rtbuf",
			
 
				+	.verify_read = xfs_rtbuf_verify_read,
			
 
				+	.verify_write = xfs_rtbuf_verify_write,
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Get a buffer for the bitmap or summary file block specified.
			
 
				  * The buffer is returned read and locked.
			
@@ -68,9 +93,12 @@ xfs_rtbuf_get(
 
				 	ASSERT(map.br_startblock != NULLFSBLOCK);
			
 
				 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
			
 
				 				   XFS_FSB_TO_DADDR(mp, map.br_startblock),
			
 
				-				   mp->m_bsize, 0, &bp, NULL);
			
 
				+				   mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				+
			
 
				+	xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
			
 
				+					     : XFS_BLFT_RTBITMAP_BUF);
			
 
				 	*bpp = bp;
			
 
				 	return 0;
			
 
				 }
			
@@ -983,7 +1011,7 @@ xfs_rtfree_extent(
 
				 	    mp->m_sb.sb_rextents) {
			
 
				 		if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
			
 
				 			mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
			
 
				-		*(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
			
 
				+		*(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
			
 
				 		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
			
 
				 	}
			
 
				 	return 0;
			
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
 
				 extern void	xfs_perag_put(struct xfs_perag *pag);
			
 
				 extern int	xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
			
 
				 
			
 
				-extern void	xfs_sb_calc_crc(struct xfs_buf *bp);
			
 
				 extern void	xfs_log_sb(struct xfs_trans *tp);
			
 
				 extern int	xfs_sync_sb(struct xfs_mount *mp, bool wait);
			
 
				 extern void	xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
			
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
 
				 extern const struct xfs_buf_ops xfs_sb_buf_ops;
			
 
				 extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
			
 
				 extern const struct xfs_buf_ops xfs_symlink_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_rtbuf_ops;
			
 
				 
			
 
				 /*
			
 
				  * Transaction types.  Used to distinguish types of buffers. These never reach
			
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -36,6 +36,21 @@
 
				 #include <linux/pagevec.h>
			
 
				 #include <linux/writeback.h>
			
 
				 
			
 
				+/* flags for direct write completions */
			
 
				+#define XFS_DIO_FLAG_UNWRITTEN	(1 << 0)
			
 
				+#define XFS_DIO_FLAG_APPEND	(1 << 1)
			
 
				+
			
 
				+/*
			
 
				+ * structure owned by writepages passed to individual writepage calls
			
 
				+ */
			
 
				+struct xfs_writepage_ctx {
			
 
				+	struct xfs_bmbt_irec    imap;
			
 
				+	bool			imap_valid;
			
 
				+	unsigned int		io_type;
			
 
				+	struct xfs_ioend	*ioend;
			
 
				+	sector_t		last_block;
			
 
				+};
			
 
				+
			
 
				 void
			
 
				 xfs_count_page_state(
			
 
				 	struct page		*page,
			
@@ -214,10 +229,12 @@ xfs_end_io(
 
				 	struct xfs_inode *ip = XFS_I(ioend->io_inode);
			
 
				 	int		error = 0;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				+	/*
			
 
				+	 * Set an error if the mount has shut down and proceed with end I/O
			
 
				+	 * processing so it can perform whatever cleanups are necessary.
			
 
				+	 */
			
 
				+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
			
 
				 		ioend->io_error = -EIO;
			
 
				-		goto done;
			
 
				-	}
			
 
				 
			
 
				 	/*
			
 
				 	 * For unwritten extents we need to issue transactions to convert a
			
@@ -265,7 +282,7 @@ xfs_alloc_ioend(
 
				 	 */
			
 
				 	atomic_set(&ioend->io_remaining, 1);
			
 
				 	ioend->io_error = 0;
			
 
				-	ioend->io_list = NULL;
			
 
				+	INIT_LIST_HEAD(&ioend->io_list);
			
 
				 	ioend->io_type = type;
			
 
				 	ioend->io_inode = inode;
			
 
				 	ioend->io_buffer_head = NULL;
			
@@ -283,8 +300,7 @@ xfs_map_blocks(
 
				 	struct inode		*inode,
			
 
				 	loff_t			offset,
			
 
				 	struct xfs_bmbt_irec	*imap,
			
 
				-	int			type,
			
 
				-	int			nonblocking)
			
 
				+	int			type)
			
 
				 {
			
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
@@ -300,12 +316,7 @@ xfs_map_blocks(
 
				 	if (type == XFS_IO_UNWRITTEN)
			
 
				 		bmapi_flags |= XFS_BMAPI_IGSTATE;
			
 
				 
			
 
				-	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
			
 
				-		if (nonblocking)
			
 
				-			return -EAGAIN;
			
 
				-		xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				-	}
			
 
				-
			
 
				+	xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
			
 
				 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
			
 
				 	ASSERT(offset <= mp->m_super->s_maxbytes);
			
@@ -341,7 +352,7 @@ xfs_map_blocks(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				+STATIC bool
			
 
				 xfs_imap_valid(
			
 
				 	struct inode		*inode,
			
 
				 	struct xfs_bmbt_irec	*imap,
			
@@ -414,8 +425,7 @@ xfs_start_buffer_writeback(
 
				 STATIC void
			
 
				 xfs_start_page_writeback(
			
 
				 	struct page		*page,
			
 
				-	int			clear_dirty,
			
 
				-	int			buffers)
			
 
				+	int			clear_dirty)
			
 
				 {
			
 
				 	ASSERT(PageLocked(page));
			
 
				 	ASSERT(!PageWriteback(page));
			
@@ -434,10 +444,6 @@ xfs_start_page_writeback(
 
				 		set_page_writeback_keepwrite(page);
			
 
				 
			
 
				 	unlock_page(page);
			
 
				-
			
 
				-	/* If no buffers on the page are to be written, finish it here */
			
 
				-	if (!buffers)
			
 
				-		end_page_writeback(page);
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
			
@@ -446,153 +452,101 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Submit all of the bios for all of the ioends we have saved up, covering the
			
 
				- * initial writepage page and also any probed pages.
			
 
				- *
			
 
				- * Because we may have multiple ioends spanning a page, we need to start
			
 
				- * writeback on all the buffers before we submit them for I/O. If we mark the
			
 
				- * buffers as we got, then we can end up with a page that only has buffers
			
 
				- * marked async write and I/O complete on can occur before we mark the other
			
 
				- * buffers async write.
			
 
				- *
			
 
				- * The end result of this is that we trip a bug in end_page_writeback() because
			
 
				- * we call it twice for the one page as the code in end_buffer_async_write()
			
 
				- * assumes that all buffers on the page are started at the same time.
			
 
				- *
			
 
				- * The fix is two passes across the ioend list - one to start writeback on the
			
 
				- * buffer_heads, and then submit them for I/O on the second pass.
			
 
				+ * Submit all of the bios for an ioend. We are only passed a single ioend at a
			
 
				+ * time; the caller is responsible for chaining prior to submission.
			
 
				  *
			
 
				  * If @fail is non-zero, it means that we have a situation where some part of
			
 
				  * the submission process has failed after we have marked paged for writeback
			
 
				  * and unlocked them. In this situation, we need to fail the ioend chain rather
			
 
				  * than submit it to IO. This typically only happens on a filesystem shutdown.
			
 
				  */
			
 
				-STATIC void
			
 
				+STATIC int
			
 
				 xfs_submit_ioend(
			
 
				 	struct writeback_control *wbc,
			
 
				 	xfs_ioend_t		*ioend,
			
 
				-	int			fail)
			
 
				+	int			status)
			
 
				 {
			
 
				-	xfs_ioend_t		*head = ioend;
			
 
				-	xfs_ioend_t		*next;
			
 
				 	struct buffer_head	*bh;
			
 
				 	struct bio		*bio;
			
 
				 	sector_t		lastblock = 0;
			
 
				 
			
 
				-	/* Pass 1 - start writeback */
			
 
				-	do {
			
 
				-		next = ioend->io_list;
			
 
				-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
			
 
				-			xfs_start_buffer_writeback(bh);
			
 
				-	} while ((ioend = next) != NULL);
			
 
				+	/* Reserve log space if we might write beyond the on-disk inode size. */
			
 
				+	if (!status &&
			
 
				+	     ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
			
 
				+		status = xfs_setfilesize_trans_alloc(ioend);
			
 
				+	/*
			
 
				+	 * If we are failing the IO now, just mark the ioend with an
			
 
				+	 * error and finish it. This will run IO completion immediately
			
 
				+	 * as there is only one reference to the ioend at this point in
			
 
				+	 * time.
			
 
				+	 */
			
 
				+	if (status) {
			
 
				+		ioend->io_error = status;
			
 
				+		xfs_finish_ioend(ioend);
			
 
				+		return status;
			
 
				+	}
			
 
				 
			
 
				-	/* Pass 2 - submit I/O */
			
 
				-	ioend = head;
			
 
				-	do {
			
 
				-		next = ioend->io_list;
			
 
				-		bio = NULL;
			
 
				+	bio = NULL;
			
 
				+	for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
			
 
				 
			
 
				-		/*
			
 
				-		 * If we are failing the IO now, just mark the ioend with an
			
 
				-		 * error and finish it. This will run IO completion immediately
			
 
				-		 * as there is only one reference to the ioend at this point in
			
 
				-		 * time.
			
 
				-		 */
			
 
				-		if (fail) {
			
 
				-			ioend->io_error = fail;
			
 
				-			xfs_finish_ioend(ioend);
			
 
				-			continue;
			
 
				+		if (!bio) {
			
 
				+retry:
			
 
				+			bio = xfs_alloc_ioend_bio(bh);
			
 
				+		} else if (bh->b_blocknr != lastblock + 1) {
			
 
				+			xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				+			goto retry;
			
 
				 		}
			
 
				 
			
 
				-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
			
 
				-
			
 
				-			if (!bio) {
			
 
				- retry:
			
 
				-				bio = xfs_alloc_ioend_bio(bh);
			
 
				-			} else if (bh->b_blocknr != lastblock + 1) {
			
 
				-				xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				-				goto retry;
			
 
				-			}
			
 
				-
			
 
				-			if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
			
 
				-				xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				-				goto retry;
			
 
				-			}
			
 
				-
			
 
				-			lastblock = bh->b_blocknr;
			
 
				-		}
			
 
				-		if (bio)
			
 
				+		if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
			
 
				 			xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				-		xfs_finish_ioend(ioend);
			
 
				-	} while ((ioend = next) != NULL);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Cancel submission of all buffer_heads so far in this endio.
			
 
				- * Toss the endio too.  Only ever called for the initial page
			
 
				- * in a writepage request, so only ever one page.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_cancel_ioend(
			
 
				-	xfs_ioend_t		*ioend)
			
 
				-{
			
 
				-	xfs_ioend_t		*next;
			
 
				-	struct buffer_head	*bh, *next_bh;
			
 
				-
			
 
				-	do {
			
 
				-		next = ioend->io_list;
			
 
				-		bh = ioend->io_buffer_head;
			
 
				-		do {
			
 
				-			next_bh = bh->b_private;
			
 
				-			clear_buffer_async_write(bh);
			
 
				-			/*
			
 
				-			 * The unwritten flag is cleared when added to the
			
 
				-			 * ioend. We're not submitting for I/O so mark the
			
 
				-			 * buffer unwritten again for next time around.
			
 
				-			 */
			
 
				-			if (ioend->io_type == XFS_IO_UNWRITTEN)
			
 
				-				set_buffer_unwritten(bh);
			
 
				-			unlock_buffer(bh);
			
 
				-		} while ((bh = next_bh) != NULL);
			
 
				+			goto retry;
			
 
				+		}
			
 
				 
			
 
				-		mempool_free(ioend, xfs_ioend_pool);
			
 
				-	} while ((ioend = next) != NULL);
			
 
				+		lastblock = bh->b_blocknr;
			
 
				+	}
			
 
				+	if (bio)
			
 
				+		xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				+	xfs_finish_ioend(ioend);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Test to see if we've been building up a completion structure for
			
 
				  * earlier buffers -- if so, we try to append to this ioend if we
			
 
				  * can, otherwise we finish off any current ioend and start another.
			
 
				- * Return true if we've finished the given ioend.
			
 
				+ * Return the ioend we finished off so that the caller can submit it
			
 
				+ * once it has finished processing the dirty page.
			
 
				  */
			
 
				 STATIC void
			
 
				 xfs_add_to_ioend(
			
 
				 	struct inode		*inode,
			
 
				 	struct buffer_head	*bh,
			
 
				 	xfs_off_t		offset,
			
 
				-	unsigned int		type,
			
 
				-	xfs_ioend_t		**result,
			
 
				-	int			need_ioend)
			
 
				+	struct xfs_writepage_ctx *wpc,
			
 
				+	struct list_head	*iolist)
			
 
				 {
			
 
				-	xfs_ioend_t		*ioend = *result;
			
 
				-
			
 
				-	if (!ioend || need_ioend || type != ioend->io_type) {
			
 
				-		xfs_ioend_t	*previous = *result;
			
 
				-
			
 
				-		ioend = xfs_alloc_ioend(inode, type);
			
 
				-		ioend->io_offset = offset;
			
 
				-		ioend->io_buffer_head = bh;
			
 
				-		ioend->io_buffer_tail = bh;
			
 
				-		if (previous)
			
 
				-			previous->io_list = ioend;
			
 
				-		*result = ioend;
			
 
				+	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
			
 
				+	    bh->b_blocknr != wpc->last_block + 1 ||
			
 
				+	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
			
 
				+		struct xfs_ioend	*new;
			
 
				+
			
 
				+		if (wpc->ioend)
			
 
				+			list_add(&wpc->ioend->io_list, iolist);
			
 
				+
			
 
				+		new = xfs_alloc_ioend(inode, wpc->io_type);
			
 
				+		new->io_offset = offset;
			
 
				+		new->io_buffer_head = bh;
			
 
				+		new->io_buffer_tail = bh;
			
 
				+		wpc->ioend = new;
			
 
				 	} else {
			
 
				-		ioend->io_buffer_tail->b_private = bh;
			
 
				-		ioend->io_buffer_tail = bh;
			
 
				+		wpc->ioend->io_buffer_tail->b_private = bh;
			
 
				+		wpc->ioend->io_buffer_tail = bh;
			
 
				 	}
			
 
				 
			
 
				 	bh->b_private = NULL;
			
 
				-	ioend->io_size += bh->b_size;
			
 
				+	wpc->ioend->io_size += bh->b_size;
			
 
				+	wpc->last_block = bh->b_blocknr;
			
 
				+	xfs_start_buffer_writeback(bh);
			
 
				 }
			
 
				 
			
 
				 STATIC void
			
@@ -678,183 +632,6 @@ xfs_check_page_type(
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Allocate & map buffers for page given the extent map. Write it out.
			
 
				- * except for the original page of a writepage, this is called on
			
 
				- * delalloc/unwritten pages only, for the original page it is possible
			
 
				- * that the page has no mapping at all.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_convert_page(
			
 
				-	struct inode		*inode,
			
 
				-	struct page		*page,
			
 
				-	loff_t			tindex,
			
 
				-	struct xfs_bmbt_irec	*imap,
			
 
				-	xfs_ioend_t		**ioendp,
			
 
				-	struct writeback_control *wbc)
			
 
				-{
			
 
				-	struct buffer_head	*bh, *head;
			
 
				-	xfs_off_t		end_offset;
			
 
				-	unsigned long		p_offset;
			
 
				-	unsigned int		type;
			
 
				-	int			len, page_dirty;
			
 
				-	int			count = 0, done = 0, uptodate = 1;
			
 
				- 	xfs_off_t		offset = page_offset(page);
			
 
				-
			
 
				-	if (page->index != tindex)
			
 
				-		goto fail;
			
 
				-	if (!trylock_page(page))
			
 
				-		goto fail;
			
 
				-	if (PageWriteback(page))
			
 
				-		goto fail_unlock_page;
			
 
				-	if (page->mapping != inode->i_mapping)
			
 
				-		goto fail_unlock_page;
			
 
				-	if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
			
 
				-		goto fail_unlock_page;
			
 
				-
			
 
				-	/*
			
 
				-	 * page_dirty is initially a count of buffers on the page before
			
 
				-	 * EOF and is decremented as we move each into a cleanable state.
			
 
				-	 *
			
 
				-	 * Derivation:
			
 
				-	 *
			
 
				-	 * End offset is the highest offset that this page should represent.
			
 
				-	 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
			
 
				-	 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
			
 
				-	 * hence give us the correct page_dirty count. On any other page,
			
 
				-	 * it will be zero and in that case we need page_dirty to be the
			
 
				-	 * count of buffers on the page.
			
 
				-	 */
			
 
				-	end_offset = min_t(unsigned long long,
			
 
				-			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
			
 
				-			i_size_read(inode));
			
 
				-
			
 
				-	/*
			
 
				-	 * If the current map does not span the entire page we are about to try
			
 
				-	 * to write, then give up. The only way we can write a page that spans
			
 
				-	 * multiple mappings in a single writeback iteration is via the
			
 
				-	 * xfs_vm_writepage() function. Data integrity writeback requires the
			
 
				-	 * entire page to be written in a single attempt, otherwise the part of
			
 
				-	 * the page we don't write here doesn't get written as part of the data
			
 
				-	 * integrity sync.
			
 
				-	 *
			
 
				-	 * For normal writeback, we also don't attempt to write partial pages
			
 
				-	 * here as it simply means that write_cache_pages() will see it under
			
 
				-	 * writeback and ignore the page until some point in the future, at
			
 
				-	 * which time this will be the only page in the file that needs
			
 
				-	 * writeback.  Hence for more optimal IO patterns, we should always
			
 
				-	 * avoid partial page writeback due to multiple mappings on a page here.
			
 
				-	 */
			
 
				-	if (!xfs_imap_valid(inode, imap, end_offset))
			
 
				-		goto fail_unlock_page;
			
 
				-
			
 
				-	len = 1 << inode->i_blkbits;
			
 
				-	p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
			
 
				-					PAGE_CACHE_SIZE);
			
 
				-	p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
			
 
				-	page_dirty = p_offset / len;
			
 
				-
			
 
				-	/*
			
 
				-	 * The moment we find a buffer that doesn't match our current type
			
 
				-	 * specification or can't be written, abort the loop and start
			
 
				-	 * writeback. As per the above xfs_imap_valid() check, only
			
 
				-	 * xfs_vm_writepage() can handle partial page writeback fully - we are
			
 
				-	 * limited here to the buffers that are contiguous with the current
			
 
				-	 * ioend, and hence a buffer we can't write breaks that contiguity and
			
 
				-	 * we have to defer the rest of the IO to xfs_vm_writepage().
			
 
				-	 */
			
 
				-	bh = head = page_buffers(page);
			
 
				-	do {
			
 
				-		if (offset >= end_offset)
			
 
				-			break;
			
 
				-		if (!buffer_uptodate(bh))
			
 
				-			uptodate = 0;
			
 
				-		if (!(PageUptodate(page) || buffer_uptodate(bh))) {
			
 
				-			done = 1;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (buffer_unwritten(bh) || buffer_delay(bh) ||
			
 
				-		    buffer_mapped(bh)) {
			
 
				-			if (buffer_unwritten(bh))
			
 
				-				type = XFS_IO_UNWRITTEN;
			
 
				-			else if (buffer_delay(bh))
			
 
				-				type = XFS_IO_DELALLOC;
			
 
				-			else
			
 
				-				type = XFS_IO_OVERWRITE;
			
 
				-
			
 
				-			/*
			
 
				-			 * imap should always be valid because of the above
			
 
				-			 * partial page end_offset check on the imap.
			
 
				-			 */
			
 
				-			ASSERT(xfs_imap_valid(inode, imap, offset));
			
 
				-
			
 
				-			lock_buffer(bh);
			
 
				-			if (type != XFS_IO_OVERWRITE)
			
 
				-				xfs_map_at_offset(inode, bh, imap, offset);
			
 
				-			xfs_add_to_ioend(inode, bh, offset, type,
			
 
				-					 ioendp, done);
			
 
				-
			
 
				-			page_dirty--;
			
 
				-			count++;
			
 
				-		} else {
			
 
				-			done = 1;
			
 
				-			break;
			
 
				-		}
			
 
				-	} while (offset += len, (bh = bh->b_this_page) != head);
			
 
				-
			
 
				-	if (uptodate && bh == head)
			
 
				-		SetPageUptodate(page);
			
 
				-
			
 
				-	if (count) {
			
 
				-		if (--wbc->nr_to_write <= 0 &&
			
 
				-		    wbc->sync_mode == WB_SYNC_NONE)
			
 
				-			done = 1;
			
 
				-	}
			
 
				-	xfs_start_page_writeback(page, !page_dirty, count);
			
 
				-
			
 
				-	return done;
			
 
				- fail_unlock_page:
			
 
				-	unlock_page(page);
			
 
				- fail:
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Convert & write out a cluster of pages in the same extent as defined
			
 
				- * by mp and following the start page.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_cluster_write(
			
 
				-	struct inode		*inode,
			
 
				-	pgoff_t			tindex,
			
 
				-	struct xfs_bmbt_irec	*imap,
			
 
				-	xfs_ioend_t		**ioendp,
			
 
				-	struct writeback_control *wbc,
			
 
				-	pgoff_t			tlast)
			
 
				-{
			
 
				-	struct pagevec		pvec;
			
 
				-	int			done = 0, i;
			
 
				-
			
 
				-	pagevec_init(&pvec, 0);
			
 
				-	while (!done && tindex <= tlast) {
			
 
				-		unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
			
 
				-
			
 
				-		if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
			
 
				-			break;
			
 
				-
			
 
				-		for (i = 0; i < pagevec_count(&pvec); i++) {
			
 
				-			done = xfs_convert_page(inode, pvec.pages[i], tindex++,
			
 
				-					imap, ioendp, wbc);
			
 
				-			if (done)
			
 
				-				break;
			
 
				-		}
			
 
				-
			
 
				-		pagevec_release(&pvec);
			
 
				-		cond_resched();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 STATIC void
			
 
				 xfs_vm_invalidatepage(
			
 
				 	struct page		*page,
			
@@ -931,6 +708,164 @@ xfs_aops_discard_page(
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * We implement an immediate ioend submission policy here to avoid needing to
			
 
				+ * chain multiple ioends and hence nest mempool allocations which can violate
			
 
				+ * forward progress guarantees we need to provide. The current ioend we are
			
 
				+ * adding buffers to is cached on the writepage context, and if the new buffer
			
 
				+ * does not append to the cached ioend it will create a new ioend and cache that
			
 
				+ * instead.
			
 
				+ *
			
 
				+ * If a new ioend is created and cached, the old ioend is returned and queued
			
 
				+ * locally for submission once the entire page is processed or an error has been
			
 
				+ * detected.  While ioends are submitted immediately after they are completed,
			
 
				+ * batching optimisations are provided by higher level block plugging.
			
 
				+ *
			
 
				+ * At the end of a writeback pass, there will be a cached ioend remaining on the
			
 
				+ * writepage context that the caller will need to submit.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_writepage_map(
			
 
				+	struct xfs_writepage_ctx *wpc,
			
 
				+	struct writeback_control *wbc,
			
 
				+	struct inode		*inode,
			
 
				+	struct page		*page,
			
 
				+	loff_t			offset,
			
 
				+	__uint64_t              end_offset)
			
 
				+{
			
 
				+	LIST_HEAD(submit_list);
			
 
				+	struct xfs_ioend	*ioend, *next;
			
 
				+	struct buffer_head	*bh, *head;
			
 
				+	ssize_t			len = 1 << inode->i_blkbits;
			
 
				+	int			error = 0;
			
 
				+	int			count = 0;
			
 
				+	int			uptodate = 1;
			
 
				+
			
 
				+	bh = head = page_buffers(page);
			
 
				+	offset = page_offset(page);
			
 
				+	do {
			
 
				+		if (offset >= end_offset)
			
 
				+			break;
			
 
				+		if (!buffer_uptodate(bh))
			
 
				+			uptodate = 0;
			
 
				+
			
 
				+		/*
			
 
				+		 * set_page_dirty dirties all buffers in a page, independent
			
 
				+		 * of their state.  The dirty state however is entirely
			
 
				+		 * meaningless for holes (!mapped && uptodate), so skip
			
 
				+		 * buffers covering holes here.
			
 
				+		 */
			
 
				+		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
			
 
				+			wpc->imap_valid = false;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (buffer_unwritten(bh)) {
			
 
				+			if (wpc->io_type != XFS_IO_UNWRITTEN) {
			
 
				+				wpc->io_type = XFS_IO_UNWRITTEN;
			
 
				+				wpc->imap_valid = false;
			
 
				+			}
			
 
				+		} else if (buffer_delay(bh)) {
			
 
				+			if (wpc->io_type != XFS_IO_DELALLOC) {
			
 
				+				wpc->io_type = XFS_IO_DELALLOC;
			
 
				+				wpc->imap_valid = false;
			
 
				+			}
			
 
				+		} else if (buffer_uptodate(bh)) {
			
 
				+			if (wpc->io_type != XFS_IO_OVERWRITE) {
			
 
				+				wpc->io_type = XFS_IO_OVERWRITE;
			
 
				+				wpc->imap_valid = false;
			
 
				+			}
			
 
				+		} else {
			
 
				+			if (PageUptodate(page))
			
 
				+				ASSERT(buffer_mapped(bh));
			
 
				+			/*
			
 
				+			 * This buffer is not uptodate and will not be
			
 
				+			 * written to disk.  Ensure that we will put any
			
 
				+			 * subsequent writeable buffers into a new
			
 
				+			 * ioend.
			
 
				+			 */
			
 
				+			wpc->imap_valid = false;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (wpc->imap_valid)
			
 
				+			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
			
 
				+							 offset);
			
 
				+		if (!wpc->imap_valid) {
			
 
				+			error = xfs_map_blocks(inode, offset, &wpc->imap,
			
 
				+					     wpc->io_type);
			
 
				+			if (error)
			
 
				+				goto out;
			
 
				+			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
			
 
				+							 offset);
			
 
				+		}
			
 
				+		if (wpc->imap_valid) {
			
 
				+			lock_buffer(bh);
			
 
				+			if (wpc->io_type != XFS_IO_OVERWRITE)
			
 
				+				xfs_map_at_offset(inode, bh, &wpc->imap, offset);
			
 
				+			xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
			
 
				+			count++;
			
 
				+		}
			
 
				+
			
 
				+	} while (offset += len, ((bh = bh->b_this_page) != head));
			
 
				+
			
 
				+	if (uptodate && bh == head)
			
 
				+		SetPageUptodate(page);
			
 
				+
			
 
				+	ASSERT(wpc->ioend || list_empty(&submit_list));
			
 
				+
			
 
				+out:
			
 
				+	/*
			
 
				+	 * On error, we have to fail the ioend here because we have locked
			
 
				+	 * buffers in the ioend. If we don't do this, we'll deadlock
			
 
				+	 * invalidating the page as that tries to lock the buffers on the page.
			
 
				+	 * Also, because we may have set pages under writeback, we have to make
			
 
				+	 * sure we run IO completion to mark the error state of the IO
			
 
				+	 * appropriately, so we can't cancel the ioend directly here. That means
			
 
				+	 * we have to mark this page as under writeback if we included any
			
 
				+	 * buffers from it in the ioend chain so that completion treats it
			
 
				+	 * correctly.
			
 
				+	 *
			
 
				+	 * If we didn't include the page in the ioend, the on error we can
			
 
				+	 * simply discard and unlock it as there are no other users of the page
			
 
				+	 * or it's buffers right now. The caller will still need to trigger
			
 
				+	 * submission of outstanding ioends on the writepage context so they are
			
 
				+	 * treated correctly on error.
			
 
				+	 */
			
 
				+	if (count) {
			
 
				+		xfs_start_page_writeback(page, !error);
			
 
				+
			
 
				+		/*
			
 
				+		 * Preserve the original error if there was one, otherwise catch
			
 
				+		 * submission errors here and propagate into subsequent ioend
			
 
				+		 * submissions.
			
 
				+		 */
			
 
				+		list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
			
 
				+			int error2;
			
 
				+
			
 
				+			list_del_init(&ioend->io_list);
			
 
				+			error2 = xfs_submit_ioend(wbc, ioend, error);
			
 
				+			if (error2 && !error)
			
 
				+				error = error2;
			
 
				+		}
			
 
				+	} else if (error) {
			
 
				+		xfs_aops_discard_page(page);
			
 
				+		ClearPageUptodate(page);
			
 
				+		unlock_page(page);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * We can end up here with no error and nothing to write if we
			
 
				+		 * race with a partial page truncate on a sub-page block sized
			
 
				+		 * filesystem. In that case we need to mark the page clean.
			
 
				+		 */
			
 
				+		xfs_start_page_writeback(page, 1);
			
 
				+		end_page_writeback(page);
			
 
				+	}
			
 
				+
			
 
				+	mapping_set_error(page->mapping, error);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Write out a dirty page.
			
 
				  *
			
@@ -940,22 +875,16 @@ xfs_aops_discard_page(
 
				  * For any other dirty buffer heads on the page we should flush them.
			
 
				  */
			
 
				 STATIC int
			
 
				-xfs_vm_writepage(
			
 
				+xfs_do_writepage(
			
 
				 	struct page		*page,
			
 
				-	struct writeback_control *wbc)
			
 
				+	struct writeback_control *wbc,
			
 
				+	void			*data)
			
 
				 {
			
 
				+	struct xfs_writepage_ctx *wpc = data;
			
 
				 	struct inode		*inode = page->mapping->host;
			
 
				-	struct buffer_head	*bh, *head;
			
 
				-	struct xfs_bmbt_irec	imap;
			
 
				-	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
			
 
				 	loff_t			offset;
			
 
				-	unsigned int		type;
			
 
				 	__uint64_t              end_offset;
			
 
				-	pgoff_t                 end_index, last_index;
			
 
				-	ssize_t			len;
			
 
				-	int			err, imap_valid = 0, uptodate = 1;
			
 
				-	int			count = 0;
			
 
				-	int			nonblocking = 0;
			
 
				+	pgoff_t                 end_index;
			
 
				 
			
 
				 	trace_xfs_writepage(inode, page, 0, 0);
			
 
				 
			
@@ -982,12 +911,9 @@ xfs_vm_writepage(
 
				 	if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
			
 
				 		goto redirty;
			
 
				 
			
 
				-	/* Is this page beyond the end of the file? */
			
 
				-	offset = i_size_read(inode);
			
 
				-	end_index = offset >> PAGE_CACHE_SHIFT;
			
 
				-	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
			
 
				-
			
 
				 	/*
			
 
				+	 * Is this page beyond the end of the file?
			
 
				+	 *
			
 
				 	 * The page index is less than the end_index, adjust the end_offset
			
 
				 	 * to the highest offset that this page should represent.
			
 
				 	 * -----------------------------------------------------
			
@@ -998,6 +924,8 @@ xfs_vm_writepage(
 
				 	 * |     desired writeback range    |      see else    |
			
 
				 	 * ---------------------------------^------------------|
			
 
				 	 */
			
 
				+	offset = i_size_read(inode);
			
 
				+	end_index = offset >> PAGE_CACHE_SHIFT;
			
 
				 	if (page->index < end_index)
			
 
				 		end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
			
 
				 	else {
			
@@ -1049,152 +977,7 @@ xfs_vm_writepage(
 
				 		end_offset = offset;
			
 
				 	}
			
 
				 
			
 
				-	len = 1 << inode->i_blkbits;
			
 
				-
			
 
				-	bh = head = page_buffers(page);
			
 
				-	offset = page_offset(page);
			
 
				-	type = XFS_IO_OVERWRITE;
			
 
				-
			
 
				-	if (wbc->sync_mode == WB_SYNC_NONE)
			
 
				-		nonblocking = 1;
			
 
				-
			
 
				-	do {
			
 
				-		int new_ioend = 0;
			
 
				-
			
 
				-		if (offset >= end_offset)
			
 
				-			break;
			
 
				-		if (!buffer_uptodate(bh))
			
 
				-			uptodate = 0;
			
 
				-
			
 
				-		/*
			
 
				-		 * set_page_dirty dirties all buffers in a page, independent
			
 
				-		 * of their state.  The dirty state however is entirely
			
 
				-		 * meaningless for holes (!mapped && uptodate), so skip
			
 
				-		 * buffers covering holes here.
			
 
				-		 */
			
 
				-		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
			
 
				-			imap_valid = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (buffer_unwritten(bh)) {
			
 
				-			if (type != XFS_IO_UNWRITTEN) {
			
 
				-				type = XFS_IO_UNWRITTEN;
			
 
				-				imap_valid = 0;
			
 
				-			}
			
 
				-		} else if (buffer_delay(bh)) {
			
 
				-			if (type != XFS_IO_DELALLOC) {
			
 
				-				type = XFS_IO_DELALLOC;
			
 
				-				imap_valid = 0;
			
 
				-			}
			
 
				-		} else if (buffer_uptodate(bh)) {
			
 
				-			if (type != XFS_IO_OVERWRITE) {
			
 
				-				type = XFS_IO_OVERWRITE;
			
 
				-				imap_valid = 0;
			
 
				-			}
			
 
				-		} else {
			
 
				-			if (PageUptodate(page))
			
 
				-				ASSERT(buffer_mapped(bh));
			
 
				-			/*
			
 
				-			 * This buffer is not uptodate and will not be
			
 
				-			 * written to disk.  Ensure that we will put any
			
 
				-			 * subsequent writeable buffers into a new
			
 
				-			 * ioend.
			
 
				-			 */
			
 
				-			imap_valid = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (imap_valid)
			
 
				-			imap_valid = xfs_imap_valid(inode, &imap, offset);
			
 
				-		if (!imap_valid) {
			
 
				-			/*
			
 
				-			 * If we didn't have a valid mapping then we need to
			
 
				-			 * put the new mapping into a separate ioend structure.
			
 
				-			 * This ensures non-contiguous extents always have
			
 
				-			 * separate ioends, which is particularly important
			
 
				-			 * for unwritten extent conversion at I/O completion
			
 
				-			 * time.
			
 
				-			 */
			
 
				-			new_ioend = 1;
			
 
				-			err = xfs_map_blocks(inode, offset, &imap, type,
			
 
				-					     nonblocking);
			
 
				-			if (err)
			
 
				-				goto error;
			
 
				-			imap_valid = xfs_imap_valid(inode, &imap, offset);
			
 
				-		}
			
 
				-		if (imap_valid) {
			
 
				-			lock_buffer(bh);
			
 
				-			if (type != XFS_IO_OVERWRITE)
			
 
				-				xfs_map_at_offset(inode, bh, &imap, offset);
			
 
				-			xfs_add_to_ioend(inode, bh, offset, type, &ioend,
			
 
				-					 new_ioend);
			
 
				-			count++;
			
 
				-		}
			
 
				-
			
 
				-		if (!iohead)
			
 
				-			iohead = ioend;
			
 
				-
			
 
				-	} while (offset += len, ((bh = bh->b_this_page) != head));
			
 
				-
			
 
				-	if (uptodate && bh == head)
			
 
				-		SetPageUptodate(page);
			
 
				-
			
 
				-	xfs_start_page_writeback(page, 1, count);
			
 
				-
			
 
				-	/* if there is no IO to be submitted for this page, we are done */
			
 
				-	if (!ioend)
			
 
				-		return 0;
			
 
				-
			
 
				-	ASSERT(iohead);
			
 
				-
			
 
				-	/*
			
 
				-	 * Any errors from this point onwards need tobe reported through the IO
			
 
				-	 * completion path as we have marked the initial page as under writeback
			
 
				-	 * and unlocked it.
			
 
				-	 */
			
 
				-	if (imap_valid) {
			
 
				-		xfs_off_t		end_index;
			
 
				-
			
 
				-		end_index = imap.br_startoff + imap.br_blockcount;
			
 
				-
			
 
				-		/* to bytes */
			
 
				-		end_index <<= inode->i_blkbits;
			
 
				-
			
 
				-		/* to pages */
			
 
				-		end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
			
 
				-
			
 
				-		/* check against file size */
			
 
				-		if (end_index > last_index)
			
 
				-			end_index = last_index;
			
 
				-
			
 
				-		xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
			
 
				-				  wbc, end_index);
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	/*
			
 
				-	 * Reserve log space if we might write beyond the on-disk inode size.
			
 
				-	 */
			
 
				-	err = 0;
			
 
				-	if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
			
 
				-		err = xfs_setfilesize_trans_alloc(ioend);
			
 
				-
			
 
				-	xfs_submit_ioend(wbc, iohead, err);
			
 
				-
			
 
				-	return 0;
			
 
				-
			
 
				-error:
			
 
				-	if (iohead)
			
 
				-		xfs_cancel_ioend(iohead);
			
 
				-
			
 
				-	if (err == -EAGAIN)
			
 
				-		goto redirty;
			
 
				-
			
 
				-	xfs_aops_discard_page(page);
			
 
				-	ClearPageUptodate(page);
			
 
				-	unlock_page(page);
			
 
				-	return err;
			
 
				+	return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
			
 
				 
			
 
				 redirty:
			
 
				 	redirty_page_for_writepage(wbc, page);
			
@@ -1202,17 +985,41 @@ xfs_vm_writepage(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+STATIC int
			
 
				+xfs_vm_writepage(
			
 
				+	struct page		*page,
			
 
				+	struct writeback_control *wbc)
			
 
				+{
			
 
				+	struct xfs_writepage_ctx wpc = {
			
 
				+		.io_type = XFS_IO_INVALID,
			
 
				+	};
			
 
				+	int			ret;
			
 
				+
			
 
				+	ret = xfs_do_writepage(page, wbc, &wpc);
			
 
				+	if (wpc.ioend)
			
 
				+		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 STATIC int
			
 
				 xfs_vm_writepages(
			
 
				 	struct address_space	*mapping,
			
 
				 	struct writeback_control *wbc)
			
 
				 {
			
 
				+	struct xfs_writepage_ctx wpc = {
			
 
				+		.io_type = XFS_IO_INVALID,
			
 
				+	};
			
 
				+	int			ret;
			
 
				+
			
 
				 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
			
 
				 	if (dax_mapping(mapping))
			
 
				 		return dax_writeback_mapping_range(mapping,
			
 
				 				xfs_find_bdev_for_inode(mapping->host), wbc);
			
 
				 
			
 
				-	return generic_writepages(mapping, wbc);
			
 
				+	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
			
 
				+	if (wpc.ioend)
			
 
				+		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1242,27 +1049,8 @@ xfs_vm_releasepage(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * When we map a DIO buffer, we may need to attach an ioend that describes the
			
 
				- * type of write IO we are doing. This passes to the completion function the
			
 
				- * operations it needs to perform. If the mapping is for an overwrite wholly
			
 
				- * within the EOF then we don't need an ioend and so we don't allocate one.
			
 
				- * This avoids the unnecessary overhead of allocating and freeing ioends for
			
 
				- * workloads that don't require transactions on IO completion.
			
 
				- *
			
 
				- * If we get multiple mappings in a single IO, we might be mapping different
			
 
				- * types. But because the direct IO can only have a single private pointer, we
			
 
				- * need to ensure that:
			
 
				- *
			
 
				- * a) i) the ioend spans the entire region of unwritten mappings; or
			
 
				- *    ii) the ioend spans all the mappings that cross or are beyond EOF; and
			
 
				- * b) if it contains unwritten extents, it is *permanently* marked as such
			
 
				- *
			
 
				- * We could do this by chaining ioends like buffered IO does, but we only
			
 
				- * actually get one IO completion callback from the direct IO, and that spans
			
 
				- * the entire IO regardless of how many mappings and IOs are needed to complete
			
 
				- * the DIO. There is only going to be one reference to the ioend and its life
			
 
				- * cycle is constrained by the DIO completion code. hence we don't need
			
 
				- * reference counting here.
			
 
				+ * When we map a DIO buffer, we may need to pass flags to
			
 
				+ * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
			
 
				  *
			
 
				  * Note that for DIO, an IO to the highest supported file block offset (i.e.
			
 
				  * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
			
@@ -1270,68 +1058,26 @@ xfs_vm_releasepage(
 
				  * extending the file size. We won't know for sure until IO completion is run
			
 
				  * and the actual max write offset is communicated to the IO completion
			
 
				  * routine.
			
 
				- *
			
 
				- * For DAX page faults, we are preparing to never see unwritten extents here,
			
 
				- * nor should we ever extend the inode size. Hence we will soon have nothing to
			
 
				- * do here for this case, ensuring we don't have to provide an IO completion
			
 
				- * callback to free an ioend that we don't actually need for a fault into the
			
 
				- * page at offset (2^63 - 1FSB) bytes.
			
 
				  */
			
 
				-
			
 
				 static void
			
 
				 xfs_map_direct(
			
 
				 	struct inode		*inode,
			
 
				 	struct buffer_head	*bh_result,
			
 
				 	struct xfs_bmbt_irec	*imap,
			
 
				-	xfs_off_t		offset,
			
 
				-	bool			dax_fault)
			
 
				+	xfs_off_t		offset)
			
 
				 {
			
 
				-	struct xfs_ioend	*ioend;
			
 
				+	uintptr_t		*flags = (uintptr_t *)&bh_result->b_private;
			
 
				 	xfs_off_t		size = bh_result->b_size;
			
 
				-	int			type;
			
 
				-
			
 
				-	if (ISUNWRITTEN(imap))
			
 
				-		type = XFS_IO_UNWRITTEN;
			
 
				-	else
			
 
				-		type = XFS_IO_OVERWRITE;
			
 
				 
			
 
				-	trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
			
 
				-
			
 
				-	if (dax_fault) {
			
 
				-		ASSERT(type == XFS_IO_OVERWRITE);
			
 
				-		trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
			
 
				-					    imap);
			
 
				-		return;
			
 
				-	}
			
 
				+	trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
			
 
				+		ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
			
 
				 
			
 
				-	if (bh_result->b_private) {
			
 
				-		ioend = bh_result->b_private;
			
 
				-		ASSERT(ioend->io_size > 0);
			
 
				-		ASSERT(offset >= ioend->io_offset);
			
 
				-		if (offset + size > ioend->io_offset + ioend->io_size)
			
 
				-			ioend->io_size = offset - ioend->io_offset + size;
			
 
				-
			
 
				-		if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
			
 
				-			ioend->io_type = XFS_IO_UNWRITTEN;
			
 
				-
			
 
				-		trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
			
 
				-					      ioend->io_size, ioend->io_type,
			
 
				-					      imap);
			
 
				-	} else if (type == XFS_IO_UNWRITTEN ||
			
 
				-		   offset + size > i_size_read(inode) ||
			
 
				-		   offset + size < 0) {
			
 
				-		ioend = xfs_alloc_ioend(inode, type);
			
 
				-		ioend->io_offset = offset;
			
 
				-		ioend->io_size = size;
			
 
				-
			
 
				-		bh_result->b_private = ioend;
			
 
				+	if (ISUNWRITTEN(imap)) {
			
 
				+		*flags |= XFS_DIO_FLAG_UNWRITTEN;
			
 
				+		set_buffer_defer_completion(bh_result);
			
 
				+	} else if (offset + size > i_size_read(inode) || offset + size < 0) {
			
 
				+		*flags |= XFS_DIO_FLAG_APPEND;
			
 
				 		set_buffer_defer_completion(bh_result);
			
 
				-
			
 
				-		trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
			
 
				-					   imap);
			
 
				-	} else {
			
 
				-		trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
			
 
				-					    imap);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1502,9 +1248,12 @@ __xfs_get_blocks(
 
				 		if (ISUNWRITTEN(&imap))
			
 
				 			set_buffer_unwritten(bh_result);
			
 
				 		/* direct IO needs special help */
			
 
				-		if (create && direct)
			
 
				-			xfs_map_direct(inode, bh_result, &imap, offset,
			
 
				-				       dax_fault);
			
 
				+		if (create && direct) {
			
 
				+			if (dax_fault)
			
 
				+				ASSERT(!ISUNWRITTEN(&imap));
			
 
				+			else
			
 
				+				xfs_map_direct(inode, bh_result, &imap, offset);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1574,42 +1323,50 @@ xfs_get_blocks_dax_fault(
 
				 	return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-__xfs_end_io_direct_write(
			
 
				-	struct inode		*inode,
			
 
				-	struct xfs_ioend	*ioend,
			
 
				+/*
			
 
				+ * Complete a direct I/O write request.
			
 
				+ *
			
 
				+ * xfs_map_direct passes us some flags in the private data to tell us what to
			
 
				+ * do.  If no flags are set, then the write IO is an overwrite wholly within
			
 
				+ * the existing allocated file size and so there is nothing for us to do.
			
 
				+ *
			
 
				+ * Note that in this case the completion can be called in interrupt context,
			
 
				+ * whereas if we have flags set we will always be called in task context
			
 
				+ * (i.e. from a workqueue).
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_end_io_direct_write(
			
 
				+	struct kiocb		*iocb,
			
 
				 	loff_t			offset,
			
 
				-	ssize_t			size)
			
 
				+	ssize_t			size,
			
 
				+	void			*private)
			
 
				 {
			
 
				-	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
			
 
				+	struct inode		*inode = file_inode(iocb->ki_filp);
			
 
				+	struct xfs_inode	*ip = XFS_I(inode);
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	uintptr_t		flags = (uintptr_t)private;
			
 
				+	int			error = 0;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
			
 
				-		goto out_end_io;
			
 
				+	trace_xfs_end_io_direct_write(ip, offset, size);
			
 
				 
			
 
				-	/*
			
 
				-	 * dio completion end_io functions are only called on writes if more
			
 
				-	 * than 0 bytes was written.
			
 
				-	 */
			
 
				-	ASSERT(size > 0);
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return -EIO;
			
 
				 
			
 
				-	/*
			
 
				-	 * The ioend only maps whole blocks, while the IO may be sector aligned.
			
 
				-	 * Hence the ioend offset/size may not match the IO offset/size exactly.
			
 
				-	 * Because we don't map overwrites within EOF into the ioend, the offset
			
 
				-	 * may not match, but only if the endio spans EOF.  Either way, write
			
 
				-	 * the IO sizes into the ioend so that completion processing does the
			
 
				-	 * right thing.
			
 
				-	 */
			
 
				-	ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
			
 
				-	ioend->io_size = size;
			
 
				-	ioend->io_offset = offset;
			
 
				+	if (size <= 0)
			
 
				+		return size;
			
 
				 
			
 
				 	/*
			
 
				-	 * The ioend tells us whether we are doing unwritten extent conversion
			
 
				+	 * The flags tell us whether we are doing unwritten extent conversions
			
 
				 	 * or an append transaction that updates the on-disk file size. These
			
 
				 	 * cases are the only cases where we should *potentially* be needing
			
 
				 	 * to update the VFS inode size.
			
 
				-	 *
			
 
				+	 */
			
 
				+	if (flags == 0) {
			
 
				+		ASSERT(offset + size <= i_size_read(inode));
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				 	 * We need to update the in-core inode size here so that we don't end up
			
 
				 	 * with the on-disk inode size being outside the in-core inode size. We
			
 
				 	 * have no other method of updating EOF for AIO, so always do it here
			
@@ -1620,91 +1377,56 @@ __xfs_end_io_direct_write(
 
				 	 * here can result in EOF moving backwards and Bad Things Happen when
			
 
				 	 * that occurs.
			
 
				 	 */
			
 
				-	spin_lock(&XFS_I(inode)->i_flags_lock);
			
 
				+	spin_lock(&ip->i_flags_lock);
			
 
				 	if (offset + size > i_size_read(inode))
			
 
				 		i_size_write(inode, offset + size);
			
 
				-	spin_unlock(&XFS_I(inode)->i_flags_lock);
			
 
				+	spin_unlock(&ip->i_flags_lock);
			
 
				 
			
 
				-	/*
			
 
				-	 * If we are doing an append IO that needs to update the EOF on disk,
			
 
				-	 * do the transaction reserve now so we can use common end io
			
 
				-	 * processing. Stashing the error (if there is one) in the ioend will
			
 
				-	 * result in the ioend processing passing on the error if it is
			
 
				-	 * possible as we can't return it from here.
			
 
				-	 */
			
 
				-	if (ioend->io_type == XFS_IO_OVERWRITE)
			
 
				-		ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
			
 
				+	if (flags & XFS_DIO_FLAG_UNWRITTEN) {
			
 
				+		trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
			
 
				 
			
 
				-out_end_io:
			
 
				-	xfs_end_io(&ioend->io_work);
			
 
				-	return;
			
 
				-}
			
 
				+		error = xfs_iomap_write_unwritten(ip, offset, size);
			
 
				+	} else if (flags & XFS_DIO_FLAG_APPEND) {
			
 
				+		struct xfs_trans *tp;
			
 
				 
			
 
				-/*
			
 
				- * Complete a direct I/O write request.
			
 
				- *
			
 
				- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
			
 
				- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
			
 
				- * wholly within the EOF and so there is nothing for us to do. Note that in this
			
 
				- * case the completion can be called in interrupt context, whereas if we have an
			
 
				- * ioend we will always be called in task context (i.e. from a workqueue).
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_end_io_direct_write(
			
 
				-	struct kiocb		*iocb,
			
 
				-	loff_t			offset,
			
 
				-	ssize_t			size,
			
 
				-	void			*private)
			
 
				-{
			
 
				-	struct inode		*inode = file_inode(iocb->ki_filp);
			
 
				-	struct xfs_ioend	*ioend = private;
			
 
				-
			
 
				-	trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
			
 
				-				     ioend ? ioend->io_type : 0, NULL);
			
 
				+		trace_xfs_end_io_direct_write_append(ip, offset, size);
			
 
				 
			
 
				-	if (!ioend) {
			
 
				-		ASSERT(offset + size <= i_size_read(inode));
			
 
				-		return;
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
			
 
				+		if (error) {
			
 
				+			xfs_trans_cancel(tp);
			
 
				+			return error;
			
 
				+		}
			
 
				+		error = xfs_setfilesize(ip, tp, offset, size);
			
 
				 	}
			
 
				 
			
 
				-	__xfs_end_io_direct_write(inode, ioend, offset, size);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				-static inline ssize_t
			
 
				-xfs_vm_do_dio(
			
 
				-	struct inode		*inode,
			
 
				+STATIC ssize_t
			
 
				+xfs_vm_direct_IO(
			
 
				 	struct kiocb		*iocb,
			
 
				 	struct iov_iter		*iter,
			
 
				-	loff_t			offset,
			
 
				-	void			(*endio)(struct kiocb	*iocb,
			
 
				-					 loff_t		offset,
			
 
				-					 ssize_t	size,
			
 
				-					 void		*private),
			
 
				-	int			flags)
			
 
				+	loff_t			offset)
			
 
				 {
			
 
				+	struct inode		*inode = iocb->ki_filp->f_mapping->host;
			
 
				+	dio_iodone_t		*endio = NULL;
			
 
				+	int			flags = 0;
			
 
				 	struct block_device	*bdev;
			
 
				 
			
 
				-	if (IS_DAX(inode))
			
 
				+	if (iov_iter_rw(iter) == WRITE) {
			
 
				+		endio = xfs_end_io_direct_write;
			
 
				+		flags = DIO_ASYNC_EXTEND;
			
 
				+	}
			
 
				+
			
 
				+	if (IS_DAX(inode)) {
			
 
				 		return dax_do_io(iocb, inode, iter, offset,
			
 
				 				 xfs_get_blocks_direct, endio, 0);
			
 
				+	}
			
 
				 
			
 
				 	bdev = xfs_find_bdev_for_inode(inode);
			
 
				 	return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
			
 
				-				     xfs_get_blocks_direct, endio, NULL, flags);
			
 
				-}
			
 
				-
			
 
				-STATIC ssize_t
			
 
				-xfs_vm_direct_IO(
			
 
				-	struct kiocb		*iocb,
			
 
				-	struct iov_iter		*iter,
			
 
				-	loff_t			offset)
			
 
				-{
			
 
				-	struct inode		*inode = iocb->ki_filp->f_mapping->host;
			
 
				-
			
 
				-	if (iov_iter_rw(iter) == WRITE)
			
 
				-		return xfs_vm_do_dio(inode, iocb, iter, offset,
			
 
				-				     xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
			
 
				-	return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
			
 
				+			xfs_get_blocks_direct, endio, NULL, flags);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1756,6 +1478,7 @@ xfs_vm_write_failed(
 
				 	loff_t			from = pos & (PAGE_CACHE_SIZE - 1);
			
 
				 	loff_t			to = from + len;
			
 
				 	struct buffer_head	*bh, *head;
			
 
				+	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
			
 
				 
			
 
				 	/*
			
 
				 	 * The request pos offset might be 32 or 64 bit, this is all fine
			
@@ -1787,14 +1510,23 @@ xfs_vm_write_failed(
 
				 		if (block_start >= to)
			
 
				 			break;
			
 
				 
			
 
				-		if (!buffer_delay(bh))
			
 
				+		/*
			
 
				+		 * Process delalloc and unwritten buffers beyond EOF. We can
			
 
				+		 * encounter unwritten buffers in the event that a file has
			
 
				+		 * post-EOF unwritten extents and an extending write happens to
			
 
				+		 * fail (e.g., an unaligned write that also involves a delalloc
			
 
				+		 * to the same page).
			
 
				+		 */
			
 
				+		if (!buffer_delay(bh) && !buffer_unwritten(bh))
			
 
				 			continue;
			
 
				 
			
 
				-		if (!buffer_new(bh) && block_offset < i_size_read(inode))
			
 
				+		if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
			
 
				+		    block_offset < i_size_read(inode))
			
 
				 			continue;
			
 
				 
			
 
				-		xfs_vm_kill_delalloc_range(inode, block_offset,
			
 
				-					   block_offset + bh->b_size);
			
 
				+		if (buffer_delay(bh))
			
 
				+			xfs_vm_kill_delalloc_range(inode, block_offset,
			
 
				+						   block_offset + bh->b_size);
			
 
				 
			
 
				 		/*
			
 
				 		 * This buffer does not contain data anymore. make sure anyone
			
@@ -1805,6 +1537,7 @@ xfs_vm_write_failed(
 
				 		clear_buffer_mapped(bh);
			
 
				 		clear_buffer_new(bh);
			
 
				 		clear_buffer_dirty(bh);
			
 
				+		clear_buffer_unwritten(bh);
			
 
				 	}
			
 
				 
			
 
				 }
			
@@ -1828,6 +1561,7 @@ xfs_vm_write_begin(
 
				 	pgoff_t			index = pos >> PAGE_CACHE_SHIFT;
			
 
				 	struct page		*page;
			
 
				 	int			status;
			
 
				+	struct xfs_mount	*mp = XFS_I(mapping->host)->i_mount;
			
 
				 
			
 
				 	ASSERT(len <= PAGE_CACHE_SIZE);
			
 
				 
			
@@ -1836,6 +1570,8 @@ xfs_vm_write_begin(
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	status = __block_write_begin(page, pos, len, xfs_get_blocks);
			
 
				+	if (xfs_mp_fail_writes(mp))
			
 
				+		status = -EIO;
			
 
				 	if (unlikely(status)) {
			
 
				 		struct inode	*inode = mapping->host;
			
 
				 		size_t		isize = i_size_read(inode);
			
@@ -1848,6 +1584,8 @@ xfs_vm_write_begin(
 
				 		 * allocated in this write, not blocks that were previously
			
 
				 		 * written successfully.
			
 
				 		 */
			
 
				+		if (xfs_mp_fail_writes(mp))
			
 
				+			isize = 0;
			
 
				 		if (pos + len > isize) {
			
 
				 			ssize_t start = max_t(ssize_t, pos, isize);
			
 
				 
			
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,12 +24,14 @@ extern mempool_t *xfs_ioend_pool;
 
				  * Types of I/O for bmap clustering and I/O completion tracking.
			
 
				  */
			
 
				 enum {
			
 
				+	XFS_IO_INVALID,		/* initial state */
			
 
				 	XFS_IO_DELALLOC,	/* covers delalloc region */
			
 
				 	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
			
 
				 	XFS_IO_OVERWRITE,	/* covers already allocated extent */
			
 
				 };
			
 
				 
			
 
				 #define XFS_IO_TYPES \
			
 
				+	{ XFS_IO_INVALID,		"invalid" }, \
			
 
				 	{ XFS_IO_DELALLOC,		"delalloc" }, \
			
 
				 	{ XFS_IO_UNWRITTEN,		"unwritten" }, \
			
 
				 	{ XFS_IO_OVERWRITE,		"overwrite" }
			
@@ -39,7 +41,7 @@ enum {
 
				  * It can manage several multi-page bio's at once.
			
 
				  */
			
 
				 typedef struct xfs_ioend {
			
 
				-	struct xfs_ioend	*io_list;	/* next ioend in chain */
			
 
				+	struct list_head	io_list;	/* next ioend in chain */
			
 
				 	unsigned int		io_type;	/* delalloc / unwritten */
			
 
				 	int			io_error;	/* I/O error code */
			
 
				 	atomic_t		io_remaining;	/* hold count */
			
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -202,8 +202,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 
				 					sbp->namelen,
			
 
				 					sbp->valuelen,
			
 
				 					&sbp->name[sbp->namelen]);
			
 
				-		if (error)
			
 
				+		if (error) {
			
 
				+			kmem_free(sbuf);
			
 
				 			return error;
			
 
				+		}
			
 
				 		if (context->seen_enough)
			
 
				 			break;
			
 
				 		cursor->offset++;
			
@@ -454,14 +456,13 @@ xfs_attr3_leaf_list_int(
 
				 				args.rmtblkcnt = xfs_attr3_rmt_blocks(
			
 
				 							args.dp->i_mount, valuelen);
			
 
				 				retval = xfs_attr_rmtval_get(&args);
			
 
				-				if (retval)
			
 
				-					return retval;
			
 
				-				retval = context->put_listent(context,
			
 
				-						entry->flags,
			
 
				-						name_rmt->name,
			
 
				-						(int)name_rmt->namelen,
			
 
				-						valuelen,
			
 
				-						args.value);
			
 
				+				if (!retval)
			
 
				+					retval = context->put_listent(context,
			
 
				+							entry->flags,
			
 
				+							name_rmt->name,
			
 
				+							(int)name_rmt->namelen,
			
 
				+							valuelen,
			
 
				+							args.value);
			
 
				 				kmem_free(args.value);
			
 
				 			} else {
			
 
				 				retval = context->put_listent(context,
			
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -203,10 +203,12 @@ xfs_bmap_rtalloc(
 
				 		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
			
 
				 
			
 
				 	/*
			
 
				-	 * Lock out other modifications to the RT bitmap inode.
			
 
				+	 * Lock out modifications to both the RT bitmap and summary inodes
			
 
				 	 */
			
 
				 	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
			
 
				 	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
			
 
				+	xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	/*
			
 
				 	 * If it's an allocation to an empty file at offset 0,
			
@@ -822,7 +824,7 @@ bool
 
				 xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
			
 
				 {
			
 
				 	/* prealloc/delalloc exists only on regular files */
			
 
				-	if (!S_ISREG(ip->i_d.di_mode))
			
 
				+	if (!S_ISREG(VFS_I(ip)->i_mode))
			
 
				 		return false;
			
 
				 
			
 
				 	/*
			
@@ -1727,7 +1729,7 @@ xfs_swap_extents(
 
				 	xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
			
 
				 
			
 
				 	/* Verify that both files have the same format */
			
 
				-	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
			
 
				+	if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
			
 
				 		error = -EINVAL;
			
 
				 		goto out_unlock;
			
 
				 	}
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -650,7 +650,7 @@ xfs_buf_read_map(
 
				 	if (bp) {
			
 
				 		trace_xfs_buf_read(bp, flags, _RET_IP_);
			
 
				 
			
 
				-		if (!XFS_BUF_ISDONE(bp)) {
			
 
				+		if (!(bp->b_flags & XBF_DONE)) {
			
 
				 			XFS_STATS_INC(target->bt_mount, xb_get_read);
			
 
				 			bp->b_ops = ops;
			
 
				 			_xfs_buf_read(bp, flags);
			
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -302,6 +302,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
 
				 
			
 
				 /* Buffer Utility Routines */
			
 
				 extern void *xfs_buf_offset(struct xfs_buf *, size_t);
			
 
				+extern void xfs_buf_stale(struct xfs_buf *bp);
			
 
				 
			
 
				 /* Delayed Write Buffer Routines */
			
 
				 extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
			
@@ -312,31 +313,6 @@ extern int xfs_buf_delwri_submit_nowait(struct list_head *);
 
				 extern int xfs_buf_init(void);
			
 
				 extern void xfs_buf_terminate(void);
			
 
				 
			
 
				-#define XFS_BUF_ZEROFLAGS(bp) \
			
 
				-	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
			
 
				-			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
			
 
				-			    XBF_WRITE_FAIL))
			
 
				-
			
 
				-void xfs_buf_stale(struct xfs_buf *bp);
			
 
				-#define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XBF_STALE)
			
 
				-#define XFS_BUF_ISSTALE(bp)	((bp)->b_flags & XBF_STALE)
			
 
				-
			
 
				-#define XFS_BUF_DONE(bp)	((bp)->b_flags |= XBF_DONE)
			
 
				-#define XFS_BUF_UNDONE(bp)	((bp)->b_flags &= ~XBF_DONE)
			
 
				-#define XFS_BUF_ISDONE(bp)	((bp)->b_flags & XBF_DONE)
			
 
				-
			
 
				-#define XFS_BUF_ASYNC(bp)	((bp)->b_flags |= XBF_ASYNC)
			
 
				-#define XFS_BUF_UNASYNC(bp)	((bp)->b_flags &= ~XBF_ASYNC)
			
 
				-#define XFS_BUF_ISASYNC(bp)	((bp)->b_flags & XBF_ASYNC)
			
 
				-
			
 
				-#define XFS_BUF_READ(bp)	((bp)->b_flags |= XBF_READ)
			
 
				-#define XFS_BUF_UNREAD(bp)	((bp)->b_flags &= ~XBF_READ)
			
 
				-#define XFS_BUF_ISREAD(bp)	((bp)->b_flags & XBF_READ)
			
 
				-
			
 
				-#define XFS_BUF_WRITE(bp)	((bp)->b_flags |= XBF_WRITE)
			
 
				-#define XFS_BUF_UNWRITE(bp)	((bp)->b_flags &= ~XBF_WRITE)
			
 
				-#define XFS_BUF_ISWRITE(bp)	((bp)->b_flags & XBF_WRITE)
			
 
				-
			
 
				 /*
			
 
				  * These macros use the IO block map rather than b_bn. b_bn is now really
			
 
				  * just for the buffer cache index for cached buffers. As IO does not use b_bn
			
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -431,7 +431,7 @@ xfs_buf_item_unpin(
 
				 	if (freed && stale) {
			
 
				 		ASSERT(bip->bli_flags & XFS_BLI_STALE);
			
 
				 		ASSERT(xfs_buf_islocked(bp));
			
 
				-		ASSERT(XFS_BUF_ISSTALE(bp));
			
 
				+		ASSERT(bp->b_flags & XBF_STALE);
			
 
				 		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
			
 
				 
			
 
				 		trace_xfs_buf_item_unpin_stale(bip);
			
@@ -493,7 +493,7 @@ xfs_buf_item_unpin(
 
				 		xfs_buf_hold(bp);
			
 
				 		bp->b_flags |= XBF_ASYNC;
			
 
				 		xfs_buf_ioerror(bp, -EIO);
			
 
				-		XFS_BUF_UNDONE(bp);
			
 
				+		bp->b_flags &= ~XBF_DONE;
			
 
				 		xfs_buf_stale(bp);
			
 
				 		xfs_buf_ioend(bp);
			
 
				 	}
			
@@ -1067,7 +1067,7 @@ xfs_buf_iodone_callbacks(
 
				 	 */
			
 
				 	if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				 		xfs_buf_stale(bp);
			
 
				-		XFS_BUF_DONE(bp);
			
 
				+		bp->b_flags |= XBF_DONE;
			
 
				 		trace_xfs_buf_item_iodone(bp, _RET_IP_);
			
 
				 		goto do_callbacks;
			
 
				 	}
			
@@ -1090,7 +1090,7 @@ xfs_buf_iodone_callbacks(
 
				 	 * errors tend to affect the whole device and a failing log write
			
 
				 	 * will make us give up.  But we really ought to do better here.
			
 
				 	 */
			
 
				-	if (XFS_BUF_ISASYNC(bp)) {
			
 
				+	if (bp->b_flags & XBF_ASYNC) {
			
 
				 		ASSERT(bp->b_iodone != NULL);
			
 
				 
			
 
				 		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
			
@@ -1113,7 +1113,7 @@ xfs_buf_iodone_callbacks(
 
				 	 * sure to return the error to the caller of xfs_bwrite().
			
 
				 	 */
			
 
				 	xfs_buf_stale(bp);
			
 
				-	XFS_BUF_DONE(bp);
			
 
				+	bp->b_flags |= XBF_DONE;
			
 
				 
			
 
				 	trace_xfs_buf_error_relse(bp, _RET_IP_);
			
 
				 
			
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -665,7 +665,7 @@ xfs_readdir(
 
				 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
			
 
				 		return -EIO;
			
 
				 
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
			
 
				 	XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
			
 
				 
			
 
				 	args.dp = dp;
			
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -227,7 +227,7 @@ xfs_discard_extents(
 
				 				GFP_NOFS, 0);
			
 
				 		if (error && error != -EOPNOTSUPP) {
			
 
				 			xfs_info(mp,
			
 
				-	 "discard failed for extent [0x%llu,%u], error %d",
			
 
				+	 "discard failed for extent [0x%llx,%u], error %d",
			
 
				 				 (unsigned long long)busyp->bno,
			
 
				 				 busyp->length,
			
 
				 				 error);
			
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits(
 
				 {
			
 
				 	struct xfs_quotainfo	*q = mp->m_quotainfo;
			
 
				 	struct xfs_disk_dquot	*d = &dq->q_core;
			
 
				+	struct xfs_def_quota	*defq;
			
 
				 	int			prealloc = 0;
			
 
				 
			
 
				 	ASSERT(d->d_id);
			
 
				+	defq = xfs_get_defquota(dq, q);
			
 
				 
			
 
				-	if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
			
 
				-		d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
			
 
				+	if (defq->bsoftlimit && !d->d_blk_softlimit) {
			
 
				+		d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
			
 
				 		prealloc = 1;
			
 
				 	}
			
 
				-	if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
			
 
				-		d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
			
 
				+	if (defq->bhardlimit && !d->d_blk_hardlimit) {
			
 
				+		d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
			
 
				 		prealloc = 1;
			
 
				 	}
			
 
				-	if (q->qi_isoftlimit && !d->d_ino_softlimit)
			
 
				-		d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
			
 
				-	if (q->qi_ihardlimit && !d->d_ino_hardlimit)
			
 
				-		d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
			
 
				-	if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
			
 
				-		d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
			
 
				-	if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
			
 
				-		d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
			
 
				+	if (defq->isoftlimit && !d->d_ino_softlimit)
			
 
				+		d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
			
 
				+	if (defq->ihardlimit && !d->d_ino_hardlimit)
			
 
				+		d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
			
 
				+	if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
			
 
				+		d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
			
 
				+	if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
			
 
				+		d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
			
 
				 
			
 
				 	if (prealloc)
			
 
				 		xfs_dquot_set_prealloc_limits(dq);
			
@@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk(
 
				 {
			
 
				 	struct xfs_quotainfo	*q = mp->m_quotainfo;
			
 
				 	xfs_dqblk_t	*d;
			
 
				-	int		curid, i;
			
 
				+	xfs_dqid_t	curid;
			
 
				+	int		i;
			
 
				 
			
 
				 	ASSERT(tp);
			
 
				 	ASSERT(xfs_buf_islocked(bp));
			
@@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk(
 
				 	 * ID of the first dquot in the block - id's are zero based.
			
 
				 	 */
			
 
				 	curid = id - (id % q->qi_dqperchunk);
			
 
				-	ASSERT(curid >= 0);
			
 
				 	memset(d, 0, BBTOB(q->qi_dqchunklen));
			
 
				 	for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
			
 
				 		d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
			
@@ -464,12 +466,13 @@ xfs_qm_dqtobp(
 
				 	struct xfs_bmbt_irec	map;
			
 
				 	int			nmaps = 1, error;
			
 
				 	struct xfs_buf		*bp;
			
 
				-	struct xfs_inode	*quotip = xfs_dq_to_quota_inode(dqp);
			
 
				+	struct xfs_inode	*quotip;
			
 
				 	struct xfs_mount	*mp = dqp->q_mount;
			
 
				 	xfs_dqid_t		id = be32_to_cpu(dqp->q_core.d_id);
			
 
				 	struct xfs_trans	*tp = (tpp ? *tpp : NULL);
			
 
				 	uint			lock_mode;
			
 
				 
			
 
				+	quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
			
 
				 	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
			
 
				 
			
 
				 	lock_mode = xfs_ilock_data_map_shared(quotip);
			
@@ -684,6 +687,56 @@ xfs_qm_dqread(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Advance to the next id in the current chunk, or if at the
			
 
				+ * end of the chunk, skip ahead to first id in next allocated chunk
			
 
				+ * using the SEEK_DATA interface.
			
 
				+ */
			
 
				+int
			
 
				+xfs_dq_get_next_id(
			
 
				+	xfs_mount_t		*mp,
			
 
				+	uint			type,
			
 
				+	xfs_dqid_t		*id,
			
 
				+	loff_t			eof)
			
 
				+{
			
 
				+	struct xfs_inode	*quotip;
			
 
				+	xfs_fsblock_t		start;
			
 
				+	loff_t			offset;
			
 
				+	uint			lock;
			
 
				+	xfs_dqid_t		next_id;
			
 
				+	int			error = 0;
			
 
				+
			
 
				+	/* Simple advance */
			
 
				+	next_id = *id + 1;
			
 
				+
			
 
				+	/* If new ID is within the current chunk, advancing it sufficed */
			
 
				+	if (next_id % mp->m_quotainfo->qi_dqperchunk) {
			
 
				+		*id = next_id;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* Nope, next_id is now past the current chunk, so find the next one */
			
 
				+	start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
			
 
				+
			
 
				+	quotip = xfs_quota_inode(mp, type);
			
 
				+	lock = xfs_ilock_data_map_shared(quotip);
			
 
				+
			
 
				+	offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
			
 
				+				      eof, SEEK_DATA);
			
 
				+	if (offset < 0)
			
 
				+		error = offset;
			
 
				+
			
 
				+	xfs_iunlock(quotip, lock);
			
 
				+
			
 
				+	/* -ENXIO is essentially "no more data" */
			
 
				+	if (error)
			
 
				+		return (error == -ENXIO ? -ENOENT: error);
			
 
				+
			
 
				+	/* Convert next data offset back to a quota id */
			
 
				+	*id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
			
 
				  * a locked dquot, doing an allocation (if requested) as needed.
			
@@ -704,6 +757,7 @@ xfs_qm_dqget(
 
				 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
			
 
				 	struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
			
 
				 	struct xfs_dquot	*dqp;
			
 
				+	loff_t			eof = 0;
			
 
				 	int			error;
			
 
				 
			
 
				 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
			
@@ -731,6 +785,21 @@ xfs_qm_dqget(
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				+	/* Get the end of the quota file if we need it */
			
 
				+	if (flags & XFS_QMOPT_DQNEXT) {
			
 
				+		struct xfs_inode	*quotip;
			
 
				+		xfs_fileoff_t		last;
			
 
				+		uint			lock_mode;
			
 
				+
			
 
				+		quotip = xfs_quota_inode(mp, type);
			
 
				+		lock_mode = xfs_ilock_data_map_shared(quotip);
			
 
				+		error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
			
 
				+		xfs_iunlock(quotip, lock_mode);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		eof = XFS_FSB_TO_B(mp, last);
			
 
				+	}
			
 
				+
			
 
				 restart:
			
 
				 	mutex_lock(&qi->qi_tree_lock);
			
 
				 	dqp = radix_tree_lookup(tree, id);
			
@@ -744,6 +813,18 @@ xfs_qm_dqget(
 
				 			goto restart;
			
 
				 		}
			
 
				 
			
 
				+		/* uninit / unused quota found in radix tree, keep looking  */
			
 
				+		if (flags & XFS_QMOPT_DQNEXT) {
			
 
				+			if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
			
 
				+				xfs_dqunlock(dqp);
			
 
				+				mutex_unlock(&qi->qi_tree_lock);
			
 
				+				error = xfs_dq_get_next_id(mp, type, &id, eof);
			
 
				+				if (error)
			
 
				+					return error;
			
 
				+				goto restart;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				 		dqp->q_nrefs++;
			
 
				 		mutex_unlock(&qi->qi_tree_lock);
			
 
				 
			
@@ -770,6 +851,13 @@ xfs_qm_dqget(
 
				 	if (ip)
			
 
				 		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				+	/* If we are asked to find next active id, keep looking */
			
 
				+	if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
			
 
				+		error = xfs_dq_get_next_id(mp, type, &id, eof);
			
 
				+		if (!error)
			
 
				+			goto restart;
			
 
				+	}
			
 
				+
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -820,6 +908,17 @@ xfs_qm_dqget(
 
				 	qi->qi_dquots++;
			
 
				 	mutex_unlock(&qi->qi_tree_lock);
			
 
				 
			
 
				+	/* If we are asked to find next active id, keep looking */
			
 
				+	if (flags & XFS_QMOPT_DQNEXT) {
			
 
				+		if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
			
 
				+			xfs_qm_dqput(dqp);
			
 
				+			error = xfs_dq_get_next_id(mp, type, &id, eof);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			goto restart;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				  dqret:
			
 
				 	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				 	trace_xfs_dqget_miss(dqp);
			
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -152,7 +152,7 @@ xfs_nfs_get_inode(
 
				 		return ERR_PTR(error);
			
 
				 	}
			
 
				 
			
 
				-	if (ip->i_d.di_gen != generation) {
			
 
				+	if (VFS_I(ip)->i_generation != generation) {
			
 
				 		IRELE(ip);
			
 
				 		return ERR_PTR(-ESTALE);
			
 
				 	}
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -156,9 +156,9 @@ xfs_update_prealloc_flags(
 
				 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	if (!(flags & XFS_PREALLOC_INVISIBLE)) {
			
 
				-		ip->i_d.di_mode &= ~S_ISUID;
			
 
				-		if (ip->i_d.di_mode & S_IXGRP)
			
 
				-			ip->i_d.di_mode &= ~S_ISGID;
			
 
				+		VFS_I(ip)->i_mode &= ~S_ISUID;
			
 
				+		if (VFS_I(ip)->i_mode & S_IXGRP)
			
 
				+			VFS_I(ip)->i_mode &= ~S_ISGID;
			
 
				 		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				 	}
			
 
				 
			
@@ -1337,31 +1337,31 @@ xfs_find_get_desired_pgoff(
 
				 	return found;
			
 
				 }
			
 
				 
			
 
				-STATIC loff_t
			
 
				-xfs_seek_hole_data(
			
 
				-	struct file		*file,
			
 
				+/*
			
 
				+ * caller must lock inode with xfs_ilock_data_map_shared,
			
 
				+ * can we craft an appropriate ASSERT?
			
 
				+ *
			
 
				+ * end is because the VFS-level lseek interface is defined such that any
			
 
				+ * offset past i_size shall return -ENXIO, but we use this for quota code
			
 
				+ * which does not maintain i_size, and we want to SEEK_DATA past i_size.
			
 
				+ */
			
 
				+loff_t
			
 
				+__xfs_seek_hole_data(
			
 
				+	struct inode		*inode,
			
 
				 	loff_t			start,
			
 
				+	loff_t			end,
			
 
				 	int			whence)
			
 
				 {
			
 
				-	struct inode		*inode = file->f_mapping->host;
			
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	loff_t			uninitialized_var(offset);
			
 
				-	xfs_fsize_t		isize;
			
 
				 	xfs_fileoff_t		fsbno;
			
 
				-	xfs_filblks_t		end;
			
 
				-	uint			lock;
			
 
				+	xfs_filblks_t		lastbno;
			
 
				 	int			error;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				-		return -EIO;
			
 
				-
			
 
				-	lock = xfs_ilock_data_map_shared(ip);
			
 
				-
			
 
				-	isize = i_size_read(inode);
			
 
				-	if (start >= isize) {
			
 
				+	if (start >= end) {
			
 
				 		error = -ENXIO;
			
 
				-		goto out_unlock;
			
 
				+		goto out_error;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1369,22 +1369,22 @@ xfs_seek_hole_data(
 
				 	 * by fsbno to the end block of the file.
			
 
				 	 */
			
 
				 	fsbno = XFS_B_TO_FSBT(mp, start);
			
 
				-	end = XFS_B_TO_FSB(mp, isize);
			
 
				+	lastbno = XFS_B_TO_FSB(mp, end);
			
 
				 
			
 
				 	for (;;) {
			
 
				 		struct xfs_bmbt_irec	map[2];
			
 
				 		int			nmap = 2;
			
 
				 		unsigned int		i;
			
 
				 
			
 
				-		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
			
 
				+		error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
			
 
				 				       XFS_BMAPI_ENTIRE);
			
 
				 		if (error)
			
 
				-			goto out_unlock;
			
 
				+			goto out_error;
			
 
				 
			
 
				 		/* No extents at given offset, must be beyond EOF */
			
 
				 		if (nmap == 0) {
			
 
				 			error = -ENXIO;
			
 
				-			goto out_unlock;
			
 
				+			goto out_error;
			
 
				 		}
			
 
				 
			
 
				 		for (i = 0; i < nmap; i++) {
			
@@ -1426,7 +1426,7 @@ xfs_seek_hole_data(
 
				 			 * hole at the end of any file).
			
 
				 		 	 */
			
 
				 			if (whence == SEEK_HOLE) {
			
 
				-				offset = isize;
			
 
				+				offset = end;
			
 
				 				break;
			
 
				 			}
			
 
				 			/*
			
@@ -1434,7 +1434,7 @@ xfs_seek_hole_data(
 
				 			 */
			
 
				 			ASSERT(whence == SEEK_DATA);
			
 
				 			error = -ENXIO;
			
 
				-			goto out_unlock;
			
 
				+			goto out_error;
			
 
				 		}
			
 
				 
			
 
				 		ASSERT(i > 1);
			
@@ -1445,14 +1445,14 @@ xfs_seek_hole_data(
 
				 		 */
			
 
				 		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
			
 
				 		start = XFS_FSB_TO_B(mp, fsbno);
			
 
				-		if (start >= isize) {
			
 
				+		if (start >= end) {
			
 
				 			if (whence == SEEK_HOLE) {
			
 
				-				offset = isize;
			
 
				+				offset = end;
			
 
				 				break;
			
 
				 			}
			
 
				 			ASSERT(whence == SEEK_DATA);
			
 
				 			error = -ENXIO;
			
 
				-			goto out_unlock;
			
 
				+			goto out_error;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1464,7 +1464,39 @@ xfs_seek_hole_data(
 
				 	 * situation in particular.
			
 
				 	 */
			
 
				 	if (whence == SEEK_HOLE)
			
 
				-		offset = min_t(loff_t, offset, isize);
			
 
				+		offset = min_t(loff_t, offset, end);
			
 
				+
			
 
				+	return offset;
			
 
				+
			
 
				+out_error:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+STATIC loff_t
			
 
				+xfs_seek_hole_data(
			
 
				+	struct file		*file,
			
 
				+	loff_t			start,
			
 
				+	int			whence)
			
 
				+{
			
 
				+	struct inode		*inode = file->f_mapping->host;
			
 
				+	struct xfs_inode	*ip = XFS_I(inode);
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	uint			lock;
			
 
				+	loff_t			offset, end;
			
 
				+	int			error = 0;
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return -EIO;
			
 
				+
			
 
				+	lock = xfs_ilock_data_map_shared(ip);
			
 
				+
			
 
				+	end = i_size_read(inode);
			
 
				+	offset = __xfs_seek_hole_data(inode, start, end, whence);
			
 
				+	if (offset < 0) {
			
 
				+		error = offset;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				 	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
			
 
				 
			
 
				 out_unlock:
			
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -151,7 +151,7 @@ xfs_filestream_pick_ag(
 
				 	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
			
 
				 	int			err, trylock, nscan;
			
 
				 
			
 
				-	ASSERT(S_ISDIR(ip->i_d.di_mode));
			
 
				+	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
			
 
				 
			
 
				 	/* 2% of an AG's blocks must be free for it to be chosen. */
			
 
				 	minfree = mp->m_sb.sb_agblocks / 50;
			
@@ -319,7 +319,7 @@ xfs_filestream_lookup_ag(
 
				 	xfs_agnumber_t		startag, ag = NULLAGNUMBER;
			
 
				 	struct xfs_mru_cache_elem *mru;
			
 
				 
			
 
				-	ASSERT(S_ISREG(ip->i_d.di_mode));
			
 
				+	ASSERT(S_ISREG(VFS_I(ip)->i_mode));
			
 
				 
			
 
				 	pip = xfs_filestream_get_parent(ip);
			
 
				 	if (!pip)
			
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
 
				 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
			
 
				 				xfs_fsop_resblks_t *outval);
			
 
				 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
			
 
				-extern int xfs_fs_log_dummy(struct xfs_mount *mp);
			
 
				 
			
 
				 #endif	/* __XFS_FSOPS_H__ */
			
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -63,6 +63,9 @@ xfs_inode_alloc(
 
				 		return NULL;
			
 
				 	}
			
 
				 
			
 
				+	/* VFS doesn't initialise i_mode! */
			
 
				+	VFS_I(ip)->i_mode = 0;
			
 
				+
			
 
				 	XFS_STATS_INC(mp, vn_active);
			
 
				 	ASSERT(atomic_read(&ip->i_pincount) == 0);
			
 
				 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
			
@@ -79,7 +82,7 @@ xfs_inode_alloc(
 
				 	memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
			
 
				 	ip->i_flags = 0;
			
 
				 	ip->i_delayed_blks = 0;
			
 
				-	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
			
 
				+	memset(&ip->i_d, 0, sizeof(ip->i_d));
			
 
				 
			
 
				 	return ip;
			
 
				 }
			
@@ -98,7 +101,7 @@ void
 
				 xfs_inode_free(
			
 
				 	struct xfs_inode	*ip)
			
 
				 {
			
 
				-	switch (ip->i_d.di_mode & S_IFMT) {
			
 
				+	switch (VFS_I(ip)->i_mode & S_IFMT) {
			
 
				 	case S_IFREG:
			
 
				 	case S_IFDIR:
			
 
				 	case S_IFLNK:
			
@@ -134,6 +137,34 @@ xfs_inode_free(
 
				 	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
			
 
				+ * part of the structure. This is made more complex by the fact we store
			
 
				+ * information about the on-disk values in the VFS inode and so we can't just
			
 
				+ * overwrite the values unconditionally. Hence we save the parameters we
			
 
				+ * need to retain across reinitialisation, and rewrite them into the VFS inode
			
 
				+ * after reinitialisation even if it fails.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_reinit_inode(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct inode		*inode)
			
 
				+{
			
 
				+	int		error;
			
 
				+	uint32_t	nlink = inode->i_nlink;
			
 
				+	uint32_t	generation = inode->i_generation;
			
 
				+	uint64_t	version = inode->i_version;
			
 
				+	umode_t		mode = inode->i_mode;
			
 
				+
			
 
				+	error = inode_init_always(mp->m_super, inode);
			
 
				+
			
 
				+	set_nlink(inode, nlink);
			
 
				+	inode->i_generation = generation;
			
 
				+	inode->i_version = version;
			
 
				+	inode->i_mode = mode;
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Check the validity of the inode we just found it the cache
			
 
				  */
			
@@ -185,7 +216,7 @@ xfs_iget_cache_hit(
 
				 	/*
			
 
				 	 * If lookup is racing with unlink return an error immediately.
			
 
				 	 */
			
 
				-	if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
			
 
				+	if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) {
			
 
				 		error = -ENOENT;
			
 
				 		goto out_error;
			
 
				 	}
			
@@ -208,7 +239,7 @@ xfs_iget_cache_hit(
 
				 		spin_unlock(&ip->i_flags_lock);
			
 
				 		rcu_read_unlock();
			
 
				 
			
 
				-		error = inode_init_always(mp->m_super, inode);
			
 
				+		error = xfs_reinit_inode(mp, inode);
			
 
				 		if (error) {
			
 
				 			/*
			
 
				 			 * Re-initializing the inode failed, and we are in deep
			
@@ -295,7 +326,7 @@ xfs_iget_cache_miss(
 
				 
			
 
				 	trace_xfs_iget_miss(ip);
			
 
				 
			
 
				-	if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
			
 
				+	if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
			
 
				 		error = -ENOENT;
			
 
				 		goto out_destroy;
			
 
				 	}
			
@@ -444,7 +475,7 @@ xfs_iget(
 
				 	 * If we have a real type for an on-disk inode, we can setup the inode
			
 
				 	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
			
 
				 	 */
			
 
				-	if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
			
 
				+	if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
			
 
				 		xfs_setup_existing_inode(ip);
			
 
				 	return 0;
			
 
				 
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -57,9 +57,9 @@ kmem_zone_t *xfs_inode_zone;
 
				  */
			
 
				 #define	XFS_ITRUNC_MAX_EXTENTS	2
			
 
				 
			
 
				-STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
			
 
				-
			
 
				-STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
			
 
				+STATIC int xfs_iflush_int(struct xfs_inode *, struct xfs_buf *);
			
 
				+STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
			
 
				+STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *);
			
 
				 
			
 
				 /*
			
 
				  * helper function to extract extent size hint from inode
			
@@ -766,6 +766,7 @@ xfs_ialloc(
 
				 	uint		flags;
			
 
				 	int		error;
			
 
				 	struct timespec	tv;
			
 
				+	struct inode	*inode;
			
 
				 
			
 
				 	/*
			
 
				 	 * Call the space management code to pick
			
@@ -791,6 +792,7 @@ xfs_ialloc(
 
				 	if (error)
			
 
				 		return error;
			
 
				 	ASSERT(ip != NULL);
			
 
				+	inode = VFS_I(ip);
			
 
				 
			
 
				 	/*
			
 
				 	 * We always convert v1 inodes to v2 now - we only support filesystems
			
@@ -800,20 +802,16 @@ xfs_ialloc(
 
				 	if (ip->i_d.di_version == 1)
			
 
				 		ip->i_d.di_version = 2;
			
 
				 
			
 
				-	ip->i_d.di_mode = mode;
			
 
				-	ip->i_d.di_onlink = 0;
			
 
				-	ip->i_d.di_nlink = nlink;
			
 
				-	ASSERT(ip->i_d.di_nlink == nlink);
			
 
				+	inode->i_mode = mode;
			
 
				+	set_nlink(inode, nlink);
			
 
				 	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
			
 
				 	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
			
 
				 	xfs_set_projid(ip, prid);
			
 
				-	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
			
 
				 
			
 
				 	if (pip && XFS_INHERIT_GID(pip)) {
			
 
				 		ip->i_d.di_gid = pip->i_d.di_gid;
			
 
				-		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
			
 
				-			ip->i_d.di_mode |= S_ISGID;
			
 
				-		}
			
 
				+		if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
			
 
				+			inode->i_mode |= S_ISGID;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -822,38 +820,29 @@ xfs_ialloc(
 
				 	 * (and only if the irix_sgid_inherit compatibility variable is set).
			
 
				 	 */
			
 
				 	if ((irix_sgid_inherit) &&
			
 
				-	    (ip->i_d.di_mode & S_ISGID) &&
			
 
				-	    (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
			
 
				-		ip->i_d.di_mode &= ~S_ISGID;
			
 
				-	}
			
 
				+	    (inode->i_mode & S_ISGID) &&
			
 
				+	    (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid))))
			
 
				+		inode->i_mode &= ~S_ISGID;
			
 
				 
			
 
				 	ip->i_d.di_size = 0;
			
 
				 	ip->i_d.di_nextents = 0;
			
 
				 	ASSERT(ip->i_d.di_nblocks == 0);
			
 
				 
			
 
				 	tv = current_fs_time(mp->m_super);
			
 
				-	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
			
 
				-	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
			
 
				-	ip->i_d.di_atime = ip->i_d.di_mtime;
			
 
				-	ip->i_d.di_ctime = ip->i_d.di_mtime;
			
 
				+	inode->i_mtime = tv;
			
 
				+	inode->i_atime = tv;
			
 
				+	inode->i_ctime = tv;
			
 
				 
			
 
				-	/*
			
 
				-	 * di_gen will have been taken care of in xfs_iread.
			
 
				-	 */
			
 
				 	ip->i_d.di_extsize = 0;
			
 
				 	ip->i_d.di_dmevmask = 0;
			
 
				 	ip->i_d.di_dmstate = 0;
			
 
				 	ip->i_d.di_flags = 0;
			
 
				 
			
 
				 	if (ip->i_d.di_version == 3) {
			
 
				-		ASSERT(ip->i_d.di_ino == ino);
			
 
				-		ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
			
 
				-		ip->i_d.di_crc = 0;
			
 
				-		ip->i_d.di_changecount = 1;
			
 
				-		ip->i_d.di_lsn = 0;
			
 
				+		inode->i_version = 1;
			
 
				 		ip->i_d.di_flags2 = 0;
			
 
				-		memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
			
 
				-		ip->i_d.di_crtime = ip->i_d.di_mtime;
			
 
				+		ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
			
 
				+		ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
			
 
				 	}
			
 
				 
			
 
				 
			
@@ -1092,35 +1081,24 @@ xfs_dir_ialloc(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Decrement the link count on an inode & log the change.
			
 
				- * If this causes the link count to go to zero, initiate the
			
 
				- * logging activity required to truncate a file.
			
 
				+ * Decrement the link count on an inode & log the change.  If this causes the
			
 
				+ * link count to go to zero, move the inode to AGI unlinked list so that it can
			
 
				+ * be freed when the last active reference goes away via xfs_inactive().
			
 
				  */
			
 
				 int				/* error */
			
 
				 xfs_droplink(
			
 
				 	xfs_trans_t *tp,
			
 
				 	xfs_inode_t *ip)
			
 
				 {
			
 
				-	int	error;
			
 
				-
			
 
				 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
			
 
				 
			
 
				-	ASSERT (ip->i_d.di_nlink > 0);
			
 
				-	ip->i_d.di_nlink--;
			
 
				 	drop_nlink(VFS_I(ip));
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				 
			
 
				-	error = 0;
			
 
				-	if (ip->i_d.di_nlink == 0) {
			
 
				-		/*
			
 
				-		 * We're dropping the last link to this file.
			
 
				-		 * Move the on-disk inode to the AGI unlinked list.
			
 
				-		 * From xfs_inactive() we will pull the inode from
			
 
				-		 * the list and free it.
			
 
				-		 */
			
 
				-		error = xfs_iunlink(tp, ip);
			
 
				-	}
			
 
				-	return error;
			
 
				+	if (VFS_I(ip)->i_nlink)
			
 
				+		return 0;
			
 
				+
			
 
				+	return xfs_iunlink(tp, ip);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1134,8 +1112,6 @@ xfs_bumplink(
 
				 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
			
 
				 
			
 
				 	ASSERT(ip->i_d.di_version > 1);
			
 
				-	ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
			
 
				-	ip->i_d.di_nlink++;
			
 
				 	inc_nlink(VFS_I(ip));
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				 	return 0;
			
@@ -1393,7 +1369,6 @@ xfs_create_tmpfile(
 
				 	 */
			
 
				 	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
			
 
				 
			
 
				-	ip->i_d.di_nlink--;
			
 
				 	error = xfs_iunlink(tp, ip);
			
 
				 	if (error)
			
 
				 		goto out_trans_cancel;
			
@@ -1444,7 +1419,7 @@ xfs_link(
 
				 
			
 
				 	trace_xfs_link(tdp, target_name);
			
 
				 
			
 
				-	ASSERT(!S_ISDIR(sip->i_d.di_mode));
			
 
				+	ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
			
 
				 
			
 
				 	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				 		return -EIO;
			
@@ -1492,7 +1467,10 @@ xfs_link(
 
				 
			
 
				 	xfs_bmap_init(&free_list, &first_block);
			
 
				 
			
 
				-	if (sip->i_d.di_nlink == 0) {
			
 
				+	/*
			
 
				+	 * Handle initial link state of O_TMPFILE inode
			
 
				+	 */
			
 
				+	if (VFS_I(sip)->i_nlink == 0) {
			
 
				 		error = xfs_iunlink_remove(tp, sip);
			
 
				 		if (error)
			
 
				 			goto error_return;
			
@@ -1648,7 +1626,7 @@ xfs_release(
 
				 	xfs_mount_t	*mp = ip->i_mount;
			
 
				 	int		error;
			
 
				 
			
 
				-	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
			
 
				+	if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0))
			
 
				 		return 0;
			
 
				 
			
 
				 	/* If this is a read-only mount, don't do this (would generate I/O) */
			
@@ -1679,7 +1657,7 @@ xfs_release(
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (ip->i_d.di_nlink == 0)
			
 
				+	if (VFS_I(ip)->i_nlink == 0)
			
 
				 		return 0;
			
 
				 
			
 
				 	if (xfs_can_free_eofblocks(ip, false)) {
			
@@ -1883,7 +1861,7 @@ xfs_inactive(
 
				 	 * If the inode is already free, then there can be nothing
			
 
				 	 * to clean up here.
			
 
				 	 */
			
 
				-	if (ip->i_d.di_mode == 0) {
			
 
				+	if (VFS_I(ip)->i_mode == 0) {
			
 
				 		ASSERT(ip->i_df.if_real_bytes == 0);
			
 
				 		ASSERT(ip->i_df.if_broot_bytes == 0);
			
 
				 		return;
			
@@ -1895,7 +1873,7 @@ xfs_inactive(
 
				 	if (mp->m_flags & XFS_MOUNT_RDONLY)
			
 
				 		return;
			
 
				 
			
 
				-	if (ip->i_d.di_nlink != 0) {
			
 
				+	if (VFS_I(ip)->i_nlink != 0) {
			
 
				 		/*
			
 
				 		 * force is true because we are evicting an inode from the
			
 
				 		 * cache. Post-eof blocks must be freed, lest we end up with
			
@@ -1907,7 +1885,7 @@ xfs_inactive(
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	if (S_ISREG(ip->i_d.di_mode) &&
			
 
				+	if (S_ISREG(VFS_I(ip)->i_mode) &&
			
 
				 	    (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
			
 
				 	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
			
 
				 		truncate = 1;
			
@@ -1916,7 +1894,7 @@ xfs_inactive(
 
				 	if (error)
			
 
				 		return;
			
 
				 
			
 
				-	if (S_ISLNK(ip->i_d.di_mode))
			
 
				+	if (S_ISLNK(VFS_I(ip)->i_mode))
			
 
				 		error = xfs_inactive_symlink(ip);
			
 
				 	else if (truncate)
			
 
				 		error = xfs_inactive_truncate(ip);
			
@@ -1952,16 +1930,21 @@ xfs_inactive(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * This is called when the inode's link count goes to 0.
			
 
				- * We place the on-disk inode on a list in the AGI.  It
			
 
				- * will be pulled from this list when the inode is freed.
			
 
				+ * This is called when the inode's link count goes to 0 or we are creating a
			
 
				+ * tmpfile via O_TMPFILE. In the case of a tmpfile, @ignore_linkcount will be
			
 
				+ * set to true as the link count is dropped to zero by the VFS after we've
			
 
				+ * created the file successfully, so we have to add it to the unlinked list
			
 
				+ * while the link count is non-zero.
			
 
				+ *
			
 
				+ * We place the on-disk inode on a list in the AGI.  It will be pulled from this
			
 
				+ * list when the inode is freed.
			
 
				  */
			
 
				-int
			
 
				+STATIC int
			
 
				 xfs_iunlink(
			
 
				-	xfs_trans_t	*tp,
			
 
				-	xfs_inode_t	*ip)
			
 
				+	struct xfs_trans *tp,
			
 
				+	struct xfs_inode *ip)
			
 
				 {
			
 
				-	xfs_mount_t	*mp;
			
 
				+	xfs_mount_t	*mp = tp->t_mountp;
			
 
				 	xfs_agi_t	*agi;
			
 
				 	xfs_dinode_t	*dip;
			
 
				 	xfs_buf_t	*agibp;
			
@@ -1971,10 +1954,7 @@ xfs_iunlink(
 
				 	int		offset;
			
 
				 	int		error;
			
 
				 
			
 
				-	ASSERT(ip->i_d.di_nlink == 0);
			
 
				-	ASSERT(ip->i_d.di_mode != 0);
			
 
				-
			
 
				-	mp = tp->t_mountp;
			
 
				+	ASSERT(VFS_I(ip)->i_mode != 0);
			
 
				 
			
 
				 	/*
			
 
				 	 * Get the agi buffer first.  It ensures lock ordering
			
@@ -2412,10 +2392,10 @@ xfs_ifree(
 
				 	struct xfs_icluster	xic = { 0 };
			
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				-	ASSERT(ip->i_d.di_nlink == 0);
			
 
				+	ASSERT(VFS_I(ip)->i_nlink == 0);
			
 
				 	ASSERT(ip->i_d.di_nextents == 0);
			
 
				 	ASSERT(ip->i_d.di_anextents == 0);
			
 
				-	ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
			
 
				+	ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
			
 
				 	ASSERT(ip->i_d.di_nblocks == 0);
			
 
				 
			
 
				 	/*
			
@@ -2429,7 +2409,7 @@ xfs_ifree(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	ip->i_d.di_mode = 0;		/* mark incore inode as free */
			
 
				+	VFS_I(ip)->i_mode = 0;		/* mark incore inode as free */
			
 
				 	ip->i_d.di_flags = 0;
			
 
				 	ip->i_d.di_dmevmask = 0;
			
 
				 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
			
@@ -2439,7 +2419,7 @@ xfs_ifree(
 
				 	 * Bump the generation count so no one will be confused
			
 
				 	 * by reincarnations of this inode.
			
 
				 	 */
			
 
				-	ip->i_d.di_gen++;
			
 
				+	VFS_I(ip)->i_generation++;
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				 
			
 
				 	if (xic.deleted)
			
@@ -2526,7 +2506,7 @@ xfs_remove(
 
				 {
			
 
				 	xfs_mount_t		*mp = dp->i_mount;
			
 
				 	xfs_trans_t             *tp = NULL;
			
 
				-	int			is_dir = S_ISDIR(ip->i_d.di_mode);
			
 
				+	int			is_dir = S_ISDIR(VFS_I(ip)->i_mode);
			
 
				 	int                     error = 0;
			
 
				 	xfs_bmap_free_t         free_list;
			
 
				 	xfs_fsblock_t           first_block;
			
@@ -2580,8 +2560,8 @@ xfs_remove(
 
				 	 * If we're removing a directory perform some additional validation.
			
 
				 	 */
			
 
				 	if (is_dir) {
			
 
				-		ASSERT(ip->i_d.di_nlink >= 2);
			
 
				-		if (ip->i_d.di_nlink != 2) {
			
 
				+		ASSERT(VFS_I(ip)->i_nlink >= 2);
			
 
				+		if (VFS_I(ip)->i_nlink != 2) {
			
 
				 			error = -ENOTEMPTY;
			
 
				 			goto out_trans_cancel;
			
 
				 		}
			
@@ -2771,7 +2751,7 @@ xfs_cross_rename(
 
				 	if (dp1 != dp2) {
			
 
				 		dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
			
 
				 
			
 
				-		if (S_ISDIR(ip2->i_d.di_mode)) {
			
 
				+		if (S_ISDIR(VFS_I(ip2)->i_mode)) {
			
 
				 			error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
			
 
				 						dp1->i_ino, first_block,
			
 
				 						free_list, spaceres);
			
@@ -2779,7 +2759,7 @@ xfs_cross_rename(
 
				 				goto out_trans_abort;
			
 
				 
			
 
				 			/* transfer ip2 ".." reference to dp1 */
			
 
				-			if (!S_ISDIR(ip1->i_d.di_mode)) {
			
 
				+			if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
			
 
				 				error = xfs_droplink(tp, dp2);
			
 
				 				if (error)
			
 
				 					goto out_trans_abort;
			
@@ -2798,7 +2778,7 @@ xfs_cross_rename(
 
				 			ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
			
 
				 		}
			
 
				 
			
 
				-		if (S_ISDIR(ip1->i_d.di_mode)) {
			
 
				+		if (S_ISDIR(VFS_I(ip1)->i_mode)) {
			
 
				 			error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
			
 
				 						dp2->i_ino, first_block,
			
 
				 						free_list, spaceres);
			
@@ -2806,7 +2786,7 @@ xfs_cross_rename(
 
				 				goto out_trans_abort;
			
 
				 
			
 
				 			/* transfer ip1 ".." reference to dp2 */
			
 
				-			if (!S_ISDIR(ip2->i_d.di_mode)) {
			
 
				+			if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
			
 
				 				error = xfs_droplink(tp, dp1);
			
 
				 				if (error)
			
 
				 					goto out_trans_abort;
			
@@ -2903,7 +2883,7 @@ xfs_rename(
 
				 	struct xfs_inode	*inodes[__XFS_SORT_INODES];
			
 
				 	int			num_inodes = __XFS_SORT_INODES;
			
 
				 	bool			new_parent = (src_dp != target_dp);
			
 
				-	bool			src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
			
 
				+	bool			src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
			
 
				 	int			spaceres;
			
 
				 	int			error;
			
 
				 
			
@@ -3032,12 +3012,12 @@ xfs_rename(
 
				 		 * target and source are directories and that target can be
			
 
				 		 * destroyed, or that neither is a directory.
			
 
				 		 */
			
 
				-		if (S_ISDIR(target_ip->i_d.di_mode)) {
			
 
				+		if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
			
 
				 			/*
			
 
				 			 * Make sure target dir is empty.
			
 
				 			 */
			
 
				 			if (!(xfs_dir_isempty(target_ip)) ||
			
 
				-			    (target_ip->i_d.di_nlink > 2)) {
			
 
				+			    (VFS_I(target_ip)->i_nlink > 2)) {
			
 
				 				error = -EEXIST;
			
 
				 				goto out_trans_cancel;
			
 
				 			}
			
@@ -3144,7 +3124,7 @@ xfs_rename(
 
				 	 * intermediate state on disk.
			
 
				 	 */
			
 
				 	if (wip) {
			
 
				-		ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
			
 
				+		ASSERT(VFS_I(wip)->i_nlink == 0);
			
 
				 		error = xfs_bumplink(tp, wip);
			
 
				 		if (error)
			
 
				 			goto out_bmap_cancel;
			
@@ -3313,7 +3293,7 @@ xfs_iflush_cluster(
 
				 		 * mark it as stale and brelse.
			
 
				 		 */
			
 
				 		if (bp->b_iodone) {
			
 
				-			XFS_BUF_UNDONE(bp);
			
 
				+			bp->b_flags &= ~XBF_DONE;
			
 
				 			xfs_buf_stale(bp);
			
 
				 			xfs_buf_ioerror(bp, -EIO);
			
 
				 			xfs_buf_ioend(bp);
			
@@ -3462,14 +3442,7 @@ xfs_iflush_int(
 
				 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
			
 
				 		goto corrupt_out;
			
 
				 	}
			
 
				-	if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
			
 
				-				mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
			
 
				-		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
			
 
				-			"%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
			
 
				-			__func__, ip->i_ino, ip, ip->i_d.di_magic);
			
 
				-		goto corrupt_out;
			
 
				-	}
			
 
				-	if (S_ISREG(ip->i_d.di_mode)) {
			
 
				+	if (S_ISREG(VFS_I(ip)->i_mode)) {
			
 
				 		if (XFS_TEST_ERROR(
			
 
				 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
			
 
				 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
			
@@ -3479,7 +3452,7 @@ xfs_iflush_int(
 
				 				__func__, ip->i_ino, ip);
			
 
				 			goto corrupt_out;
			
 
				 		}
			
 
				-	} else if (S_ISDIR(ip->i_d.di_mode)) {
			
 
				+	} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
			
 
				 		if (XFS_TEST_ERROR(
			
 
				 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
			
 
				 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
			
@@ -3523,12 +3496,11 @@ xfs_iflush_int(
 
				 		ip->i_d.di_flushiter++;
			
 
				 
			
 
				 	/*
			
 
				-	 * Copy the dirty parts of the inode into the on-disk
			
 
				-	 * inode.  We always copy out the core of the inode,
			
 
				-	 * because if the inode is dirty at all the core must
			
 
				-	 * be.
			
 
				+	 * Copy the dirty parts of the inode into the on-disk inode.  We always
			
 
				+	 * copy out the core of the inode, because if the inode is dirty at all
			
 
				+	 * the core must be.
			
 
				 	 */
			
 
				-	xfs_dinode_to_disk(dip, &ip->i_d);
			
 
				+	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
			
 
				 
			
 
				 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
			
 
				 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
			
@@ -3580,10 +3552,6 @@ xfs_iflush_int(
 
				 	 */
			
 
				 	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
			
 
				 
			
 
				-	/* update the lsn in the on disk inode if required */
			
 
				-	if (ip->i_d.di_version == 3)
			
 
				-		dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
			
 
				-
			
 
				 	/* generate the checksum. */
			
 
				 	xfs_dinode_calc_crc(mp, dip);
			
 
				 
			
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -63,7 +63,7 @@ typedef struct xfs_inode {
 
				 	unsigned long		i_flags;	/* see defined flags below */
			
 
				 	unsigned int		i_delayed_blks;	/* count of delay alloc blks */
			
 
				 
			
 
				-	xfs_icdinode_t		i_d;		/* most of ondisk inode */
			
 
				+	struct xfs_icdinode	i_d;		/* most of ondisk inode */
			
 
				 
			
 
				 	/* VFS inode */
			
 
				 	struct inode		i_vnode;	/* embedded VFS inode */
			
@@ -88,7 +88,7 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
 
				  */
			
 
				 static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
			
 
				 {
			
 
				-	if (S_ISREG(ip->i_d.di_mode))
			
 
				+	if (S_ISREG(VFS_I(ip)->i_mode))
			
 
				 		return i_size_read(VFS_I(ip));
			
 
				 	return ip->i_d.di_size;
			
 
				 }
			
@@ -369,7 +369,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 
				  */
			
 
				 #define XFS_INHERIT_GID(pip)	\
			
 
				 	(((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
			
 
				-	 ((pip)->i_d.di_mode & S_ISGID))
			
 
				+	 (VFS_I(pip)->i_mode & S_ISGID))
			
 
				 
			
 
				 int		xfs_release(struct xfs_inode *ip);
			
 
				 void		xfs_inactive(struct xfs_inode *ip);
			
@@ -405,8 +405,6 @@ int		xfs_ifree(struct xfs_trans *, xfs_inode_t *,
 
				 			   struct xfs_bmap_free *);
			
 
				 int		xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
			
 
				 				      int, xfs_fsize_t);
			
 
				-int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
			
 
				-
			
 
				 void		xfs_iext_realloc(xfs_inode_t *, int, int);
			
 
				 
			
 
				 void		xfs_iunpin_wait(xfs_inode_t *);
			
@@ -437,6 +435,8 @@ int	xfs_update_prealloc_flags(struct xfs_inode *ip,
 
				 int	xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
			
 
				 		     xfs_fsize_t isize, bool *did_zeroing);
			
 
				 int	xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
			
 
				+loff_t	__xfs_seek_hole_data(struct inode *inode, loff_t start,
			
 
				+			     loff_t eof, int whence);
			
 
				 
			
 
				 
			
 
				 /* from xfs_iops.c */
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -135,7 +135,7 @@ xfs_inode_item_size(
 
				 
			
 
				 	*nvecs += 2;
			
 
				 	*nbytes += sizeof(struct xfs_inode_log_format) +
			
 
				-		   xfs_icdinode_size(ip->i_d.di_version);
			
 
				+		   xfs_log_dinode_size(ip->i_d.di_version);
			
 
				 
			
 
				 	xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
			
 
				 	if (XFS_IFORK_Q(ip))
			
@@ -322,6 +322,81 @@ xfs_inode_item_format_attr_fork(
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void
			
 
				+xfs_inode_to_log_dinode(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_log_dinode	*to,
			
 
				+	xfs_lsn_t		lsn)
			
 
				+{
			
 
				+	struct xfs_icdinode	*from = &ip->i_d;
			
 
				+	struct inode		*inode = VFS_I(ip);
			
 
				+
			
 
				+	to->di_magic = XFS_DINODE_MAGIC;
			
 
				+
			
 
				+	to->di_version = from->di_version;
			
 
				+	to->di_format = from->di_format;
			
 
				+	to->di_uid = from->di_uid;
			
 
				+	to->di_gid = from->di_gid;
			
 
				+	to->di_projid_lo = from->di_projid_lo;
			
 
				+	to->di_projid_hi = from->di_projid_hi;
			
 
				+
			
 
				+	memset(to->di_pad, 0, sizeof(to->di_pad));
			
 
				+	memset(to->di_pad3, 0, sizeof(to->di_pad3));
			
 
				+	to->di_atime.t_sec = inode->i_atime.tv_sec;
			
 
				+	to->di_atime.t_nsec = inode->i_atime.tv_nsec;
			
 
				+	to->di_mtime.t_sec = inode->i_mtime.tv_sec;
			
 
				+	to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
			
 
				+	to->di_ctime.t_sec = inode->i_ctime.tv_sec;
			
 
				+	to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
			
 
				+	to->di_nlink = inode->i_nlink;
			
 
				+	to->di_gen = inode->i_generation;
			
 
				+	to->di_mode = inode->i_mode;
			
 
				+
			
 
				+	to->di_size = from->di_size;
			
 
				+	to->di_nblocks = from->di_nblocks;
			
 
				+	to->di_extsize = from->di_extsize;
			
 
				+	to->di_nextents = from->di_nextents;
			
 
				+	to->di_anextents = from->di_anextents;
			
 
				+	to->di_forkoff = from->di_forkoff;
			
 
				+	to->di_aformat = from->di_aformat;
			
 
				+	to->di_dmevmask = from->di_dmevmask;
			
 
				+	to->di_dmstate = from->di_dmstate;
			
 
				+	to->di_flags = from->di_flags;
			
 
				+
			
 
				+	if (from->di_version == 3) {
			
 
				+		to->di_changecount = inode->i_version;
			
 
				+		to->di_crtime.t_sec = from->di_crtime.t_sec;
			
 
				+		to->di_crtime.t_nsec = from->di_crtime.t_nsec;
			
 
				+		to->di_flags2 = from->di_flags2;
			
 
				+
			
 
				+		to->di_ino = ip->i_ino;
			
 
				+		to->di_lsn = lsn;
			
 
				+		memset(to->di_pad2, 0, sizeof(to->di_pad2));
			
 
				+		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
			
 
				+		to->di_flushiter = 0;
			
 
				+	} else {
			
 
				+		to->di_flushiter = from->di_flushiter;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Format the inode core. Current timestamp data is only in the VFS inode
			
 
				+ * fields, so we need to grab them from there. Hence rather than just copying
			
 
				+ * the XFS inode core structure, format the fields directly into the iovec.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_inode_item_format_core(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_log_vec	*lv,
			
 
				+	struct xfs_log_iovec	**vecp)
			
 
				+{
			
 
				+	struct xfs_log_dinode	*dic;
			
 
				+
			
 
				+	dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
			
 
				+	xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
			
 
				+	xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This is called to fill in the vector of log iovecs for the given inode
			
 
				  * log item.  It fills the first item with an inode log format structure,
			
@@ -351,10 +426,7 @@ xfs_inode_item_format(
 
				 	ilf->ilf_size = 2; /* format + core */
			
 
				 	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
			
 
				 
			
 
				-	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
			
 
				-			&ip->i_d,
			
 
				-			xfs_icdinode_size(ip->i_d.di_version));
			
 
				-
			
 
				+	xfs_inode_item_format_core(ip, lv, &vecp);
			
 
				 	xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
			
 
				 	if (XFS_IFORK_Q(ip)) {
			
 
				 		xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
			
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -114,7 +114,7 @@ xfs_find_handle(
 
				 		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
			
 
				 					sizeof(handle.ha_fid.fid_len);
			
 
				 		handle.ha_fid.fid_pad = 0;
			
 
				-		handle.ha_fid.fid_gen = ip->i_d.di_gen;
			
 
				+		handle.ha_fid.fid_gen = inode->i_generation;
			
 
				 		handle.ha_fid.fid_ino = ip->i_ino;
			
 
				 
			
 
				 		hsize = XFS_HSIZE(handle);
			
@@ -963,7 +963,7 @@ xfs_set_diflags(
 
				 		di_flags |= XFS_DIFLAG_NODEFRAG;
			
 
				 	if (xflags & FS_XFLAG_FILESTREAM)
			
 
				 		di_flags |= XFS_DIFLAG_FILESTREAM;
			
 
				-	if (S_ISDIR(ip->i_d.di_mode)) {
			
 
				+	if (S_ISDIR(VFS_I(ip)->i_mode)) {
			
 
				 		if (xflags & FS_XFLAG_RTINHERIT)
			
 
				 			di_flags |= XFS_DIFLAG_RTINHERIT;
			
 
				 		if (xflags & FS_XFLAG_NOSYMLINKS)
			
@@ -972,7 +972,7 @@ xfs_set_diflags(
 
				 			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
			
 
				 		if (xflags & FS_XFLAG_PROJINHERIT)
			
 
				 			di_flags |= XFS_DIFLAG_PROJINHERIT;
			
 
				-	} else if (S_ISREG(ip->i_d.di_mode)) {
			
 
				+	} else if (S_ISREG(VFS_I(ip)->i_mode)) {
			
 
				 		if (xflags & FS_XFLAG_REALTIME)
			
 
				 			di_flags |= XFS_DIFLAG_REALTIME;
			
 
				 		if (xflags & FS_XFLAG_EXTSIZE)
			
@@ -1059,24 +1059,87 @@ xfs_ioctl_setattr_xflags(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * If we are changing DAX flags, we have to ensure the file is clean and any
			
 
				+ * cached objects in the address space are invalidated and removed. This
			
 
				+ * requires us to lock out other IO and page faults similar to a truncate
			
 
				+ * operation. The locks need to be held until the transaction has been committed
			
 
				+ * so that the cache invalidation is atomic with respect to the DAX flag
			
 
				+ * manipulation.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_ioctl_setattr_dax_invalidate(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct fsxattr		*fa,
			
 
				+	int			*join_flags)
			
 
				+{
			
 
				+	struct inode		*inode = VFS_I(ip);
			
 
				+	int			error;
			
 
				+
			
 
				+	*join_flags = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * It is only valid to set the DAX flag on regular files and
			
 
				+	 * directories on filesystems where the block size is equal to the page
			
 
				+	 * size. On directories it serves as an inherit hint.
			
 
				+	 */
			
 
				+	if (fa->fsx_xflags & FS_XFLAG_DAX) {
			
 
				+		if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
			
 
				+			return -EINVAL;
			
 
				+		if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
			
 
				+			return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	/* If the DAX state is not changing, we have nothing to do here. */
			
 
				+	if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
			
 
				+		return 0;
			
 
				+	if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* lock, flush and invalidate mapping in preparation for flag change */
			
 
				+	xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				+	error = filemap_write_and_wait(inode->i_mapping);
			
 
				+	if (error)
			
 
				+		goto out_unlock;
			
 
				+	error = invalidate_inode_pages2(inode->i_mapping);
			
 
				+	if (error)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	*join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
			
 
				+	return 0;
			
 
				+
			
 
				+out_unlock:
			
 
				+	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				+	return error;
			
 
				+
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Set up the transaction structure for the setattr operation, checking that we
			
 
				  * have permission to do so. On success, return a clean transaction and the
			
 
				  * inode locked exclusively ready for further operation specific checks. On
			
 
				  * failure, return an error without modifying or locking the inode.
			
 
				+ *
			
 
				+ * The inode might already be IO locked on call. If this is the case, it is
			
 
				+ * indicated in @join_flags and we take full responsibility for ensuring they
			
 
				+ * are unlocked from now on. Hence if we have an error here, we still have to
			
 
				+ * unlock them. Otherwise, once they are joined to the transaction, they will
			
 
				+ * be unlocked on commit/cancel.
			
 
				  */
			
 
				 static struct xfs_trans *
			
 
				 xfs_ioctl_setattr_get_trans(
			
 
				-	struct xfs_inode	*ip)
			
 
				+	struct xfs_inode	*ip,
			
 
				+	int			join_flags)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	struct xfs_trans	*tp;
			
 
				-	int			error;
			
 
				+	int			error = -EROFS;
			
 
				 
			
 
				 	if (mp->m_flags & XFS_MOUNT_RDONLY)
			
 
				-		return ERR_PTR(-EROFS);
			
 
				+		goto out_unlock;
			
 
				+	error = -EIO;
			
 
				 	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				-		return ERR_PTR(-EIO);
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
			
 
				 	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
			
@@ -1084,7 +1147,8 @@ xfs_ioctl_setattr_get_trans(
 
				 		goto out_cancel;
			
 
				 
			
 
				 	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
			
 
				+	join_flags = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * CAP_FOWNER overrides the following restrictions:
			
@@ -1104,6 +1168,9 @@ xfs_ioctl_setattr_get_trans(
 
				 
			
 
				 out_cancel:
			
 
				 	xfs_trans_cancel(tp);
			
 
				+out_unlock:
			
 
				+	if (join_flags)
			
 
				+		xfs_iunlock(ip, join_flags);
			
 
				 	return ERR_PTR(error);
			
 
				 }
			
 
				 
			
@@ -1128,14 +1195,14 @@ xfs_ioctl_setattr_check_extsize(
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				 
			
 
				-	if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
			
 
				+	if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
			
 
				-	    !S_ISDIR(ip->i_d.di_mode))
			
 
				+	    !S_ISDIR(VFS_I(ip)->i_mode))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents &&
			
 
				+	if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
			
 
				 	    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
			
 
				 		return -EINVAL;
			
 
				 
			
@@ -1202,6 +1269,7 @@ xfs_ioctl_setattr(
 
				 	struct xfs_dquot	*pdqp = NULL;
			
 
				 	struct xfs_dquot	*olddquot = NULL;
			
 
				 	int			code;
			
 
				+	int			join_flags = 0;
			
 
				 
			
 
				 	trace_xfs_ioctl_setattr(ip);
			
 
				 
			
@@ -1225,7 +1293,18 @@ xfs_ioctl_setattr(
 
				 			return code;
			
 
				 	}
			
 
				 
			
 
				-	tp = xfs_ioctl_setattr_get_trans(ip);
			
 
				+	/*
			
 
				+	 * Changing DAX config may require inode locking for mapping
			
 
				+	 * invalidation. These need to be held all the way to transaction commit
			
 
				+	 * or cancel time, so need to be passed through to
			
 
				+	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
			
 
				+	 * appropriately.
			
 
				+	 */
			
 
				+	code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
			
 
				+	if (code)
			
 
				+		goto error_free_dquots;
			
 
				+
			
 
				+	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
			
 
				 	if (IS_ERR(tp)) {
			
 
				 		code = PTR_ERR(tp);
			
 
				 		goto error_free_dquots;
			
@@ -1256,9 +1335,9 @@ xfs_ioctl_setattr(
 
				 	 * successful return from chown()
			
 
				 	 */
			
 
				 
			
 
				-	if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
			
 
				+	if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) &&
			
 
				 	    !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
			
 
				-		ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
			
 
				+		VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
			
 
				 
			
 
				 	/* Change the ownerships and register project quota modifications */
			
 
				 	if (xfs_get_projid(ip) != fa->fsx_projid) {
			
@@ -1341,6 +1420,7 @@ xfs_ioc_setxflags(
 
				 	struct xfs_trans	*tp;
			
 
				 	struct fsxattr		fa;
			
 
				 	unsigned int		flags;
			
 
				+	int			join_flags = 0;
			
 
				 	int			error;
			
 
				 
			
 
				 	if (copy_from_user(&flags, arg, sizeof(flags)))
			
@@ -1357,7 +1437,18 @@ xfs_ioc_setxflags(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	tp = xfs_ioctl_setattr_get_trans(ip);
			
 
				+	/*
			
 
				+	 * Changing DAX config may require inode locking for mapping
			
 
				+	 * invalidation. These need to be held all the way to transaction commit
			
 
				+	 * or cancel time, so need to be passed through to
			
 
				+	 * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
			
 
				+	 * appropriately.
			
 
				+	 */
			
 
				+	error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
			
 
				+	if (error)
			
 
				+		goto out_drop_write;
			
 
				+
			
 
				+	tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
			
 
				 	if (IS_ERR(tp)) {
			
 
				 		error = PTR_ERR(tp);
			
 
				 		goto out_drop_write;
			
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -459,8 +459,8 @@ xfs_vn_getattr(
 
				 
			
 
				 	stat->size = XFS_ISIZE(ip);
			
 
				 	stat->dev = inode->i_sb->s_dev;
			
 
				-	stat->mode = ip->i_d.di_mode;
			
 
				-	stat->nlink = ip->i_d.di_nlink;
			
 
				+	stat->mode = inode->i_mode;
			
 
				+	stat->nlink = inode->i_nlink;
			
 
				 	stat->uid = inode->i_uid;
			
 
				 	stat->gid = inode->i_gid;
			
 
				 	stat->ino = ip->i_ino;
			
@@ -506,9 +506,6 @@ xfs_setattr_mode(
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				 
			
 
				-	ip->i_d.di_mode &= S_IFMT;
			
 
				-	ip->i_d.di_mode |= mode & ~S_IFMT;
			
 
				-
			
 
				 	inode->i_mode &= S_IFMT;
			
 
				 	inode->i_mode |= mode & ~S_IFMT;
			
 
				 }
			
@@ -522,21 +519,12 @@ xfs_setattr_time(
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				 
			
 
				-	if (iattr->ia_valid & ATTR_ATIME) {
			
 
				+	if (iattr->ia_valid & ATTR_ATIME)
			
 
				 		inode->i_atime = iattr->ia_atime;
			
 
				-		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
			
 
				-		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
			
 
				-	}
			
 
				-	if (iattr->ia_valid & ATTR_CTIME) {
			
 
				+	if (iattr->ia_valid & ATTR_CTIME)
			
 
				 		inode->i_ctime = iattr->ia_ctime;
			
 
				-		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
			
 
				-		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
			
 
				-	}
			
 
				-	if (iattr->ia_valid & ATTR_MTIME) {
			
 
				+	if (iattr->ia_valid & ATTR_MTIME)
			
 
				 		inode->i_mtime = iattr->ia_mtime;
			
 
				-		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
			
 
				-		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 int
			
@@ -661,9 +649,9 @@ xfs_setattr_nonsize(
 
				 		 * The set-user-ID and set-group-ID bits of a file will be
			
 
				 		 * cleared upon successful return from chown()
			
 
				 		 */
			
 
				-		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
			
 
				+		if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
			
 
				 		    !capable(CAP_FSETID))
			
 
				-			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
			
 
				+			inode->i_mode &= ~(S_ISUID|S_ISGID);
			
 
				 
			
 
				 		/*
			
 
				 		 * Change the ownerships and register quota modifications
			
@@ -773,7 +761,7 @@ xfs_setattr_size(
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
			
 
				-	ASSERT(S_ISREG(ip->i_d.di_mode));
			
 
				+	ASSERT(S_ISREG(inode->i_mode));
			
 
				 	ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
			
 
				 		ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
			
 
				 
			
@@ -991,21 +979,13 @@ xfs_vn_update_time(
 
				 	}
			
 
				 
			
 
				 	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				-	if (flags & S_CTIME) {
			
 
				+	if (flags & S_CTIME)
			
 
				 		inode->i_ctime = *now;
			
 
				-		ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec;
			
 
				-		ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec;
			
 
				-	}
			
 
				-	if (flags & S_MTIME) {
			
 
				+	if (flags & S_MTIME)
			
 
				 		inode->i_mtime = *now;
			
 
				-		ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec;
			
 
				-		ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec;
			
 
				-	}
			
 
				-	if (flags & S_ATIME) {
			
 
				+	if (flags & S_ATIME)
			
 
				 		inode->i_atime = *now;
			
 
				-		ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec;
			
 
				-		ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec;
			
 
				-	}
			
 
				+
			
 
				 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
			
 
				 	return xfs_trans_commit(tp);
			
@@ -1205,8 +1185,10 @@ xfs_diflags_to_iflags(
 
				 		inode->i_flags |= S_SYNC;
			
 
				 	if (flags & XFS_DIFLAG_NOATIME)
			
 
				 		inode->i_flags |= S_NOATIME;
			
 
				-	if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
			
 
				-	    ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
			
 
				+	if (S_ISREG(inode->i_mode) &&
			
 
				+	    ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
			
 
				+	    (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
			
 
				+	     ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
			
 
				 		inode->i_flags |= S_DAX;
			
 
				 }
			
 
				 
			
@@ -1232,8 +1214,6 @@ xfs_setup_inode(
 
				 	/* make the inode look hashed for the writeback code */
			
 
				 	hlist_add_fake(&inode->i_hash);
			
 
				 
			
 
				-	inode->i_mode	= ip->i_d.di_mode;
			
 
				-	set_nlink(inode, ip->i_d.di_nlink);
			
 
				 	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
			
 
				 	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
			
 
				 
			
@@ -1249,14 +1229,7 @@ xfs_setup_inode(
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				-	inode->i_generation = ip->i_d.di_gen;
			
 
				 	i_size_write(inode, ip->i_d.di_size);
			
 
				-	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
			
 
				-	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
			
 
				-	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
			
 
				-	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
			
 
				-	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
			
 
				-	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
			
 
				 	xfs_diflags_to_iflags(inode, ip);
			
 
				 
			
 
				 	ip->d_ops = ip->i_mount->m_nondir_inode_ops;
			
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -57,6 +57,7 @@ xfs_bulkstat_one_int(
 
				 {
			
 
				 	struct xfs_icdinode	*dic;		/* dinode core info pointer */
			
 
				 	struct xfs_inode	*ip;		/* incore inode pointer */
			
 
				+	struct inode		*inode;
			
 
				 	struct xfs_bstat	*buf;		/* return buffer */
			
 
				 	int			error = 0;	/* error value */
			
 
				 
			
@@ -77,30 +78,33 @@ xfs_bulkstat_one_int(
 
				 
			
 
				 	ASSERT(ip != NULL);
			
 
				 	ASSERT(ip->i_imap.im_blkno != 0);
			
 
				+	inode = VFS_I(ip);
			
 
				 
			
 
				 	dic = &ip->i_d;
			
 
				 
			
 
				 	/* xfs_iget returns the following without needing
			
 
				 	 * further change.
			
 
				 	 */
			
 
				-	buf->bs_nlink = dic->di_nlink;
			
 
				 	buf->bs_projid_lo = dic->di_projid_lo;
			
 
				 	buf->bs_projid_hi = dic->di_projid_hi;
			
 
				 	buf->bs_ino = ino;
			
 
				-	buf->bs_mode = dic->di_mode;
			
 
				 	buf->bs_uid = dic->di_uid;
			
 
				 	buf->bs_gid = dic->di_gid;
			
 
				 	buf->bs_size = dic->di_size;
			
 
				-	buf->bs_atime.tv_sec = dic->di_atime.t_sec;
			
 
				-	buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
			
 
				-	buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
			
 
				-	buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
			
 
				-	buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
			
 
				-	buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
			
 
				+
			
 
				+	buf->bs_nlink = inode->i_nlink;
			
 
				+	buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
			
 
				+	buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
			
 
				+	buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
			
 
				+	buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
			
 
				+	buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
			
 
				+	buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
			
 
				+	buf->bs_gen = inode->i_generation;
			
 
				+	buf->bs_mode = inode->i_mode;
			
 
				+
			
 
				 	buf->bs_xflags = xfs_ip2xflags(ip);
			
 
				 	buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
			
 
				 	buf->bs_extents = dic->di_nextents;
			
 
				-	buf->bs_gen = dic->di_gen;
			
 
				 	memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
			
 
				 	buf->bs_dmevmask = dic->di_dmevmask;
			
 
				 	buf->bs_dmstate = dic->di_dmstate;
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1212,7 +1212,7 @@ xlog_iodone(xfs_buf_t *bp)
 
				 	}
			
 
				 
			
 
				 	/* log I/O is always issued ASYNC */
			
 
				-	ASSERT(XFS_BUF_ISASYNC(bp));
			
 
				+	ASSERT(bp->b_flags & XBF_ASYNC);
			
 
				 	xlog_state_done_syncing(iclog, aborted);
			
 
				 
			
 
				 	/*
			
@@ -1864,9 +1864,8 @@ xlog_sync(
 
				 
			
 
				 	bp->b_io_length = BTOBB(count);
			
 
				 	bp->b_fspriv = iclog;
			
 
				-	XFS_BUF_ZEROFLAGS(bp);
			
 
				-	XFS_BUF_ASYNC(bp);
			
 
				-	bp->b_flags |= XBF_SYNCIO;
			
 
				+	bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
			
 
				+	bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
			
 
				 
			
 
				 	if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
			
 
				 		bp->b_flags |= XBF_FUA;
			
@@ -1893,12 +1892,11 @@ xlog_sync(
 
				 
			
 
				 	/* account for log which doesn't start at block #0 */
			
 
				 	XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
			
 
				+
			
 
				 	/*
			
 
				 	 * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
			
 
				 	 * is shutting down.
			
 
				 	 */
			
 
				-	XFS_BUF_WRITE(bp);
			
 
				-
			
 
				 	error = xlog_bdstrat(bp);
			
 
				 	if (error) {
			
 
				 		xfs_buf_ioerror_alert(bp, "xlog_sync");
			
@@ -1910,9 +1908,8 @@ xlog_sync(
 
				 		xfs_buf_associate_memory(bp,
			
 
				 				(char *)&iclog->ic_header + count, split);
			
 
				 		bp->b_fspriv = iclog;
			
 
				-		XFS_BUF_ZEROFLAGS(bp);
			
 
				-		XFS_BUF_ASYNC(bp);
			
 
				-		bp->b_flags |= XBF_SYNCIO;
			
 
				+		bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
			
 
				+		bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
			
 
				 		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
			
 
				 			bp->b_flags |= XBF_FUA;
			
 
				 
			
@@ -1921,7 +1918,6 @@ xlog_sync(
 
				 
			
 
				 		/* account for internal log which doesn't start at block #0 */
			
 
				 		XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
			
 
				-		XFS_BUF_WRITE(bp);
			
 
				 		error = xlog_bdstrat(bp);
			
 
				 		if (error) {
			
 
				 			xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
			
@@ -2012,77 +2008,81 @@ xlog_print_tic_res(
 
				 	uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
			
 
				 
			
 
				 	/* match with XLOG_REG_TYPE_* in xfs_log.h */
			
 
				-	static char *res_type_str[XLOG_REG_TYPE_MAX] = {
			
 
				-	    "bformat",
			
 
				-	    "bchunk",
			
 
				-	    "efi_format",
			
 
				-	    "efd_format",
			
 
				-	    "iformat",
			
 
				-	    "icore",
			
 
				-	    "iext",
			
 
				-	    "ibroot",
			
 
				-	    "ilocal",
			
 
				-	    "iattr_ext",
			
 
				-	    "iattr_broot",
			
 
				-	    "iattr_local",
			
 
				-	    "qformat",
			
 
				-	    "dquot",
			
 
				-	    "quotaoff",
			
 
				-	    "LR header",
			
 
				-	    "unmount",
			
 
				-	    "commit",
			
 
				-	    "trans header"
			
 
				+#define REG_TYPE_STR(type, str)	[XLOG_REG_TYPE_##type] = str
			
 
				+	static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = {
			
 
				+	    REG_TYPE_STR(BFORMAT, "bformat"),
			
 
				+	    REG_TYPE_STR(BCHUNK, "bchunk"),
			
 
				+	    REG_TYPE_STR(EFI_FORMAT, "efi_format"),
			
 
				+	    REG_TYPE_STR(EFD_FORMAT, "efd_format"),
			
 
				+	    REG_TYPE_STR(IFORMAT, "iformat"),
			
 
				+	    REG_TYPE_STR(ICORE, "icore"),
			
 
				+	    REG_TYPE_STR(IEXT, "iext"),
			
 
				+	    REG_TYPE_STR(IBROOT, "ibroot"),
			
 
				+	    REG_TYPE_STR(ILOCAL, "ilocal"),
			
 
				+	    REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
			
 
				+	    REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
			
 
				+	    REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
			
 
				+	    REG_TYPE_STR(QFORMAT, "qformat"),
			
 
				+	    REG_TYPE_STR(DQUOT, "dquot"),
			
 
				+	    REG_TYPE_STR(QUOTAOFF, "quotaoff"),
			
 
				+	    REG_TYPE_STR(LRHEADER, "LR header"),
			
 
				+	    REG_TYPE_STR(UNMOUNT, "unmount"),
			
 
				+	    REG_TYPE_STR(COMMIT, "commit"),
			
 
				+	    REG_TYPE_STR(TRANSHDR, "trans header"),
			
 
				+	    REG_TYPE_STR(ICREATE, "inode create")
			
 
				 	};
			
 
				+#undef REG_TYPE_STR
			
 
				+#define TRANS_TYPE_STR(type)	[XFS_TRANS_##type] = #type
			
 
				 	static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
			
 
				-	    "SETATTR_NOT_SIZE",
			
 
				-	    "SETATTR_SIZE",
			
 
				-	    "INACTIVE",
			
 
				-	    "CREATE",
			
 
				-	    "CREATE_TRUNC",
			
 
				-	    "TRUNCATE_FILE",
			
 
				-	    "REMOVE",
			
 
				-	    "LINK",
			
 
				-	    "RENAME",
			
 
				-	    "MKDIR",
			
 
				-	    "RMDIR",
			
 
				-	    "SYMLINK",
			
 
				-	    "SET_DMATTRS",
			
 
				-	    "GROWFS",
			
 
				-	    "STRAT_WRITE",
			
 
				-	    "DIOSTRAT",
			
 
				-	    "WRITE_SYNC",
			
 
				-	    "WRITEID",
			
 
				-	    "ADDAFORK",
			
 
				-	    "ATTRINVAL",
			
 
				-	    "ATRUNCATE",
			
 
				-	    "ATTR_SET",
			
 
				-	    "ATTR_RM",
			
 
				-	    "ATTR_FLAG",
			
 
				-	    "CLEAR_AGI_BUCKET",
			
 
				-	    "QM_SBCHANGE",
			
 
				-	    "DUMMY1",
			
 
				-	    "DUMMY2",
			
 
				-	    "QM_QUOTAOFF",
			
 
				-	    "QM_DQALLOC",
			
 
				-	    "QM_SETQLIM",
			
 
				-	    "QM_DQCLUSTER",
			
 
				-	    "QM_QINOCREATE",
			
 
				-	    "QM_QUOTAOFF_END",
			
 
				-	    "FSYNC_TS",
			
 
				-	    "GROWFSRT_ALLOC",
			
 
				-	    "GROWFSRT_ZERO",
			
 
				-	    "GROWFSRT_FREE",
			
 
				-	    "SWAPEXT",
			
 
				-	    "CHECKPOINT",
			
 
				-	    "ICREATE",
			
 
				-	    "CREATE_TMPFILE"
			
 
				+	    TRANS_TYPE_STR(SETATTR_NOT_SIZE),
			
 
				+	    TRANS_TYPE_STR(SETATTR_SIZE),
			
 
				+	    TRANS_TYPE_STR(INACTIVE),
			
 
				+	    TRANS_TYPE_STR(CREATE),
			
 
				+	    TRANS_TYPE_STR(CREATE_TRUNC),
			
 
				+	    TRANS_TYPE_STR(TRUNCATE_FILE),
			
 
				+	    TRANS_TYPE_STR(REMOVE),
			
 
				+	    TRANS_TYPE_STR(LINK),
			
 
				+	    TRANS_TYPE_STR(RENAME),
			
 
				+	    TRANS_TYPE_STR(MKDIR),
			
 
				+	    TRANS_TYPE_STR(RMDIR),
			
 
				+	    TRANS_TYPE_STR(SYMLINK),
			
 
				+	    TRANS_TYPE_STR(SET_DMATTRS),
			
 
				+	    TRANS_TYPE_STR(GROWFS),
			
 
				+	    TRANS_TYPE_STR(STRAT_WRITE),
			
 
				+	    TRANS_TYPE_STR(DIOSTRAT),
			
 
				+	    TRANS_TYPE_STR(WRITEID),
			
 
				+	    TRANS_TYPE_STR(ADDAFORK),
			
 
				+	    TRANS_TYPE_STR(ATTRINVAL),
			
 
				+	    TRANS_TYPE_STR(ATRUNCATE),
			
 
				+	    TRANS_TYPE_STR(ATTR_SET),
			
 
				+	    TRANS_TYPE_STR(ATTR_RM),
			
 
				+	    TRANS_TYPE_STR(ATTR_FLAG),
			
 
				+	    TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
			
 
				+	    TRANS_TYPE_STR(SB_CHANGE),
			
 
				+	    TRANS_TYPE_STR(DUMMY1),
			
 
				+	    TRANS_TYPE_STR(DUMMY2),
			
 
				+	    TRANS_TYPE_STR(QM_QUOTAOFF),
			
 
				+	    TRANS_TYPE_STR(QM_DQALLOC),
			
 
				+	    TRANS_TYPE_STR(QM_SETQLIM),
			
 
				+	    TRANS_TYPE_STR(QM_DQCLUSTER),
			
 
				+	    TRANS_TYPE_STR(QM_QINOCREATE),
			
 
				+	    TRANS_TYPE_STR(QM_QUOTAOFF_END),
			
 
				+	    TRANS_TYPE_STR(FSYNC_TS),
			
 
				+	    TRANS_TYPE_STR(GROWFSRT_ALLOC),
			
 
				+	    TRANS_TYPE_STR(GROWFSRT_ZERO),
			
 
				+	    TRANS_TYPE_STR(GROWFSRT_FREE),
			
 
				+	    TRANS_TYPE_STR(SWAPEXT),
			
 
				+	    TRANS_TYPE_STR(CHECKPOINT),
			
 
				+	    TRANS_TYPE_STR(ICREATE),
			
 
				+	    TRANS_TYPE_STR(CREATE_TMPFILE)
			
 
				 	};
			
 
				+#undef TRANS_TYPE_STR
			
 
				 
			
 
				 	xfs_warn(mp, "xlog_write: reservation summary:");
			
 
				 	xfs_warn(mp, "  trans type  = %s (%u)",
			
 
				 		 ((ticket->t_trans_type <= 0 ||
			
 
				 		   ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
			
 
				-		  "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
			
 
				+		  "bad-trans-type" : trans_type_str[ticket->t_trans_type]),
			
 
				 		 ticket->t_trans_type);
			
 
				 	xfs_warn(mp, "  unit res    = %d bytes",
			
 
				 		 ticket->t_unit_res);
			
@@ -2101,7 +2101,7 @@ xlog_print_tic_res(
 
				 		uint r_type = ticket->t_res_arr[i].r_type;
			
 
				 		xfs_warn(mp, "region[%u]: %s - %u bytes", i,
			
 
				 			    ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
			
 
				-			    "bad-rtype" : res_type_str[r_type-1]),
			
 
				+			    "bad-rtype" : res_type_str[r_type]),
			
 
				 			    ticket->t_res_arr[i].r_len);
			
 
				 	}
			
 
				 
			
@@ -3979,7 +3979,7 @@ xfs_log_force_umount(
 
				 	    log->l_flags & XLOG_ACTIVE_RECOVERY) {
			
 
				 		mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
			
 
				 		if (mp->m_sb_bp)
			
 
				-			XFS_BUF_DONE(mp->m_sb_bp);
			
 
				+			mp->m_sb_bp->b_flags |= XBF_DONE;
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -4009,7 +4009,7 @@ xfs_log_force_umount(
 
				 	spin_lock(&log->l_icloglock);
			
 
				 	mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
			
 
				 	if (mp->m_sb_bp)
			
 
				-		XFS_BUF_DONE(mp->m_sb_bp);
			
 
				+		mp->m_sb_bp->b_flags |= XBF_DONE;
			
 
				 
			
 
				 	/*
			
 
				 	 * Mark the log and the iclogs with IO error flags to prevent any
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -190,7 +190,7 @@ xlog_bread_noalign(
 
				 	ASSERT(nbblks <= bp->b_length);
			
 
				 
			
 
				 	XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
			
 
				-	XFS_BUF_READ(bp);
			
 
				+	bp->b_flags |= XBF_READ;
			
 
				 	bp->b_io_length = nbblks;
			
 
				 	bp->b_error = 0;
			
 
				 
			
@@ -275,7 +275,6 @@ xlog_bwrite(
 
				 	ASSERT(nbblks <= bp->b_length);
			
 
				 
			
 
				 	XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
			
 
				-	XFS_BUF_ZEROFLAGS(bp);
			
 
				 	xfs_buf_hold(bp);
			
 
				 	xfs_buf_lock(bp);
			
 
				 	bp->b_io_length = nbblks;
			
@@ -2538,6 +2537,13 @@ xlog_recover_validate_buf_type(
 
				 		}
			
 
				 		bp->b_ops = &xfs_sb_buf_ops;
			
 
				 		break;
			
 
				+#ifdef CONFIG_XFS_RT
			
 
				+	case XFS_BLFT_RTBITMAP_BUF:
			
 
				+	case XFS_BLFT_RTSUMMARY_BUF:
			
 
				+		/* no magic numbers for verification of RT buffers */
			
 
				+		bp->b_ops = &xfs_rtbuf_ops;
			
 
				+		break;
			
 
				+#endif /* CONFIG_XFS_RT */
			
 
				 	default:
			
 
				 		xfs_warn(mp, "Unknown buffer type %d!",
			
 
				 			 xfs_blft_from_flags(buf_f));
			
@@ -2858,7 +2864,7 @@ xfs_recover_inode_owner_change(
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	/* instantiate the inode */
			
 
				-	xfs_dinode_from_disk(&ip->i_d, dip);
			
 
				+	xfs_inode_from_disk(ip, dip);
			
 
				 	ASSERT(ip->i_d.di_version >= 3);
			
 
				 
			
 
				 	error = xfs_iformat_fork(ip, dip);
			
@@ -2904,7 +2910,7 @@ xlog_recover_inode_pass2(
 
				 	int			error;
			
 
				 	int			attr_index;
			
 
				 	uint			fields;
			
 
				-	xfs_icdinode_t		*dicp;
			
 
				+	struct xfs_log_dinode	*ldip;
			
 
				 	uint			isize;
			
 
				 	int			need_free = 0;
			
 
				 
			
@@ -2957,8 +2963,8 @@ xlog_recover_inode_pass2(
 
				 		error = -EFSCORRUPTED;
			
 
				 		goto out_release;
			
 
				 	}
			
 
				-	dicp = item->ri_buf[1].i_addr;
			
 
				-	if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
			
 
				+	ldip = item->ri_buf[1].i_addr;
			
 
				+	if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
			
 
				 		xfs_alert(mp,
			
 
				 			"%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
			
 
				 			__func__, item, in_f->ilf_ino);
			
@@ -2994,13 +3000,13 @@ xlog_recover_inode_pass2(
 
				 	 * to skip replay when the on disk inode is newer than the log one
			
 
				 	 */
			
 
				 	if (!xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				-	    dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
			
 
				+	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
			
 
				 		/*
			
 
				 		 * Deal with the wrap case, DI_MAX_FLUSH is less
			
 
				 		 * than smaller numbers
			
 
				 		 */
			
 
				 		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
			
 
				-		    dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
			
 
				+		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
			
 
				 			/* do nothing */
			
 
				 		} else {
			
 
				 			trace_xfs_log_recover_inode_skip(log, in_f);
			
@@ -3010,13 +3016,13 @@ xlog_recover_inode_pass2(
 
				 	}
			
 
				 
			
 
				 	/* Take the opportunity to reset the flush iteration count */
			
 
				-	dicp->di_flushiter = 0;
			
 
				+	ldip->di_flushiter = 0;
			
 
				 
			
 
				-	if (unlikely(S_ISREG(dicp->di_mode))) {
			
 
				-		if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
			
 
				-		    (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
			
 
				+	if (unlikely(S_ISREG(ldip->di_mode))) {
			
 
				+		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
			
 
				+		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
			
 
				 			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
			
 
				-					 XFS_ERRLEVEL_LOW, mp, dicp);
			
 
				+					 XFS_ERRLEVEL_LOW, mp, ldip);
			
 
				 			xfs_alert(mp,
			
 
				 		"%s: Bad regular inode log record, rec ptr 0x%p, "
			
 
				 		"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
			
@@ -3024,12 +3030,12 @@ xlog_recover_inode_pass2(
 
				 			error = -EFSCORRUPTED;
			
 
				 			goto out_release;
			
 
				 		}
			
 
				-	} else if (unlikely(S_ISDIR(dicp->di_mode))) {
			
 
				-		if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
			
 
				-		    (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
			
 
				-		    (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
			
 
				+	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
			
 
				+		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
			
 
				+		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
			
 
				+		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
			
 
				 			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
			
 
				-					     XFS_ERRLEVEL_LOW, mp, dicp);
			
 
				+					     XFS_ERRLEVEL_LOW, mp, ldip);
			
 
				 			xfs_alert(mp,
			
 
				 		"%s: Bad dir inode log record, rec ptr 0x%p, "
			
 
				 		"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
			
@@ -3038,32 +3044,32 @@ xlog_recover_inode_pass2(
 
				 			goto out_release;
			
 
				 		}
			
 
				 	}
			
 
				-	if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
			
 
				+	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
			
 
				 		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
			
 
				-				     XFS_ERRLEVEL_LOW, mp, dicp);
			
 
				+				     XFS_ERRLEVEL_LOW, mp, ldip);
			
 
				 		xfs_alert(mp,
			
 
				 	"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
			
 
				 	"dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
			
 
				 			__func__, item, dip, bp, in_f->ilf_ino,
			
 
				-			dicp->di_nextents + dicp->di_anextents,
			
 
				-			dicp->di_nblocks);
			
 
				+			ldip->di_nextents + ldip->di_anextents,
			
 
				+			ldip->di_nblocks);
			
 
				 		error = -EFSCORRUPTED;
			
 
				 		goto out_release;
			
 
				 	}
			
 
				-	if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
			
 
				+	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
			
 
				 		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
			
 
				-				     XFS_ERRLEVEL_LOW, mp, dicp);
			
 
				+				     XFS_ERRLEVEL_LOW, mp, ldip);
			
 
				 		xfs_alert(mp,
			
 
				 	"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
			
 
				 	"dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
			
 
				-			item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
			
 
				+			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
			
 
				 		error = -EFSCORRUPTED;
			
 
				 		goto out_release;
			
 
				 	}
			
 
				-	isize = xfs_icdinode_size(dicp->di_version);
			
 
				+	isize = xfs_log_dinode_size(ldip->di_version);
			
 
				 	if (unlikely(item->ri_buf[1].i_len > isize)) {
			
 
				 		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
			
 
				-				     XFS_ERRLEVEL_LOW, mp, dicp);
			
 
				+				     XFS_ERRLEVEL_LOW, mp, ldip);
			
 
				 		xfs_alert(mp,
			
 
				 			"%s: Bad inode log record length %d, rec ptr 0x%p",
			
 
				 			__func__, item->ri_buf[1].i_len, item);
			
@@ -3071,8 +3077,8 @@ xlog_recover_inode_pass2(
 
				 		goto out_release;
			
 
				 	}
			
 
				 
			
 
				-	/* The core is in in-core format */
			
 
				-	xfs_dinode_to_disk(dip, dicp);
			
 
				+	/* recover the log dinode inode into the on disk inode */
			
 
				+	xfs_log_dinode_to_disk(ldip, dip);
			
 
				 
			
 
				 	/* the rest is in on-disk format */
			
 
				 	if (item->ri_buf[1].i_len > isize) {
			
@@ -4402,8 +4408,8 @@ xlog_recover_process_one_iunlink(
 
				 	if (error)
			
 
				 		goto fail_iput;
			
 
				 
			
 
				-	ASSERT(ip->i_d.di_nlink == 0);
			
 
				-	ASSERT(ip->i_d.di_mode != 0);
			
 
				+	ASSERT(VFS_I(ip)->i_nlink == 0);
			
 
				+	ASSERT(VFS_I(ip)->i_mode != 0);
			
 
				 
			
 
				 	/* setup for the next pass */
			
 
				 	agino = be32_to_cpu(dip->di_next_unlinked);
			
@@ -4957,6 +4963,7 @@ xlog_do_recover(
 
				 	xfs_daddr_t	head_blk,
			
 
				 	xfs_daddr_t	tail_blk)
			
 
				 {
			
 
				+	struct xfs_mount *mp = log->l_mp;
			
 
				 	int		error;
			
 
				 	xfs_buf_t	*bp;
			
 
				 	xfs_sb_t	*sbp;
			
@@ -4971,7 +4978,7 @@ xlog_do_recover(
 
				 	/*
			
 
				 	 * If IO errors happened during recovery, bail out.
			
 
				 	 */
			
 
				-	if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				 		return -EIO;
			
 
				 	}
			
 
				 
			
@@ -4984,22 +4991,21 @@ xlog_do_recover(
 
				 	 * or iunlinks they will have some entries in the AIL; so we look at
			
 
				 	 * the AIL to determine how to set the tail_lsn.
			
 
				 	 */
			
 
				-	xlog_assign_tail_lsn(log->l_mp);
			
 
				+	xlog_assign_tail_lsn(mp);
			
 
				 
			
 
				 	/*
			
 
				 	 * Now that we've finished replaying all buffer and inode
			
 
				 	 * updates, re-read in the superblock and reverify it.
			
 
				 	 */
			
 
				-	bp = xfs_getsb(log->l_mp, 0);
			
 
				-	XFS_BUF_UNDONE(bp);
			
 
				-	ASSERT(!(XFS_BUF_ISWRITE(bp)));
			
 
				-	XFS_BUF_READ(bp);
			
 
				-	XFS_BUF_UNASYNC(bp);
			
 
				+	bp = xfs_getsb(mp, 0);
			
 
				+	bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
			
 
				+	ASSERT(!(bp->b_flags & XBF_WRITE));
			
 
				+	bp->b_flags |= XBF_READ;
			
 
				 	bp->b_ops = &xfs_sb_buf_ops;
			
 
				 
			
 
				 	error = xfs_buf_submit_wait(bp);
			
 
				 	if (error) {
			
 
				-		if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
			
 
				+		if (!XFS_FORCED_SHUTDOWN(mp)) {
			
 
				 			xfs_buf_ioerror_alert(bp, __func__);
			
 
				 			ASSERT(0);
			
 
				 		}
			
@@ -5008,14 +5014,17 @@ xlog_do_recover(
 
				 	}
			
 
				 
			
 
				 	/* Convert superblock from on-disk format */
			
 
				-	sbp = &log->l_mp->m_sb;
			
 
				+	sbp = &mp->m_sb;
			
 
				 	xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
			
 
				-	ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
			
 
				-	ASSERT(xfs_sb_good_version(sbp));
			
 
				-	xfs_reinit_percpu_counters(log->l_mp);
			
 
				-
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				+	/* re-initialise in-core superblock and geometry structures */
			
 
				+	xfs_reinit_percpu_counters(mp);
			
 
				+	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
			
 
				+	if (error) {
			
 
				+		xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
			
 
				+		return error;
			
 
				+	}
			
 
				 
			
 
				 	xlog_recover_check_summary(log);
			
 
				 
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -185,9 +185,6 @@ xfs_initialize_perag(
 
				 	xfs_agnumber_t	index;
			
 
				 	xfs_agnumber_t	first_initialised = 0;
			
 
				 	xfs_perag_t	*pag;
			
 
				-	xfs_agino_t	agino;
			
 
				-	xfs_ino_t	ino;
			
 
				-	xfs_sb_t	*sbp = &mp->m_sb;
			
 
				 	int		error = -ENOMEM;
			
 
				 
			
 
				 	/*
			
@@ -230,22 +227,7 @@ xfs_initialize_perag(
 
				 		radix_tree_preload_end();
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * If we mount with the inode64 option, or no inode overflows
			
 
				-	 * the legacy 32-bit address space clear the inode32 option.
			
 
				-	 */
			
 
				-	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
			
 
				-	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
			
 
				-
			
 
				-	if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
			
 
				-		mp->m_flags |= XFS_MOUNT_32BITINODES;
			
 
				-	else
			
 
				-		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
			
 
				-
			
 
				-	if (mp->m_flags & XFS_MOUNT_32BITINODES)
			
 
				-		index = xfs_set_inode32(mp, agcount);
			
 
				-	else
			
 
				-		index = xfs_set_inode64(mp, agcount);
			
 
				+	index = xfs_set_inode_alloc(mp, agcount);
			
 
				 
			
 
				 	if (maxagi)
			
 
				 		*maxagi = index;
			
@@ -865,7 +847,7 @@ xfs_mountfs(
 
				 
			
 
				 	ASSERT(rip != NULL);
			
 
				 
			
 
				-	if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
			
 
				+	if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
			
 
				 		xfs_warn(mp, "corrupted root inode %llu: not a directory",
			
 
				 			(unsigned long long)rip->i_ino);
			
 
				 		xfs_iunlock(rip, XFS_ILOCK_EXCL);
			
@@ -1284,7 +1266,7 @@ xfs_getsb(
 
				 	}
			
 
				 
			
 
				 	xfs_buf_hold(bp);
			
 
				-	ASSERT(XFS_BUF_ISDONE(bp));
			
 
				+	ASSERT(bp->b_flags & XBF_DONE);
			
 
				 	return bp;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -147,6 +147,17 @@ typedef struct xfs_mount {
 
				 	 * to various other kinds of pain inflicted on the pNFS server.
			
 
				 	 */
			
 
				 	__uint32_t		m_generation;
			
 
				+
			
 
				+#ifdef DEBUG
			
 
				+	/*
			
 
				+	 * DEBUG mode instrumentation to test and/or trigger delayed allocation
			
 
				+	 * block killing in the event of failed writes. When enabled, all
			
 
				+	 * buffered writes are forced to fail. All delalloc blocks in the range
			
 
				+	 * of the write (including pre-existing delalloc blocks!) are tossed as
			
 
				+	 * part of the write failure error handling sequence.
			
 
				+	 */
			
 
				+	bool			m_fail_writes;
			
 
				+#endif
			
 
				 } xfs_mount_t;
			
 
				 
			
 
				 /*
			
@@ -166,9 +177,8 @@ typedef struct xfs_mount {
 
				 #define XFS_MOUNT_GRPID		(1ULL << 9)	/* group-ID assigned from directory */
			
 
				 #define XFS_MOUNT_NORECOVERY	(1ULL << 10)	/* no recovery - dirty fs */
			
 
				 #define XFS_MOUNT_DFLT_IOSIZE	(1ULL << 12)	/* set default i/o size */
			
 
				-#define XFS_MOUNT_32BITINODES	(1ULL << 14)	/* do not create inodes above
			
 
				-						 * 32 bits in size */
			
 
				-#define XFS_MOUNT_SMALL_INUMS	(1ULL << 15)	/* users wants 32bit inodes */
			
 
				+#define XFS_MOUNT_SMALL_INUMS	(1ULL << 14)	/* user wants 32bit inodes */
			
 
				+#define XFS_MOUNT_32BITINODES	(1ULL << 15)	/* inode32 allocator active */
			
 
				 #define XFS_MOUNT_NOUUID	(1ULL << 16)	/* ignore uuid during mount */
			
 
				 #define XFS_MOUNT_BARRIER	(1ULL << 17)
			
 
				 #define XFS_MOUNT_IKEEP		(1ULL << 18)	/* keep empty inode clusters*/
			
@@ -264,6 +274,20 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 
				 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
			
 
				 }
			
 
				 
			
 
				+#ifdef DEBUG
			
 
				+static inline bool
			
 
				+xfs_mp_fail_writes(struct xfs_mount *mp)
			
 
				+{
			
 
				+	return mp->m_fail_writes;
			
 
				+}
			
 
				+#else
			
 
				+static inline bool
			
 
				+xfs_mp_fail_writes(struct xfs_mount *mp)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * Per-ag incore structure, copies of information in agf and agi, to improve the
			
 
				  * performance of allocation group selection.
			
@@ -327,7 +351,6 @@ extern int	xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
 
				 				 bool reserved);
			
 
				 extern int	xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
			
 
				 
			
 
				-extern int	xfs_mount_log_sb(xfs_mount_t *);
			
 
				 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
			
 
				 extern int	xfs_readsb(xfs_mount_t *, int);
			
 
				 extern void	xfs_freesb(xfs_mount_t *);
			
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -0,0 +1,117 @@
 
				+/*
			
 
				+ * Copyright (c) 2016 Oracle.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef __XFS_ONDISK_H
			
 
				+#define __XFS_ONDISK_H
			
 
				+
			
 
				+#define XFS_CHECK_STRUCT_SIZE(structname, size) \
			
 
				+	BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
			
 
				+		#structname ") is wrong, expected " #size)
			
 
				+
			
 
				+static inline void __init
			
 
				+xfs_check_ondisk_structs(void)
			
 
				+{
			
 
				+	/* ag/file structures */
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_acl,			4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry,		12);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_agf,			224);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_agfl,			36);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_agi,			336);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key,		8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec,		16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block,		4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block,		72);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,		176);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot,		104);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk,			136);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dsb,			264);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr,		56);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key,		4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec,		16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp,		8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t,			8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t,			4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t,			8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t,			4);
			
 
				+
			
 
				+	/* dir/attr trees */
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr,	80);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock,	88);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr,		56);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo,		56);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode,		64);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_da3_node_hdr,		64);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_blk_hdr,		48);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_data_hdr,		64);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free,		64);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr,		64);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf,		64);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr,		64);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t,		8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t,		32);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t,		4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t,	4);
			
 
				+
			
 
				+	/*
			
 
				+	 * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
			
 
				+	 * 4 bytes anyway so it's not obviously a problem.  Hence for the moment
			
 
				+	 * we don't check this structure. This can be re-instated when the attr
			
 
				+	 * definitions are updated to use c99 VLA definitions.
			
 
				+	 *
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t,	12);
			
 
				+	 */
			
 
				+
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t,		40);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_attr_shortform_t,		8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t,			12);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t,			16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t,		8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t,		16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t,		4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t,		16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_unused_t,		6);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t,		16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t,			16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino4_t,			4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino8_t,			8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_inou_t,			8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t,		8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t,		16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t,			16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t,		4);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t,		3);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t,		10);
			
 
				+	XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_off_t,		2);
			
 
				+
			
 
				+	/* log structures */
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat,		24);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32,	28);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64,	32);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32,	28);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64,	32);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32,		12);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64,		16);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode,		176);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log,		28);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp,		8);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32,	52);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64,	56);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat,	20);
			
 
				+	XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header,		16);
			
 
				+}
			
 
				+
			
 
				+#endif /* __XFS_ONDISK_H */
			
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -560,6 +560,37 @@ xfs_qm_shrink_count(
 
				 	return list_lru_shrink_count(&qi->qi_lru, sc);
			
 
				 }
			
 
				 
			
 
				+STATIC void
			
 
				+xfs_qm_set_defquota(
			
 
				+	xfs_mount_t	*mp,
			
 
				+	uint		type,
			
 
				+	xfs_quotainfo_t	*qinf)
			
 
				+{
			
 
				+	xfs_dquot_t		*dqp;
			
 
				+	struct xfs_def_quota    *defq;
			
 
				+	int			error;
			
 
				+
			
 
				+	error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
			
 
				+
			
 
				+	if (!error) {
			
 
				+		xfs_disk_dquot_t        *ddqp = &dqp->q_core;
			
 
				+
			
 
				+		defq = xfs_get_defquota(dqp, qinf);
			
 
				+
			
 
				+		/*
			
 
				+		 * Timers and warnings have been already set, let's just set the
			
 
				+		 * default limits for this quota type
			
 
				+		 */
			
 
				+		defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
			
 
				+		defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
			
 
				+		defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
			
 
				+		defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
			
 
				+		defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
			
 
				+		defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
			
 
				+		xfs_qm_dqdestroy(dqp);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This initializes all the quota information that's kept in the
			
 
				  * mount structure
			
@@ -606,19 +637,19 @@ xfs_qm_init_quotainfo(
 
				 	 * We try to get the limits from the superuser's limits fields.
			
 
				 	 * This is quite hacky, but it is standard quota practice.
			
 
				 	 *
			
 
				-	 * We look at the USR dquot with id == 0 first, but if user quotas
			
 
				-	 * are not enabled we goto the GRP dquot with id == 0.
			
 
				-	 * We don't really care to keep separate default limits for user
			
 
				-	 * and group quotas, at least not at this point.
			
 
				-	 *
			
 
				 	 * Since we may not have done a quotacheck by this point, just read
			
 
				 	 * the dquot without attaching it to any hashtables or lists.
			
 
				+	 *
			
 
				+	 * Timers and warnings are globally set by the first timer found in
			
 
				+	 * user/group/proj quota types, otherwise a default value is used.
			
 
				+	 * This should be split into different fields per quota type.
			
 
				 	 */
			
 
				 	error = xfs_qm_dqread(mp, 0,
			
 
				 			XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
			
 
				 			 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
			
 
				 			  XFS_DQ_PROJ),
			
 
				 			XFS_QMOPT_DOWARN, &dqp);
			
 
				+
			
 
				 	if (!error) {
			
 
				 		xfs_disk_dquot_t	*ddqp = &dqp->q_core;
			
 
				 
			
@@ -639,13 +670,6 @@ xfs_qm_init_quotainfo(
 
				 			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
			
 
				 		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
			
 
				 			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
			
 
				-		qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
			
 
				-		qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
			
 
				-		qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
			
 
				-		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
			
 
				-		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
			
 
				-		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
			
 
				-
			
 
				 		xfs_qm_dqdestroy(dqp);
			
 
				 	} else {
			
 
				 		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
			
@@ -656,6 +680,13 @@ xfs_qm_init_quotainfo(
 
				 		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
			
 
				 	}
			
 
				 
			
 
				+	if (XFS_IS_UQUOTA_RUNNING(mp))
			
 
				+		xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
			
 
				+	if (XFS_IS_GQUOTA_RUNNING(mp))
			
 
				+		xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf);
			
 
				+	if (XFS_IS_PQUOTA_RUNNING(mp))
			
 
				+		xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf);
			
 
				+
			
 
				 	qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
			
 
				 	qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
			
 
				 	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
			
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -53,6 +53,15 @@ extern struct kmem_zone	*xfs_qm_dqtrxzone;
 
				  */
			
 
				 #define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
			
 
				 
			
 
				+struct xfs_def_quota {
			
 
				+	xfs_qcnt_t       bhardlimit;     /* default data blk hard limit */
			
 
				+	xfs_qcnt_t       bsoftlimit;	 /* default data blk soft limit */
			
 
				+	xfs_qcnt_t       ihardlimit;	 /* default inode count hard limit */
			
 
				+	xfs_qcnt_t       isoftlimit;	 /* default inode count soft limit */
			
 
				+	xfs_qcnt_t	 rtbhardlimit;   /* default realtime blk hard limit */
			
 
				+	xfs_qcnt_t	 rtbsoftlimit;   /* default realtime blk soft limit */
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Various quota information for individual filesystems.
			
 
				  * The mount structure keeps a pointer to this.
			
@@ -76,12 +85,9 @@ typedef struct xfs_quotainfo {
 
				 	struct mutex	 qi_quotaofflock;/* to serialize quotaoff */
			
 
				 	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
			
 
				 	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
			
 
				-	xfs_qcnt_t	 qi_bhardlimit;	 /* default data blk hard limit */
			
 
				-	xfs_qcnt_t	 qi_bsoftlimit;	 /* default data blk soft limit */
			
 
				-	xfs_qcnt_t	 qi_ihardlimit;	 /* default inode count hard limit */
			
 
				-	xfs_qcnt_t	 qi_isoftlimit;	 /* default inode count soft limit */
			
 
				-	xfs_qcnt_t	 qi_rtbhardlimit;/* default realtime blk hard limit */
			
 
				-	xfs_qcnt_t	 qi_rtbsoftlimit;/* default realtime blk soft limit */
			
 
				+	struct xfs_def_quota	qi_usr_default;
			
 
				+	struct xfs_def_quota	qi_grp_default;
			
 
				+	struct xfs_def_quota	qi_prj_default;
			
 
				 	struct shrinker  qi_shrinker;
			
 
				 } xfs_quotainfo_t;
			
 
				 
			
@@ -104,15 +110,15 @@ xfs_dquot_tree(
 
				 }
			
 
				 
			
 
				 static inline struct xfs_inode *
			
 
				-xfs_dq_to_quota_inode(struct xfs_dquot *dqp)
			
 
				+xfs_quota_inode(xfs_mount_t *mp, uint dq_flags)
			
 
				 {
			
 
				-	switch (dqp->dq_flags & XFS_DQ_ALLTYPES) {
			
 
				+	switch (dq_flags & XFS_DQ_ALLTYPES) {
			
 
				 	case XFS_DQ_USER:
			
 
				-		return dqp->q_mount->m_quotainfo->qi_uquotaip;
			
 
				+		return mp->m_quotainfo->qi_uquotaip;
			
 
				 	case XFS_DQ_GROUP:
			
 
				-		return dqp->q_mount->m_quotainfo->qi_gquotaip;
			
 
				+		return mp->m_quotainfo->qi_gquotaip;
			
 
				 	case XFS_DQ_PROJ:
			
 
				-		return dqp->q_mount->m_quotainfo->qi_pquotaip;
			
 
				+		return mp->m_quotainfo->qi_pquotaip;
			
 
				 	default:
			
 
				 		ASSERT(0);
			
 
				 	}
			
@@ -164,11 +170,27 @@ extern void		xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
 
				 
			
 
				 /* quota ops */
			
 
				 extern int		xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
			
 
				-extern int		xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
			
 
				-					uint, struct qc_dqblk *);
			
 
				+extern int		xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *,
			
 
				+					uint, struct qc_dqblk *, uint);
			
 
				 extern int		xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
			
 
				 					struct qc_dqblk *);
			
 
				 extern int		xfs_qm_scall_quotaon(struct xfs_mount *, uint);
			
 
				 extern int		xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
			
 
				 
			
 
				+static inline struct xfs_def_quota *
			
 
				+xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi)
			
 
				+{
			
 
				+	struct xfs_def_quota *defq;
			
 
				+
			
 
				+	if (XFS_QM_ISUDQ(dqp))
			
 
				+		defq = &qi->qi_usr_default;
			
 
				+	else if (XFS_QM_ISGDQ(dqp))
			
 
				+		defq = &qi->qi_grp_default;
			
 
				+	else {
			
 
				+		ASSERT(XFS_QM_ISPDQ(dqp));
			
 
				+		defq = &qi->qi_prj_default;
			
 
				+	}
			
 
				+	return defq;
			
 
				+}
			
 
				+
			
 
				 #endif /* __XFS_QM_H__ */
			
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -404,6 +404,7 @@ xfs_qm_scall_setqlim(
 
				 	struct xfs_disk_dquot	*ddq;
			
 
				 	struct xfs_dquot	*dqp;
			
 
				 	struct xfs_trans	*tp;
			
 
				+	struct xfs_def_quota	*defq;
			
 
				 	int			error;
			
 
				 	xfs_qcnt_t		hard, soft;
			
 
				 
			
@@ -431,6 +432,8 @@ xfs_qm_scall_setqlim(
 
				 		ASSERT(error != -ENOENT);
			
 
				 		goto out_unlock;
			
 
				 	}
			
 
				+
			
 
				+	defq = xfs_get_defquota(dqp, q);
			
 
				 	xfs_dqunlock(dqp);
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
			
@@ -458,8 +461,8 @@ xfs_qm_scall_setqlim(
 
				 		ddq->d_blk_softlimit = cpu_to_be64(soft);
			
 
				 		xfs_dquot_set_prealloc_limits(dqp);
			
 
				 		if (id == 0) {
			
 
				-			q->qi_bhardlimit = hard;
			
 
				-			q->qi_bsoftlimit = soft;
			
 
				+			defq->bhardlimit = hard;
			
 
				+			defq->bsoftlimit = soft;
			
 
				 		}
			
 
				 	} else {
			
 
				 		xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
			
@@ -474,8 +477,8 @@ xfs_qm_scall_setqlim(
 
				 		ddq->d_rtb_hardlimit = cpu_to_be64(hard);
			
 
				 		ddq->d_rtb_softlimit = cpu_to_be64(soft);
			
 
				 		if (id == 0) {
			
 
				-			q->qi_rtbhardlimit = hard;
			
 
				-			q->qi_rtbsoftlimit = soft;
			
 
				+			defq->rtbhardlimit = hard;
			
 
				+			defq->rtbsoftlimit = soft;
			
 
				 		}
			
 
				 	} else {
			
 
				 		xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
			
@@ -491,8 +494,8 @@ xfs_qm_scall_setqlim(
 
				 		ddq->d_ino_hardlimit = cpu_to_be64(hard);
			
 
				 		ddq->d_ino_softlimit = cpu_to_be64(soft);
			
 
				 		if (id == 0) {
			
 
				-			q->qi_ihardlimit = hard;
			
 
				-			q->qi_isoftlimit = soft;
			
 
				+			defq->ihardlimit = hard;
			
 
				+			defq->isoftlimit = soft;
			
 
				 		}
			
 
				 	} else {
			
 
				 		xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);
			
@@ -635,9 +638,10 @@ xfs_qm_log_quotaoff(
 
				 int
			
 
				 xfs_qm_scall_getquota(
			
 
				 	struct xfs_mount	*mp,
			
 
				-	xfs_dqid_t		id,
			
 
				+	xfs_dqid_t		*id,
			
 
				 	uint			type,
			
 
				-	struct qc_dqblk		*dst)
			
 
				+	struct qc_dqblk		*dst,
			
 
				+	uint			dqget_flags)
			
 
				 {
			
 
				 	struct xfs_dquot	*dqp;
			
 
				 	int			error;
			
@@ -647,7 +651,7 @@ xfs_qm_scall_getquota(
 
				 	 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
			
 
				 	 * exist, we'll get ENOENT back.
			
 
				 	 */
			
 
				-	error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
			
 
				+	error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -660,6 +664,9 @@ xfs_qm_scall_getquota(
 
				 		goto out_put;
			
 
				 	}
			
 
				 
			
 
				+	/* Fill in the ID we actually read from disk */
			
 
				+	*id = be32_to_cpu(dqp->q_core.d_id);
			
 
				+
			
 
				 	memset(dst, 0, sizeof(*dst));
			
 
				 	dst->d_spc_hardlimit =
			
 
				 		XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
			
@@ -701,7 +708,7 @@ xfs_qm_scall_getquota(
 
				 	if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
			
 
				 	     (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
			
 
				 	     (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
			
 
				-	    id != 0) {
			
 
				+	    *id != 0) {
			
 
				 		if ((dst->d_space > dst->d_spc_softlimit) &&
			
 
				 		    (dst->d_spc_softlimit > 0)) {
			
 
				 			ASSERT(dst->d_spc_timer != 0);
			
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -231,14 +231,45 @@ xfs_fs_get_dqblk(
 
				 	struct qc_dqblk		*qdq)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = XFS_M(sb);
			
 
				+	xfs_dqid_t		id;
			
 
				 
			
 
				 	if (!XFS_IS_QUOTA_RUNNING(mp))
			
 
				 		return -ENOSYS;
			
 
				 	if (!XFS_IS_QUOTA_ON(mp))
			
 
				 		return -ESRCH;
			
 
				 
			
 
				-	return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
			
 
				-				      xfs_quota_type(qid.type), qdq);
			
 
				+	id = from_kqid(&init_user_ns, qid);
			
 
				+	return xfs_qm_scall_getquota(mp, &id,
			
 
				+				      xfs_quota_type(qid.type), qdq, 0);
			
 
				+}
			
 
				+
			
 
				+/* Return quota info for active quota >= this qid */
			
 
				+STATIC int
			
 
				+xfs_fs_get_nextdqblk(
			
 
				+	struct super_block	*sb,
			
 
				+	struct kqid		*qid,
			
 
				+	struct qc_dqblk		*qdq)
			
 
				+{
			
 
				+	int			ret;
			
 
				+	struct xfs_mount	*mp = XFS_M(sb);
			
 
				+	xfs_dqid_t		id;
			
 
				+
			
 
				+	if (!XFS_IS_QUOTA_RUNNING(mp))
			
 
				+		return -ENOSYS;
			
 
				+	if (!XFS_IS_QUOTA_ON(mp))
			
 
				+		return -ESRCH;
			
 
				+
			
 
				+	id = from_kqid(&init_user_ns, *qid);
			
 
				+	ret = xfs_qm_scall_getquota(mp, &id,
			
 
				+				    xfs_quota_type(qid->type), qdq,
			
 
				+				    XFS_QMOPT_DQNEXT);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	/* ID may be different, so convert back what we got */
			
 
				+	*qid = make_kqid(current_user_ns(), qid->type, id);
			
 
				+	return 0;
			
 
				+	
			
 
				 }
			
 
				 
			
 
				 STATIC int
			
@@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = {
 
				 	.quota_disable		= xfs_quota_disable,
			
 
				 	.rm_xquota		= xfs_fs_rm_xquota,
			
 
				 	.get_dqblk		= xfs_fs_get_dqblk,
			
 
				+	.get_nextdqblk		= xfs_fs_get_nextdqblk,
			
 
				 	.set_dqblk		= xfs_fs_set_dqblk,
			
 
				 };
			
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1272,7 +1272,7 @@ xfs_rtpick_extent(
 
				 
			
 
				 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
			
 
				 
			
 
				-	seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime;
			
 
				+	seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
			
 
				 	if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
			
 
				 		mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
			
 
				 		*seqp = 0;
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -45,6 +45,7 @@
 
				 #include "xfs_filestream.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_sysfs.h"
			
 
				+#include "xfs_ondisk.h"
			
 
				 
			
 
				 #include <linux/namei.h>
			
 
				 #include <linux/init.h>
			
@@ -65,83 +66,85 @@ static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
 
				 static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
			
 
				 #endif
			
 
				 
			
 
				-#define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
			
 
				-#define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
			
 
				-#define MNTOPT_LOGDEV	"logdev"	/* log device */
			
 
				-#define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
			
 
				-#define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
			
 
				-#define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
			
 
				-#define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
			
 
				-#define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
			
 
				-#define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
			
 
				-#define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
			
 
				-#define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
			
 
				-#define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
			
 
				-#define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
			
 
				-#define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
			
 
				-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
			
 
				-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
			
 
				-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
			
 
				-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
			
 
				-#define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
			
 
				-					 * unwritten extent conversion */
			
 
				-#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
			
 
				-#define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
			
 
				-#define MNTOPT_32BITINODE   "inode32"	/* inode allocation limited to
			
 
				-					 * XFS_MAXINUMBER_32 */
			
 
				-#define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
			
 
				-#define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
			
 
				-#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
			
 
				-#define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
			
 
				-					 * in stat(). */
			
 
				-#define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
			
 
				-#define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
			
 
				-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
			
 
				-#define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
			
 
				-#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
			
 
				-#define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
			
 
				-#define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
			
 
				-#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
			
 
				-#define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
			
 
				-#define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
			
 
				-#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
			
 
				-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
			
 
				-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
			
 
				-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
			
 
				-#define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
			
 
				-#define MNTOPT_DISCARD	   "discard"	/* Discard unused blocks */
			
 
				-#define MNTOPT_NODISCARD   "nodiscard"	/* Do not discard unused blocks */
			
 
				-
			
 
				-#define MNTOPT_DAX	"dax"		/* Enable direct access to bdev pages */
			
 
				-
			
 
				 /*
			
 
				  * Table driven mount option parser.
			
 
				- *
			
 
				- * Currently only used for remount, but it will be used for mount
			
 
				- * in the future, too.
			
 
				  */
			
 
				 enum {
			
 
				-	Opt_barrier,
			
 
				-	Opt_nobarrier,
			
 
				-	Opt_inode64,
			
 
				-	Opt_inode32,
			
 
				-	Opt_err
			
 
				+	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
			
 
				+	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
			
 
				+	Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
			
 
				+	Opt_allocsize, Opt_norecovery, Opt_barrier, Opt_nobarrier,
			
 
				+	Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep,
			
 
				+	Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams,
			
 
				+	Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota,
			
 
				+	Opt_uquota, Opt_gquota, Opt_pquota,
			
 
				+	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
			
 
				+	Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
			
 
				 };
			
 
				 
			
 
				 static const match_table_t tokens = {
			
 
				-	{Opt_barrier, "barrier"},
			
 
				-	{Opt_nobarrier, "nobarrier"},
			
 
				-	{Opt_inode64, "inode64"},
			
 
				-	{Opt_inode32, "inode32"},
			
 
				-	{Opt_err, NULL}
			
 
				+	{Opt_logbufs,	"logbufs=%u"},	/* number of XFS log buffers */
			
 
				+	{Opt_logbsize,	"logbsize=%s"},	/* size of XFS log buffers */
			
 
				+	{Opt_logdev,	"logdev=%s"},	/* log device */
			
 
				+	{Opt_rtdev,	"rtdev=%s"},	/* realtime I/O device */
			
 
				+	{Opt_biosize,	"biosize=%u"},	/* log2 of preferred buffered io size */
			
 
				+	{Opt_wsync,	"wsync"},	/* safe-mode nfs compatible mount */
			
 
				+	{Opt_noalign,	"noalign"},	/* turn off stripe alignment */
			
 
				+	{Opt_swalloc,	"swalloc"},	/* turn on stripe width allocation */
			
 
				+	{Opt_sunit,	"sunit=%u"},	/* data volume stripe unit */
			
 
				+	{Opt_swidth,	"swidth=%u"},	/* data volume stripe width */
			
 
				+	{Opt_nouuid,	"nouuid"},	/* ignore filesystem UUID */
			
 
				+	{Opt_mtpt,	"mtpt"},	/* filesystem mount point */
			
 
				+	{Opt_grpid,	"grpid"},	/* group-ID from parent directory */
			
 
				+	{Opt_nogrpid,	"nogrpid"},	/* group-ID from current process */
			
 
				+	{Opt_bsdgroups,	"bsdgroups"},	/* group-ID from parent directory */
			
 
				+	{Opt_sysvgroups,"sysvgroups"},	/* group-ID from current process */
			
 
				+	{Opt_allocsize,	"allocsize=%s"},/* preferred allocation size */
			
 
				+	{Opt_norecovery,"norecovery"},	/* don't run XFS recovery */
			
 
				+	{Opt_barrier,	"barrier"},	/* use writer barriers for log write and
			
 
				+					 * unwritten extent conversion */
			
 
				+	{Opt_nobarrier,	"nobarrier"},	/* .. disable */
			
 
				+	{Opt_inode64,	"inode64"},	/* inodes can be allocated anywhere */
			
 
				+	{Opt_inode32,   "inode32"},	/* inode allocation limited to
			
 
				+					 * XFS_MAXINUMBER_32 */
			
 
				+	{Opt_ikeep,	"ikeep"},	/* do not free empty inode clusters */
			
 
				+	{Opt_noikeep,	"noikeep"},	/* free empty inode clusters */
			
 
				+	{Opt_largeio,	"largeio"},	/* report large I/O sizes in stat() */
			
 
				+	{Opt_nolargeio,	"nolargeio"},	/* do not report large I/O sizes
			
 
				+					 * in stat(). */
			
 
				+	{Opt_attr2,	"attr2"},	/* do use attr2 attribute format */
			
 
				+	{Opt_noattr2,	"noattr2"},	/* do not use attr2 attribute format */
			
 
				+	{Opt_filestreams,"filestreams"},/* use filestreams allocator */
			
 
				+	{Opt_quota,	"quota"},	/* disk quotas (user) */
			
 
				+	{Opt_noquota,	"noquota"},	/* no quotas */
			
 
				+	{Opt_usrquota,	"usrquota"},	/* user quota enabled */
			
 
				+	{Opt_grpquota,	"grpquota"},	/* group quota enabled */
			
 
				+	{Opt_prjquota,	"prjquota"},	/* project quota enabled */
			
 
				+	{Opt_uquota,	"uquota"},	/* user quota (IRIX variant) */
			
 
				+	{Opt_gquota,	"gquota"},	/* group quota (IRIX variant) */
			
 
				+	{Opt_pquota,	"pquota"},	/* project quota (IRIX variant) */
			
 
				+	{Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
			
 
				+	{Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
			
 
				+	{Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
			
 
				+	{Opt_qnoenforce, "qnoenforce"},	/* same as uqnoenforce */
			
 
				+	{Opt_discard,	"discard"},	/* Discard unused blocks */
			
 
				+	{Opt_nodiscard,	"nodiscard"},	/* Do not discard unused blocks */
			
 
				+
			
 
				+	{Opt_dax,	"dax"},		/* Enable direct access to bdev pages */
			
 
				+	{Opt_err,	NULL},
			
 
				 };
			
 
				 
			
 
				 
			
 
				 STATIC int
			
 
				-suffix_kstrtoint(char *s, unsigned int base, int *res)
			
 
				+suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
			
 
				 {
			
 
				 	int	last, shift_left_factor = 0, _res;
			
 
				-	char	*value = s;
			
 
				+	char	*value;
			
 
				+	int	ret = 0;
			
 
				+
			
 
				+	value = match_strdup(s);
			
 
				+	if (!value)
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				 	last = strlen(value) - 1;
			
 
				 	if (value[last] == 'K' || value[last] == 'k') {
			
@@ -157,10 +160,11 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
 
				 		value[last] = '\0';
			
 
				 	}
			
 
				 
			
 
				-	if (kstrtoint(s, base, &_res))
			
 
				-		return -EINVAL;
			
 
				+	if (kstrtoint(value, base, &_res))
			
 
				+		ret = -EINVAL;
			
 
				+	kfree(value);
			
 
				 	*res = _res << shift_left_factor;
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -169,14 +173,19 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
 
				  *
			
 
				  * Note that this function leaks the various device name allocations on
			
 
				  * failure.  The caller takes care of them.
			
 
				+ *
			
 
				+ * *sb is const because this is also used to test options on the remount
			
 
				+ * path, and we don't want this to have any side effects at remount time.
			
 
				+ * Today this function does not change *sb, but just to future-proof...
			
 
				  */
			
 
				 STATIC int
			
 
				 xfs_parseargs(
			
 
				 	struct xfs_mount	*mp,
			
 
				 	char			*options)
			
 
				 {
			
 
				-	struct super_block	*sb = mp->m_super;
			
 
				-	char			*this_char, *value;
			
 
				+	const struct super_block *sb = mp->m_super;
			
 
				+	char			*p;
			
 
				+	substring_t		args[MAX_OPT_ARGS];
			
 
				 	int			dsunit = 0;
			
 
				 	int			dswidth = 0;
			
 
				 	int			iosize = 0;
			
@@ -217,152 +226,152 @@ xfs_parseargs(
 
				 	if (!options)
			
 
				 		goto done;
			
 
				 
			
 
				-	while ((this_char = strsep(&options, ",")) != NULL) {
			
 
				-		if (!*this_char)
			
 
				+	while ((p = strsep(&options, ",")) != NULL) {
			
 
				+		int		token;
			
 
				+
			
 
				+		if (!*p)
			
 
				 			continue;
			
 
				-		if ((value = strchr(this_char, '=')) != NULL)
			
 
				-			*value++ = 0;
			
 
				 
			
 
				-		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
			
 
				-			if (!value || !*value) {
			
 
				-				xfs_warn(mp, "%s option requires an argument",
			
 
				-					this_char);
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-			if (kstrtoint(value, 10, &mp->m_logbufs))
			
 
				-				return -EINVAL;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
			
 
				-			if (!value || !*value) {
			
 
				-				xfs_warn(mp, "%s option requires an argument",
			
 
				-					this_char);
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-			if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
			
 
				+		token = match_token(p, tokens, args);
			
 
				+		switch (token) {
			
 
				+		case Opt_logbufs:
			
 
				+			if (match_int(args, &mp->m_logbufs))
			
 
				 				return -EINVAL;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
			
 
				-			if (!value || !*value) {
			
 
				-				xfs_warn(mp, "%s option requires an argument",
			
 
				-					this_char);
			
 
				+			break;
			
 
				+		case Opt_logbsize:
			
 
				+			if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
			
 
				 				return -EINVAL;
			
 
				-			}
			
 
				-			mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
			
 
				+			break;
			
 
				+		case Opt_logdev:
			
 
				+			mp->m_logname = match_strdup(args);
			
 
				 			if (!mp->m_logname)
			
 
				 				return -ENOMEM;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
			
 
				-			xfs_warn(mp, "%s option not allowed on this system",
			
 
				-				this_char);
			
 
				+			break;
			
 
				+		case Opt_mtpt:
			
 
				+			xfs_warn(mp, "%s option not allowed on this system", p);
			
 
				 			return -EINVAL;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
			
 
				-			if (!value || !*value) {
			
 
				-				xfs_warn(mp, "%s option requires an argument",
			
 
				-					this_char);
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-			mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
			
 
				+		case Opt_rtdev:
			
 
				+			mp->m_rtname = match_strdup(args);
			
 
				 			if (!mp->m_rtname)
			
 
				 				return -ENOMEM;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE) ||
			
 
				-			   !strcmp(this_char, MNTOPT_BIOSIZE)) {
			
 
				-			if (!value || !*value) {
			
 
				-				xfs_warn(mp, "%s option requires an argument",
			
 
				-					this_char);
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-			if (suffix_kstrtoint(value, 10, &iosize))
			
 
				+			break;
			
 
				+		case Opt_allocsize:
			
 
				+		case Opt_biosize:
			
 
				+			if (suffix_kstrtoint(args, 10, &iosize))
			
 
				 				return -EINVAL;
			
 
				 			iosizelog = ffs(iosize) - 1;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
			
 
				-			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
			
 
				+			break;
			
 
				+		case Opt_grpid:
			
 
				+		case Opt_bsdgroups:
			
 
				 			mp->m_flags |= XFS_MOUNT_GRPID;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
			
 
				-			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
			
 
				+			break;
			
 
				+		case Opt_nogrpid:
			
 
				+		case Opt_sysvgroups:
			
 
				 			mp->m_flags &= ~XFS_MOUNT_GRPID;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
			
 
				+			break;
			
 
				+		case Opt_wsync:
			
 
				 			mp->m_flags |= XFS_MOUNT_WSYNC;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
			
 
				+			break;
			
 
				+		case Opt_norecovery:
			
 
				 			mp->m_flags |= XFS_MOUNT_NORECOVERY;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
			
 
				+			break;
			
 
				+		case Opt_noalign:
			
 
				 			mp->m_flags |= XFS_MOUNT_NOALIGN;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
			
 
				+			break;
			
 
				+		case Opt_swalloc:
			
 
				 			mp->m_flags |= XFS_MOUNT_SWALLOC;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
			
 
				-			if (!value || !*value) {
			
 
				-				xfs_warn(mp, "%s option requires an argument",
			
 
				-					this_char);
			
 
				-				return -EINVAL;
			
 
				-			}
			
 
				-			if (kstrtoint(value, 10, &dsunit))
			
 
				-				return -EINVAL;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
			
 
				-			if (!value || !*value) {
			
 
				-				xfs_warn(mp, "%s option requires an argument",
			
 
				-					this_char);
			
 
				+			break;
			
 
				+		case Opt_sunit:
			
 
				+			if (match_int(args, &dsunit))
			
 
				 				return -EINVAL;
			
 
				-			}
			
 
				-			if (kstrtoint(value, 10, &dswidth))
			
 
				+			break;
			
 
				+		case Opt_swidth:
			
 
				+			if (match_int(args, &dswidth))
			
 
				 				return -EINVAL;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
			
 
				+			break;
			
 
				+		case Opt_inode32:
			
 
				 			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
			
 
				+			break;
			
 
				+		case Opt_inode64:
			
 
				 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
			
 
				+			break;
			
 
				+		case Opt_nouuid:
			
 
				 			mp->m_flags |= XFS_MOUNT_NOUUID;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
			
 
				+			break;
			
 
				+		case Opt_barrier:
			
 
				 			mp->m_flags |= XFS_MOUNT_BARRIER;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
			
 
				+			break;
			
 
				+		case Opt_nobarrier:
			
 
				 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
			
 
				+			break;
			
 
				+		case Opt_ikeep:
			
 
				 			mp->m_flags |= XFS_MOUNT_IKEEP;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
			
 
				+			break;
			
 
				+		case Opt_noikeep:
			
 
				 			mp->m_flags &= ~XFS_MOUNT_IKEEP;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
			
 
				+			break;
			
 
				+		case Opt_largeio:
			
 
				 			mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
			
 
				+			break;
			
 
				+		case Opt_nolargeio:
			
 
				 			mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
			
 
				+			break;
			
 
				+		case Opt_attr2:
			
 
				 			mp->m_flags |= XFS_MOUNT_ATTR2;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
			
 
				+			break;
			
 
				+		case Opt_noattr2:
			
 
				 			mp->m_flags &= ~XFS_MOUNT_ATTR2;
			
 
				 			mp->m_flags |= XFS_MOUNT_NOATTR2;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
			
 
				+			break;
			
 
				+		case Opt_filestreams:
			
 
				 			mp->m_flags |= XFS_MOUNT_FILESTREAMS;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
			
 
				+			break;
			
 
				+		case Opt_noquota:
			
 
				 			mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
			
 
				 			mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
			
 
				 			mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
			
 
				-			   !strcmp(this_char, MNTOPT_UQUOTA) ||
			
 
				-			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
			
 
				+			break;
			
 
				+		case Opt_quota:
			
 
				+		case Opt_uquota:
			
 
				+		case Opt_usrquota:
			
 
				 			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
			
 
				 					 XFS_UQUOTA_ENFD);
			
 
				-		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
			
 
				-			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
			
 
				+			break;
			
 
				+		case Opt_qnoenforce:
			
 
				+		case Opt_uqnoenforce:
			
 
				 			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
			
 
				 			mp->m_qflags &= ~XFS_UQUOTA_ENFD;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
			
 
				-			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
			
 
				+			break;
			
 
				+		case Opt_pquota:
			
 
				+		case Opt_prjquota:
			
 
				 			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
			
 
				 					 XFS_PQUOTA_ENFD);
			
 
				-		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
			
 
				+			break;
			
 
				+		case Opt_pqnoenforce:
			
 
				 			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
			
 
				 			mp->m_qflags &= ~XFS_PQUOTA_ENFD;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
			
 
				-			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
			
 
				+		case Opt_gquota:
			
 
				+		case Opt_grpquota:
			
 
				 			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
			
 
				 					 XFS_GQUOTA_ENFD);
			
 
				-		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
			
 
				+			break;
			
 
				+		case Opt_gqnoenforce:
			
 
				 			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
			
 
				 			mp->m_qflags &= ~XFS_GQUOTA_ENFD;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_DISCARD)) {
			
 
				+			break;
			
 
				+		case Opt_discard:
			
 
				 			mp->m_flags |= XFS_MOUNT_DISCARD;
			
 
				-		} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
			
 
				+			break;
			
 
				+		case Opt_nodiscard:
			
 
				 			mp->m_flags &= ~XFS_MOUNT_DISCARD;
			
 
				+			break;
			
 
				 #ifdef CONFIG_FS_DAX
			
 
				-		} else if (!strcmp(this_char, MNTOPT_DAX)) {
			
 
				+		case Opt_dax:
			
 
				 			mp->m_flags |= XFS_MOUNT_DAX;
			
 
				+			break;
			
 
				 #endif
			
 
				-		} else {
			
 
				-			xfs_warn(mp, "unknown mount option [%s].", this_char);
			
 
				+		default:
			
 
				+			xfs_warn(mp, "unknown mount option [%s].", p);
			
 
				 			return -EINVAL;
			
 
				 		}
			
 
				 	}
			
@@ -461,25 +470,25 @@ xfs_showargs(
 
				 {
			
 
				 	static struct proc_xfs_info xfs_info_set[] = {
			
 
				 		/* the few simple ones we can get from the mount struct */
			
 
				-		{ XFS_MOUNT_IKEEP,		"," MNTOPT_IKEEP },
			
 
				-		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
			
 
				-		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
			
 
				-		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
			
 
				-		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
			
 
				-		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
			
 
				-		{ XFS_MOUNT_ATTR2,		"," MNTOPT_ATTR2 },
			
 
				-		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
			
 
				-		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
			
 
				-		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
			
 
				-		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_32BITINODE },
			
 
				-		{ XFS_MOUNT_DAX,		"," MNTOPT_DAX },
			
 
				+		{ XFS_MOUNT_IKEEP,		",ikeep" },
			
 
				+		{ XFS_MOUNT_WSYNC,		",wsync" },
			
 
				+		{ XFS_MOUNT_NOALIGN,		",noalign" },
			
 
				+		{ XFS_MOUNT_SWALLOC,		",swalloc" },
			
 
				+		{ XFS_MOUNT_NOUUID,		",nouuid" },
			
 
				+		{ XFS_MOUNT_NORECOVERY,		",norecovery" },
			
 
				+		{ XFS_MOUNT_ATTR2,		",attr2" },
			
 
				+		{ XFS_MOUNT_FILESTREAMS,	",filestreams" },
			
 
				+		{ XFS_MOUNT_GRPID,		",grpid" },
			
 
				+		{ XFS_MOUNT_DISCARD,		",discard" },
			
 
				+		{ XFS_MOUNT_SMALL_INUMS,	",inode32" },
			
 
				+		{ XFS_MOUNT_DAX,		",dax" },
			
 
				 		{ 0, NULL }
			
 
				 	};
			
 
				 	static struct proc_xfs_info xfs_info_unset[] = {
			
 
				 		/* the few simple ones we can get from the mount struct */
			
 
				-		{ XFS_MOUNT_COMPAT_IOSIZE,	"," MNTOPT_LARGEIO },
			
 
				-		{ XFS_MOUNT_BARRIER,		"," MNTOPT_NOBARRIER },
			
 
				-		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_64BITINODE },
			
 
				+		{ XFS_MOUNT_COMPAT_IOSIZE,	",largeio" },
			
 
				+		{ XFS_MOUNT_BARRIER,		",nobarrier" },
			
 
				+		{ XFS_MOUNT_SMALL_INUMS,	",inode64" },
			
 
				 		{ 0, NULL }
			
 
				 	};
			
 
				 	struct proc_xfs_info	*xfs_infop;
			
@@ -494,46 +503,46 @@ xfs_showargs(
 
				 	}
			
 
				 
			
 
				 	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
			
 
				-		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
			
 
				+		seq_printf(m, ",allocsize=%dk",
			
 
				 				(int)(1 << mp->m_writeio_log) >> 10);
			
 
				 
			
 
				 	if (mp->m_logbufs > 0)
			
 
				-		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
			
 
				+		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
			
 
				 	if (mp->m_logbsize > 0)
			
 
				-		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
			
 
				+		seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
			
 
				 
			
 
				 	if (mp->m_logname)
			
 
				-		seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname);
			
 
				+		seq_show_option(m, "logdev", mp->m_logname);
			
 
				 	if (mp->m_rtname)
			
 
				-		seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname);
			
 
				+		seq_show_option(m, "rtdev", mp->m_rtname);
			
 
				 
			
 
				 	if (mp->m_dalign > 0)
			
 
				-		seq_printf(m, "," MNTOPT_SUNIT "=%d",
			
 
				+		seq_printf(m, ",sunit=%d",
			
 
				 				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
			
 
				 	if (mp->m_swidth > 0)
			
 
				-		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
			
 
				+		seq_printf(m, ",swidth=%d",
			
 
				 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
			
 
				 
			
 
				 	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
			
 
				-		seq_puts(m, "," MNTOPT_USRQUOTA);
			
 
				+		seq_puts(m, ",usrquota");
			
 
				 	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
			
 
				-		seq_puts(m, "," MNTOPT_UQUOTANOENF);
			
 
				+		seq_puts(m, ",uqnoenforce");
			
 
				 
			
 
				 	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
			
 
				 		if (mp->m_qflags & XFS_PQUOTA_ENFD)
			
 
				-			seq_puts(m, "," MNTOPT_PRJQUOTA);
			
 
				+			seq_puts(m, ",prjquota");
			
 
				 		else
			
 
				-			seq_puts(m, "," MNTOPT_PQUOTANOENF);
			
 
				+			seq_puts(m, ",pqnoenforce");
			
 
				 	}
			
 
				 	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
			
 
				 		if (mp->m_qflags & XFS_GQUOTA_ENFD)
			
 
				-			seq_puts(m, "," MNTOPT_GRPQUOTA);
			
 
				+			seq_puts(m, ",grpquota");
			
 
				 		else
			
 
				-			seq_puts(m, "," MNTOPT_GQUOTANOENF);
			
 
				+			seq_puts(m, ",gqnoenforce");
			
 
				 	}
			
 
				 
			
 
				 	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
			
 
				-		seq_puts(m, "," MNTOPT_NOQUOTA);
			
 
				+		seq_puts(m, ",noquota");
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -572,23 +581,35 @@ xfs_max_file_offset(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
			
 
				- * because in the growfs case, mp->m_sb.sb_agcount is not updated
			
 
				- * yet to the potentially higher ag count.
			
 
				+ * Set parameters for inode allocation heuristics, taking into account
			
 
				+ * filesystem size and inode32/inode64 mount options; i.e. specifically
			
 
				+ * whether or not XFS_MOUNT_SMALL_INUMS is set.
			
 
				+ *
			
 
				+ * Inode allocation patterns are altered only if inode32 is requested
			
 
				+ * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
			
 
				+ * If altered, XFS_MOUNT_32BITINODES is set as well.
			
 
				+ *
			
 
				+ * An agcount independent of that in the mount structure is provided
			
 
				+ * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
			
 
				+ * to the potentially higher ag count.
			
 
				+ *
			
 
				+ * Returns the maximum AG index which may contain inodes.
			
 
				  */
			
 
				 xfs_agnumber_t
			
 
				-xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
			
 
				+xfs_set_inode_alloc(
			
 
				+	struct xfs_mount *mp,
			
 
				+	xfs_agnumber_t	agcount)
			
 
				 {
			
 
				-	xfs_agnumber_t	index = 0;
			
 
				+	xfs_agnumber_t	index;
			
 
				 	xfs_agnumber_t	maxagi = 0;
			
 
				 	xfs_sb_t	*sbp = &mp->m_sb;
			
 
				 	xfs_agnumber_t	max_metadata;
			
 
				 	xfs_agino_t	agino;
			
 
				 	xfs_ino_t	ino;
			
 
				-	xfs_perag_t	*pag;
			
 
				 
			
 
				-	/* Calculate how much should be reserved for inodes to meet
			
 
				-	 * the max inode percentage.
			
 
				+	/*
			
 
				+	 * Calculate how much should be reserved for inodes to meet
			
 
				+	 * the max inode percentage.  Used only for inode32.
			
 
				 	 */
			
 
				 	if (mp->m_maxicount) {
			
 
				 		__uint64_t	icount;
			
@@ -602,54 +623,48 @@ xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
 
				 		max_metadata = agcount;
			
 
				 	}
			
 
				 
			
 
				+	/* Get the last possible inode in the filesystem */
			
 
				 	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
			
 
				+	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
			
 
				+
			
 
				+	/*
			
 
				+	 * If user asked for no more than 32-bit inodes, and the fs is
			
 
				+	 * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
			
 
				+	 * the allocator to accommodate the request.
			
 
				+	 */
			
 
				+	if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
			
 
				+		mp->m_flags |= XFS_MOUNT_32BITINODES;
			
 
				+	else
			
 
				+		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
			
 
				 
			
 
				 	for (index = 0; index < agcount; index++) {
			
 
				-		ino = XFS_AGINO_TO_INO(mp, index, agino);
			
 
				+		struct xfs_perag	*pag;
			
 
				 
			
 
				-		if (ino > XFS_MAXINUMBER_32) {
			
 
				-			pag = xfs_perag_get(mp, index);
			
 
				-			pag->pagi_inodeok = 0;
			
 
				-			pag->pagf_metadata = 0;
			
 
				-			xfs_perag_put(pag);
			
 
				-			continue;
			
 
				-		}
			
 
				+		ino = XFS_AGINO_TO_INO(mp, index, agino);
			
 
				 
			
 
				 		pag = xfs_perag_get(mp, index);
			
 
				-		pag->pagi_inodeok = 1;
			
 
				-		maxagi++;
			
 
				-		if (index < max_metadata)
			
 
				-			pag->pagf_metadata = 1;
			
 
				-		xfs_perag_put(pag);
			
 
				-	}
			
 
				-	mp->m_flags |= (XFS_MOUNT_32BITINODES |
			
 
				-			XFS_MOUNT_SMALL_INUMS);
			
 
				 
			
 
				-	return maxagi;
			
 
				-}
			
 
				-
			
 
				-xfs_agnumber_t
			
 
				-xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
			
 
				-{
			
 
				-	xfs_agnumber_t index = 0;
			
 
				-
			
 
				-	for (index = 0; index < agcount; index++) {
			
 
				-		struct xfs_perag	*pag;
			
 
				+		if (mp->m_flags & XFS_MOUNT_32BITINODES) {
			
 
				+			if (ino > XFS_MAXINUMBER_32) {
			
 
				+				pag->pagi_inodeok = 0;
			
 
				+				pag->pagf_metadata = 0;
			
 
				+			} else {
			
 
				+				pag->pagi_inodeok = 1;
			
 
				+				maxagi++;
			
 
				+				if (index < max_metadata)
			
 
				+					pag->pagf_metadata = 1;
			
 
				+				else
			
 
				+					pag->pagf_metadata = 0;
			
 
				+			}
			
 
				+		} else {
			
 
				+			pag->pagi_inodeok = 1;
			
 
				+			pag->pagf_metadata = 0;
			
 
				+		}
			
 
				 
			
 
				-		pag = xfs_perag_get(mp, index);
			
 
				-		pag->pagi_inodeok = 1;
			
 
				-		pag->pagf_metadata = 0;
			
 
				 		xfs_perag_put(pag);
			
 
				 	}
			
 
				 
			
 
				-	/* There is no need for lock protection on m_flags,
			
 
				-	 * the rw_semaphore of the VFS superblock is locked
			
 
				-	 * during mount/umount/remount operations, so this is
			
 
				-	 * enough to avoid concurency on the m_flags field
			
 
				-	 */
			
 
				-	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
			
 
				-			 XFS_MOUNT_SMALL_INUMS);
			
 
				-	return index;
			
 
				+	return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
			
 
				 }
			
 
				 
			
 
				 STATIC int
			
@@ -1165,6 +1180,27 @@ xfs_quiesce_attr(
 
				 	xfs_log_quiesce(mp);
			
 
				 }
			
 
				 
			
 
				+STATIC int
			
 
				+xfs_test_remount_options(
			
 
				+	struct super_block	*sb,
			
 
				+	struct xfs_mount	*mp,
			
 
				+	char			*options)
			
 
				+{
			
 
				+	int			error = 0;
			
 
				+	struct xfs_mount	*tmp_mp;
			
 
				+
			
 
				+	tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
			
 
				+	if (!tmp_mp)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	tmp_mp->m_super = sb;
			
 
				+	error = xfs_parseargs(tmp_mp, options);
			
 
				+	xfs_free_fsname(tmp_mp);
			
 
				+	kfree(tmp_mp);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 STATIC int
			
 
				 xfs_fs_remount(
			
 
				 	struct super_block	*sb,
			
@@ -1177,6 +1213,11 @@ xfs_fs_remount(
 
				 	char			*p;
			
 
				 	int			error;
			
 
				 
			
 
				+	/* First, check for complete junk; i.e. invalid options */
			
 
				+	error = xfs_test_remount_options(sb, mp, options);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				 	sync_filesystem(sb);
			
 
				 	while ((p = strsep(&options, ",")) != NULL) {
			
 
				 		int token;
			
@@ -1193,10 +1234,12 @@ xfs_fs_remount(
 
				 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
			
 
				 			break;
			
 
				 		case Opt_inode64:
			
 
				-			mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
			
 
				+			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
			
 
				+			mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
			
 
				 			break;
			
 
				 		case Opt_inode32:
			
 
				-			mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
			
 
				+			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
			
 
				+			mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
			
 
				 			break;
			
 
				 		default:
			
 
				 			/*
			
@@ -1344,9 +1387,8 @@ xfs_finish_flags(
 
				 	 */
			
 
				 	if (xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				 	    (mp->m_flags & XFS_MOUNT_NOATTR2)) {
			
 
				-		xfs_warn(mp,
			
 
				-"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
			
 
				-			MNTOPT_NOATTR2, MNTOPT_ATTR2);
			
 
				+		xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
			
 
				+			     "attr2 is always enabled for V5 filesystems.");
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
@@ -1817,6 +1859,8 @@ init_xfs_fs(void)
 
				 {
			
 
				 	int			error;
			
 
				 
			
 
				+	xfs_check_ondisk_structs();
			
 
				+
			
 
				 	printk(KERN_INFO XFS_VERSION_STRING " with "
			
 
				 			 XFS_BUILD_OPTIONS " enabled\n");
			
 
				 
			
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -65,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
 
				 
			
 
				 extern void xfs_flush_inodes(struct xfs_mount *mp);
			
 
				 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
			
 
				-extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount);
			
 
				-extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount);
			
 
				+extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
			
 
				+					   xfs_agnumber_t agcount);
			
 
				 
			
 
				 extern const struct export_operations xfs_export_operations;
			
 
				 extern const struct xattr_handler *xfs_xattr_handlers[];
			
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -18,10 +18,13 @@
 
				 
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_sysfs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_log_priv.h"
			
 
				 #include "xfs_stats.h"
			
 
				+#include "xfs_mount.h"
			
 
				 
			
 
				 struct xfs_sysfs_attr {
			
 
				 	struct attribute attr;
			
@@ -45,16 +48,6 @@ to_attr(struct attribute *attr)
 
				 
			
 
				 #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
			
 
				 
			
 
				-/*
			
 
				- * xfs_mount kobject. This currently has no attributes and thus no need for show
			
 
				- * and store helpers. The mp kobject serves as the per-mount parent object that
			
 
				- * is identified by the fsname under sysfs.
			
 
				- */
			
 
				-
			
 
				-struct kobj_type xfs_mp_ktype = {
			
 
				-	.release = xfs_sysfs_release,
			
 
				-};
			
 
				-
			
 
				 STATIC ssize_t
			
 
				 xfs_sysfs_object_show(
			
 
				 	struct kobject		*kobject,
			
@@ -83,6 +76,71 @@ static const struct sysfs_ops xfs_sysfs_ops = {
 
				 	.store = xfs_sysfs_object_store,
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
			
 
				+ * that is identified by the fsname under sysfs.
			
 
				+ */
			
 
				+
			
 
				+static inline struct xfs_mount *
			
 
				+to_mp(struct kobject *kobject)
			
 
				+{
			
 
				+	struct xfs_kobj *kobj = to_kobj(kobject);
			
 
				+
			
 
				+	return container_of(kobj, struct xfs_mount, m_kobj);
			
 
				+}
			
 
				+
			
 
				+#ifdef DEBUG
			
 
				+
			
 
				+STATIC ssize_t
			
 
				+fail_writes_store(
			
 
				+	struct kobject		*kobject,
			
 
				+	const char		*buf,
			
 
				+	size_t			count)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = to_mp(kobject);
			
 
				+	int			ret;
			
 
				+	int			val;
			
 
				+
			
 
				+	ret = kstrtoint(buf, 0, &val);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	if (val == 1)
			
 
				+		mp->m_fail_writes = true;
			
 
				+	else if (val == 0)
			
 
				+		mp->m_fail_writes = false;
			
 
				+	else
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+STATIC ssize_t
			
 
				+fail_writes_show(
			
 
				+	struct kobject		*kobject,
			
 
				+	char			*buf)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = to_mp(kobject);
			
 
				+
			
 
				+	return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
			
 
				+}
			
 
				+XFS_SYSFS_ATTR_RW(fail_writes);
			
 
				+
			
 
				+#endif /* DEBUG */
			
 
				+
			
 
				+static struct attribute *xfs_mp_attrs[] = {
			
 
				+#ifdef DEBUG
			
 
				+	ATTR_LIST(fail_writes),
			
 
				+#endif
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+struct kobj_type xfs_mp_ktype = {
			
 
				+	.release = xfs_sysfs_release,
			
 
				+	.sysfs_ops = &xfs_sysfs_ops,
			
 
				+	.default_attrs = xfs_mp_attrs,
			
 
				+};
			
 
				+
			
 
				 #ifdef DEBUG
			
 
				 /* debug */
			
 
				 
			
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
 
				 DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
			
 
				 DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
			
 
				 DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
			
 
				-DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
			
 
				-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
			
 
				-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
			
 
				-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
			
 
				-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
			
 
				+DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
			
 
				 
			
 
				 DECLARE_EVENT_CLASS(xfs_simple_io_class,
			
 
				 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
			
@@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
 
				 DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
			
 
				 DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
			
 
				 DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
			
 
				+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
			
 
				+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
			
 
				+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
			
 
				 
			
 
				 DECLARE_EVENT_CLASS(xfs_itrunc_class,
			
 
				 	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
			
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1028,6 +1028,8 @@ __xfs_trans_roll(
 
				 	struct xfs_trans_res	tres;
			
 
				 	int			error;
			
 
				 
			
 
				+	*committed = 0;
			
 
				+
			
 
				 	/*
			
 
				 	 * Ensure that the inode is always logged.
			
 
				 	 */
			
@@ -1082,6 +1084,6 @@ xfs_trans_roll(
 
				 	struct xfs_trans	**tpp,
			
 
				 	struct xfs_inode	*dp)
			
 
				 {
			
 
				-	int			committed = 0;
			
 
				+	int			committed;
			
 
				 	return __xfs_trans_roll(tpp, dp, &committed);
			
 
				 }
			
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -133,7 +133,6 @@ typedef struct xfs_trans {
 
				  * XFS transaction mechanism exported interfaces that are
			
 
				  * actually macros.
			
 
				  */
			
 
				-#define	xfs_trans_get_block_res(tp)	((tp)->t_blk_res)
			
 
				 #define	xfs_trans_set_sync(tp)		((tp)->t_flags |= XFS_TRANS_SYNC)
			
 
				 
			
 
				 #if defined(DEBUG) || defined(XFS_WARN)
			
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -497,6 +497,7 @@ xfsaild(
 
				 	long		tout = 0;	/* milliseconds */
			
 
				 
			
 
				 	current->flags |= PF_MEMALLOC;
			
 
				+	set_freezable();
			
 
				 
			
 
				 	while (!kthread_should_stop()) {
			
 
				 		if (tout && tout <= 20)
			
@@ -519,14 +520,14 @@ xfsaild(
 
				 		if (!xfs_ail_min(ailp) &&
			
 
				 		    ailp->xa_target == ailp->xa_target_prev) {
			
 
				 			spin_unlock(&ailp->xa_lock);
			
 
				-			schedule();
			
 
				+			freezable_schedule();
			
 
				 			tout = 0;
			
 
				 			continue;
			
 
				 		}
			
 
				 		spin_unlock(&ailp->xa_lock);
			
 
				 
			
 
				 		if (tout)
			
 
				-			schedule_timeout(msecs_to_jiffies(tout));
			
 
				+			freezable_schedule_timeout(msecs_to_jiffies(tout));
			
 
				 
			
 
				 		__set_current_state(TASK_RUNNING);
			
 
				 
			
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -155,7 +155,7 @@ xfs_trans_get_buf_map(
 
				 		ASSERT(xfs_buf_islocked(bp));
			
 
				 		if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
			
 
				 			xfs_buf_stale(bp);
			
 
				-			XFS_BUF_DONE(bp);
			
 
				+			bp->b_flags |= XBF_DONE;
			
 
				 		}
			
 
				 
			
 
				 		ASSERT(bp->b_transp == tp);
			
@@ -518,7 +518,7 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 
				 	 * inside the b_bdstrat callback so that this won't get written to
			
 
				 	 * disk.
			
 
				 	 */
			
 
				-	XFS_BUF_DONE(bp);
			
 
				+	bp->b_flags |= XBF_DONE;
			
 
				 
			
 
				 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
			
 
				 	bp->b_iodone = xfs_buf_iodone_callbacks;
			
@@ -534,8 +534,8 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 
				 	 */
			
 
				 	if (bip->bli_flags & XFS_BLI_STALE) {
			
 
				 		bip->bli_flags &= ~XFS_BLI_STALE;
			
 
				-		ASSERT(XFS_BUF_ISSTALE(bp));
			
 
				-		XFS_BUF_UNSTALE(bp);
			
 
				+		ASSERT(bp->b_flags & XBF_STALE);
			
 
				+		bp->b_flags &= ~XBF_STALE;
			
 
				 		bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
			
 
				 	}
			
 
				 
			
@@ -600,7 +600,7 @@ xfs_trans_binval(
 
				 		 * If the buffer is already invalidated, then
			
 
				 		 * just return.
			
 
				 		 */
			
 
				-		ASSERT(XFS_BUF_ISSTALE(bp));
			
 
				+		ASSERT(bp->b_flags & XBF_STALE);
			
 
				 		ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
			
 
				 		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
			
 
				 		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
			
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -609,17 +609,20 @@ xfs_trans_dqresv(
 
				 	xfs_qcnt_t	total_count;
			
 
				 	xfs_qcnt_t	*resbcountp;
			
 
				 	xfs_quotainfo_t	*q = mp->m_quotainfo;
			
 
				+	struct xfs_def_quota	*defq;
			
 
				 
			
 
				 
			
 
				 	xfs_dqlock(dqp);
			
 
				 
			
 
				+	defq = xfs_get_defquota(dqp, q);
			
 
				+
			
 
				 	if (flags & XFS_TRANS_DQ_RES_BLKS) {
			
 
				 		hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
			
 
				 		if (!hardlimit)
			
 
				-			hardlimit = q->qi_bhardlimit;
			
 
				+			hardlimit = defq->bhardlimit;
			
 
				 		softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
			
 
				 		if (!softlimit)
			
 
				-			softlimit = q->qi_bsoftlimit;
			
 
				+			softlimit = defq->bsoftlimit;
			
 
				 		timer = be32_to_cpu(dqp->q_core.d_btimer);
			
 
				 		warns = be16_to_cpu(dqp->q_core.d_bwarns);
			
 
				 		warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
			
@@ -628,10 +631,10 @@ xfs_trans_dqresv(
 
				 		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
			
 
				 		hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
			
 
				 		if (!hardlimit)
			
 
				-			hardlimit = q->qi_rtbhardlimit;
			
 
				+			hardlimit = defq->rtbhardlimit;
			
 
				 		softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
			
 
				 		if (!softlimit)
			
 
				-			softlimit = q->qi_rtbsoftlimit;
			
 
				+			softlimit = defq->rtbsoftlimit;
			
 
				 		timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
			
 
				 		warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
			
 
				 		warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
			
@@ -672,10 +675,10 @@ xfs_trans_dqresv(
 
				 			warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
			
 
				 			hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
			
 
				 			if (!hardlimit)
			
 
				-				hardlimit = q->qi_ihardlimit;
			
 
				+				hardlimit = defq->ihardlimit;
			
 
				 			softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
			
 
				 			if (!softlimit)
			
 
				-				softlimit = q->qi_isoftlimit;
			
 
				+				softlimit = defq->isoftlimit;
			
 
				 
			
 
				 			if (hardlimit && total_count > hardlimit) {
			
 
				 				xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
			
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -75,18 +75,10 @@ xfs_trans_ichgtime(
 
				 
			
 
				 	tv = current_fs_time(inode->i_sb);
			
 
				 
			
 
				-	if ((flags & XFS_ICHGTIME_MOD) &&
			
 
				-	    !timespec_equal(&inode->i_mtime, &tv)) {
			
 
				+	if (flags & XFS_ICHGTIME_MOD)
			
 
				 		inode->i_mtime = tv;
			
 
				-		ip->i_d.di_mtime.t_sec = tv.tv_sec;
			
 
				-		ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
			
 
				-	}
			
 
				-	if ((flags & XFS_ICHGTIME_CHG) &&
			
 
				-	    !timespec_equal(&inode->i_ctime, &tv)) {
			
 
				+	if (flags & XFS_ICHGTIME_CHG)
			
 
				 		inode->i_ctime = tv;
			
 
				-		ip->i_d.di_ctime.t_sec = tv.tv_sec;
			
 
				-		ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -125,7 +117,7 @@ xfs_trans_log_inode(
 
				 	 */
			
 
				 	if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
			
 
				 	    IS_I_VERSION(VFS_I(ip))) {
			
 
				-		ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
			
 
				+		VFS_I(ip)->i_version++;
			
 
				 		flags |= XFS_ILOG_CORE;
			
 
				 	}
			
 
				 
			
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -72,7 +72,7 @@ extern int sysctl_protected_hardlinks;
 
				 struct buffer_head;
			
 
				 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
			
 
				 			struct buffer_head *bh_result, int create);
			
 
				-typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
			
 
				+typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
			
 
				 			ssize_t bytes, void *private);
			
 
				 typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
			
 
				 
			
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -425,6 +425,8 @@ struct quotactl_ops {
 
				 	int (*quota_sync)(struct super_block *, int);
			
 
				 	int (*set_info)(struct super_block *, int, struct qc_info *);
			
 
				 	int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
			
 
				+	int (*get_nextdqblk)(struct super_block *, struct kqid *,
			
 
				+			     struct qc_dqblk *);
			
 
				 	int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
			
 
				 	int (*get_state)(struct super_block *, struct qc_state *);
			
 
				 	int (*rm_xquota)(struct super_block *, unsigned int);
			
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -39,6 +39,7 @@
 
				 #define Q_XQUOTARM	XQM_CMD(6)	/* free disk space used by dquots */
			
 
				 #define Q_XQUOTASYNC	XQM_CMD(7)	/* delalloc flush, updates dquots */
			
 
				 #define Q_XGETQSTATV	XQM_CMD(8)	/* newer version of get quota */
			
 
				+#define Q_XGETNEXTQUOTA	XQM_CMD(9)	/* get disk limits and usage >= ID */
			
 
				 
			
 
				 /*
			
 
				  * fs_disk_quota structure:
			
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -71,6 +71,7 @@
 
				 #define Q_SETINFO  0x800006	/* set information about quota files */
			
 
				 #define Q_GETQUOTA 0x800007	/* get user quota structure */
			
 
				 #define Q_SETQUOTA 0x800008	/* set user quota structure */
			
 
				+#define Q_GETNEXTQUOTA 0x800009	/* get disk limits and usage >= ID */
			
 
				 
			
 
				 /* Quota format type IDs */
			
 
				 #define	QFMT_VFS_OLD 1
			
@@ -119,6 +120,19 @@ struct if_dqblk {
 
				 	__u32 dqb_valid;
			
 
				 };
			
 
				 
			
 
				+struct if_nextdqblk {
			
 
				+	__u64 dqb_bhardlimit;
			
 
				+	__u64 dqb_bsoftlimit;
			
 
				+	__u64 dqb_curspace;
			
 
				+	__u64 dqb_ihardlimit;
			
 
				+	__u64 dqb_isoftlimit;
			
 
				+	__u64 dqb_curinodes;
			
 
				+	__u64 dqb_btime;
			
 
				+	__u64 dqb_itime;
			
 
				+	__u32 dqb_valid;
			
 
				+	__u32 dqb_id;
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Structure used for setting quota information about file via quotactl
			
 
				  * Following flags are used to specify which fields are valid