7 năm trước cách đây · 808eb24e0e
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -71,6 +71,23 @@ config XFS_RT
 
				 
			
 
				 	  If unsure, say N.
			
 
				 
			
 
				+config XFS_ONLINE_SCRUB
			
 
				+	bool "XFS online metadata check support"
			
 
				+	default n
			
 
				+	depends on XFS_FS
			
 
				+	help
			
 
				+	  If you say Y here you will be able to check metadata on a
			
 
				+	  mounted XFS filesystem.  This feature is intended to reduce
			
 
				+	  filesystem downtime by supplementing xfs_repair.  The key
			
 
				+	  advantage here is to look for problems proactively so that
			
 
				+	  they can be dealt with in a controlled manner.
			
 
				+
			
 
				+	  This feature is considered EXPERIMENTAL.  Use with caution!
			
 
				+
			
 
				+	  See the xfs_scrub man page in section 8 for additional information.
			
 
				+
			
 
				+	  If unsure, say N.
			
 
				+
			
 
				 config XFS_WARN
			
 
				 	bool "XFS Verbose Warnings"
			
 
				 	depends on XFS_FS && !XFS_DEBUG
			
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -49,6 +49,7 @@ xfs-y				+= $(addprefix libxfs/, \
 
				 				   xfs_dquot_buf.o \
			
 
				 				   xfs_ialloc.o \
			
 
				 				   xfs_ialloc_btree.o \
			
 
				+				   xfs_iext_tree.o \
			
 
				 				   xfs_inode_fork.o \
			
 
				 				   xfs_inode_buf.o \
			
 
				 				   xfs_log_rlimit.o \
			
@@ -135,3 +136,31 @@ xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
 
				 xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
			
 
				 xfs-$(CONFIG_COMPAT)		+= xfs_ioctl32.o
			
 
				 xfs-$(CONFIG_EXPORTFS_BLOCK_OPS)	+= xfs_pnfs.o
			
 
				+
			
 
				+# online scrub/repair
			
 
				+ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
			
 
				+
			
 
				+# Tracepoints like to blow up, so build that before everything else
			
 
				+
			
 
				+xfs-y				+= $(addprefix scrub/, \
			
 
				+				   trace.o \
			
 
				+				   agheader.o \
			
 
				+				   alloc.o \
			
 
				+				   attr.o \
			
 
				+				   bmap.o \
			
 
				+				   btree.o \
			
 
				+				   common.o \
			
 
				+				   dabtree.o \
			
 
				+				   dir.o \
			
 
				+				   ialloc.o \
			
 
				+				   inode.o \
			
 
				+				   parent.o \
			
 
				+				   refcount.o \
			
 
				+				   rmap.o \
			
 
				+				   scrub.o \
			
 
				+				   symlink.o \
			
 
				+				   )
			
 
				+
			
 
				+xfs-$(CONFIG_XFS_RT)		+= scrub/rtbitmap.o
			
 
				+xfs-$(CONFIG_XFS_QUOTA)		+= scrub/quota.o
			
 
				+endif
			
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -119,8 +119,7 @@ kmem_zone_free(kmem_zone_t *zone, void *ptr)
 
				 static inline void
			
 
				 kmem_zone_destroy(kmem_zone_t *zone)
			
 
				 {
			
 
				-	if (zone)
			
 
				-		kmem_cache_destroy(zone);
			
 
				+	kmem_cache_destroy(zone);
			
 
				 }
			
 
				 
			
 
				 extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t);
			
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -27,6 +27,7 @@
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_defer.h"
			
 
				 #include "xfs_alloc.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_cksum.h"
			
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -31,6 +31,7 @@
 
				 #include "xfs_alloc_btree.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_extent_busy.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_cksum.h"
			
 
				 #include "xfs_trace.h"
			
@@ -2931,3 +2932,52 @@ xfs_alloc_query_all(
 
				 	query.fn = fn;
			
 
				 	return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
			
 
				 }
			
 
				+
			
 
				+/* Find the size of the AG, in blocks. */
			
 
				+xfs_agblock_t
			
 
				+xfs_ag_block_count(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		agno)
			
 
				+{
			
 
				+	ASSERT(agno < mp->m_sb.sb_agcount);
			
 
				+
			
 
				+	if (agno < mp->m_sb.sb_agcount - 1)
			
 
				+		return mp->m_sb.sb_agblocks;
			
 
				+	return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify that an AG block number pointer neither points outside the AG
			
 
				+ * nor points at static metadata.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_verify_agbno(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		agno,
			
 
				+	xfs_agblock_t		agbno)
			
 
				+{
			
 
				+	xfs_agblock_t		eoag;
			
 
				+
			
 
				+	eoag = xfs_ag_block_count(mp, agno);
			
 
				+	if (agbno >= eoag)
			
 
				+		return false;
			
 
				+	if (agbno <= XFS_AGFL_BLOCK(mp))
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify that an FS block number pointer neither points outside the
			
 
				+ * filesystem nor points at static AG metadata.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_verify_fsbno(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_fsblock_t		fsbno)
			
 
				+{
			
 
				+	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, fsbno);
			
 
				+
			
 
				+	if (agno >= mp->m_sb.sb_agcount)
			
 
				+		return false;
			
 
				+	return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
			
 
				+}
			
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -232,5 +232,9 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur,
 
				 		xfs_alloc_query_range_fn fn, void *priv);
			
 
				 int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
			
 
				 		void *priv);
			
 
				+xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno);
			
 
				+bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				+		xfs_agblock_t agbno);
			
 
				+bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
			
 
				 
			
 
				 #endif	/* __XFS_ALLOC_H__ */
			
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -397,13 +397,9 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
 
				 	/* rounded down */
			
 
				 	offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
			
 
				 
			
 
				-	switch (dp->i_d.di_format) {
			
 
				-	case XFS_DINODE_FMT_DEV:
			
 
				+	if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) {
			
 
				 		minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
			
 
				 		return (offset >= minforkoff) ? minforkoff : 0;
			
 
				-	case XFS_DINODE_FMT_UUID:
			
 
				-		minforkoff = roundup(sizeof(uuid_t), 8) >> 3;
			
 
				-		return (offset >= minforkoff) ? minforkoff : 0;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -38,6 +38,7 @@
 
				 #include "xfs_bmap_util.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trans_space.h"
			
@@ -112,28 +113,21 @@ xfs_bmap_compute_maxlevels(
 
				 STATIC int				/* error */
			
 
				 xfs_bmbt_lookup_eq(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				-	xfs_fileoff_t		off,
			
 
				-	xfs_fsblock_t		bno,
			
 
				-	xfs_filblks_t		len,
			
 
				+	struct xfs_bmbt_irec	*irec,
			
 
				 	int			*stat)	/* success/failure */
			
 
				 {
			
 
				-	cur->bc_rec.b.br_startoff = off;
			
 
				-	cur->bc_rec.b.br_startblock = bno;
			
 
				-	cur->bc_rec.b.br_blockcount = len;
			
 
				+	cur->bc_rec.b = *irec;
			
 
				 	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
			
 
				 }
			
 
				 
			
 
				 STATIC int				/* error */
			
 
				-xfs_bmbt_lookup_ge(
			
 
				+xfs_bmbt_lookup_first(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				-	xfs_fileoff_t		off,
			
 
				-	xfs_fsblock_t		bno,
			
 
				-	xfs_filblks_t		len,
			
 
				 	int			*stat)	/* success/failure */
			
 
				 {
			
 
				-	cur->bc_rec.b.br_startoff = off;
			
 
				-	cur->bc_rec.b.br_startblock = bno;
			
 
				-	cur->bc_rec.b.br_blockcount = len;
			
 
				+	cur->bc_rec.b.br_startoff = 0;
			
 
				+	cur->bc_rec.b.br_startblock = 0;
			
 
				+	cur->bc_rec.b.br_blockcount = 0;
			
 
				 	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
			
 
				 }
			
 
				 
			
@@ -160,21 +154,17 @@ static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Update the record referred to by cur to the value given
			
 
				- * by [off, bno, len, state].
			
 
				+ * Update the record referred to by cur to the value given by irec
			
 
				  * This either works (return 0) or gets an EFSCORRUPTED error.
			
 
				  */
			
 
				 STATIC int
			
 
				 xfs_bmbt_update(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				-	xfs_fileoff_t		off,
			
 
				-	xfs_fsblock_t		bno,
			
 
				-	xfs_filblks_t		len,
			
 
				-	xfs_exntst_t		state)
			
 
				+	struct xfs_bmbt_irec	*irec)
			
 
				 {
			
 
				 	union xfs_btree_rec	rec;
			
 
				 
			
 
				-	xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
			
 
				+	xfs_bmbt_disk_set_all(&rec.bmbt, irec);
			
 
				 	return xfs_btree_update(cur, &rec);
			
 
				 }
			
 
				 
			
@@ -242,7 +232,6 @@ xfs_bmap_forkoff_reset(
 
				 {
			
 
				 	if (whichfork == XFS_ATTR_FORK &&
			
 
				 	    ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
			
 
				-	    ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
			
 
				 	    ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
			
 
				 		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
			
 
				 
			
@@ -498,31 +487,6 @@ error_norelse:
 
				 	return;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Add bmap trace insert entries for all the contents of the extent records.
			
 
				- */
			
 
				-void
			
 
				-xfs_bmap_trace_exlist(
			
 
				-	xfs_inode_t	*ip,		/* incore inode pointer */
			
 
				-	xfs_extnum_t	cnt,		/* count of entries in the list */
			
 
				-	int		whichfork,	/* data or attr or cow fork */
			
 
				-	unsigned long	caller_ip)
			
 
				-{
			
 
				-	xfs_extnum_t	idx;		/* extent record index */
			
 
				-	xfs_ifork_t	*ifp;		/* inode fork pointer */
			
 
				-	int		state = 0;
			
 
				-
			
 
				-	if (whichfork == XFS_ATTR_FORK)
			
 
				-		state |= BMAP_ATTRFORK;
			
 
				-	else if (whichfork == XFS_COW_FORK)
			
 
				-		state |= BMAP_COWFORK;
			
 
				-
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	ASSERT(cnt == xfs_iext_count(ifp));
			
 
				-	for (idx = 0; idx < cnt; idx++)
			
 
				-		trace_xfs_extlist(ip, idx, state, caller_ip);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Validate that the bmbt_irecs being returned from bmapi are valid
			
 
				  * given the caller's original parameters.  Specifically check the
			
@@ -657,8 +621,8 @@ xfs_bmap_btree_to_extents(
 
				 	cbno = be64_to_cpu(*pp);
			
 
				 	*logflagsp = 0;
			
 
				 #ifdef DEBUG
			
 
				-	if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
			
 
				-		return error;
			
 
				+	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
			
 
				+			xfs_btree_check_lptr(cur, cbno, 1));
			
 
				 #endif
			
 
				 	error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
			
 
				 				&xfs_bmbt_buf_ops);
			
@@ -703,14 +667,14 @@ xfs_bmap_extents_to_btree(
 
				 	xfs_bmbt_rec_t		*arp;		/* child record pointer */
			
 
				 	struct xfs_btree_block	*block;		/* btree root block */
			
 
				 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
			
 
				-	xfs_bmbt_rec_host_t	*ep;		/* extent record pointer */
			
 
				 	int			error;		/* error return value */
			
 
				-	xfs_extnum_t		i, cnt;		/* extent record index */
			
 
				 	xfs_ifork_t		*ifp;		/* inode fork pointer */
			
 
				 	xfs_bmbt_key_t		*kp;		/* root block key pointer */
			
 
				 	xfs_mount_t		*mp;		/* mount structure */
			
 
				-	xfs_extnum_t		nextents;	/* number of file extents */
			
 
				 	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+	struct xfs_bmbt_irec	rec;
			
 
				+	xfs_extnum_t		cnt = 0;
			
 
				 
			
 
				 	mp = ip->i_mount;
			
 
				 	ASSERT(whichfork != XFS_COW_FORK);
			
@@ -789,15 +753,12 @@ xfs_bmap_extents_to_btree(
 
				 				XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
			
 
				 				XFS_BTREE_LONG_PTRS);
			
 
				 
			
 
				-	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
			
 
				-	nextents =  xfs_iext_count(ifp);
			
 
				-	for (cnt = i = 0; i < nextents; i++) {
			
 
				-		ep = xfs_iext_get_ext(ifp, i);
			
 
				-		if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
			
 
				-			arp->l0 = cpu_to_be64(ep->l0);
			
 
				-			arp->l1 = cpu_to_be64(ep->l1);
			
 
				-			arp++; cnt++;
			
 
				-		}
			
 
				+	for_each_xfs_iext(ifp, &icur, &rec) {
			
 
				+		if (isnullstartblock(rec.br_startblock))
			
 
				+			continue;
			
 
				+		arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
			
 
				+		xfs_bmbt_disk_set_all(arp, &rec);
			
 
				+		cnt++;
			
 
				 	}
			
 
				 	ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
			
 
				 	xfs_btree_set_numrecs(ablock, cnt);
			
@@ -845,6 +806,8 @@ xfs_bmap_local_to_extents_empty(
 
				 	xfs_bmap_forkoff_reset(ip, whichfork);
			
 
				 	ifp->if_flags &= ~XFS_IFINLINE;
			
 
				 	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				+	ifp->if_u1.if_root = NULL;
			
 
				+	ifp->if_height = 0;
			
 
				 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
			
 
				 }
			
 
				 
			
@@ -868,6 +831,7 @@ xfs_bmap_local_to_extents(
 
				 	xfs_alloc_arg_t	args;		/* allocation arguments */
			
 
				 	xfs_buf_t	*bp;		/* buffer for extent block */
			
 
				 	struct xfs_bmbt_irec rec;
			
 
				+	struct xfs_iext_cursor icur;
			
 
				 
			
 
				 	/*
			
 
				 	 * We don't want to deal with the case of keeping inode data inline yet.
			
@@ -885,8 +849,7 @@ xfs_bmap_local_to_extents(
 
				 
			
 
				 	flags = 0;
			
 
				 	error = 0;
			
 
				-	ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
			
 
				-								XFS_IFINLINE);
			
 
				+	ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
			
 
				 	memset(&args, 0, sizeof(args));
			
 
				 	args.tp = tp;
			
 
				 	args.mp = ip->i_mount;
			
@@ -930,15 +893,16 @@ xfs_bmap_local_to_extents(
 
				 	xfs_bmap_local_to_extents_empty(ip, whichfork);
			
 
				 	flags |= XFS_ILOG_CORE;
			
 
				 
			
 
				+	ifp->if_u1.if_root = NULL;
			
 
				+	ifp->if_height = 0;
			
 
				+
			
 
				 	rec.br_startoff = 0;
			
 
				 	rec.br_startblock = args.fsbno;
			
 
				 	rec.br_blockcount = 1;
			
 
				 	rec.br_state = XFS_EXT_NORM;
			
 
				-	xfs_iext_insert(ip, 0, 1, &rec, 0);
			
 
				+	xfs_iext_first(ifp, &icur);
			
 
				+	xfs_iext_insert(ip, &icur, &rec, 0);
			
 
				 
			
 
				-	trace_xfs_bmap_post_update(ip, 0,
			
 
				-			whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
			
 
				-			_THIS_IP_);
			
 
				 	XFS_IFORK_NEXT_SET(ip, whichfork, 1);
			
 
				 	ip->i_d.di_nblocks = 1;
			
 
				 	xfs_trans_mod_dquot_byino(tp, ip,
			
@@ -973,7 +937,8 @@ xfs_bmap_add_attrfork_btree(
 
				 		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
			
 
				 		cur->bc_private.b.dfops = dfops;
			
 
				 		cur->bc_private.b.firstblock = *firstblock;
			
 
				-		if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
			
 
				+		error = xfs_bmbt_lookup_first(cur, &stat);
			
 
				+		if (error)
			
 
				 			goto error0;
			
 
				 		/* must be at least one entry */
			
 
				 		XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
			
@@ -1124,9 +1089,6 @@ xfs_bmap_add_attrfork(
 
				 	case XFS_DINODE_FMT_DEV:
			
 
				 		ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
			
 
				 		break;
			
 
				-	case XFS_DINODE_FMT_UUID:
			
 
				-		ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
			
 
				-		break;
			
 
				 	case XFS_DINODE_FMT_LOCAL:
			
 
				 	case XFS_DINODE_FMT_EXTENTS:
			
 
				 	case XFS_DINODE_FMT_BTREE:
			
@@ -1206,32 +1168,35 @@ trans_cancel:
 
				  */
			
 
				 
			
 
				 /*
			
 
				- * Read in the extents to if_extents.
			
 
				- * All inode fields are set up by caller, we just traverse the btree
			
 
				- * and copy the records in. If the file system cannot contain unwritten
			
 
				- * extents, the records are checked for no "state" flags.
			
 
				+ * Read in extents from a btree-format inode.
			
 
				  */
			
 
				-int					/* error */
			
 
				-xfs_bmap_read_extents(
			
 
				-	xfs_trans_t		*tp,	/* transaction pointer */
			
 
				-	xfs_inode_t		*ip,	/* incore inode */
			
 
				-	int			whichfork) /* data or attr fork */
			
 
				+int
			
 
				+xfs_iread_extents(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	int			whichfork)
			
 
				 {
			
 
				-	struct xfs_btree_block	*block;	/* current btree block */
			
 
				-	xfs_fsblock_t		bno;	/* block # of "block" */
			
 
				-	xfs_buf_t		*bp;	/* buffer for "block" */
			
 
				-	int			error;	/* error return value */
			
 
				-	xfs_extnum_t		i, j;	/* index into the extents list */
			
 
				-	xfs_ifork_t		*ifp;	/* fork structure */
			
 
				-	int			level;	/* btree level, for checking */
			
 
				-	xfs_mount_t		*mp;	/* file system mount structure */
			
 
				-	__be64			*pp;	/* pointer to block address */
			
 
				-	/* REFERENCED */
			
 
				-	xfs_extnum_t		room;	/* number of entries there's room for */
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	xfs_extnum_t		nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
			
 
				+	struct xfs_btree_block	*block = ifp->if_broot;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+	struct xfs_bmbt_irec	new;
			
 
				+	xfs_fsblock_t		bno;
			
 
				+	struct xfs_buf		*bp;
			
 
				+	xfs_extnum_t		i, j;
			
 
				+	int			level;
			
 
				+	__be64			*pp;
			
 
				+	int			error;
			
 
				+
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				+
			
 
				+	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
			
 
				+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
			
 
				+		return -EFSCORRUPTED;
			
 
				+	}
			
 
				 
			
 
				-	mp = ip->i_mount;
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	block = ifp->if_broot;
			
 
				 	/*
			
 
				 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
			
 
				 	 */
			
@@ -1248,21 +1213,23 @@ xfs_bmap_read_extents(
 
				 		error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
			
 
				 				XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
			
 
				 		if (error)
			
 
				-			return error;
			
 
				+			goto out;
			
 
				 		block = XFS_BUF_TO_BLOCK(bp);
			
 
				 		if (level == 0)
			
 
				 			break;
			
 
				 		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
			
 
				 		bno = be64_to_cpu(*pp);
			
 
				 		XFS_WANT_CORRUPTED_GOTO(mp,
			
 
				-			XFS_FSB_SANITY_CHECK(mp, bno), error0);
			
 
				+			XFS_FSB_SANITY_CHECK(mp, bno), out_brelse);
			
 
				 		xfs_trans_brelse(tp, bp);
			
 
				 	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Here with bp and block set to the leftmost leaf node in the tree.
			
 
				 	 */
			
 
				-	room = xfs_iext_count(ifp);
			
 
				 	i = 0;
			
 
				+	xfs_iext_first(ifp, &icur);
			
 
				+
			
 
				 	/*
			
 
				 	 * Loop over all leaf nodes.  Copy information to the extent records.
			
 
				 	 */
			
@@ -1272,14 +1239,15 @@ xfs_bmap_read_extents(
 
				 		xfs_extnum_t	num_recs;
			
 
				 
			
 
				 		num_recs = xfs_btree_get_numrecs(block);
			
 
				-		if (unlikely(i + num_recs > room)) {
			
 
				-			ASSERT(i + num_recs <= room);
			
 
				+		if (unlikely(i + num_recs > nextents)) {
			
 
				+			ASSERT(i + num_recs <= nextents);
			
 
				 			xfs_warn(ip->i_mount,
			
 
				 				"corrupt dinode %Lu, (btree extents).",
			
 
				 				(unsigned long long) ip->i_ino);
			
 
				-			XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
			
 
				+			XFS_CORRUPTION_ERROR(__func__,
			
 
				 				XFS_ERRLEVEL_LOW, ip->i_mount, block);
			
 
				-			goto error0;
			
 
				+			error = -EFSCORRUPTED;
			
 
				+			goto out_brelse;
			
 
				 		}
			
 
				 		/*
			
 
				 		 * Read-ahead the next leaf block, if any.
			
@@ -1292,15 +1260,17 @@ xfs_bmap_read_extents(
 
				 		 * Copy records into the extent records.
			
 
				 		 */
			
 
				 		frp = XFS_BMBT_REC_ADDR(mp, block, 1);
			
 
				-		for (j = 0; j < num_recs; j++, i++, frp++) {
			
 
				-			xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
			
 
				-			trp->l0 = be64_to_cpu(frp->l0);
			
 
				-			trp->l1 = be64_to_cpu(frp->l1);
			
 
				-			if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) {
			
 
				+		for (j = 0; j < num_recs; j++, frp++, i++) {
			
 
				+			xfs_bmbt_disk_get_all(frp, &new);
			
 
				+			if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
			
 
				 				XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
			
 
				 						 XFS_ERRLEVEL_LOW, mp);
			
 
				-				goto error0;
			
 
				+				error = -EFSCORRUPTED;
			
 
				+				goto out_brelse;
			
 
				 			}
			
 
				+			xfs_iext_insert(ip, &icur, &new, state);
			
 
				+			trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
			
 
				+			xfs_iext_next(ifp, &icur);
			
 
				 		}
			
 
				 		xfs_trans_brelse(tp, bp);
			
 
				 		bno = nextbno;
			
@@ -1312,71 +1282,74 @@ xfs_bmap_read_extents(
 
				 		error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
			
 
				 				XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
			
 
				 		if (error)
			
 
				-			return error;
			
 
				+			goto out;
			
 
				 		block = XFS_BUF_TO_BLOCK(bp);
			
 
				 	}
			
 
				-	if (i != XFS_IFORK_NEXTENTS(ip, whichfork))
			
 
				-		return -EFSCORRUPTED;
			
 
				+
			
 
				+	if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) {
			
 
				+		error = -EFSCORRUPTED;
			
 
				+		goto out;
			
 
				+	}
			
 
				 	ASSERT(i == xfs_iext_count(ifp));
			
 
				-	XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
			
 
				+
			
 
				+	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				 	return 0;
			
 
				-error0:
			
 
				+
			
 
				+out_brelse:
			
 
				 	xfs_trans_brelse(tp, bp);
			
 
				-	return -EFSCORRUPTED;
			
 
				+out:
			
 
				+	xfs_iext_destroy(ifp);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Returns the file-relative block number of the first unused block(s)
			
 
				- * in the file with at least "len" logically contiguous blocks free.
			
 
				- * This is the lowest-address hole if the file has holes, else the first block
			
 
				- * past the end of file.
			
 
				- * Return 0 if the file is currently local (in-inode).
			
 
				+ * Returns the relative block number of the first unused block(s) in the given
			
 
				+ * fork with at least "len" logically contiguous blocks free.  This is the
			
 
				+ * lowest-address hole if the fork has holes, else the first block past the end
			
 
				+ * of fork.  Return 0 if the fork is currently local (in-inode).
			
 
				  */
			
 
				 int						/* error */
			
 
				 xfs_bmap_first_unused(
			
 
				-	xfs_trans_t	*tp,			/* transaction pointer */
			
 
				-	xfs_inode_t	*ip,			/* incore inode */
			
 
				-	xfs_extlen_t	len,			/* size of hole to find */
			
 
				-	xfs_fileoff_t	*first_unused,		/* unused block */
			
 
				-	int		whichfork)		/* data or attr fork */
			
 
				+	struct xfs_trans	*tp,		/* transaction pointer */
			
 
				+	struct xfs_inode	*ip,		/* incore inode */
			
 
				+	xfs_extlen_t		len,		/* size of hole to find */
			
 
				+	xfs_fileoff_t		*first_unused,	/* unused block */
			
 
				+	int			whichfork)	/* data or attr fork */
			
 
				 {
			
 
				-	int		error;			/* error return value */
			
 
				-	int		idx;			/* extent record index */
			
 
				-	xfs_ifork_t	*ifp;			/* inode fork pointer */
			
 
				-	xfs_fileoff_t	lastaddr;		/* last block number seen */
			
 
				-	xfs_fileoff_t	lowest;			/* lowest useful block */
			
 
				-	xfs_fileoff_t	max;			/* starting useful block */
			
 
				-	xfs_extnum_t	nextents;		/* number of extent entries */
			
 
				+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	struct xfs_bmbt_irec	got;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+	xfs_fileoff_t		lastaddr = 0;
			
 
				+	xfs_fileoff_t		lowest, max;
			
 
				+	int			error;
			
 
				 
			
 
				 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
			
 
				 	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
			
 
				 	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
			
 
				+
			
 
				 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
			
 
				 		*first_unused = 0;
			
 
				 		return 0;
			
 
				 	}
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
			
 
				-	    (error = xfs_iread_extents(tp, ip, whichfork)))
			
 
				-		return error;
			
 
				-	lowest = *first_unused;
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
			
 
				-		struct xfs_bmbt_irec got;
			
 
				 
			
 
				-		xfs_iext_get_extent(ifp, idx, &got);
			
 
				+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				+		error = xfs_iread_extents(tp, ip, whichfork);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				 
			
 
				+	lowest = max = *first_unused;
			
 
				+	for_each_xfs_iext(ifp, &icur, &got) {
			
 
				 		/*
			
 
				 		 * See if the hole before this extent will work.
			
 
				 		 */
			
 
				 		if (got.br_startoff >= lowest + len &&
			
 
				-		    got.br_startoff - max >= len) {
			
 
				-			*first_unused = max;
			
 
				-			return 0;
			
 
				-		}
			
 
				+		    got.br_startoff - max >= len)
			
 
				+			break;
			
 
				 		lastaddr = got.br_startoff + got.br_blockcount;
			
 
				 		max = XFS_FILEOFF_MAX(lastaddr, lowest);
			
 
				 	}
			
 
				+
			
 
				 	*first_unused = max;
			
 
				 	return 0;
			
 
				 }
			
@@ -1396,7 +1369,7 @@ xfs_bmap_last_before(
 
				 {
			
 
				 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	struct xfs_bmbt_irec	got;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	int			error;
			
 
				 
			
 
				 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
			
@@ -1416,17 +1389,8 @@ xfs_bmap_last_before(
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) {
			
 
				-		if (got.br_startoff <= *last_block - 1)
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (xfs_iext_get_extent(ifp, idx - 1, &got)) {
			
 
				-		*last_block = got.br_startoff + got.br_blockcount;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	*last_block = 0;
			
 
				+	if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
			
 
				+		*last_block = 0;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1439,8 +1403,8 @@ xfs_bmap_last_extent(
 
				 	int			*is_empty)
			
 
				 {
			
 
				 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	int			error;
			
 
				-	int			nextents;
			
 
				 
			
 
				 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				 		error = xfs_iread_extents(tp, ip, whichfork);
			
@@ -1448,14 +1412,11 @@ xfs_bmap_last_extent(
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	if (nextents == 0) {
			
 
				+	xfs_iext_last(ifp, &icur);
			
 
				+	if (!xfs_iext_get_extent(ifp, &icur, rec))
			
 
				 		*is_empty = 1;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
			
 
				-	*is_empty = 0;
			
 
				+	else
			
 
				+		*is_empty = 0;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1540,10 +1501,10 @@ xfs_bmap_one_block(
 
				 	xfs_inode_t	*ip,		/* incore inode */
			
 
				 	int		whichfork)	/* data or attr fork */
			
 
				 {
			
 
				-	xfs_bmbt_rec_host_t *ep;	/* ptr to fork's extent */
			
 
				 	xfs_ifork_t	*ifp;		/* inode fork pointer */
			
 
				 	int		rval;		/* return value */
			
 
				 	xfs_bmbt_irec_t	s;		/* internal version of extent */
			
 
				+	struct xfs_iext_cursor icur;
			
 
				 
			
 
				 #ifndef DEBUG
			
 
				 	if (whichfork == XFS_DATA_FORK)
			
@@ -1555,8 +1516,8 @@ xfs_bmap_one_block(
 
				 		return 0;
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
			
 
				-	ep = xfs_iext_get_ext(ifp, 0);
			
 
				-	xfs_bmbt_get_all(ep, &s);
			
 
				+	xfs_iext_first(ifp, &icur);
			
 
				+	xfs_iext_get_extent(ifp, &icur, &s);
			
 
				 	rval = s.br_startoff == 0 && s.br_blockcount == 1;
			
 
				 	if (rval && whichfork == XFS_DATA_FORK)
			
 
				 		ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
			
@@ -1576,8 +1537,6 @@ xfs_bmap_add_extent_delay_real(
 
				 	int			whichfork)
			
 
				 {
			
 
				 	struct xfs_bmbt_irec	*new = &bma->got;
			
 
				-	int			diff;	/* temp value */
			
 
				-	xfs_bmbt_rec_host_t	*ep;	/* extent entry for idx */
			
 
				 	int			error;	/* error return value */
			
 
				 	int			i;	/* temp state */
			
 
				 	xfs_ifork_t		*ifp;	/* inode fork pointer */
			
@@ -1585,14 +1544,14 @@ xfs_bmap_add_extent_delay_real(
 
				 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
			
 
				 					/* left is 0, right is 1, prev is 2 */
			
 
				 	int			rval=0;	/* return value (logging flags) */
			
 
				-	int			state = 0;/* state bits, accessed thru macros */
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				 	xfs_filblks_t		da_new; /* new count del alloc blocks used */
			
 
				 	xfs_filblks_t		da_old; /* old count del alloc blocks used */
			
 
				 	xfs_filblks_t		temp=0;	/* value for da_new calculations */
			
 
				-	xfs_filblks_t		temp2=0;/* value for da_new calculations */
			
 
				 	int			tmp_rval;	/* partial logging flags */
			
 
				 	struct xfs_mount	*mp;
			
 
				 	xfs_extnum_t		*nextents;
			
 
				+	struct xfs_bmbt_irec	old;
			
 
				 
			
 
				 	mp = bma->ip->i_mount;
			
 
				 	ifp = XFS_IFORK_PTR(bma->ip, whichfork);
			
@@ -1600,8 +1559,6 @@ xfs_bmap_add_extent_delay_real(
 
				 	nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
			
 
				 						&bma->ip->i_d.di_nextents);
			
 
				 
			
 
				-	ASSERT(bma->idx >= 0);
			
 
				-	ASSERT(bma->idx <= xfs_iext_count(ifp));
			
 
				 	ASSERT(!isnullstartblock(new->br_startblock));
			
 
				 	ASSERT(!bma->cur ||
			
 
				 	       (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
			
@@ -1612,15 +1569,12 @@ xfs_bmap_add_extent_delay_real(
 
				 #define	RIGHT		r[1]
			
 
				 #define	PREV		r[2]
			
 
				 
			
 
				-	if (whichfork == XFS_COW_FORK)
			
 
				-		state |= BMAP_COWFORK;
			
 
				-
			
 
				 	/*
			
 
				 	 * Set up a bunch of variables to make the tests simpler.
			
 
				 	 */
			
 
				-	ep = xfs_iext_get_ext(ifp, bma->idx);
			
 
				-	xfs_bmbt_get_all(ep, &PREV);
			
 
				+	xfs_iext_get_extent(ifp, &bma->icur, &PREV);
			
 
				 	new_endoff = new->br_startoff + new->br_blockcount;
			
 
				+	ASSERT(isnullstartblock(PREV.br_startblock));
			
 
				 	ASSERT(PREV.br_startoff <= new->br_startoff);
			
 
				 	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
			
 
				 
			
@@ -1640,10 +1594,8 @@ xfs_bmap_add_extent_delay_real(
 
				 	 * Check and set flags if this segment has a left neighbor.
			
 
				 	 * Don't set contiguous if the combined extent would be too large.
			
 
				 	 */
			
 
				-	if (bma->idx > 0) {
			
 
				+	if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
			
 
				 		state |= BMAP_LEFT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
			
 
				-
			
 
				 		if (isnullstartblock(LEFT.br_startblock))
			
 
				 			state |= BMAP_LEFT_DELAY;
			
 
				 	}
			
@@ -1660,10 +1612,8 @@ xfs_bmap_add_extent_delay_real(
 
				 	 * Don't set contiguous if the combined extent would be too large.
			
 
				 	 * Also check for all-three-contiguous being too large.
			
 
				 	 */
			
 
				-	if (bma->idx < xfs_iext_count(ifp) - 1) {
			
 
				+	if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
			
 
				 		state |= BMAP_RIGHT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
			
 
				-
			
 
				 		if (isnullstartblock(RIGHT.br_startblock))
			
 
				 			state |= BMAP_RIGHT_DELAY;
			
 
				 	}
			
@@ -1693,22 +1643,19 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Filling in all of a previously delayed allocation extent.
			
 
				 		 * The left and right neighbors are both contiguous with new.
			
 
				 		 */
			
 
				-		bma->idx--;
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
			
 
				-			LEFT.br_blockcount + PREV.br_blockcount +
			
 
				-			RIGHT.br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-
			
 
				-		xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
			
 
				+		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
			
 
				+
			
 
				+		xfs_iext_remove(bma->ip, &bma->icur, state);
			
 
				+		xfs_iext_remove(bma->ip, &bma->icur, state);
			
 
				+		xfs_iext_prev(ifp, &bma->icur);
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
			
 
				 		(*nextents)--;
			
 
				+
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
			
 
				-					RIGHT.br_startblock,
			
 
				-					RIGHT.br_blockcount, &i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
@@ -1720,11 +1667,7 @@ xfs_bmap_add_extent_delay_real(
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
			
 
				-					LEFT.br_startblock,
			
 
				-					LEFT.br_blockcount +
			
 
				-					PREV.br_blockcount +
			
 
				-					RIGHT.br_blockcount, LEFT.br_state);
			
 
				+			error = xfs_bmbt_update(bma->cur, &LEFT);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
@@ -1735,28 +1678,22 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Filling in all of a previously delayed allocation extent.
			
 
				 		 * The left neighbor is contiguous, the right is not.
			
 
				 		 */
			
 
				-		bma->idx--;
			
 
				+		old = LEFT;
			
 
				+		LEFT.br_blockcount += PREV.br_blockcount;
			
 
				 
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
			
 
				-			LEFT.br_blockcount + PREV.br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				+		xfs_iext_remove(bma->ip, &bma->icur, state);
			
 
				+		xfs_iext_prev(ifp, &bma->icur);
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
			
 
				 
			
 
				-		xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = 0;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
			
 
				-					LEFT.br_startblock, LEFT.br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
			
 
				-					LEFT.br_startblock,
			
 
				-					LEFT.br_blockcount +
			
 
				-					PREV.br_blockcount, LEFT.br_state);
			
 
				+			error = xfs_bmbt_update(bma->cur, &LEFT);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
@@ -1767,27 +1704,23 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Filling in all of a previously delayed allocation extent.
			
 
				 		 * The right neighbor is contiguous, the left is not.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_startblock(ep, new->br_startblock);
			
 
				-		xfs_bmbt_set_blockcount(ep,
			
 
				-			PREV.br_blockcount + RIGHT.br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				+		PREV.br_startblock = new->br_startblock;
			
 
				+		PREV.br_blockcount += RIGHT.br_blockcount;
			
 
				+
			
 
				+		xfs_iext_next(ifp, &bma->icur);
			
 
				+		xfs_iext_remove(bma->ip, &bma->icur, state);
			
 
				+		xfs_iext_prev(ifp, &bma->icur);
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
			
 
				 
			
 
				-		xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = 0;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
			
 
				-					RIGHT.br_startblock,
			
 
				-					RIGHT.br_blockcount, &i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
			
 
				-					new->br_startblock,
			
 
				-					PREV.br_blockcount +
			
 
				-					RIGHT.br_blockcount, PREV.br_state);
			
 
				+			error = xfs_bmbt_update(bma->cur, &PREV);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
@@ -1799,23 +1732,19 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Neither the left nor right neighbors are contiguous with
			
 
				 		 * the new one.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_startblock(ep, new->br_startblock);
			
 
				-		xfs_bmbt_set_state(ep, new->br_state);
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				+		PREV.br_startblock = new->br_startblock;
			
 
				+		PREV.br_state = new->br_state;
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
			
 
				 
			
 
				 		(*nextents)++;
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
			
 
				-					new->br_startblock, new->br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
			
 
				-			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
			
 
				 			error = xfs_btree_insert(bma->cur, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
@@ -1828,40 +1757,33 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Filling in the first part of a previous delayed allocation.
			
 
				 		 * The left neighbor is contiguous.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
			
 
				-			LEFT.br_blockcount + new->br_blockcount);
			
 
				-		xfs_bmbt_set_startoff(ep,
			
 
				-			PREV.br_startoff + new->br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
			
 
				-
			
 
				+		old = LEFT;
			
 
				 		temp = PREV.br_blockcount - new->br_blockcount;
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);
			
 
				+		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
			
 
				+				startblockval(PREV.br_startblock));
			
 
				+
			
 
				+		LEFT.br_blockcount += new->br_blockcount;
			
 
				+
			
 
				+		PREV.br_blockcount = temp;
			
 
				+		PREV.br_startoff += new->br_blockcount;
			
 
				+		PREV.br_startblock = nullstartblock(da_new);
			
 
				+
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
			
 
				+		xfs_iext_prev(ifp, &bma->icur);
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
			
 
				+
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = 0;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
			
 
				-					LEFT.br_startblock, LEFT.br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
			
 
				-					LEFT.br_startblock,
			
 
				-					LEFT.br_blockcount +
			
 
				-					new->br_blockcount,
			
 
				-					LEFT.br_state);
			
 
				+			error = xfs_bmbt_update(bma->cur, &LEFT);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				-		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
			
 
				-			startblockval(PREV.br_startblock));
			
 
				-		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-
			
 
				-		bma->idx--;
			
 
				 		break;
			
 
				 
			
 
				 	case BMAP_LEFT_FILLING:
			
@@ -1869,23 +1791,16 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Filling in the first part of a previous delayed allocation.
			
 
				 		 * The left neighbor is not contiguous.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_startoff(ep, new_endoff);
			
 
				-		temp = PREV.br_blockcount - new->br_blockcount;
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);
			
 
				-		xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
			
 
				 		(*nextents)++;
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
			
 
				-					new->br_startblock, new->br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
			
 
				-			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
			
 
				 			error = xfs_btree_insert(bma->cur, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
@@ -1900,12 +1815,18 @@ xfs_bmap_add_extent_delay_real(
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				+
			
 
				+		temp = PREV.br_blockcount - new->br_blockcount;
			
 
				 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
			
 
				 			startblockval(PREV.br_startblock) -
			
 
				 			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
			
 
				-		ep = xfs_iext_get_ext(ifp, bma->idx + 1);
			
 
				-		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
			
 
				+
			
 
				+		PREV.br_startoff = new_endoff;
			
 
				+		PREV.br_blockcount = temp;
			
 
				+		PREV.br_startblock = nullstartblock(da_new);
			
 
				+		xfs_iext_next(ifp, &bma->icur);
			
 
				+		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
			
 
				+		xfs_iext_prev(ifp, &bma->icur);
			
 
				 		break;
			
 
				 
			
 
				 	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
			
@@ -1913,40 +1834,34 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Filling in the last part of a previous delayed allocation.
			
 
				 		 * The right neighbor is contiguous with the new allocation.
			
 
				 		 */
			
 
				-		temp = PREV.br_blockcount - new->br_blockcount;
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);
			
 
				-		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
			
 
				-			new->br_startoff, new->br_startblock,
			
 
				-			new->br_blockcount + RIGHT.br_blockcount,
			
 
				-			RIGHT.br_state);
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
			
 
				+		old = RIGHT;
			
 
				+		RIGHT.br_startoff = new->br_startoff;
			
 
				+		RIGHT.br_startblock = new->br_startblock;
			
 
				+		RIGHT.br_blockcount += new->br_blockcount;
			
 
				+
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = 0;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
			
 
				-					RIGHT.br_startblock,
			
 
				-					RIGHT.br_blockcount, &i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(bma->cur, new->br_startoff,
			
 
				-					new->br_startblock,
			
 
				-					new->br_blockcount +
			
 
				-					RIGHT.br_blockcount,
			
 
				-					RIGHT.br_state);
			
 
				+			error = xfs_bmbt_update(bma->cur, &RIGHT);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				 
			
 
				+		temp = PREV.br_blockcount - new->br_blockcount;
			
 
				 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
			
 
				 			startblockval(PREV.br_startblock));
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				 
			
 
				-		bma->idx++;
			
 
				+		PREV.br_blockcount = temp;
			
 
				+		PREV.br_startblock = nullstartblock(da_new);
			
 
				+
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
			
 
				+		xfs_iext_next(ifp, &bma->icur);
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
			
 
				 		break;
			
 
				 
			
 
				 	case BMAP_RIGHT_FILLING:
			
@@ -1954,22 +1869,16 @@ xfs_bmap_add_extent_delay_real(
 
				 		 * Filling in the last part of a previous delayed allocation.
			
 
				 		 * The right neighbor is not contiguous.
			
 
				 		 */
			
 
				-		temp = PREV.br_blockcount - new->br_blockcount;
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);
			
 
				-		xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
			
 
				 		(*nextents)++;
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
			
 
				-					new->br_startblock, new->br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
			
 
				-			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
			
 
				 			error = xfs_btree_insert(bma->cur, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
@@ -1984,14 +1893,16 @@ xfs_bmap_add_extent_delay_real(
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				+
			
 
				+		temp = PREV.br_blockcount - new->br_blockcount;
			
 
				 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
			
 
				 			startblockval(PREV.br_startblock) -
			
 
				 			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
			
 
				-		ep = xfs_iext_get_ext(ifp, bma->idx);
			
 
				-		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				 
			
 
				-		bma->idx++;
			
 
				+		PREV.br_startblock = nullstartblock(da_new);
			
 
				+		PREV.br_blockcount = temp;
			
 
				+		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
			
 
				+		xfs_iext_next(ifp, &bma->icur);
			
 
				 		break;
			
 
				 
			
 
				 	case 0:
			
@@ -2015,30 +1926,40 @@ xfs_bmap_add_extent_delay_real(
 
				 		 *  PREV @ idx          LEFT              RIGHT
			
 
				 		 *                      inserted at idx + 1
			
 
				 		 */
			
 
				-		temp = new->br_startoff - PREV.br_startoff;
			
 
				-		temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);	/* truncate PREV */
			
 
				+		old = PREV;
			
 
				+
			
 
				+		/* LEFT is the new middle */
			
 
				 		LEFT = *new;
			
 
				+
			
 
				+		/* RIGHT is the new right */
			
 
				 		RIGHT.br_state = PREV.br_state;
			
 
				-		RIGHT.br_startblock = nullstartblock(
			
 
				-				(int)xfs_bmap_worst_indlen(bma->ip, temp2));
			
 
				 		RIGHT.br_startoff = new_endoff;
			
 
				-		RIGHT.br_blockcount = temp2;
			
 
				-		/* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
			
 
				-		xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
			
 
				+		RIGHT.br_blockcount =
			
 
				+			PREV.br_startoff + PREV.br_blockcount - new_endoff;
			
 
				+		RIGHT.br_startblock =
			
 
				+			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
			
 
				+					RIGHT.br_blockcount));
			
 
				+
			
 
				+		/* truncate PREV */
			
 
				+		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
			
 
				+		PREV.br_startblock =
			
 
				+			nullstartblock(xfs_bmap_worst_indlen(bma->ip,
			
 
				+					PREV.br_blockcount));
			
 
				+		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
			
 
				+
			
 
				+		xfs_iext_next(ifp, &bma->icur);
			
 
				+		xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
			
 
				+		xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
			
 
				 		(*nextents)++;
			
 
				+
			
 
				 		if (bma->cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
			
 
				-					new->br_startblock, new->br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
			
 
				-			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
			
 
				 			error = xfs_btree_insert(bma->cur, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
@@ -2053,30 +1974,9 @@ xfs_bmap_add_extent_delay_real(
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				-		temp = xfs_bmap_worst_indlen(bma->ip, temp);
			
 
				-		temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
			
 
				-		diff = (int)(temp + temp2 -
			
 
				-			     (startblockval(PREV.br_startblock) -
			
 
				-			      (bma->cur ?
			
 
				-			       bma->cur->bc_private.b.allocated : 0)));
			
 
				-		if (diff > 0) {
			
 
				-			error = xfs_mod_fdblocks(bma->ip->i_mount,
			
 
				-						 -((int64_t)diff), false);
			
 
				-			ASSERT(!error);
			
 
				-			if (error)
			
 
				-				goto done;
			
 
				-		}
			
 
				-
			
 
				-		ep = xfs_iext_get_ext(ifp, bma->idx);
			
 
				-		xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
			
 
				-		trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
			
 
				-			nullstartblock((int)temp2));
			
 
				-		trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
			
 
				 
			
 
				-		bma->idx++;
			
 
				-		da_new = temp + temp2;
			
 
				+		da_new = startblockval(PREV.br_startblock) +
			
 
				+			 startblockval(RIGHT.br_startblock);
			
 
				 		break;
			
 
				 
			
 
				 	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
			
@@ -2110,19 +2010,17 @@ xfs_bmap_add_extent_delay_real(
 
				 			goto done;
			
 
				 	}
			
 
				 
			
 
				-	/* adjust for changes in reserved delayed indirect blocks */
			
 
				-	if (da_old || da_new) {
			
 
				-		temp = da_new;
			
 
				-		if (bma->cur)
			
 
				-			temp += bma->cur->bc_private.b.allocated;
			
 
				-		if (temp < da_old)
			
 
				-			xfs_mod_fdblocks(bma->ip->i_mount,
			
 
				-					(int64_t)(da_old - temp), false);
			
 
				+	if (bma->cur) {
			
 
				+		da_new += bma->cur->bc_private.b.allocated;
			
 
				+		bma->cur->bc_private.b.allocated = 0;
			
 
				 	}
			
 
				 
			
 
				-	/* clear out the allocated field, done with it now in any case. */
			
 
				-	if (bma->cur)
			
 
				-		bma->cur->bc_private.b.allocated = 0;
			
 
				+	/* adjust for changes in reserved delayed indirect blocks */
			
 
				+	if (da_new != da_old) {
			
 
				+		ASSERT(state == 0 || da_new < da_old);
			
 
				+		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
			
 
				+				false);
			
 
				+	}
			
 
				 
			
 
				 	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
			
 
				 done:
			
@@ -2142,7 +2040,7 @@ xfs_bmap_add_extent_unwritten_real(
 
				 	struct xfs_trans	*tp,
			
 
				 	xfs_inode_t		*ip,	/* incore inode pointer */
			
 
				 	int			whichfork,
			
 
				-	xfs_extnum_t		*idx,	/* extent number to update/insert */
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
			
 
				 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
			
 
				 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
			
@@ -2150,28 +2048,22 @@ xfs_bmap_add_extent_unwritten_real(
 
				 	int			*logflagsp) /* inode logging flags */
			
 
				 {
			
 
				 	xfs_btree_cur_t		*cur;	/* btree cursor */
			
 
				-	xfs_bmbt_rec_host_t	*ep;	/* extent entry for idx */
			
 
				 	int			error;	/* error return value */
			
 
				 	int			i;	/* temp state */
			
 
				 	xfs_ifork_t		*ifp;	/* inode fork pointer */
			
 
				 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
			
 
				-	xfs_exntst_t		newext;	/* new extent state */
			
 
				-	xfs_exntst_t		oldext;	/* old extent state */
			
 
				 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
			
 
				 					/* left is 0, right is 1, prev is 2 */
			
 
				 	int			rval=0;	/* return value (logging flags) */
			
 
				-	int			state = 0;/* state bits, accessed thru macros */
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_bmbt_irec	old;
			
 
				 
			
 
				 	*logflagsp = 0;
			
 
				 
			
 
				 	cur = *curp;
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	if (whichfork == XFS_COW_FORK)
			
 
				-		state |= BMAP_COWFORK;
			
 
				 
			
 
				-	ASSERT(*idx >= 0);
			
 
				-	ASSERT(*idx <= xfs_iext_count(ifp));
			
 
				 	ASSERT(!isnullstartblock(new->br_startblock));
			
 
				 
			
 
				 	XFS_STATS_INC(mp, xs_add_exlist);
			
@@ -2184,12 +2076,8 @@ xfs_bmap_add_extent_unwritten_real(
 
				 	 * Set up a bunch of variables to make the tests simpler.
			
 
				 	 */
			
 
				 	error = 0;
			
 
				-	ep = xfs_iext_get_ext(ifp, *idx);
			
 
				-	xfs_bmbt_get_all(ep, &PREV);
			
 
				-	newext = new->br_state;
			
 
				-	oldext = (newext == XFS_EXT_UNWRITTEN) ?
			
 
				-		XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
			
 
				-	ASSERT(PREV.br_state == oldext);
			
 
				+	xfs_iext_get_extent(ifp, icur, &PREV);
			
 
				+	ASSERT(new->br_state != PREV.br_state);
			
 
				 	new_endoff = new->br_startoff + new->br_blockcount;
			
 
				 	ASSERT(PREV.br_startoff <= new->br_startoff);
			
 
				 	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
			
@@ -2207,10 +2095,8 @@ xfs_bmap_add_extent_unwritten_real(
 
				 	 * Check and set flags if this segment has a left neighbor.
			
 
				 	 * Don't set contiguous if the combined extent would be too large.
			
 
				 	 */
			
 
				-	if (*idx > 0) {
			
 
				+	if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
			
 
				 		state |= BMAP_LEFT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
			
 
				-
			
 
				 		if (isnullstartblock(LEFT.br_startblock))
			
 
				 			state |= BMAP_LEFT_DELAY;
			
 
				 	}
			
@@ -2218,7 +2104,7 @@ xfs_bmap_add_extent_unwritten_real(
 
				 	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
			
 
				 	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
			
 
				 	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
			
 
				-	    LEFT.br_state == newext &&
			
 
				+	    LEFT.br_state == new->br_state &&
			
 
				 	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
			
 
				 		state |= BMAP_LEFT_CONTIG;
			
 
				 
			
@@ -2227,9 +2113,8 @@ xfs_bmap_add_extent_unwritten_real(
 
				 	 * Don't set contiguous if the combined extent would be too large.
			
 
				 	 * Also check for all-three-contiguous being too large.
			
 
				 	 */
			
 
				-	if (*idx < xfs_iext_count(ifp) - 1) {
			
 
				+	if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
			
 
				 		state |= BMAP_RIGHT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
			
 
				 		if (isnullstartblock(RIGHT.br_startblock))
			
 
				 			state |= BMAP_RIGHT_DELAY;
			
 
				 	}
			
@@ -2237,7 +2122,7 @@ xfs_bmap_add_extent_unwritten_real(
 
				 	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
			
 
				 	    new_endoff == RIGHT.br_startoff &&
			
 
				 	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
			
 
				-	    newext == RIGHT.br_state &&
			
 
				+	    new->br_state == RIGHT.br_state &&
			
 
				 	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
			
 
				 	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
			
 
				 		       BMAP_RIGHT_FILLING)) !=
			
@@ -2258,24 +2143,20 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Setting all of a previous oldext extent to newext.
			
 
				 		 * The left and right neighbors are both contiguous with new.
			
 
				 		 */
			
 
				-		--*idx;
			
 
				-
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
			
 
				-			LEFT.br_blockcount + PREV.br_blockcount +
			
 
				-			RIGHT.br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
			
 
				 
			
 
				-		xfs_iext_remove(ip, *idx + 1, 2, state);
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &LEFT);
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 				XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
			
 
				 		if (cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
			
 
				-					RIGHT.br_startblock,
			
 
				-					RIGHT.br_blockcount, &i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				 			if ((error = xfs_btree_delete(cur, &i)))
			
@@ -2290,10 +2171,8 @@ xfs_bmap_add_extent_unwritten_real(
 
				 			if ((error = xfs_btree_decrement(cur, 0, &i)))
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
			
 
				-				LEFT.br_startblock,
			
 
				-				LEFT.br_blockcount + PREV.br_blockcount +
			
 
				-				RIGHT.br_blockcount, LEFT.br_state)))
			
 
				+			error = xfs_bmbt_update(cur, &LEFT);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				 		break;
			
@@ -2303,23 +2182,19 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Setting all of a previous oldext extent to newext.
			
 
				 		 * The left neighbor is contiguous, the right is not.
			
 
				 		 */
			
 
				-		--*idx;
			
 
				+		LEFT.br_blockcount += PREV.br_blockcount;
			
 
				 
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
			
 
				-			LEFT.br_blockcount + PREV.br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				-
			
 
				-		xfs_iext_remove(ip, *idx + 1, 1, state);
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &LEFT);
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 				XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
			
 
				 		if (cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
			
 
				-					PREV.br_startblock, PREV.br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				 			if ((error = xfs_btree_delete(cur, &i)))
			
@@ -2328,10 +2203,8 @@ xfs_bmap_add_extent_unwritten_real(
 
				 			if ((error = xfs_btree_decrement(cur, 0, &i)))
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
			
 
				-				LEFT.br_startblock,
			
 
				-				LEFT.br_blockcount + PREV.br_blockcount,
			
 
				-				LEFT.br_state)))
			
 
				+			error = xfs_bmbt_update(cur, &LEFT);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				 		break;
			
@@ -2341,21 +2214,22 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Setting all of a previous oldext extent to newext.
			
 
				 		 * The right neighbor is contiguous, the left is not.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep,
			
 
				-			PREV.br_blockcount + RIGHT.br_blockcount);
			
 
				-		xfs_bmbt_set_state(ep, newext);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_iext_remove(ip, *idx + 1, 1, state);
			
 
				+		PREV.br_blockcount += RIGHT.br_blockcount;
			
 
				+		PREV.br_state = new->br_state;
			
 
				+
			
 
				+		xfs_iext_next(ifp, icur);
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &PREV);
			
 
				+
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 				XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
			
 
				 		if (cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
			
 
				-					RIGHT.br_startblock,
			
 
				-					RIGHT.br_blockcount, &i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				 			if ((error = xfs_btree_delete(cur, &i)))
			
@@ -2364,10 +2238,8 @@ xfs_bmap_add_extent_unwritten_real(
 
				 			if ((error = xfs_btree_decrement(cur, 0, &i)))
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur, new->br_startoff,
			
 
				-				new->br_startblock,
			
 
				-				new->br_blockcount + RIGHT.br_blockcount,
			
 
				-				newext)))
			
 
				+			error = xfs_bmbt_update(cur, &PREV);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				 		break;
			
@@ -2378,22 +2250,19 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Neither the left nor right neighbors are contiguous with
			
 
				 		 * the new one.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_state(ep, newext);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		PREV.br_state = new->br_state;
			
 
				+		xfs_iext_update_extent(ip, state, icur, &PREV);
			
 
				 
			
 
				 		if (cur == NULL)
			
 
				 			rval = XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = 0;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
			
 
				-					new->br_startblock, new->br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, new, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur, new->br_startoff,
			
 
				-				new->br_startblock, new->br_blockcount,
			
 
				-				newext)))
			
 
				+			error = xfs_bmbt_update(cur, &PREV);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				 		break;
			
@@ -2403,43 +2272,32 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Setting the first part of a previous oldext extent to newext.
			
 
				 		 * The left neighbor is contiguous.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
			
 
				-			LEFT.br_blockcount + new->br_blockcount);
			
 
				-		xfs_bmbt_set_startoff(ep,
			
 
				-			PREV.br_startoff + new->br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
			
 
				-
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_startblock(ep,
			
 
				-			new->br_startblock + new->br_blockcount);
			
 
				-		xfs_bmbt_set_blockcount(ep,
			
 
				-			PREV.br_blockcount - new->br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				-
			
 
				-		--*idx;
			
 
				+		LEFT.br_blockcount += new->br_blockcount;
			
 
				+
			
 
				+		old = PREV;
			
 
				+		PREV.br_startoff += new->br_blockcount;
			
 
				+		PREV.br_startblock += new->br_blockcount;
			
 
				+		PREV.br_blockcount -= new->br_blockcount;
			
 
				+
			
 
				+		xfs_iext_update_extent(ip, state, icur, &PREV);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &LEFT);
			
 
				 
			
 
				 		if (cur == NULL)
			
 
				 			rval = XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = 0;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
			
 
				-					PREV.br_startblock, PREV.br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &old, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur,
			
 
				-				PREV.br_startoff + new->br_blockcount,
			
 
				-				PREV.br_startblock + new->br_blockcount,
			
 
				-				PREV.br_blockcount - new->br_blockcount,
			
 
				-				oldext)))
			
 
				+			error = xfs_bmbt_update(cur, &PREV);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				-			if ((error = xfs_btree_decrement(cur, 0, &i)))
			
 
				+			error = xfs_btree_decrement(cur, 0, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				-			error = xfs_bmbt_update(cur, LEFT.br_startoff,
			
 
				-				LEFT.br_startblock,
			
 
				-				LEFT.br_blockcount + new->br_blockcount,
			
 
				-				LEFT.br_state);
			
 
				+			error = xfs_bmbt_update(cur, &LEFT);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
@@ -2450,32 +2308,25 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Setting the first part of a previous oldext extent to newext.
			
 
				 		 * The left neighbor is not contiguous.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
			
 
				-		xfs_bmbt_set_startoff(ep, new_endoff);
			
 
				-		xfs_bmbt_set_blockcount(ep,
			
 
				-			PREV.br_blockcount - new->br_blockcount);
			
 
				-		xfs_bmbt_set_startblock(ep,
			
 
				-			new->br_startblock + new->br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				-
			
 
				-		xfs_iext_insert(ip, *idx, 1, new, state);
			
 
				+		old = PREV;
			
 
				+		PREV.br_startoff += new->br_blockcount;
			
 
				+		PREV.br_startblock += new->br_blockcount;
			
 
				+		PREV.br_blockcount -= new->br_blockcount;
			
 
				+
			
 
				+		xfs_iext_update_extent(ip, state, icur, &PREV);
			
 
				+		xfs_iext_insert(ip, icur, new, state);
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
			
 
				 		if (cur == NULL)
			
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
			
 
				-					PREV.br_startblock, PREV.br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &old, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur,
			
 
				-				PREV.br_startoff + new->br_blockcount,
			
 
				-				PREV.br_startblock + new->br_blockcount,
			
 
				-				PREV.br_blockcount - new->br_blockcount,
			
 
				-				oldext)))
			
 
				+			error = xfs_bmbt_update(cur, &PREV);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			cur->bc_rec.b = *new;
			
 
				 			if ((error = xfs_btree_insert(cur, &i)))
			
@@ -2489,39 +2340,33 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Setting the last part of a previous oldext extent to newext.
			
 
				 		 * The right neighbor is contiguous with the new allocation.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep,
			
 
				-			PREV.br_blockcount - new->br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		old = PREV;
			
 
				+		PREV.br_blockcount -= new->br_blockcount;
			
 
				 
			
 
				-		++*idx;
			
 
				+		RIGHT.br_startoff = new->br_startoff;
			
 
				+		RIGHT.br_startblock = new->br_startblock;
			
 
				+		RIGHT.br_blockcount += new->br_blockcount;
			
 
				 
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
			
 
				-			new->br_startoff, new->br_startblock,
			
 
				-			new->br_blockcount + RIGHT.br_blockcount, newext);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &PREV);
			
 
				+		xfs_iext_next(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &RIGHT);
			
 
				 
			
 
				 		if (cur == NULL)
			
 
				 			rval = XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = 0;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
			
 
				-					PREV.br_startblock,
			
 
				-					PREV.br_blockcount, &i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &old, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
			
 
				-				PREV.br_startblock,
			
 
				-				PREV.br_blockcount - new->br_blockcount,
			
 
				-				oldext)))
			
 
				+			error = xfs_bmbt_update(cur, &PREV);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				-			if ((error = xfs_btree_increment(cur, 0, &i)))
			
 
				+			error = xfs_btree_increment(cur, 0, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				-			if ((error = xfs_bmbt_update(cur, new->br_startoff,
			
 
				-				new->br_startblock,
			
 
				-				new->br_blockcount + RIGHT.br_blockcount,
			
 
				-				newext)))
			
 
				+			error = xfs_bmbt_update(cur, &RIGHT);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 		}
			
 
				 		break;
			
@@ -2531,13 +2376,12 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * Setting the last part of a previous oldext extent to newext.
			
 
				 		 * The right neighbor is not contiguous.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep,
			
 
				-			PREV.br_blockcount - new->br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		old = PREV;
			
 
				+		PREV.br_blockcount -= new->br_blockcount;
			
 
				 
			
 
				-		++*idx;
			
 
				-		xfs_iext_insert(ip, *idx, 1, new, state);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &PREV);
			
 
				+		xfs_iext_next(ifp, icur);
			
 
				+		xfs_iext_insert(ip, icur, new, state);
			
 
				 
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
			
@@ -2545,22 +2389,17 @@ xfs_bmap_add_extent_unwritten_real(
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
			
 
				-					PREV.br_startblock, PREV.br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &old, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
			
 
				-				PREV.br_startblock,
			
 
				-				PREV.br_blockcount - new->br_blockcount,
			
 
				-				oldext)))
			
 
				+			error = xfs_bmbt_update(cur, &PREV);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
			
 
				-					new->br_startblock, new->br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, new, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
			
 
				-			cur->bc_rec.b.br_state = XFS_EXT_NORM;
			
 
				 			if ((error = xfs_btree_insert(cur, &i)))
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
@@ -2573,20 +2412,20 @@ xfs_bmap_add_extent_unwritten_real(
 
				 		 * newext.  Contiguity is impossible here.
			
 
				 		 * One extent becomes three extents.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep,
			
 
				-			new->br_startoff - PREV.br_startoff);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		old = PREV;
			
 
				+		PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
			
 
				 
			
 
				 		r[0] = *new;
			
 
				 		r[1].br_startoff = new_endoff;
			
 
				 		r[1].br_blockcount =
			
 
				-			PREV.br_startoff + PREV.br_blockcount - new_endoff;
			
 
				+			old.br_startoff + old.br_blockcount - new_endoff;
			
 
				 		r[1].br_startblock = new->br_startblock + new->br_blockcount;
			
 
				-		r[1].br_state = oldext;
			
 
				+		r[1].br_state = PREV.br_state;
			
 
				 
			
 
				-		++*idx;
			
 
				-		xfs_iext_insert(ip, *idx, 2, &r[0], state);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &PREV);
			
 
				+		xfs_iext_next(ifp, icur);
			
 
				+		xfs_iext_insert(ip, icur, &r[1], state);
			
 
				+		xfs_iext_insert(ip, icur, &r[0], state);
			
 
				 
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 				XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
			
@@ -2594,20 +2433,16 @@ xfs_bmap_add_extent_unwritten_real(
 
				 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
			
 
				 		else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
			
 
				-					PREV.br_startblock, PREV.br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &old, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				 			/* new right extent - oldext */
			
 
				-			if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
			
 
				-				r[1].br_startblock, r[1].br_blockcount,
			
 
				-				r[1].br_state)))
			
 
				+			error = xfs_bmbt_update(cur, &r[1]);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			/* new left extent - oldext */
			
 
				 			cur->bc_rec.b = PREV;
			
 
				-			cur->bc_rec.b.br_blockcount =
			
 
				-				new->br_startoff - PREV.br_startoff;
			
 
				 			if ((error = xfs_btree_insert(cur, &i)))
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
@@ -2616,13 +2451,11 @@ xfs_bmap_add_extent_unwritten_real(
 
				 			 * we are about to insert as we can't trust it after
			
 
				 			 * the previous insert.
			
 
				 			 */
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
			
 
				-					new->br_startblock, new->br_blockcount,
			
 
				-					&i)))
			
 
				+			error = xfs_bmbt_lookup_eq(cur, new, &i);
			
 
				+			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
			
 
				 			/* new middle extent - newext */
			
 
				-			cur->bc_rec.b.br_state = new->br_state;
			
 
				 			if ((error = xfs_btree_insert(cur, &i)))
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
@@ -2681,7 +2514,7 @@ STATIC void
 
				 xfs_bmap_add_extent_hole_delay(
			
 
				 	xfs_inode_t		*ip,	/* incore inode pointer */
			
 
				 	int			whichfork,
			
 
				-	xfs_extnum_t		*idx,	/* extent number to update/insert */
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				 	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
			
 
				 {
			
 
				 	xfs_ifork_t		*ifp;	/* inode fork pointer */
			
@@ -2689,22 +2522,17 @@ xfs_bmap_add_extent_hole_delay(
 
				 	xfs_filblks_t		newlen=0;	/* new indirect size */
			
 
				 	xfs_filblks_t		oldlen=0;	/* old indirect size */
			
 
				 	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
			
 
				-	int			state;  /* state bits, accessed thru macros */
			
 
				-	xfs_filblks_t		temp=0;	/* temp for indirect calculations */
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				+	xfs_filblks_t		temp;	 /* temp for indirect calculations */
			
 
				 
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	state = 0;
			
 
				-	if (whichfork == XFS_COW_FORK)
			
 
				-		state |= BMAP_COWFORK;
			
 
				 	ASSERT(isnullstartblock(new->br_startblock));
			
 
				 
			
 
				 	/*
			
 
				 	 * Check and set flags if this segment has a left neighbor
			
 
				 	 */
			
 
				-	if (*idx > 0) {
			
 
				+	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
			
 
				 		state |= BMAP_LEFT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
			
 
				-
			
 
				 		if (isnullstartblock(left.br_startblock))
			
 
				 			state |= BMAP_LEFT_DELAY;
			
 
				 	}
			
@@ -2713,10 +2541,8 @@ xfs_bmap_add_extent_hole_delay(
 
				 	 * Check and set flags if the current (right) segment exists.
			
 
				 	 * If it doesn't exist, we're converting the hole at end-of-file.
			
 
				 	 */
			
 
				-	if (*idx < xfs_iext_count(ifp)) {
			
 
				+	if (xfs_iext_get_extent(ifp, icur, &right)) {
			
 
				 		state |= BMAP_RIGHT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
			
 
				-
			
 
				 		if (isnullstartblock(right.br_startblock))
			
 
				 			state |= BMAP_RIGHT_DELAY;
			
 
				 	}
			
@@ -2748,22 +2574,20 @@ xfs_bmap_add_extent_hole_delay(
 
				 		 * on the left and on the right.
			
 
				 		 * Merge all three into a single extent record.
			
 
				 		 */
			
 
				-		--*idx;
			
 
				 		temp = left.br_blockcount + new->br_blockcount +
			
 
				 			right.br_blockcount;
			
 
				 
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
			
 
				 		oldlen = startblockval(left.br_startblock) +
			
 
				 			startblockval(new->br_startblock) +
			
 
				 			startblockval(right.br_startblock);
			
 
				 		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
			
 
				 					 oldlen);
			
 
				-		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
			
 
				-			nullstartblock((int)newlen));
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		left.br_startblock = nullstartblock(newlen);
			
 
				+		left.br_blockcount = temp;
			
 
				 
			
 
				-		xfs_iext_remove(ip, *idx + 1, 1, state);
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &left);
			
 
				 		break;
			
 
				 
			
 
				 	case BMAP_LEFT_CONTIG:
			
@@ -2772,18 +2596,17 @@ xfs_bmap_add_extent_hole_delay(
 
				 		 * on the left.
			
 
				 		 * Merge the new allocation with the left neighbor.
			
 
				 		 */
			
 
				-		--*idx;
			
 
				 		temp = left.br_blockcount + new->br_blockcount;
			
 
				 
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
			
 
				 		oldlen = startblockval(left.br_startblock) +
			
 
				 			startblockval(new->br_startblock);
			
 
				 		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
			
 
				 					 oldlen);
			
 
				-		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
			
 
				-			nullstartblock((int)newlen));
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		left.br_blockcount = temp;
			
 
				+		left.br_startblock = nullstartblock(newlen);
			
 
				+
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &left);
			
 
				 		break;
			
 
				 
			
 
				 	case BMAP_RIGHT_CONTIG:
			
@@ -2792,16 +2615,15 @@ xfs_bmap_add_extent_hole_delay(
 
				 		 * on the right.
			
 
				 		 * Merge the new allocation with the right neighbor.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		temp = new->br_blockcount + right.br_blockcount;
			
 
				 		oldlen = startblockval(new->br_startblock) +
			
 
				 			startblockval(right.br_startblock);
			
 
				 		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
			
 
				 					 oldlen);
			
 
				-		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
			
 
				-			new->br_startoff,
			
 
				-			nullstartblock((int)newlen), temp, right.br_state);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		right.br_startoff = new->br_startoff;
			
 
				+		right.br_startblock = nullstartblock(newlen);
			
 
				+		right.br_blockcount = temp;
			
 
				+		xfs_iext_update_extent(ip, state, icur, &right);
			
 
				 		break;
			
 
				 
			
 
				 	case 0:
			
@@ -2811,7 +2633,7 @@ xfs_bmap_add_extent_hole_delay(
 
				 		 * Insert a new entry.
			
 
				 		 */
			
 
				 		oldlen = newlen = 0;
			
 
				-		xfs_iext_insert(ip, *idx, 1, new, state);
			
 
				+		xfs_iext_insert(ip, icur, new, state);
			
 
				 		break;
			
 
				 	}
			
 
				 	if (oldlen != newlen) {
			
@@ -2832,7 +2654,7 @@ xfs_bmap_add_extent_hole_real(
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*ip,
			
 
				 	int			whichfork,
			
 
				-	xfs_extnum_t		*idx,
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				 	struct xfs_btree_cur	**curp,
			
 
				 	struct xfs_bmbt_irec	*new,
			
 
				 	xfs_fsblock_t		*first,
			
@@ -2847,27 +2669,19 @@ xfs_bmap_add_extent_hole_real(
 
				 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
			
 
				 	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
			
 
				 	int			rval=0;	/* return value (logging flags) */
			
 
				-	int			state;	/* state bits, accessed thru macros */
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				+	struct xfs_bmbt_irec	old;
			
 
				 
			
 
				-	ASSERT(*idx >= 0);
			
 
				-	ASSERT(*idx <= xfs_iext_count(ifp));
			
 
				 	ASSERT(!isnullstartblock(new->br_startblock));
			
 
				 	ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
			
 
				 
			
 
				 	XFS_STATS_INC(mp, xs_add_exlist);
			
 
				 
			
 
				-	state = 0;
			
 
				-	if (whichfork == XFS_ATTR_FORK)
			
 
				-		state |= BMAP_ATTRFORK;
			
 
				-	if (whichfork == XFS_COW_FORK)
			
 
				-		state |= BMAP_COWFORK;
			
 
				-
			
 
				 	/*
			
 
				 	 * Check and set flags if this segment has a left neighbor.
			
 
				 	 */
			
 
				-	if (*idx > 0) {
			
 
				+	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
			
 
				 		state |= BMAP_LEFT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
			
 
				 		if (isnullstartblock(left.br_startblock))
			
 
				 			state |= BMAP_LEFT_DELAY;
			
 
				 	}
			
@@ -2876,9 +2690,8 @@ xfs_bmap_add_extent_hole_real(
 
				 	 * Check and set flags if this segment has a current value.
			
 
				 	 * Not true if we're inserting into the "hole" at eof.
			
 
				 	 */
			
 
				-	if (*idx < xfs_iext_count(ifp)) {
			
 
				+	if (xfs_iext_get_extent(ifp, icur, &right)) {
			
 
				 		state |= BMAP_RIGHT_VALID;
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
			
 
				 		if (isnullstartblock(right.br_startblock))
			
 
				 			state |= BMAP_RIGHT_DELAY;
			
 
				 	}
			
@@ -2915,14 +2728,11 @@ xfs_bmap_add_extent_hole_real(
 
				 		 * left and on the right.
			
 
				 		 * Merge all three into a single extent record.
			
 
				 		 */
			
 
				-		--*idx;
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
			
 
				-			left.br_blockcount + new->br_blockcount +
			
 
				-			right.br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		left.br_blockcount += new->br_blockcount + right.br_blockcount;
			
 
				 
			
 
				-		xfs_iext_remove(ip, *idx + 1, 1, state);
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &left);
			
 
				 
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
			
@@ -2930,9 +2740,7 @@ xfs_bmap_add_extent_hole_real(
 
				 			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
			
 
				 		} else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
			
 
				-					right.br_startblock, right.br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &right, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
@@ -2944,12 +2752,7 @@ xfs_bmap_add_extent_hole_real(
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(cur, left.br_startoff,
			
 
				-					left.br_startblock,
			
 
				-					left.br_blockcount +
			
 
				-						new->br_blockcount +
			
 
				-						right.br_blockcount,
			
 
				-					left.br_state);
			
 
				+			error = xfs_bmbt_update(cur, &left);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
@@ -2961,27 +2764,21 @@ xfs_bmap_add_extent_hole_real(
 
				 		 * on the left.
			
 
				 		 * Merge the new allocation with the left neighbor.
			
 
				 		 */
			
 
				-		--*idx;
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
			
 
				-			left.br_blockcount + new->br_blockcount);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		old = left;
			
 
				+		left.br_blockcount += new->br_blockcount;
			
 
				+
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				+		xfs_iext_update_extent(ip, state, icur, &left);
			
 
				 
			
 
				 		if (cur == NULL) {
			
 
				 			rval = xfs_ilog_fext(whichfork);
			
 
				 		} else {
			
 
				 			rval = 0;
			
 
				-			error = xfs_bmbt_lookup_eq(cur, left.br_startoff,
			
 
				-					left.br_startblock, left.br_blockcount,
			
 
				-					&i);
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &old, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(cur, left.br_startoff,
			
 
				-					left.br_startblock,
			
 
				-					left.br_blockcount +
			
 
				-						new->br_blockcount,
			
 
				-					left.br_state);
			
 
				+			error = xfs_bmbt_update(cur, &left);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
@@ -2993,29 +2790,22 @@ xfs_bmap_add_extent_hole_real(
 
				 		 * on the right.
			
 
				 		 * Merge the new allocation with the right neighbor.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
			
 
				-			new->br_startoff, new->br_startblock,
			
 
				-			new->br_blockcount + right.br_blockcount,
			
 
				-			right.br_state);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		old = right;
			
 
				+
			
 
				+		right.br_startoff = new->br_startoff;
			
 
				+		right.br_startblock = new->br_startblock;
			
 
				+		right.br_blockcount += new->br_blockcount;
			
 
				+		xfs_iext_update_extent(ip, state, icur, &right);
			
 
				 
			
 
				 		if (cur == NULL) {
			
 
				 			rval = xfs_ilog_fext(whichfork);
			
 
				 		} else {
			
 
				 			rval = 0;
			
 
				-			error = xfs_bmbt_lookup_eq(cur,
			
 
				-					right.br_startoff,
			
 
				-					right.br_startblock,
			
 
				-					right.br_blockcount, &i);
			
 
				+			error = xfs_bmbt_lookup_eq(cur, &old, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			error = xfs_bmbt_update(cur, new->br_startoff,
			
 
				-					new->br_startblock,
			
 
				-					new->br_blockcount +
			
 
				-						right.br_blockcount,
			
 
				-					right.br_state);
			
 
				+			error = xfs_bmbt_update(cur, &right);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 		}
			
@@ -3027,21 +2817,17 @@ xfs_bmap_add_extent_hole_real(
 
				 		 * real allocation.
			
 
				 		 * Insert a new entry.
			
 
				 		 */
			
 
				-		xfs_iext_insert(ip, *idx, 1, new, state);
			
 
				+		xfs_iext_insert(ip, icur, new, state);
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
			
 
				 		if (cur == NULL) {
			
 
				 			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
			
 
				 		} else {
			
 
				 			rval = XFS_ILOG_CORE;
			
 
				-			error = xfs_bmbt_lookup_eq(cur,
			
 
				-					new->br_startoff,
			
 
				-					new->br_startblock,
			
 
				-					new->br_blockcount, &i);
			
 
				+			error = xfs_bmbt_lookup_eq(cur, new, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
 
				 			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
			
 
				-			cur->bc_rec.b.br_state = new->br_state;
			
 
				 			error = xfs_btree_insert(cur, &i);
			
 
				 			if (error)
			
 
				 				goto done;
			
@@ -3981,7 +3767,7 @@ xfs_bmapi_read(
 
				 	struct xfs_bmbt_irec	got;
			
 
				 	xfs_fileoff_t		obno;
			
 
				 	xfs_fileoff_t		end;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	int			error;
			
 
				 	bool			eof = false;
			
 
				 	int			n = 0;
			
@@ -4023,7 +3809,7 @@ xfs_bmapi_read(
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
			
 
				 		eof = true;
			
 
				 	end = bno + len;
			
 
				 	obno = bno;
			
@@ -4055,7 +3841,7 @@ xfs_bmapi_read(
 
				 			break;
			
 
				 
			
 
				 		/* Else go on to the next record. */
			
 
				-		if (!xfs_iext_get_extent(ifp, ++idx, &got))
			
 
				+		if (!xfs_iext_next_extent(ifp, &icur, &got))
			
 
				 			eof = true;
			
 
				 	}
			
 
				 	*nmap = n;
			
@@ -4083,7 +3869,7 @@ xfs_bmapi_reserve_delalloc(
 
				 	xfs_filblks_t		len,
			
 
				 	xfs_filblks_t		prealloc,
			
 
				 	struct xfs_bmbt_irec	*got,
			
 
				-	xfs_extnum_t		*lastx,
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				 	int			eof)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
@@ -4113,7 +3899,7 @@ xfs_bmapi_reserve_delalloc(
 
				 	if (extsz) {
			
 
				 		struct xfs_bmbt_irec	prev;
			
 
				 
			
 
				-		if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev))
			
 
				+		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
			
 
				 			prev.br_startoff = NULLFILEOFF;
			
 
				 
			
 
				 		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
			
@@ -4162,7 +3948,7 @@ xfs_bmapi_reserve_delalloc(
 
				 	got->br_blockcount = alen;
			
 
				 	got->br_state = XFS_EXT_NORM;
			
 
				 
			
 
				-	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
			
 
				+	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
			
 
				 
			
 
				 	/*
			
 
				 	 * Tag the inode if blocks were preallocated. Note that COW fork
			
@@ -4207,10 +3993,7 @@ xfs_bmapi_allocate(
 
				 	if (bma->wasdel) {
			
 
				 		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
			
 
				 		bma->offset = bma->got.br_startoff;
			
 
				-		if (bma->idx) {
			
 
				-			xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
			
 
				-					 &bma->prev);
			
 
				-		}
			
 
				+		xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev);
			
 
				 	} else {
			
 
				 		bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
			
 
				 		if (!bma->eof)
			
@@ -4295,7 +4078,7 @@ xfs_bmapi_allocate(
 
				 		error = xfs_bmap_add_extent_delay_real(bma, whichfork);
			
 
				 	else
			
 
				 		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
			
 
				-				whichfork, &bma->idx, &bma->cur, &bma->got,
			
 
				+				whichfork, &bma->icur, &bma->cur, &bma->got,
			
 
				 				bma->firstblock, bma->dfops, &bma->logflags);
			
 
				 
			
 
				 	bma->logflags |= tmp_logflags;
			
@@ -4307,7 +4090,7 @@ xfs_bmapi_allocate(
 
				 	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
			
 
				 	 * the neighbouring ones.
			
 
				 	 */
			
 
				-	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
			
 
				+	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
			
 
				 
			
 
				 	ASSERT(bma->got.br_startoff <= bma->offset);
			
 
				 	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
			
@@ -4365,8 +4148,8 @@ xfs_bmapi_convert_unwritten(
 
				 	}
			
 
				 
			
 
				 	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
			
 
				-			&bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
			
 
				-			&tmp_logflags);
			
 
				+			&bma->icur, &bma->cur, mval, bma->firstblock,
			
 
				+			bma->dfops, &tmp_logflags);
			
 
				 	/*
			
 
				 	 * Log the inode core unconditionally in the unwritten extent conversion
			
 
				 	 * path because the conversion might not have done so (e.g., if the
			
@@ -4388,7 +4171,7 @@ xfs_bmapi_convert_unwritten(
 
				 	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
			
 
				 	 * of the neighbouring ones.
			
 
				 	 */
			
 
				-	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
			
 
				+	xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
			
 
				 
			
 
				 	/*
			
 
				 	 * We may have combined previously unwritten space with written space,
			
@@ -4507,9 +4290,9 @@ xfs_bmapi_write(
 
				 	end = bno + len;
			
 
				 	obno = bno;
			
 
				 
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got))
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
			
 
				 		eof = true;
			
 
				-	if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev))
			
 
				+	if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
			
 
				 		bma.prev.br_startoff = NULLFILEOFF;
			
 
				 	bma.tp = tp;
			
 
				 	bma.ip = ip;
			
@@ -4551,7 +4334,8 @@ xfs_bmapi_write(
 
				 		 * First, deal with the hole before the allocated space
			
 
				 		 * that we found, if any.
			
 
				 		 */
			
 
				-		if (need_alloc || wasdelay) {
			
 
				+		if ((need_alloc || wasdelay) &&
			
 
				+		    !(flags & XFS_BMAPI_CONVERT_ONLY)) {
			
 
				 			bma.eof = eof;
			
 
				 			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
			
 
				 			bma.wasdel = wasdelay;
			
@@ -4614,7 +4398,7 @@ xfs_bmapi_write(
 
				 
			
 
				 		/* Else go on to the next record. */
			
 
				 		bma.prev = bma.got;
			
 
				-		if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got))
			
 
				+		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
			
 
				 			eof = true;
			
 
				 	}
			
 
				 	*nmap = n;
			
@@ -4687,7 +4471,7 @@ xfs_bmapi_remap(
 
				 	struct xfs_btree_cur	*cur = NULL;
			
 
				 	xfs_fsblock_t		firstblock = NULLFSBLOCK;
			
 
				 	struct xfs_bmbt_irec	got;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	int			logflags = 0, error;
			
 
				 
			
 
				 	ASSERT(len > 0);
			
@@ -4711,7 +4495,7 @@ xfs_bmapi_remap(
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
			
 
				+	if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
			
 
				 		/* make sure we only reflink into a hole. */
			
 
				 		ASSERT(got.br_startoff > bno);
			
 
				 		ASSERT(got.br_startoff - bno >= len);
			
@@ -4732,8 +4516,8 @@ xfs_bmapi_remap(
 
				 	got.br_blockcount = len;
			
 
				 	got.br_state = XFS_EXT_NORM;
			
 
				 
			
 
				-	error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur,
			
 
				-			&got, &firstblock, dfops, &logflags);
			
 
				+	error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &icur,
			
 
				+			&cur, &got, &firstblock, dfops, &logflags);
			
 
				 	if (error)
			
 
				 		goto error0;
			
 
				 
			
@@ -4849,7 +4633,7 @@ int
 
				 xfs_bmap_del_extent_delay(
			
 
				 	struct xfs_inode	*ip,
			
 
				 	int			whichfork,
			
 
				-	xfs_extnum_t		*idx,
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				 	struct xfs_bmbt_irec	*got,
			
 
				 	struct xfs_bmbt_irec	*del)
			
 
				 {
			
@@ -4859,7 +4643,8 @@ xfs_bmap_del_extent_delay(
 
				 	int64_t			da_old, da_new, da_diff = 0;
			
 
				 	xfs_fileoff_t		del_endoff, got_endoff;
			
 
				 	xfs_filblks_t		got_indlen, new_indlen, stolen;
			
 
				-	int			error = 0, state = 0;
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				+	int			error = 0;
			
 
				 	bool			isrt;
			
 
				 
			
 
				 	XFS_STATS_INC(mp, xs_del_exlist);
			
@@ -4870,8 +4655,6 @@ xfs_bmap_del_extent_delay(
 
				 	da_old = startblockval(got->br_startblock);
			
 
				 	da_new = 0;
			
 
				 
			
 
				-	ASSERT(*idx >= 0);
			
 
				-	ASSERT(*idx <= xfs_iext_count(ifp));
			
 
				 	ASSERT(del->br_blockcount > 0);
			
 
				 	ASSERT(got->br_startoff <= del->br_startoff);
			
 
				 	ASSERT(got_endoff >= del_endoff);
			
@@ -4895,46 +4678,39 @@ xfs_bmap_del_extent_delay(
 
				 		return error;
			
 
				 	ip->i_delayed_blks -= del->br_blockcount;
			
 
				 
			
 
				-	if (whichfork == XFS_COW_FORK)
			
 
				-		state |= BMAP_COWFORK;
			
 
				-
			
 
				 	if (got->br_startoff == del->br_startoff)
			
 
				-		state |= BMAP_LEFT_CONTIG;
			
 
				+		state |= BMAP_LEFT_FILLING;
			
 
				 	if (got_endoff == del_endoff)
			
 
				-		state |= BMAP_RIGHT_CONTIG;
			
 
				+		state |= BMAP_RIGHT_FILLING;
			
 
				 
			
 
				-	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
			
 
				-	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
			
 
				+	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
			
 
				+	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
			
 
				 		/*
			
 
				 		 * Matches the whole extent.  Delete the entry.
			
 
				 		 */
			
 
				-		xfs_iext_remove(ip, *idx, 1, state);
			
 
				-		--*idx;
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				 		break;
			
 
				-	case BMAP_LEFT_CONTIG:
			
 
				+	case BMAP_LEFT_FILLING:
			
 
				 		/*
			
 
				 		 * Deleting the first part of the extent.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		got->br_startoff = del_endoff;
			
 
				 		got->br_blockcount -= del->br_blockcount;
			
 
				 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
			
 
				 				got->br_blockcount), da_old);
			
 
				 		got->br_startblock = nullstartblock((int)da_new);
			
 
				-		xfs_iext_update_extent(ifp, *idx, got);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		xfs_iext_update_extent(ip, state, icur, got);
			
 
				 		break;
			
 
				-	case BMAP_RIGHT_CONTIG:
			
 
				+	case BMAP_RIGHT_FILLING:
			
 
				 		/*
			
 
				 		 * Deleting the last part of the extent.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		got->br_blockcount = got->br_blockcount - del->br_blockcount;
			
 
				 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
			
 
				 				got->br_blockcount), da_old);
			
 
				 		got->br_startblock = nullstartblock((int)da_new);
			
 
				-		xfs_iext_update_extent(ifp, *idx, got);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		xfs_iext_update_extent(ip, state, icur, got);
			
 
				 		break;
			
 
				 	case 0:
			
 
				 		/*
			
@@ -4946,8 +4722,6 @@ xfs_bmap_del_extent_delay(
 
				 		 * Warn if either of the new indlen reservations is zero as this
			
 
				 		 * can lead to delalloc problems.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-
			
 
				 		got->br_blockcount = del->br_startoff - got->br_startoff;
			
 
				 		got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
			
 
				 
			
@@ -4959,15 +4733,14 @@ xfs_bmap_del_extent_delay(
 
				 						       del->br_blockcount);
			
 
				 
			
 
				 		got->br_startblock = nullstartblock((int)got_indlen);
			
 
				-		xfs_iext_update_extent(ifp, *idx, got);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
			
 
				 
			
 
				 		new.br_startoff = del_endoff;
			
 
				 		new.br_state = got->br_state;
			
 
				 		new.br_startblock = nullstartblock((int)new_indlen);
			
 
				 
			
 
				-		++*idx;
			
 
				-		xfs_iext_insert(ip, *idx, 1, &new, state);
			
 
				+		xfs_iext_update_extent(ip, state, icur, got);
			
 
				+		xfs_iext_next(ifp, icur);
			
 
				+		xfs_iext_insert(ip, icur, &new, state);
			
 
				 
			
 
				 		da_new = got_indlen + new_indlen - stolen;
			
 
				 		del->br_blockcount -= stolen;
			
@@ -4986,7 +4759,7 @@ xfs_bmap_del_extent_delay(
 
				 void
			
 
				 xfs_bmap_del_extent_cow(
			
 
				 	struct xfs_inode	*ip,
			
 
				-	xfs_extnum_t		*idx,
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				 	struct xfs_bmbt_irec	*got,
			
 
				 	struct xfs_bmbt_irec	*del)
			
 
				 {
			
@@ -5001,75 +4774,67 @@ xfs_bmap_del_extent_cow(
 
				 	del_endoff = del->br_startoff + del->br_blockcount;
			
 
				 	got_endoff = got->br_startoff + got->br_blockcount;
			
 
				 
			
 
				-	ASSERT(*idx >= 0);
			
 
				-	ASSERT(*idx <= xfs_iext_count(ifp));
			
 
				 	ASSERT(del->br_blockcount > 0);
			
 
				 	ASSERT(got->br_startoff <= del->br_startoff);
			
 
				 	ASSERT(got_endoff >= del_endoff);
			
 
				 	ASSERT(!isnullstartblock(got->br_startblock));
			
 
				 
			
 
				 	if (got->br_startoff == del->br_startoff)
			
 
				-		state |= BMAP_LEFT_CONTIG;
			
 
				+		state |= BMAP_LEFT_FILLING;
			
 
				 	if (got_endoff == del_endoff)
			
 
				-		state |= BMAP_RIGHT_CONTIG;
			
 
				+		state |= BMAP_RIGHT_FILLING;
			
 
				 
			
 
				-	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
			
 
				-	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
			
 
				+	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
			
 
				+	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
			
 
				 		/*
			
 
				 		 * Matches the whole extent.  Delete the entry.
			
 
				 		 */
			
 
				-		xfs_iext_remove(ip, *idx, 1, state);
			
 
				-		--*idx;
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				 		break;
			
 
				-	case BMAP_LEFT_CONTIG:
			
 
				+	case BMAP_LEFT_FILLING:
			
 
				 		/*
			
 
				 		 * Deleting the first part of the extent.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		got->br_startoff = del_endoff;
			
 
				 		got->br_blockcount -= del->br_blockcount;
			
 
				 		got->br_startblock = del->br_startblock + del->br_blockcount;
			
 
				-		xfs_iext_update_extent(ifp, *idx, got);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		xfs_iext_update_extent(ip, state, icur, got);
			
 
				 		break;
			
 
				-	case BMAP_RIGHT_CONTIG:
			
 
				+	case BMAP_RIGHT_FILLING:
			
 
				 		/*
			
 
				 		 * Deleting the last part of the extent.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		got->br_blockcount -= del->br_blockcount;
			
 
				-		xfs_iext_update_extent(ifp, *idx, got);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		xfs_iext_update_extent(ip, state, icur, got);
			
 
				 		break;
			
 
				 	case 0:
			
 
				 		/*
			
 
				 		 * Deleting the middle of the extent.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		got->br_blockcount = del->br_startoff - got->br_startoff;
			
 
				-		xfs_iext_update_extent(ifp, *idx, got);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				 
			
 
				 		new.br_startoff = del_endoff;
			
 
				 		new.br_blockcount = got_endoff - del_endoff;
			
 
				 		new.br_state = got->br_state;
			
 
				 		new.br_startblock = del->br_startblock + del->br_blockcount;
			
 
				 
			
 
				-		++*idx;
			
 
				-		xfs_iext_insert(ip, *idx, 1, &new, state);
			
 
				+		xfs_iext_update_extent(ip, state, icur, got);
			
 
				+		xfs_iext_next(ifp, icur);
			
 
				+		xfs_iext_insert(ip, icur, &new, state);
			
 
				 		break;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Called by xfs_bmapi to update file extent records and the btree
			
 
				- * after removing space (or undoing a delayed allocation).
			
 
				+ * after removing space.
			
 
				  */
			
 
				 STATIC int				/* error */
			
 
				-xfs_bmap_del_extent(
			
 
				+xfs_bmap_del_extent_real(
			
 
				 	xfs_inode_t		*ip,	/* incore inode pointer */
			
 
				 	xfs_trans_t		*tp,	/* current transaction pointer */
			
 
				-	xfs_extnum_t		*idx,	/* extent number to update/delete */
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				 	struct xfs_defer_ops	*dfops,	/* list of extents to be freed */
			
 
				 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
			
 
				 	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
			
@@ -5077,16 +4842,12 @@ xfs_bmap_del_extent(
 
				 	int			whichfork, /* data or attr fork */
			
 
				 	int			bflags)	/* bmapi flags */
			
 
				 {
			
 
				-	xfs_filblks_t		da_new;	/* new delay-alloc indirect blocks */
			
 
				-	xfs_filblks_t		da_old;	/* old delay-alloc indirect blocks */
			
 
				 	xfs_fsblock_t		del_endblock=0;	/* first block past del */
			
 
				 	xfs_fileoff_t		del_endoff;	/* first offset past del */
			
 
				-	int			delay;	/* current block is delayed allocated */
			
 
				 	int			do_fx;	/* free extent at end of routine */
			
 
				-	xfs_bmbt_rec_host_t	*ep;	/* current extent entry pointer */
			
 
				 	int			error;	/* error return value */
			
 
				-	int			flags;	/* inode logging flags */
			
 
				-	xfs_bmbt_irec_t		got;	/* current extent entry */
			
 
				+	int			flags = 0;/* inode logging flags */
			
 
				+	struct xfs_bmbt_irec	got;	/* current extent entry */
			
 
				 	xfs_fileoff_t		got_endoff;	/* first offset past got */
			
 
				 	int			i;	/* temp state */
			
 
				 	xfs_ifork_t		*ifp;	/* inode fork pointer */
			
@@ -5095,103 +4856,81 @@ xfs_bmap_del_extent(
 
				 	xfs_bmbt_irec_t		new;	/* new record to be inserted */
			
 
				 	/* REFERENCED */
			
 
				 	uint			qfield;	/* quota field to update */
			
 
				-	xfs_filblks_t		temp;	/* for indirect length calculations */
			
 
				-	xfs_filblks_t		temp2;	/* for indirect length calculations */
			
 
				-	int			state = 0;
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				+	struct xfs_bmbt_irec	old;
			
 
				 
			
 
				 	mp = ip->i_mount;
			
 
				 	XFS_STATS_INC(mp, xs_del_exlist);
			
 
				 
			
 
				-	if (whichfork == XFS_ATTR_FORK)
			
 
				-		state |= BMAP_ATTRFORK;
			
 
				-	else if (whichfork == XFS_COW_FORK)
			
 
				-		state |= BMAP_COWFORK;
			
 
				-
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
			
 
				 	ASSERT(del->br_blockcount > 0);
			
 
				-	ep = xfs_iext_get_ext(ifp, *idx);
			
 
				-	xfs_bmbt_get_all(ep, &got);
			
 
				+	xfs_iext_get_extent(ifp, icur, &got);
			
 
				 	ASSERT(got.br_startoff <= del->br_startoff);
			
 
				 	del_endoff = del->br_startoff + del->br_blockcount;
			
 
				 	got_endoff = got.br_startoff + got.br_blockcount;
			
 
				 	ASSERT(got_endoff >= del_endoff);
			
 
				-	delay = isnullstartblock(got.br_startblock);
			
 
				-	ASSERT(isnullstartblock(del->br_startblock) == delay);
			
 
				-	flags = 0;
			
 
				+	ASSERT(!isnullstartblock(got.br_startblock));
			
 
				 	qfield = 0;
			
 
				 	error = 0;
			
 
				+
			
 
				 	/*
			
 
				-	 * If deleting a real allocation, must free up the disk space.
			
 
				+	 * If it's the case where the directory code is running with no block
			
 
				+	 * reservation, and the deleted block is in the middle of its extent,
			
 
				+	 * and the resulting insert of an extent would cause transformation to
			
 
				+	 * btree format, then reject it.  The calling code will then swap blocks
			
 
				+	 * around instead.  We have to do this now, rather than waiting for the
			
 
				+	 * conversion to btree format, since the transaction will be dirty then.
			
 
				 	 */
			
 
				-	if (!delay) {
			
 
				-		flags = XFS_ILOG_CORE;
			
 
				-		/*
			
 
				-		 * Realtime allocation.  Free it and record di_nblocks update.
			
 
				-		 */
			
 
				-		if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
			
 
				-			xfs_fsblock_t	bno;
			
 
				-			xfs_filblks_t	len;
			
 
				-
			
 
				-			ASSERT(do_mod(del->br_blockcount,
			
 
				-				      mp->m_sb.sb_rextsize) == 0);
			
 
				-			ASSERT(do_mod(del->br_startblock,
			
 
				-				      mp->m_sb.sb_rextsize) == 0);
			
 
				-			bno = del->br_startblock;
			
 
				-			len = del->br_blockcount;
			
 
				-			do_div(bno, mp->m_sb.sb_rextsize);
			
 
				-			do_div(len, mp->m_sb.sb_rextsize);
			
 
				-			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
			
 
				-			if (error)
			
 
				-				goto done;
			
 
				-			do_fx = 0;
			
 
				-			nblks = len * mp->m_sb.sb_rextsize;
			
 
				-			qfield = XFS_TRANS_DQ_RTBCOUNT;
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Ordinary allocation.
			
 
				-		 */
			
 
				-		else {
			
 
				-			do_fx = 1;
			
 
				-			nblks = del->br_blockcount;
			
 
				-			qfield = XFS_TRANS_DQ_BCOUNT;
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Set up del_endblock and cur for later.
			
 
				-		 */
			
 
				-		del_endblock = del->br_startblock + del->br_blockcount;
			
 
				-		if (cur) {
			
 
				-			if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
			
 
				-					got.br_startblock, got.br_blockcount,
			
 
				-					&i)))
			
 
				-				goto done;
			
 
				-			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-		}
			
 
				-		da_old = da_new = 0;
			
 
				-	} else {
			
 
				-		da_old = startblockval(got.br_startblock);
			
 
				-		da_new = 0;
			
 
				-		nblks = 0;
			
 
				+	if (tp->t_blk_res == 0 &&
			
 
				+	    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
			
 
				+	    XFS_IFORK_NEXTENTS(ip, whichfork) >=
			
 
				+			XFS_IFORK_MAXEXT(ip, whichfork) &&
			
 
				+	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
			
 
				+		return -ENOSPC;
			
 
				+
			
 
				+	flags = XFS_ILOG_CORE;
			
 
				+	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
			
 
				+		xfs_fsblock_t	bno;
			
 
				+		xfs_filblks_t	len;
			
 
				+
			
 
				+		ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0);
			
 
				+		ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0);
			
 
				+		bno = del->br_startblock;
			
 
				+		len = del->br_blockcount;
			
 
				+		do_div(bno, mp->m_sb.sb_rextsize);
			
 
				+		do_div(len, mp->m_sb.sb_rextsize);
			
 
				+		error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
			
 
				+		if (error)
			
 
				+			goto done;
			
 
				 		do_fx = 0;
			
 
				+		nblks = len * mp->m_sb.sb_rextsize;
			
 
				+		qfield = XFS_TRANS_DQ_RTBCOUNT;
			
 
				+	} else {
			
 
				+		do_fx = 1;
			
 
				+		nblks = del->br_blockcount;
			
 
				+		qfield = XFS_TRANS_DQ_BCOUNT;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Set flag value to use in switch statement.
			
 
				-	 * Left-contig is 2, right-contig is 1.
			
 
				-	 */
			
 
				-	switch (((got.br_startoff == del->br_startoff) << 1) |
			
 
				-		(got_endoff == del_endoff)) {
			
 
				-	case 3:
			
 
				+	del_endblock = del->br_startblock + del->br_blockcount;
			
 
				+	if (cur) {
			
 
				+		error = xfs_bmbt_lookup_eq(cur, &got, &i);
			
 
				+		if (error)
			
 
				+			goto done;
			
 
				+		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				+	}
			
 
				+
			
 
				+	if (got.br_startoff == del->br_startoff)
			
 
				+		state |= BMAP_LEFT_FILLING;
			
 
				+	if (got_endoff == del_endoff)
			
 
				+		state |= BMAP_RIGHT_FILLING;
			
 
				+
			
 
				+	switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
			
 
				+	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
			
 
				 		/*
			
 
				 		 * Matches the whole extent.  Delete the entry.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_iext_remove(ip, *idx, 1,
			
 
				-				whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
			
 
				-		--*idx;
			
 
				-		if (delay)
			
 
				-			break;
			
 
				-
			
 
				+		xfs_iext_remove(ip, icur, state);
			
 
				+		xfs_iext_prev(ifp, icur);
			
 
				 		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
			
 
				 		flags |= XFS_ILOG_CORE;
			
@@ -5203,168 +4942,106 @@ xfs_bmap_del_extent(
 
				 			goto done;
			
 
				 		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				 		break;
			
 
				-
			
 
				-	case 2:
			
 
				+	case BMAP_LEFT_FILLING:
			
 
				 		/*
			
 
				 		 * Deleting the first part of the extent.
			
 
				 		 */
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_startoff(ep, del_endoff);
			
 
				-		temp = got.br_blockcount - del->br_blockcount;
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);
			
 
				-		if (delay) {
			
 
				-			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
			
 
				-				da_old);
			
 
				-			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
			
 
				-			trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				-			da_new = temp;
			
 
				-			break;
			
 
				-		}
			
 
				-		xfs_bmbt_set_startblock(ep, del_endblock);
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		got.br_startoff = del_endoff;
			
 
				+		got.br_startblock = del_endblock;
			
 
				+		got.br_blockcount -= del->br_blockcount;
			
 
				+		xfs_iext_update_extent(ip, state, icur, &got);
			
 
				 		if (!cur) {
			
 
				 			flags |= xfs_ilog_fext(whichfork);
			
 
				 			break;
			
 
				 		}
			
 
				-		if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
			
 
				-				got.br_blockcount - del->br_blockcount,
			
 
				-				got.br_state)))
			
 
				+		error = xfs_bmbt_update(cur, &got);
			
 
				+		if (error)
			
 
				 			goto done;
			
 
				 		break;
			
 
				-
			
 
				-	case 1:
			
 
				+	case BMAP_RIGHT_FILLING:
			
 
				 		/*
			
 
				 		 * Deleting the last part of the extent.
			
 
				 		 */
			
 
				-		temp = got.br_blockcount - del->br_blockcount;
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);
			
 
				-		if (delay) {
			
 
				-			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
			
 
				-				da_old);
			
 
				-			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
			
 
				-			trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				-			da_new = temp;
			
 
				-			break;
			
 
				-		}
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				+		got.br_blockcount -= del->br_blockcount;
			
 
				+		xfs_iext_update_extent(ip, state, icur, &got);
			
 
				 		if (!cur) {
			
 
				 			flags |= xfs_ilog_fext(whichfork);
			
 
				 			break;
			
 
				 		}
			
 
				-		if ((error = xfs_bmbt_update(cur, got.br_startoff,
			
 
				-				got.br_startblock,
			
 
				-				got.br_blockcount - del->br_blockcount,
			
 
				-				got.br_state)))
			
 
				+		error = xfs_bmbt_update(cur, &got);
			
 
				+		if (error)
			
 
				 			goto done;
			
 
				 		break;
			
 
				-
			
 
				 	case 0:
			
 
				 		/*
			
 
				 		 * Deleting the middle of the extent.
			
 
				 		 */
			
 
				-		temp = del->br_startoff - got.br_startoff;
			
 
				-		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_bmbt_set_blockcount(ep, temp);
			
 
				+		old = got;
			
 
				+
			
 
				+		got.br_blockcount = del->br_startoff - got.br_startoff;
			
 
				+		xfs_iext_update_extent(ip, state, icur, &got);
			
 
				+
			
 
				 		new.br_startoff = del_endoff;
			
 
				-		temp2 = got_endoff - del_endoff;
			
 
				-		new.br_blockcount = temp2;
			
 
				+		new.br_blockcount = got_endoff - del_endoff;
			
 
				 		new.br_state = got.br_state;
			
 
				-		if (!delay) {
			
 
				-			new.br_startblock = del_endblock;
			
 
				-			flags |= XFS_ILOG_CORE;
			
 
				-			if (cur) {
			
 
				-				if ((error = xfs_bmbt_update(cur,
			
 
				-						got.br_startoff,
			
 
				-						got.br_startblock, temp,
			
 
				-						got.br_state)))
			
 
				-					goto done;
			
 
				-				if ((error = xfs_btree_increment(cur, 0, &i)))
			
 
				-					goto done;
			
 
				-				cur->bc_rec.b = new;
			
 
				-				error = xfs_btree_insert(cur, &i);
			
 
				-				if (error && error != -ENOSPC)
			
 
				-					goto done;
			
 
				+		new.br_startblock = del_endblock;
			
 
				+
			
 
				+		flags |= XFS_ILOG_CORE;
			
 
				+		if (cur) {
			
 
				+			error = xfs_bmbt_update(cur, &got);
			
 
				+			if (error)
			
 
				+				goto done;
			
 
				+			error = xfs_btree_increment(cur, 0, &i);
			
 
				+			if (error)
			
 
				+				goto done;
			
 
				+			cur->bc_rec.b = new;
			
 
				+			error = xfs_btree_insert(cur, &i);
			
 
				+			if (error && error != -ENOSPC)
			
 
				+				goto done;
			
 
				+			/*
			
 
				+			 * If get no-space back from btree insert, it tried a
			
 
				+			 * split, and we have a zero block reservation.  Fix up
			
 
				+			 * our state and return the error.
			
 
				+			 */
			
 
				+			if (error == -ENOSPC) {
			
 
				 				/*
			
 
				-				 * If get no-space back from btree insert,
			
 
				-				 * it tried a split, and we have a zero
			
 
				-				 * block reservation.
			
 
				-				 * Fix up our state and return the error.
			
 
				+				 * Reset the cursor, don't trust it after any
			
 
				+				 * insert operation.
			
 
				 				 */
			
 
				-				if (error == -ENOSPC) {
			
 
				-					/*
			
 
				-					 * Reset the cursor, don't trust
			
 
				-					 * it after any insert operation.
			
 
				-					 */
			
 
				-					if ((error = xfs_bmbt_lookup_eq(cur,
			
 
				-							got.br_startoff,
			
 
				-							got.br_startblock,
			
 
				-							temp, &i)))
			
 
				-						goto done;
			
 
				-					XFS_WANT_CORRUPTED_GOTO(mp,
			
 
				-								i == 1, done);
			
 
				-					/*
			
 
				-					 * Update the btree record back
			
 
				-					 * to the original value.
			
 
				-					 */
			
 
				-					if ((error = xfs_bmbt_update(cur,
			
 
				-							got.br_startoff,
			
 
				-							got.br_startblock,
			
 
				-							got.br_blockcount,
			
 
				-							got.br_state)))
			
 
				-						goto done;
			
 
				-					/*
			
 
				-					 * Reset the extent record back
			
 
				-					 * to the original value.
			
 
				-					 */
			
 
				-					xfs_bmbt_set_blockcount(ep,
			
 
				-						got.br_blockcount);
			
 
				-					flags = 0;
			
 
				-					error = -ENOSPC;
			
 
				+				error = xfs_bmbt_lookup_eq(cur, &got, &i);
			
 
				+				if (error)
			
 
				 					goto done;
			
 
				-				}
			
 
				 				XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				-			} else
			
 
				-				flags |= xfs_ilog_fext(whichfork);
			
 
				-			XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				-				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
			
 
				-		} else {
			
 
				-			xfs_filblks_t	stolen;
			
 
				-			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				-
			
 
				-			/*
			
 
				-			 * Distribute the original indlen reservation across the
			
 
				-			 * two new extents. Steal blocks from the deleted extent
			
 
				-			 * if necessary. Stealing blocks simply fudges the
			
 
				-			 * fdblocks accounting in xfs_bunmapi().
			
 
				-			 */
			
 
				-			temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
			
 
				-			temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
			
 
				-			stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
			
 
				-						       del->br_blockcount);
			
 
				-			da_new = temp + temp2 - stolen;
			
 
				-			del->br_blockcount -= stolen;
			
 
				-
			
 
				-			/*
			
 
				-			 * Set the reservation for each extent. Warn if either
			
 
				-			 * is zero as this can lead to delalloc problems.
			
 
				-			 */
			
 
				-			WARN_ON_ONCE(!temp || !temp2);
			
 
				-			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
			
 
				-			new.br_startblock = nullstartblock((int)temp2);
			
 
				-		}
			
 
				-		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				-		xfs_iext_insert(ip, *idx + 1, 1, &new, state);
			
 
				-		++*idx;
			
 
				+				/*
			
 
				+				 * Update the btree record back
			
 
				+				 * to the original value.
			
 
				+				 */
			
 
				+				error = xfs_bmbt_update(cur, &old);
			
 
				+				if (error)
			
 
				+					goto done;
			
 
				+				/*
			
 
				+				 * Reset the extent record back
			
 
				+				 * to the original value.
			
 
				+				 */
			
 
				+				xfs_iext_update_extent(ip, state, icur, &old);
			
 
				+				flags = 0;
			
 
				+				error = -ENOSPC;
			
 
				+				goto done;
			
 
				+			}
			
 
				+			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
			
 
				+		} else
			
 
				+			flags |= xfs_ilog_fext(whichfork);
			
 
				+		XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				+			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
			
 
				+		xfs_iext_next(ifp, icur);
			
 
				+		xfs_iext_insert(ip, icur, &new, state);
			
 
				 		break;
			
 
				 	}
			
 
				 
			
 
				 	/* remove reverse mapping */
			
 
				-	if (!delay) {
			
 
				-		error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
			
 
				-		if (error)
			
 
				-			goto done;
			
 
				-	}
			
 
				+	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
			
 
				+	if (error)
			
 
				+		goto done;
			
 
				 
			
 
				 	/*
			
 
				 	 * If we need to, add to list of extents to delete.
			
@@ -5390,13 +5067,6 @@ xfs_bmap_del_extent(
 
				 	if (qfield && !(bflags & XFS_BMAPI_REMAP))
			
 
				 		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
			
 
				 
			
 
				-	/*
			
 
				-	 * Account for change in delayed indirect blocks.
			
 
				-	 * Nothing to do for disk quota accounting here.
			
 
				-	 */
			
 
				-	ASSERT(da_old >= da_new);
			
 
				-	if (da_old > da_new)
			
 
				-		xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
			
 
				 done:
			
 
				 	*logflagsp = flags;
			
 
				 	return error;
			
@@ -5412,7 +5082,7 @@ int						/* error */
 
				 __xfs_bunmapi(
			
 
				 	xfs_trans_t		*tp,		/* transaction pointer */
			
 
				 	struct xfs_inode	*ip,		/* incore inode */
			
 
				-	xfs_fileoff_t		bno,		/* starting offset to unmap */
			
 
				+	xfs_fileoff_t		start,		/* first file offset deleted */
			
 
				 	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
			
 
				 	int			flags,		/* misc flags */
			
 
				 	xfs_extnum_t		nexts,		/* number of extents max */
			
@@ -5427,11 +5097,9 @@ __xfs_bunmapi(
 
				 	xfs_bmbt_irec_t		got;		/* current extent record */
			
 
				 	xfs_ifork_t		*ifp;		/* inode fork pointer */
			
 
				 	int			isrt;		/* freeing in rt area */
			
 
				-	xfs_extnum_t		lastx;		/* last extent index used */
			
 
				 	int			logflags;	/* transaction logging flags */
			
 
				 	xfs_extlen_t		mod;		/* rt extent offset */
			
 
				 	xfs_mount_t		*mp;		/* mount structure */
			
 
				-	xfs_fileoff_t		start;		/* first file offset deleted */
			
 
				 	int			tmp_logflags;	/* partial logging flags */
			
 
				 	int			wasdel;		/* was a delayed alloc extent */
			
 
				 	int			whichfork;	/* data or attribute fork */
			
@@ -5439,8 +5107,11 @@ __xfs_bunmapi(
 
				 	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
			
 
				 	xfs_fileoff_t		max_len;
			
 
				 	xfs_agnumber_t		prev_agno = NULLAGNUMBER, agno;
			
 
				+	xfs_fileoff_t		end;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+	bool			done = false;
			
 
				 
			
 
				-	trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
			
 
				+	trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
			
 
				 
			
 
				 	whichfork = xfs_bmapi_whichfork(flags);
			
 
				 	ASSERT(whichfork != XFS_COW_FORK);
			
@@ -5479,18 +5150,13 @@ __xfs_bunmapi(
 
				 	}
			
 
				 	XFS_STATS_INC(mp, xs_blk_unmap);
			
 
				 	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
			
 
				-	start = bno;
			
 
				-	bno = start + len - 1;
			
 
				+	end = start + len;
			
 
				 
			
 
				-	/*
			
 
				-	 * Check to see if the given block number is past the end of the
			
 
				-	 * file, back up to the last block if so...
			
 
				-	 */
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) {
			
 
				-		ASSERT(lastx > 0);
			
 
				-		xfs_iext_get_extent(ifp, --lastx, &got);
			
 
				-		bno = got.br_startoff + got.br_blockcount - 1;
			
 
				+	if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
			
 
				+		*rlen = 0;
			
 
				+		return 0;
			
 
				 	}
			
 
				+	end--;
			
 
				 
			
 
				 	logflags = 0;
			
 
				 	if (ifp->if_flags & XFS_IFBROOT) {
			
@@ -5513,24 +5179,24 @@ __xfs_bunmapi(
 
				 	}
			
 
				 
			
 
				 	extno = 0;
			
 
				-	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
			
 
				+	while (end != (xfs_fileoff_t)-1 && end >= start &&
			
 
				 	       (nexts == 0 || extno < nexts) && max_len > 0) {
			
 
				 		/*
			
 
				-		 * Is the found extent after a hole in which bno lives?
			
 
				+		 * Is the found extent after a hole in which end lives?
			
 
				 		 * Just back up to the previous extent, if so.
			
 
				 		 */
			
 
				-		if (got.br_startoff > bno) {
			
 
				-			if (--lastx < 0)
			
 
				-				break;
			
 
				-			xfs_iext_get_extent(ifp, lastx, &got);
			
 
				+		if (got.br_startoff > end &&
			
 
				+		    !xfs_iext_prev_extent(ifp, &icur, &got)) {
			
 
				+			done = true;
			
 
				+			break;
			
 
				 		}
			
 
				 		/*
			
 
				 		 * Is the last block of this extent before the range
			
 
				 		 * we're supposed to delete?  If so, we're done.
			
 
				 		 */
			
 
				-		bno = XFS_FILEOFF_MIN(bno,
			
 
				+		end = XFS_FILEOFF_MIN(end,
			
 
				 			got.br_startoff + got.br_blockcount - 1);
			
 
				-		if (bno < start)
			
 
				+		if (end < start)
			
 
				 			break;
			
 
				 		/*
			
 
				 		 * Then deal with the (possibly delayed) allocated space
			
@@ -5555,8 +5221,8 @@ __xfs_bunmapi(
 
				 			if (!wasdel)
			
 
				 				del.br_startblock += start - got.br_startoff;
			
 
				 		}
			
 
				-		if (del.br_startoff + del.br_blockcount > bno + 1)
			
 
				-			del.br_blockcount = bno + 1 - del.br_startoff;
			
 
				+		if (del.br_startoff + del.br_blockcount > end + 1)
			
 
				+			del.br_blockcount = end + 1 - del.br_startoff;
			
 
				 
			
 
				 		/* How much can we safely unmap? */
			
 
				 		if (max_len < del.br_blockcount) {
			
@@ -5582,13 +5248,13 @@ __xfs_bunmapi(
 
				 				 * This piece is unwritten, or we're not
			
 
				 				 * using unwritten extents.  Skip over it.
			
 
				 				 */
			
 
				-				ASSERT(bno >= mod);
			
 
				-				bno -= mod > del.br_blockcount ?
			
 
				+				ASSERT(end >= mod);
			
 
				+				end -= mod > del.br_blockcount ?
			
 
				 					del.br_blockcount : mod;
			
 
				-				if (bno < got.br_startoff) {
			
 
				-					if (--lastx >= 0)
			
 
				-						xfs_bmbt_get_all(xfs_iext_get_ext(
			
 
				-							ifp, lastx), &got);
			
 
				+				if (end < got.br_startoff &&
			
 
				+				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
			
 
				+					done = true;
			
 
				+					break;
			
 
				 				}
			
 
				 				continue;
			
 
				 			}
			
@@ -5609,7 +5275,7 @@ __xfs_bunmapi(
 
				 			}
			
 
				 			del.br_state = XFS_EXT_UNWRITTEN;
			
 
				 			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
			
 
				-					whichfork, &lastx, &cur, &del,
			
 
				+					whichfork, &icur, &cur, &del,
			
 
				 					firstblock, dfops, &logflags);
			
 
				 			if (error)
			
 
				 				goto error0;
			
@@ -5634,10 +5300,13 @@ __xfs_bunmapi(
 
				 				 * Can't make it unwritten.  There isn't
			
 
				 				 * a full extent here so just skip it.
			
 
				 				 */
			
 
				-				ASSERT(bno >= del.br_blockcount);
			
 
				-				bno -= del.br_blockcount;
			
 
				-				if (got.br_startoff > bno && --lastx >= 0)
			
 
				-					xfs_iext_get_extent(ifp, lastx, &got);
			
 
				+				ASSERT(end >= del.br_blockcount);
			
 
				+				end -= del.br_blockcount;
			
 
				+				if (got.br_startoff > end &&
			
 
				+				    !xfs_iext_prev_extent(ifp, &icur, &got)) {
			
 
				+					done = true;
			
 
				+					break;
			
 
				+				}
			
 
				 				continue;
			
 
				 			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
			
 
				 				struct xfs_bmbt_irec	prev;
			
@@ -5648,8 +5317,8 @@ __xfs_bunmapi(
 
				 				 * Unwrite the killed part of that one and
			
 
				 				 * try again.
			
 
				 				 */
			
 
				-				ASSERT(lastx > 0);
			
 
				-				xfs_iext_get_extent(ifp, lastx - 1, &prev);
			
 
				+				if (!xfs_iext_prev_extent(ifp, &icur, &prev))
			
 
				+					ASSERT(0);
			
 
				 				ASSERT(prev.br_state == XFS_EXT_NORM);
			
 
				 				ASSERT(!isnullstartblock(prev.br_startblock));
			
 
				 				ASSERT(del.br_startblock ==
			
@@ -5661,9 +5330,8 @@ __xfs_bunmapi(
 
				 					prev.br_startoff = start;
			
 
				 				}
			
 
				 				prev.br_state = XFS_EXT_UNWRITTEN;
			
 
				-				lastx--;
			
 
				 				error = xfs_bmap_add_extent_unwritten_real(tp,
			
 
				-						ip, whichfork, &lastx, &cur,
			
 
				+						ip, whichfork, &icur, &cur,
			
 
				 						&prev, firstblock, dfops,
			
 
				 						&logflags);
			
 
				 				if (error)
			
@@ -5673,7 +5341,7 @@ __xfs_bunmapi(
 
				 				ASSERT(del.br_state == XFS_EXT_NORM);
			
 
				 				del.br_state = XFS_EXT_UNWRITTEN;
			
 
				 				error = xfs_bmap_add_extent_unwritten_real(tp,
			
 
				-						ip, whichfork, &lastx, &cur,
			
 
				+						ip, whichfork, &icur, &cur,
			
 
				 						&del, firstblock, dfops,
			
 
				 						&logflags);
			
 
				 				if (error)
			
@@ -5682,85 +5350,39 @@ __xfs_bunmapi(
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * If it's the case where the directory code is running
			
 
				-		 * with no block reservation, and the deleted block is in
			
 
				-		 * the middle of its extent, and the resulting insert
			
 
				-		 * of an extent would cause transformation to btree format,
			
 
				-		 * then reject it.  The calling code will then swap
			
 
				-		 * blocks around instead.
			
 
				-		 * We have to do this now, rather than waiting for the
			
 
				-		 * conversion to btree format, since the transaction
			
 
				-		 * will be dirty.
			
 
				-		 */
			
 
				-		if (!wasdel && tp->t_blk_res == 0 &&
			
 
				-		    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
			
 
				-		    XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
			
 
				-			XFS_IFORK_MAXEXT(ip, whichfork) &&
			
 
				-		    del.br_startoff > got.br_startoff &&
			
 
				-		    del.br_startoff + del.br_blockcount <
			
 
				-		    got.br_startoff + got.br_blockcount) {
			
 
				-			error = -ENOSPC;
			
 
				-			goto error0;
			
 
				+		if (wasdel) {
			
 
				+			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
			
 
				+					&got, &del);
			
 
				+		} else {
			
 
				+			error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops,
			
 
				+					cur, &del, &tmp_logflags, whichfork,
			
 
				+					flags);
			
 
				+			logflags |= tmp_logflags;
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * Unreserve quota and update realtime free space, if
			
 
				-		 * appropriate. If delayed allocation, update the inode delalloc
			
 
				-		 * counter now and wait to update the sb counters as
			
 
				-		 * xfs_bmap_del_extent() might need to borrow some blocks.
			
 
				-		 */
			
 
				-		if (wasdel) {
			
 
				-			ASSERT(startblockval(del.br_startblock) > 0);
			
 
				-			if (isrt) {
			
 
				-				xfs_filblks_t rtexts;
			
 
				-
			
 
				-				rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
			
 
				-				do_div(rtexts, mp->m_sb.sb_rextsize);
			
 
				-				xfs_mod_frextents(mp, (int64_t)rtexts);
			
 
				-				(void)xfs_trans_reserve_quota_nblks(NULL,
			
 
				-					ip, -((long)del.br_blockcount), 0,
			
 
				-					XFS_QMOPT_RES_RTBLKS);
			
 
				-			} else {
			
 
				-				(void)xfs_trans_reserve_quota_nblks(NULL,
			
 
				-					ip, -((long)del.br_blockcount), 0,
			
 
				-					XFS_QMOPT_RES_REGBLKS);
			
 
				-			}
			
 
				-			ip->i_delayed_blks -= del.br_blockcount;
			
 
				-			if (cur)
			
 
				-				cur->bc_private.b.flags |=
			
 
				-					XFS_BTCUR_BPRV_WASDEL;
			
 
				-		} else if (cur)
			
 
				-			cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
			
 
				-
			
 
				-		error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
			
 
				-				&tmp_logflags, whichfork, flags);
			
 
				-		logflags |= tmp_logflags;
			
 
				 		if (error)
			
 
				 			goto error0;
			
 
				 
			
 
				-		if (!isrt && wasdel)
			
 
				-			xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
			
 
				-
			
 
				 		max_len -= del.br_blockcount;
			
 
				-		bno = del.br_startoff - 1;
			
 
				+		end = del.br_startoff - 1;
			
 
				 nodelete:
			
 
				 		/*
			
 
				 		 * If not done go on to the next (previous) record.
			
 
				 		 */
			
 
				-		if (bno != (xfs_fileoff_t)-1 && bno >= start) {
			
 
				-			if (lastx >= 0) {
			
 
				-				xfs_iext_get_extent(ifp, lastx, &got);
			
 
				-				if (got.br_startoff > bno && --lastx >= 0)
			
 
				-					xfs_iext_get_extent(ifp, lastx, &got);
			
 
				+		if (end != (xfs_fileoff_t)-1 && end >= start) {
			
 
				+			if (!xfs_iext_get_extent(ifp, &icur, &got) ||
			
 
				+			    (got.br_startoff > end &&
			
 
				+			     !xfs_iext_prev_extent(ifp, &icur, &got))) {
			
 
				+				done = true;
			
 
				+				break;
			
 
				 			}
			
 
				 			extno++;
			
 
				 		}
			
 
				 	}
			
 
				-	if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0)
			
 
				+	if (done || end == (xfs_fileoff_t)-1 || end < start)
			
 
				 		*rlen = 0;
			
 
				 	else
			
 
				-		*rlen = bno - start + 1;
			
 
				+		*rlen = end - start + 1;
			
 
				 
			
 
				 	/*
			
 
				 	 * Convert to a btree if necessary.
			
@@ -5878,14 +5500,13 @@ xfs_bmse_merge(
 
				 	struct xfs_inode		*ip,
			
 
				 	int				whichfork,
			
 
				 	xfs_fileoff_t			shift,		/* shift fsb */
			
 
				-	int				current_ext,	/* idx of gotp */
			
 
				+	struct xfs_iext_cursor		*icur,
			
 
				 	struct xfs_bmbt_irec		*got,		/* extent to shift */
			
 
				 	struct xfs_bmbt_irec		*left,		/* preceding extent */
			
 
				 	struct xfs_btree_cur		*cur,
			
 
				 	int				*logflags,	/* output */
			
 
				 	struct xfs_defer_ops		*dfops)
			
 
				 {
			
 
				-	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	struct xfs_bmbt_irec		new;
			
 
				 	xfs_filblks_t			blockcount;
			
 
				 	int				error, i;
			
@@ -5913,8 +5534,7 @@ xfs_bmse_merge(
 
				 	}
			
 
				 
			
 
				 	/* lookup and remove the extent to merge */
			
 
				-	error = xfs_bmbt_lookup_eq(cur, got->br_startoff, got->br_startblock,
			
 
				-				   got->br_blockcount, &i);
			
 
				+	error = xfs_bmbt_lookup_eq(cur, got, &i);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
@@ -5925,20 +5545,20 @@ xfs_bmse_merge(
 
				 	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
 
				 
			
 
				 	/* lookup and update size of the previous extent */
			
 
				-	error = xfs_bmbt_lookup_eq(cur, left->br_startoff, left->br_startblock,
			
 
				-				   left->br_blockcount, &i);
			
 
				+	error = xfs_bmbt_lookup_eq(cur, left, &i);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
 
				 
			
 
				-	error = xfs_bmbt_update(cur, new.br_startoff, new.br_startblock,
			
 
				-			        new.br_blockcount, new.br_state);
			
 
				+	error = xfs_bmbt_update(cur, &new);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				 done:
			
 
				-	xfs_iext_update_extent(ifp, current_ext - 1, &new);
			
 
				-	xfs_iext_remove(ip, current_ext, 1, 0);
			
 
				+	xfs_iext_remove(ip, icur, 0);
			
 
				+	xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
			
 
				+	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
			
 
				+			&new);
			
 
				 
			
 
				 	/* update reverse mapping. rmap functions merge the rmaps for us */
			
 
				 	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
			
@@ -5949,183 +5569,83 @@ done:
 
				 	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Shift a single extent.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_bmse_shift_one(
			
 
				-	struct xfs_inode		*ip,
			
 
				-	int				whichfork,
			
 
				-	xfs_fileoff_t			offset_shift_fsb,
			
 
				-	int				*current_ext,
			
 
				-	struct xfs_bmbt_irec		*got,
			
 
				-	struct xfs_btree_cur		*cur,
			
 
				-	int				*logflags,
			
 
				-	enum shift_direction		direction,
			
 
				-	struct xfs_defer_ops		*dfops)
			
 
				+static int
			
 
				+xfs_bmap_shift_update_extent(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	int			whichfork,
			
 
				+	struct xfs_iext_cursor	*icur,
			
 
				+	struct xfs_bmbt_irec	*got,
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	int			*logflags,
			
 
				+	struct xfs_defer_ops	*dfops,
			
 
				+	xfs_fileoff_t		startoff)
			
 
				 {
			
 
				-	struct xfs_ifork		*ifp;
			
 
				-	struct xfs_mount		*mp;
			
 
				-	xfs_fileoff_t			startoff;
			
 
				-	struct xfs_bmbt_irec		adj_irec, new;
			
 
				-	int				error;
			
 
				-	int				i;
			
 
				-	int				total_extents;
			
 
				-
			
 
				-	mp = ip->i_mount;
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	total_extents = xfs_iext_count(ifp);
			
 
				-
			
 
				-	/* delalloc extents should be prevented by caller */
			
 
				-	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got->br_startblock));
			
 
				-
			
 
				-	if (direction == SHIFT_LEFT) {
			
 
				-		startoff = got->br_startoff - offset_shift_fsb;
			
 
				-
			
 
				-		/*
			
 
				-		 * Check for merge if we've got an extent to the left,
			
 
				-		 * otherwise make sure there's enough room at the start
			
 
				-		 * of the file for the shift.
			
 
				-		 */
			
 
				-		if (!*current_ext) {
			
 
				-			if (got->br_startoff < offset_shift_fsb)
			
 
				-				return -EINVAL;
			
 
				-			goto update_current_ext;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * grab the left extent and check for a large enough hole.
			
 
				-		 */
			
 
				-		xfs_iext_get_extent(ifp, *current_ext - 1, &adj_irec);
			
 
				-		if (startoff < adj_irec.br_startoff + adj_irec.br_blockcount)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		/* check whether to merge the extent or shift it down */
			
 
				-		if (xfs_bmse_can_merge(&adj_irec, got, offset_shift_fsb)) {
			
 
				-			return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
			
 
				-					      *current_ext, got, &adj_irec,
			
 
				-					      cur, logflags, dfops);
			
 
				-		}
			
 
				-	} else {
			
 
				-		startoff = got->br_startoff + offset_shift_fsb;
			
 
				-		/* nothing to move if this is the last extent */
			
 
				-		if (*current_ext >= (total_extents - 1))
			
 
				-			goto update_current_ext;
			
 
				-
			
 
				-		/*
			
 
				-		 * If this is not the last extent in the file, make sure there
			
 
				-		 * is enough room between current extent and next extent for
			
 
				-		 * accommodating the shift.
			
 
				-		 */
			
 
				-		xfs_iext_get_extent(ifp, *current_ext + 1, &adj_irec);
			
 
				-		if (startoff + got->br_blockcount > adj_irec.br_startoff)
			
 
				-			return -EINVAL;
			
 
				-
			
 
				-		/*
			
 
				-		 * Unlike a left shift (which involves a hole punch),
			
 
				-		 * a right shift does not modify extent neighbors
			
 
				-		 * in any way. We should never find mergeable extents
			
 
				-		 * in this scenario. Check anyways and warn if we
			
 
				-		 * encounter two extents that could be one.
			
 
				-		 */
			
 
				-		if (xfs_bmse_can_merge(got, &adj_irec, offset_shift_fsb))
			
 
				-			WARN_ON_ONCE(1);
			
 
				-	}
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_bmbt_irec	prev = *got;
			
 
				+	int			error, i;
			
 
				 
			
 
				-	/*
			
 
				-	 * Increment the extent index for the next iteration, update the start
			
 
				-	 * offset of the in-core extent and update the btree if applicable.
			
 
				-	 */
			
 
				-update_current_ext:
			
 
				 	*logflags |= XFS_ILOG_CORE;
			
 
				 
			
 
				-	new = *got;
			
 
				-	new.br_startoff = startoff;
			
 
				+	got->br_startoff = startoff;
			
 
				 
			
 
				 	if (cur) {
			
 
				-		error = xfs_bmbt_lookup_eq(cur, got->br_startoff,
			
 
				-				got->br_startblock, got->br_blockcount, &i);
			
 
				+		error = xfs_bmbt_lookup_eq(cur, &prev, &i);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
 
				 
			
 
				-		error = xfs_bmbt_update(cur, new.br_startoff,
			
 
				-				new.br_startblock, new.br_blockcount,
			
 
				-				new.br_state);
			
 
				+		error = xfs_bmbt_update(cur, got);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	} else {
			
 
				 		*logflags |= XFS_ILOG_DEXT;
			
 
				 	}
			
 
				 
			
 
				-	xfs_iext_update_extent(ifp, *current_ext, &new);
			
 
				-
			
 
				-	if (direction == SHIFT_LEFT)
			
 
				-		(*current_ext)++;
			
 
				-	else
			
 
				-		(*current_ext)--;
			
 
				+	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
			
 
				+			got);
			
 
				 
			
 
				 	/* update reverse mapping */
			
 
				-	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
			
 
				+	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				-	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
			
 
				+	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Shift extent records to the left/right to cover/create a hole.
			
 
				- *
			
 
				- * The maximum number of extents to be shifted in a single operation is
			
 
				- * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
			
 
				- * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
			
 
				- * is the length by which each extent is shifted. If there is no hole to shift
			
 
				- * the extents into, this will be considered invalid operation and we abort
			
 
				- * immediately.
			
 
				- */
			
 
				 int
			
 
				-xfs_bmap_shift_extents(
			
 
				+xfs_bmap_collapse_extents(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*ip,
			
 
				 	xfs_fileoff_t		*next_fsb,
			
 
				 	xfs_fileoff_t		offset_shift_fsb,
			
 
				-	int			*done,
			
 
				+	bool			*done,
			
 
				 	xfs_fileoff_t		stop_fsb,
			
 
				 	xfs_fsblock_t		*firstblock,
			
 
				-	struct xfs_defer_ops	*dfops,
			
 
				-	enum shift_direction	direction,
			
 
				-	int			num_exts)
			
 
				+	struct xfs_defer_ops	*dfops)
			
 
				 {
			
 
				-	struct xfs_btree_cur		*cur = NULL;
			
 
				-	struct xfs_bmbt_irec            got;
			
 
				-	struct xfs_mount		*mp = ip->i_mount;
			
 
				-	struct xfs_ifork		*ifp;
			
 
				-	xfs_extnum_t			nexts = 0;
			
 
				-	xfs_extnum_t			current_ext;
			
 
				-	xfs_extnum_t			total_extents;
			
 
				-	xfs_extnum_t			stop_extent;
			
 
				-	int				error = 0;
			
 
				-	int				whichfork = XFS_DATA_FORK;
			
 
				-	int				logflags = 0;
			
 
				+	int			whichfork = XFS_DATA_FORK;
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	struct xfs_btree_cur	*cur = NULL;
			
 
				+	struct xfs_bmbt_irec	got, prev;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+	xfs_fileoff_t		new_startoff;
			
 
				+	int			error = 0;
			
 
				+	int			logflags = 0;
			
 
				 
			
 
				 	if (unlikely(XFS_TEST_ERROR(
			
 
				 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
			
 
				 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
			
 
				 	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
			
 
				-		XFS_ERROR_REPORT("xfs_bmap_shift_extents",
			
 
				-				 XFS_ERRLEVEL_LOW, mp);
			
 
				+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
			
 
				 		return -EFSCORRUPTED;
			
 
				 	}
			
 
				 
			
 
				 	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				 		return -EIO;
			
 
				 
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				-	ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
			
 
				 
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				-		/* Read in all the extents */
			
 
				 		error = xfs_iread_extents(tp, ip, whichfork);
			
 
				 		if (error)
			
 
				 			return error;
			
@@ -6138,107 +5658,165 @@ xfs_bmap_shift_extents(
 
				 		cur->bc_private.b.flags = 0;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * There may be delalloc extents in the data fork before the range we
			
 
				-	 * are collapsing out, so we cannot use the count of real extents here.
			
 
				-	 * Instead we have to calculate it from the incore fork.
			
 
				-	 */
			
 
				-	total_extents = xfs_iext_count(ifp);
			
 
				-	if (total_extents == 0) {
			
 
				-		*done = 1;
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
			
 
				+		*done = true;
			
 
				 		goto del_cursor;
			
 
				 	}
			
 
				+	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
			
 
				 
			
 
				-	/*
			
 
				-	 * In case of first right shift, we need to initialize next_fsb
			
 
				-	 */
			
 
				-	if (*next_fsb == NULLFSBLOCK) {
			
 
				-		ASSERT(direction == SHIFT_RIGHT);
			
 
				-
			
 
				-		current_ext = total_extents - 1;
			
 
				-		xfs_iext_get_extent(ifp, current_ext, &got);
			
 
				-		if (stop_fsb > got.br_startoff) {
			
 
				-			*done = 1;
			
 
				+	new_startoff = got.br_startoff - offset_shift_fsb;
			
 
				+	if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
			
 
				+		if (new_startoff < prev.br_startoff + prev.br_blockcount) {
			
 
				+			error = -EINVAL;
			
 
				 			goto del_cursor;
			
 
				 		}
			
 
				-		*next_fsb = got.br_startoff;
			
 
				+
			
 
				+		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
			
 
				+			error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
			
 
				+					&icur, &got, &prev, cur, &logflags,
			
 
				+					dfops);
			
 
				+			if (error)
			
 
				+				goto del_cursor;
			
 
				+			goto done;
			
 
				+		}
			
 
				 	} else {
			
 
				-		/*
			
 
				-		 * Look up the extent index for the fsb where we start shifting. We can
			
 
				-		 * henceforth iterate with current_ext as extent list changes are locked
			
 
				-		 * out via ilock.
			
 
				-		 *
			
 
				-		 * If next_fsb lies in a hole beyond which there are no extents we are
			
 
				-		 * done.
			
 
				-		 */
			
 
				-		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &current_ext,
			
 
				-				&got)) {
			
 
				-			*done = 1;
			
 
				+		if (got.br_startoff < offset_shift_fsb) {
			
 
				+			error = -EINVAL;
			
 
				 			goto del_cursor;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	/* Lookup the extent index at which we have to stop */
			
 
				-	if (direction == SHIFT_RIGHT) {
			
 
				-		struct xfs_bmbt_irec s;
			
 
				+	error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
			
 
				+			&logflags, dfops, new_startoff);
			
 
				+	if (error)
			
 
				+		goto del_cursor;
			
 
				+
			
 
				+done:
			
 
				+	if (!xfs_iext_next_extent(ifp, &icur, &got)) {
			
 
				+		*done = true;
			
 
				+		goto del_cursor;
			
 
				+	}
			
 
				+
			
 
				+	*next_fsb = got.br_startoff;
			
 
				+del_cursor:
			
 
				+	if (cur)
			
 
				+		xfs_btree_del_cursor(cur,
			
 
				+			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
			
 
				+	if (logflags)
			
 
				+		xfs_trans_log_inode(tp, ip, logflags);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_bmap_insert_extents(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	xfs_fileoff_t		*next_fsb,
			
 
				+	xfs_fileoff_t		offset_shift_fsb,
			
 
				+	bool			*done,
			
 
				+	xfs_fileoff_t		stop_fsb,
			
 
				+	xfs_fsblock_t		*firstblock,
			
 
				+	struct xfs_defer_ops	*dfops)
			
 
				+{
			
 
				+	int			whichfork = XFS_DATA_FORK;
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	struct xfs_btree_cur	*cur = NULL;
			
 
				+	struct xfs_bmbt_irec	got, next;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+	xfs_fileoff_t		new_startoff;
			
 
				+	int			error = 0;
			
 
				+	int			logflags = 0;
			
 
				+
			
 
				+	if (unlikely(XFS_TEST_ERROR(
			
 
				+	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
			
 
				+	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
			
 
				+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
			
 
				+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
			
 
				+		return -EFSCORRUPTED;
			
 
				+	}
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return -EIO;
			
 
				+
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
			
 
				+
			
 
				+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				+		error = xfs_iread_extents(tp, ip, whichfork);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	if (ifp->if_flags & XFS_IFBROOT) {
			
 
				+		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
			
 
				+		cur->bc_private.b.firstblock = *firstblock;
			
 
				+		cur->bc_private.b.dfops = dfops;
			
 
				+		cur->bc_private.b.flags = 0;
			
 
				+	}
			
 
				 
			
 
				-		xfs_iext_lookup_extent(ip, ifp, stop_fsb, &stop_extent, &s);
			
 
				-		/* Make stop_extent exclusive of shift range */
			
 
				-		stop_extent--;
			
 
				-		if (current_ext <= stop_extent) {
			
 
				-			error = -EIO;
			
 
				+	if (*next_fsb == NULLFSBLOCK) {
			
 
				+		xfs_iext_last(ifp, &icur);
			
 
				+		if (!xfs_iext_get_extent(ifp, &icur, &got) ||
			
 
				+		    stop_fsb > got.br_startoff) {
			
 
				+			*done = true;
			
 
				 			goto del_cursor;
			
 
				 		}
			
 
				 	} else {
			
 
				-		stop_extent = total_extents;
			
 
				-		if (current_ext >= stop_extent) {
			
 
				-			error = -EIO;
			
 
				+		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
			
 
				+			*done = true;
			
 
				 			goto del_cursor;
			
 
				 		}
			
 
				 	}
			
 
				+	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
			
 
				 
			
 
				-	while (nexts++ < num_exts) {
			
 
				-		error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
			
 
				-					   &current_ext, &got, cur, &logflags,
			
 
				-					   direction, dfops);
			
 
				-		if (error)
			
 
				+	if (stop_fsb >= got.br_startoff + got.br_blockcount) {
			
 
				+		error = -EIO;
			
 
				+		goto del_cursor;
			
 
				+	}
			
 
				+
			
 
				+	new_startoff = got.br_startoff + offset_shift_fsb;
			
 
				+	if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
			
 
				+		if (new_startoff + got.br_blockcount > next.br_startoff) {
			
 
				+			error = -EINVAL;
			
 
				 			goto del_cursor;
			
 
				-		/*
			
 
				-		 * If there was an extent merge during the shift, the extent
			
 
				-		 * count can change. Update the total and grade the next record.
			
 
				-		 */
			
 
				-		if (direction == SHIFT_LEFT) {
			
 
				-			total_extents = xfs_iext_count(ifp);
			
 
				-			stop_extent = total_extents;
			
 
				 		}
			
 
				 
			
 
				-		if (current_ext == stop_extent) {
			
 
				-			*done = 1;
			
 
				-			*next_fsb = NULLFSBLOCK;
			
 
				-			break;
			
 
				-		}
			
 
				-		xfs_iext_get_extent(ifp, current_ext, &got);
			
 
				+		/*
			
 
				+		 * Unlike a left shift (which involves a hole punch), a right
			
 
				+		 * shift does not modify extent neighbors in any way.  We should
			
 
				+		 * never find mergeable extents in this scenario.  Check anyways
			
 
				+		 * and warn if we encounter two extents that could be one.
			
 
				+		 */
			
 
				+		if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
			
 
				+			WARN_ON_ONCE(1);
			
 
				 	}
			
 
				 
			
 
				-	if (!*done)
			
 
				-		*next_fsb = got.br_startoff;
			
 
				+	error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
			
 
				+			&logflags, dfops, new_startoff);
			
 
				+	if (error)
			
 
				+		goto del_cursor;
			
 
				+
			
 
				+	if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
			
 
				+	    stop_fsb >= got.br_startoff + got.br_blockcount) {
			
 
				+		*done = true;
			
 
				+		goto del_cursor;
			
 
				+	}
			
 
				 
			
 
				+	*next_fsb = got.br_startoff;
			
 
				 del_cursor:
			
 
				 	if (cur)
			
 
				 		xfs_btree_del_cursor(cur,
			
 
				 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
			
 
				-
			
 
				 	if (logflags)
			
 
				 		xfs_trans_log_inode(tp, ip, logflags);
			
 
				-
			
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Splits an extent into two extents at split_fsb block such that it is
			
 
				- * the first block of the current_ext. @current_ext is a target extent
			
 
				- * to be split. @split_fsb is a block where the extents is split.
			
 
				- * If split_fsb lies in a hole or the first block of extents, just return 0.
			
 
				+ * Splits an extent into two extents at split_fsb block such that it is the
			
 
				+ * first block of the current_ext. @ext is a target extent to be split.
			
 
				+ * @split_fsb is a block where the extents is split.  If split_fsb lies in a
			
 
				+ * hole or the first block of extents, just return 0.
			
 
				  */
			
 
				 STATIC int
			
 
				 xfs_bmap_split_extent_at(
			
@@ -6255,7 +5833,7 @@ xfs_bmap_split_extent_at(
 
				 	struct xfs_mount		*mp = ip->i_mount;
			
 
				 	struct xfs_ifork		*ifp;
			
 
				 	xfs_fsblock_t			gotblkcnt; /* new block count for got */
			
 
				-	xfs_extnum_t			current_ext;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				 	int				error = 0;
			
 
				 	int				logflags = 0;
			
 
				 	int				i = 0;
			
@@ -6283,7 +5861,7 @@ xfs_bmap_split_extent_at(
 
				 	/*
			
 
				 	 * If there are not extents, or split_fsb lies in a hole we are done.
			
 
				 	 */
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &current_ext, &got) ||
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
			
 
				 	    got.br_startoff >= split_fsb)
			
 
				 		return 0;
			
 
				 
			
@@ -6298,44 +5876,35 @@ xfs_bmap_split_extent_at(
 
				 		cur->bc_private.b.firstblock = *firstfsb;
			
 
				 		cur->bc_private.b.dfops = dfops;
			
 
				 		cur->bc_private.b.flags = 0;
			
 
				-		error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
			
 
				-				got.br_startblock,
			
 
				-				got.br_blockcount,
			
 
				-				&i);
			
 
				+		error = xfs_bmbt_lookup_eq(cur, &got, &i);
			
 
				 		if (error)
			
 
				 			goto del_cursor;
			
 
				 		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
			
 
				 	}
			
 
				 
			
 
				 	got.br_blockcount = gotblkcnt;
			
 
				-	xfs_iext_update_extent(ifp, current_ext, &got);
			
 
				+	xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
			
 
				+			&got);
			
 
				 
			
 
				 	logflags = XFS_ILOG_CORE;
			
 
				 	if (cur) {
			
 
				-		error = xfs_bmbt_update(cur, got.br_startoff,
			
 
				-				got.br_startblock,
			
 
				-				got.br_blockcount,
			
 
				-				got.br_state);
			
 
				+		error = xfs_bmbt_update(cur, &got);
			
 
				 		if (error)
			
 
				 			goto del_cursor;
			
 
				 	} else
			
 
				 		logflags |= XFS_ILOG_DEXT;
			
 
				 
			
 
				 	/* Add new extent */
			
 
				-	current_ext++;
			
 
				-	xfs_iext_insert(ip, current_ext, 1, &new, 0);
			
 
				+	xfs_iext_next(ifp, &icur);
			
 
				+	xfs_iext_insert(ip, &icur, &new, 0);
			
 
				 	XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				 			   XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
			
 
				 
			
 
				 	if (cur) {
			
 
				-		error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
			
 
				-				new.br_startblock, new.br_blockcount,
			
 
				-				&i);
			
 
				+		error = xfs_bmbt_lookup_eq(cur, &new, &i);
			
 
				 		if (error)
			
 
				 			goto del_cursor;
			
 
				 		XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
			
 
				-		cur->bc_rec.b.br_state = new.br_state;
			
 
				-
			
 
				 		error = xfs_btree_insert(cur, &i);
			
 
				 		if (error)
			
 
				 			goto del_cursor;
			
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -43,7 +43,7 @@ struct xfs_bmalloca {
 
				 	xfs_fsblock_t		blkno;	/* starting block of new extent */
			
 
				 
			
 
				 	struct xfs_btree_cur	*cur;	/* btree cursor */
			
 
				-	xfs_extnum_t		idx;	/* current extent index */
			
 
				+	struct xfs_iext_cursor	icur;	/* incore extent cursor */
			
 
				 	int			nallocs;/* number of extents alloc'd */
			
 
				 	int			logflags;/* flags for transaction logging */
			
 
				 
			
@@ -113,6 +113,9 @@ struct xfs_extent_free_item
 
				 /* Only convert delalloc space, don't allocate entirely new extents */
			
 
				 #define XFS_BMAPI_DELALLOC	0x400
			
 
				 
			
 
				+/* Only convert unwritten extents, don't allocate new blocks */
			
 
				+#define XFS_BMAPI_CONVERT_ONLY	0x800
			
 
				+
			
 
				 #define XFS_BMAPI_FLAGS \
			
 
				 	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
			
 
				 	{ XFS_BMAPI_METADATA,	"METADATA" }, \
			
@@ -124,7 +127,8 @@ struct xfs_extent_free_item
 
				 	{ XFS_BMAPI_ZERO,	"ZERO" }, \
			
 
				 	{ XFS_BMAPI_REMAP,	"REMAP" }, \
			
 
				 	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
			
 
				-	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }
			
 
				+	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }, \
			
 
				+	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }
			
 
				 
			
 
				 
			
 
				 static inline int xfs_bmapi_aflag(int w)
			
@@ -183,29 +187,6 @@ static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
 
				 		!isnullstartblock(irec->br_startblock);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * This macro is used to determine how many extents will be shifted
			
 
				- * in one write transaction. We could require two splits,
			
 
				- * an extent move on the first and an extent merge on the second,
			
 
				- * So it is proper that one extent is shifted inside write transaction
			
 
				- * at a time.
			
 
				- */
			
 
				-#define XFS_BMAP_MAX_SHIFT_EXTENTS	1
			
 
				-
			
 
				-enum shift_direction {
			
 
				-	SHIFT_LEFT = 0,
			
 
				-	SHIFT_RIGHT,
			
 
				-};
			
 
				-
			
 
				-#ifdef DEBUG
			
 
				-void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
			
 
				-		int whichfork, unsigned long caller_ip);
			
 
				-#define	XFS_BMAP_TRACE_EXLIST(ip,c,w)	\
			
 
				-	xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
			
 
				-#else
			
 
				-#define	XFS_BMAP_TRACE_EXLIST(ip,c,w)
			
 
				-#endif
			
 
				-
			
 
				 void	xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
			
 
				 		xfs_filblks_t len);
			
 
				 void	xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
			
@@ -222,8 +203,6 @@ int	xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
 
				 int	xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused,
			
 
				 		int whichfork);
			
 
				 int	xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
			
 
				-int	xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				-		int whichfork);
			
 
				 int	xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
			
 
				 		xfs_filblks_t len, struct xfs_bmbt_irec *mval,
			
 
				 		int *nmap, int flags);
			
@@ -241,20 +220,25 @@ int	xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 
				 		xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
			
 
				 		struct xfs_defer_ops *dfops, int *done);
			
 
				 int	xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
			
 
				-		xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
			
 
				+		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
			
 
				+		struct xfs_bmbt_irec *del);
			
 
				+void	xfs_bmap_del_extent_cow(struct xfs_inode *ip,
			
 
				+		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
			
 
				 		struct xfs_bmbt_irec *del);
			
 
				-void	xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
			
 
				-		struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
			
 
				 uint	xfs_default_attroffset(struct xfs_inode *ip);
			
 
				-int	xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				+int	xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				+		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
			
 
				+		bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
			
 
				+		struct xfs_defer_ops *dfops);
			
 
				+int	xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				 		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
			
 
				-		int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
			
 
				-		struct xfs_defer_ops *dfops, enum shift_direction direction,
			
 
				-		int num_exts);
			
 
				+		bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
			
 
				+		struct xfs_defer_ops *dfops);
			
 
				 int	xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
			
 
				 int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
			
 
				 		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
			
 
				-		struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof);
			
 
				+		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
			
 
				+		int eof);
			
 
				 
			
 
				 enum xfs_bmap_intent_type {
			
 
				 	XFS_BMAP_MAP = 1,
			
@@ -278,4 +262,16 @@ int	xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 
				 int	xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
			
 
				 		struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
			
 
				 
			
 
				+static inline int xfs_bmap_fork_to_state(int whichfork)
			
 
				+{
			
 
				+	switch (whichfork) {
			
 
				+	case XFS_ATTR_FORK:
			
 
				+		return BMAP_ATTRFORK;
			
 
				+	case XFS_COW_FORK:
			
 
				+		return BMAP_COWFORK;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 #endif	/* __XFS_BMAP_H__ */
			
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -37,22 +37,6 @@
 
				 #include "xfs_cksum.h"
			
 
				 #include "xfs_rmap.h"
			
 
				 
			
 
				-/*
			
 
				- * Determine the extent state.
			
 
				- */
			
 
				-/* ARGSUSED */
			
 
				-STATIC xfs_exntst_t
			
 
				-xfs_extent_state(
			
 
				-	xfs_filblks_t		blks,
			
 
				-	int			extent_flag)
			
 
				-{
			
 
				-	if (extent_flag) {
			
 
				-		ASSERT(blks != 0);	/* saved for DMIG */
			
 
				-		return XFS_EXT_UNWRITTEN;
			
 
				-	}
			
 
				-	return XFS_EXT_NORM;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Convert on-disk form of btree root to in-memory form.
			
 
				  */
			
@@ -87,84 +71,21 @@ xfs_bmdr_to_bmbt(
 
				 	memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Convert a compressed bmap extent record to an uncompressed form.
			
 
				- * This code must be in sync with the routines xfs_bmbt_get_startoff,
			
 
				- * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
			
 
				- */
			
 
				-STATIC void
			
 
				-__xfs_bmbt_get_all(
			
 
				-		uint64_t l0,
			
 
				-		uint64_t l1,
			
 
				-		xfs_bmbt_irec_t *s)
			
 
				-{
			
 
				-	int	ext_flag;
			
 
				-	xfs_exntst_t st;
			
 
				-
			
 
				-	ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
			
 
				-	s->br_startoff = ((xfs_fileoff_t)l0 &
			
 
				-			   xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
			
 
				-	s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
			
 
				-			   (((xfs_fsblock_t)l1) >> 21);
			
 
				-	s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
			
 
				-	/* This is xfs_extent_state() in-line */
			
 
				-	if (ext_flag) {
			
 
				-		ASSERT(s->br_blockcount != 0);	/* saved for DMIG */
			
 
				-		st = XFS_EXT_UNWRITTEN;
			
 
				-	} else
			
 
				-		st = XFS_EXT_NORM;
			
 
				-	s->br_state = st;
			
 
				-}
			
 
				-
			
 
				 void
			
 
				-xfs_bmbt_get_all(
			
 
				-	xfs_bmbt_rec_host_t *r,
			
 
				-	xfs_bmbt_irec_t *s)
			
 
				-{
			
 
				-	__xfs_bmbt_get_all(r->l0, r->l1, s);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Extract the blockcount field from an in memory bmap extent record.
			
 
				- */
			
 
				-xfs_filblks_t
			
 
				-xfs_bmbt_get_blockcount(
			
 
				-	xfs_bmbt_rec_host_t	*r)
			
 
				-{
			
 
				-	return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Extract the startblock field from an in memory bmap extent record.
			
 
				- */
			
 
				-xfs_fsblock_t
			
 
				-xfs_bmbt_get_startblock(
			
 
				-	xfs_bmbt_rec_host_t	*r)
			
 
				-{
			
 
				-	return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
			
 
				-	       (((xfs_fsblock_t)r->l1) >> 21);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Extract the startoff field from an in memory bmap extent record.
			
 
				- */
			
 
				-xfs_fileoff_t
			
 
				-xfs_bmbt_get_startoff(
			
 
				-	xfs_bmbt_rec_host_t	*r)
			
 
				-{
			
 
				-	return ((xfs_fileoff_t)r->l0 &
			
 
				-		 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
			
 
				-}
			
 
				-
			
 
				-xfs_exntst_t
			
 
				-xfs_bmbt_get_state(
			
 
				-	xfs_bmbt_rec_host_t	*r)
			
 
				-{
			
 
				-	int	ext_flag;
			
 
				-
			
 
				-	ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
			
 
				-	return xfs_extent_state(xfs_bmbt_get_blockcount(r),
			
 
				-				ext_flag);
			
 
				+xfs_bmbt_disk_get_all(
			
 
				+	struct xfs_bmbt_rec	*rec,
			
 
				+	struct xfs_bmbt_irec	*irec)
			
 
				+{
			
 
				+	uint64_t		l0 = get_unaligned_be64(&rec->l0);
			
 
				+	uint64_t		l1 = get_unaligned_be64(&rec->l1);
			
 
				+
			
 
				+	irec->br_startoff = (l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
			
 
				+	irec->br_startblock = ((l0 & xfs_mask64lo(9)) << 43) | (l1 >> 21);
			
 
				+	irec->br_blockcount = l1 & xfs_mask64lo(21);
			
 
				+	if (l0 >> (64 - BMBT_EXNTFLAG_BITLEN))
			
 
				+		irec->br_state = XFS_EXT_UNWRITTEN;
			
 
				+	else
			
 
				+		irec->br_state = XFS_EXT_NORM;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -188,142 +109,29 @@ xfs_bmbt_disk_get_startoff(
 
				 		 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-/*
			
 
				- * Set all the fields in a bmap extent record from the arguments.
			
 
				- */
			
 
				-void
			
 
				-xfs_bmbt_set_allf(
			
 
				-	xfs_bmbt_rec_host_t	*r,
			
 
				-	xfs_fileoff_t		startoff,
			
 
				-	xfs_fsblock_t		startblock,
			
 
				-	xfs_filblks_t		blockcount,
			
 
				-	xfs_exntst_t		state)
			
 
				-{
			
 
				-	int		extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
			
 
				-
			
 
				-	ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
			
 
				-	ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
			
 
				-	ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
			
 
				-
			
 
				-	ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
			
 
				-
			
 
				-	r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
			
 
				-		((xfs_bmbt_rec_base_t)startoff << 9) |
			
 
				-		((xfs_bmbt_rec_base_t)startblock >> 43);
			
 
				-	r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
			
 
				-		((xfs_bmbt_rec_base_t)blockcount &
			
 
				-		(xfs_bmbt_rec_base_t)xfs_mask64lo(21));
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Set all the fields in a bmap extent record from the uncompressed form.
			
 
				  */
			
 
				 void
			
 
				-xfs_bmbt_set_all(
			
 
				-	xfs_bmbt_rec_host_t *r,
			
 
				-	xfs_bmbt_irec_t	*s)
			
 
				-{
			
 
				-	xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock,
			
 
				-			     s->br_blockcount, s->br_state);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * Set all the fields in a disk format bmap extent record from the arguments.
			
 
				- */
			
 
				-void
			
 
				-xfs_bmbt_disk_set_allf(
			
 
				-	xfs_bmbt_rec_t		*r,
			
 
				-	xfs_fileoff_t		startoff,
			
 
				-	xfs_fsblock_t		startblock,
			
 
				-	xfs_filblks_t		blockcount,
			
 
				-	xfs_exntst_t		state)
			
 
				-{
			
 
				-	int			extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
			
 
				-
			
 
				-	ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
			
 
				-	ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
			
 
				-	ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
			
 
				-	ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
			
 
				-
			
 
				-	r->l0 = cpu_to_be64(
			
 
				-		((xfs_bmbt_rec_base_t)extent_flag << 63) |
			
 
				-		 ((xfs_bmbt_rec_base_t)startoff << 9) |
			
 
				-		 ((xfs_bmbt_rec_base_t)startblock >> 43));
			
 
				-	r->l1 = cpu_to_be64(
			
 
				-		((xfs_bmbt_rec_base_t)startblock << 21) |
			
 
				-		 ((xfs_bmbt_rec_base_t)blockcount &
			
 
				-		  (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Set all the fields in a bmap extent record from the uncompressed form.
			
 
				- */
			
 
				-STATIC void
			
 
				 xfs_bmbt_disk_set_all(
			
 
				-	xfs_bmbt_rec_t	*r,
			
 
				-	xfs_bmbt_irec_t *s)
			
 
				-{
			
 
				-	xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock,
			
 
				-				  s->br_blockcount, s->br_state);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Set the blockcount field in a bmap extent record.
			
 
				- */
			
 
				-void
			
 
				-xfs_bmbt_set_blockcount(
			
 
				-	xfs_bmbt_rec_host_t *r,
			
 
				-	xfs_filblks_t	v)
			
 
				+	struct xfs_bmbt_rec	*r,
			
 
				+	struct xfs_bmbt_irec	*s)
			
 
				 {
			
 
				-	ASSERT((v & xfs_mask64hi(43)) == 0);
			
 
				-	r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) |
			
 
				-		  (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Set the startblock field in a bmap extent record.
			
 
				- */
			
 
				-void
			
 
				-xfs_bmbt_set_startblock(
			
 
				-	xfs_bmbt_rec_host_t *r,
			
 
				-	xfs_fsblock_t	v)
			
 
				-{
			
 
				-	ASSERT((v & xfs_mask64hi(12)) == 0);
			
 
				-	r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
			
 
				-		  (xfs_bmbt_rec_base_t)(v >> 43);
			
 
				-	r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
			
 
				-		  (xfs_bmbt_rec_base_t)(v << 21);
			
 
				-}
			
 
				+	int			extent_flag = (s->br_state != XFS_EXT_NORM);
			
 
				 
			
 
				-/*
			
 
				- * Set the startoff field in a bmap extent record.
			
 
				- */
			
 
				-void
			
 
				-xfs_bmbt_set_startoff(
			
 
				-	xfs_bmbt_rec_host_t *r,
			
 
				-	xfs_fileoff_t	v)
			
 
				-{
			
 
				-	ASSERT((v & xfs_mask64hi(9)) == 0);
			
 
				-	r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) |
			
 
				-		((xfs_bmbt_rec_base_t)v << 9) |
			
 
				-		  (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
			
 
				-}
			
 
				+	ASSERT(s->br_state == XFS_EXT_NORM || s->br_state == XFS_EXT_UNWRITTEN);
			
 
				+	ASSERT(!(s->br_startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)));
			
 
				+	ASSERT(!(s->br_blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)));
			
 
				+	ASSERT(!(s->br_startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)));
			
 
				 
			
 
				-/*
			
 
				- * Set the extent state field in a bmap extent record.
			
 
				- */
			
 
				-void
			
 
				-xfs_bmbt_set_state(
			
 
				-	xfs_bmbt_rec_host_t *r,
			
 
				-	xfs_exntst_t	v)
			
 
				-{
			
 
				-	ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
			
 
				-	if (v == XFS_EXT_NORM)
			
 
				-		r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN);
			
 
				-	else
			
 
				-		r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN);
			
 
				+	put_unaligned_be64(
			
 
				+		((xfs_bmbt_rec_base_t)extent_flag << 63) |
			
 
				+		 ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
			
 
				+		 ((xfs_bmbt_rec_base_t)s->br_startblock >> 43), &r->l0);
			
 
				+	put_unaligned_be64(
			
 
				+		((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
			
 
				+		 ((xfs_bmbt_rec_base_t)s->br_blockcount &
			
 
				+		  (xfs_bmbt_rec_base_t)xfs_mask64lo(21)), &r->l1);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -98,25 +98,11 @@ struct xfs_trans;
 
				  */
			
 
				 extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
			
 
				 			struct xfs_btree_block *, int);
			
 
				-extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
			
 
				-extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
			
 
				-extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
			
 
				-extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
			
 
				-extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r);
			
 
				 
			
 
				+void xfs_bmbt_disk_set_all(struct xfs_bmbt_rec *r, struct xfs_bmbt_irec *s);
			
 
				 extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
			
 
				 extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
			
 
				-
			
 
				-extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
			
 
				-extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
			
 
				-			xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
			
 
				-extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_host_t *r, xfs_filblks_t v);
			
 
				-extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v);
			
 
				-extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v);
			
 
				-extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v);
			
 
				-
			
 
				-extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
			
 
				-			xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
			
 
				+extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
			
 
				 
			
 
				 extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
			
 
				 			xfs_bmdr_block_t *, int);
			
@@ -136,9 +122,9 @@ extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
 
				  * Check that the extent does not contain an invalid unwritten extent flag.
			
 
				  */
			
 
				 static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork,
			
 
				-		struct xfs_bmbt_rec_host *ep)
			
 
				+		struct xfs_bmbt_irec *irec)
			
 
				 {
			
 
				-	if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0)
			
 
				+	if (irec->br_state == XFS_EXT_NORM)
			
 
				 		return true;
			
 
				 	if (whichfork == XFS_DATA_FORK &&
			
 
				 	    xfs_sb_version_hasextflgbit(&mp->m_sb))
			
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -29,6 +29,7 @@
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_btree.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_cksum.h"
			
@@ -63,44 +64,63 @@ xfs_btree_magic(
 
				 	return magic;
			
 
				 }
			
 
				 
			
 
				-STATIC int				/* error (0 or EFSCORRUPTED) */
			
 
				-xfs_btree_check_lblock(
			
 
				-	struct xfs_btree_cur	*cur,	/* btree cursor */
			
 
				-	struct xfs_btree_block	*block,	/* btree long form block pointer */
			
 
				-	int			level,	/* level of the btree block */
			
 
				-	struct xfs_buf		*bp)	/* buffer for block, if any */
			
 
				+/*
			
 
				+ * Check a long btree block header.  Return the address of the failing check,
			
 
				+ * or NULL if everything is ok.
			
 
				+ */
			
 
				+xfs_failaddr_t
			
 
				+__xfs_btree_check_lblock(
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	struct xfs_btree_block	*block,
			
 
				+	int			level,
			
 
				+	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	int			lblock_ok = 1; /* block passes checks */
			
 
				-	struct xfs_mount	*mp;	/* file system mount point */
			
 
				+	struct xfs_mount	*mp = cur->bc_mp;
			
 
				 	xfs_btnum_t		btnum = cur->bc_btnum;
			
 
				-	int			crc;
			
 
				-
			
 
				-	mp = cur->bc_mp;
			
 
				-	crc = xfs_sb_version_hascrc(&mp->m_sb);
			
 
				+	int			crc = xfs_sb_version_hascrc(&mp->m_sb);
			
 
				 
			
 
				 	if (crc) {
			
 
				-		lblock_ok = lblock_ok &&
			
 
				-			uuid_equal(&block->bb_u.l.bb_uuid,
			
 
				-				   &mp->m_sb.sb_meta_uuid) &&
			
 
				-			block->bb_u.l.bb_blkno == cpu_to_be64(
			
 
				-				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
			
 
				+		if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
			
 
				+			return __this_address;
			
 
				+		if (block->bb_u.l.bb_blkno !=
			
 
				+		    cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
			
 
				+			return __this_address;
			
 
				+		if (block->bb_u.l.bb_pad != cpu_to_be32(0))
			
 
				+			return __this_address;
			
 
				 	}
			
 
				 
			
 
				-	lblock_ok = lblock_ok &&
			
 
				-		be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
			
 
				-		be16_to_cpu(block->bb_level) == level &&
			
 
				-		be16_to_cpu(block->bb_numrecs) <=
			
 
				-			cur->bc_ops->get_maxrecs(cur, level) &&
			
 
				-		block->bb_u.l.bb_leftsib &&
			
 
				-		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
			
 
				-		 XFS_FSB_SANITY_CHECK(mp,
			
 
				-			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
			
 
				-		block->bb_u.l.bb_rightsib &&
			
 
				-		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
			
 
				-		 XFS_FSB_SANITY_CHECK(mp,
			
 
				-			be64_to_cpu(block->bb_u.l.bb_rightsib)));
			
 
				-
			
 
				-	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
			
 
				+	if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
			
 
				+		return __this_address;
			
 
				+	if (be16_to_cpu(block->bb_level) != level)
			
 
				+		return __this_address;
			
 
				+	if (be16_to_cpu(block->bb_numrecs) >
			
 
				+	    cur->bc_ops->get_maxrecs(cur, level))
			
 
				+		return __this_address;
			
 
				+	if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
			
 
				+	    !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib),
			
 
				+			level + 1))
			
 
				+		return __this_address;
			
 
				+	if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
			
 
				+	    !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib),
			
 
				+			level + 1))
			
 
				+		return __this_address;
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/* Check a long btree block header. */
			
 
				+static int
			
 
				+xfs_btree_check_lblock(
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	struct xfs_btree_block	*block,
			
 
				+	int			level,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = cur->bc_mp;
			
 
				+	xfs_failaddr_t		fa;
			
 
				+
			
 
				+	fa = __xfs_btree_check_lblock(cur, block, level, bp);
			
 
				+	if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
			
 
				 			XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
			
 
				 		if (bp)
			
 
				 			trace_xfs_btree_corrupt(bp, _RET_IP_);
			
@@ -110,48 +130,61 @@ xfs_btree_check_lblock(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC int				/* error (0 or EFSCORRUPTED) */
			
 
				-xfs_btree_check_sblock(
			
 
				-	struct xfs_btree_cur	*cur,	/* btree cursor */
			
 
				-	struct xfs_btree_block	*block,	/* btree short form block pointer */
			
 
				-	int			level,	/* level of the btree block */
			
 
				-	struct xfs_buf		*bp)	/* buffer containing block */
			
 
				+/*
			
 
				+ * Check a short btree block header.  Return the address of the failing check,
			
 
				+ * or NULL if everything is ok.
			
 
				+ */
			
 
				+xfs_failaddr_t
			
 
				+__xfs_btree_check_sblock(
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	struct xfs_btree_block	*block,
			
 
				+	int			level,
			
 
				+	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	struct xfs_mount	*mp;	/* file system mount point */
			
 
				-	struct xfs_buf		*agbp;	/* buffer for ag. freespace struct */
			
 
				-	struct xfs_agf		*agf;	/* ag. freespace structure */
			
 
				-	xfs_agblock_t		agflen;	/* native ag. freespace length */
			
 
				-	int			sblock_ok = 1; /* block passes checks */
			
 
				+	struct xfs_mount	*mp = cur->bc_mp;
			
 
				 	xfs_btnum_t		btnum = cur->bc_btnum;
			
 
				-	int			crc;
			
 
				-
			
 
				-	mp = cur->bc_mp;
			
 
				-	crc = xfs_sb_version_hascrc(&mp->m_sb);
			
 
				-	agbp = cur->bc_private.a.agbp;
			
 
				-	agf = XFS_BUF_TO_AGF(agbp);
			
 
				-	agflen = be32_to_cpu(agf->agf_length);
			
 
				+	int			crc = xfs_sb_version_hascrc(&mp->m_sb);
			
 
				 
			
 
				 	if (crc) {
			
 
				-		sblock_ok = sblock_ok &&
			
 
				-			uuid_equal(&block->bb_u.s.bb_uuid,
			
 
				-				   &mp->m_sb.sb_meta_uuid) &&
			
 
				-			block->bb_u.s.bb_blkno == cpu_to_be64(
			
 
				-				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
			
 
				+		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
			
 
				+			return __this_address;
			
 
				+		if (block->bb_u.s.bb_blkno !=
			
 
				+		    cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
			
 
				+			return __this_address;
			
 
				 	}
			
 
				 
			
 
				-	sblock_ok = sblock_ok &&
			
 
				-		be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) &&
			
 
				-		be16_to_cpu(block->bb_level) == level &&
			
 
				-		be16_to_cpu(block->bb_numrecs) <=
			
 
				-			cur->bc_ops->get_maxrecs(cur, level) &&
			
 
				-		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
			
 
				-		 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
			
 
				-		block->bb_u.s.bb_leftsib &&
			
 
				-		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
			
 
				-		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
			
 
				-		block->bb_u.s.bb_rightsib;
			
 
				-
			
 
				-	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
			
 
				+	if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
			
 
				+		return __this_address;
			
 
				+	if (be16_to_cpu(block->bb_level) != level)
			
 
				+		return __this_address;
			
 
				+	if (be16_to_cpu(block->bb_numrecs) >
			
 
				+	    cur->bc_ops->get_maxrecs(cur, level))
			
 
				+		return __this_address;
			
 
				+	if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
			
 
				+	    !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib),
			
 
				+			level + 1))
			
 
				+		return __this_address;
			
 
				+	if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
			
 
				+	    !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib),
			
 
				+			level + 1))
			
 
				+		return __this_address;
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/* Check a short btree block header. */
			
 
				+STATIC int
			
 
				+xfs_btree_check_sblock(
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	struct xfs_btree_block	*block,
			
 
				+	int			level,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = cur->bc_mp;
			
 
				+	xfs_failaddr_t		fa;
			
 
				+
			
 
				+	fa = __xfs_btree_check_sblock(cur, block, level, bp);
			
 
				+	if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
			
 
				 			XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
			
 
				 		if (bp)
			
 
				 			trace_xfs_btree_corrupt(bp, _RET_IP_);
			
@@ -177,59 +210,53 @@ xfs_btree_check_block(
 
				 		return xfs_btree_check_sblock(cur, block, level, bp);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Check that (long) pointer is ok.
			
 
				- */
			
 
				-int					/* error (0 or EFSCORRUPTED) */
			
 
				+/* Check that this long pointer is valid and points within the fs. */
			
 
				+bool
			
 
				 xfs_btree_check_lptr(
			
 
				-	struct xfs_btree_cur	*cur,	/* btree cursor */
			
 
				-	xfs_fsblock_t		bno,	/* btree block disk address */
			
 
				-	int			level)	/* btree block level */
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	xfs_fsblock_t		fsbno,
			
 
				+	int			level)
			
 
				 {
			
 
				-	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
			
 
				-		level > 0 &&
			
 
				-		bno != NULLFSBLOCK &&
			
 
				-		XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
			
 
				-	return 0;
			
 
				+	if (level <= 0)
			
 
				+		return false;
			
 
				+	return xfs_verify_fsbno(cur->bc_mp, fsbno);
			
 
				 }
			
 
				 
			
 
				-#ifdef DEBUG
			
 
				-/*
			
 
				- * Check that (short) pointer is ok.
			
 
				- */
			
 
				-STATIC int				/* error (0 or EFSCORRUPTED) */
			
 
				+/* Check that this short pointer is valid and points within the AG. */
			
 
				+bool
			
 
				 xfs_btree_check_sptr(
			
 
				-	struct xfs_btree_cur	*cur,	/* btree cursor */
			
 
				-	xfs_agblock_t		bno,	/* btree block disk address */
			
 
				-	int			level)	/* btree block level */
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	xfs_agblock_t		agbno,
			
 
				+	int			level)
			
 
				 {
			
 
				-	xfs_agblock_t		agblocks = cur->bc_mp->m_sb.sb_agblocks;
			
 
				-
			
 
				-	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
			
 
				-		level > 0 &&
			
 
				-		bno != NULLAGBLOCK &&
			
 
				-		bno != 0 &&
			
 
				-		bno < agblocks);
			
 
				-	return 0;
			
 
				+	if (level <= 0)
			
 
				+		return false;
			
 
				+	return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno);
			
 
				 }
			
 
				 
			
 
				+#ifdef DEBUG
			
 
				 /*
			
 
				- * Check that block ptr is ok.
			
 
				+ * Check that a given (indexed) btree pointer at a certain level of a
			
 
				+ * btree is valid and doesn't point past where it should.
			
 
				  */
			
 
				-STATIC int				/* error (0 or EFSCORRUPTED) */
			
 
				+static int
			
 
				 xfs_btree_check_ptr(
			
 
				-	struct xfs_btree_cur	*cur,	/* btree cursor */
			
 
				-	union xfs_btree_ptr	*ptr,	/* btree block disk address */
			
 
				-	int			index,	/* offset from ptr to check */
			
 
				-	int			level)	/* btree block level */
			
 
				+	struct xfs_btree_cur	*cur,
			
 
				+	union xfs_btree_ptr	*ptr,
			
 
				+	int			index,
			
 
				+	int			level)
			
 
				 {
			
 
				 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
			
 
				-		return xfs_btree_check_lptr(cur,
			
 
				-				be64_to_cpu((&ptr->l)[index]), level);
			
 
				+		XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
			
 
				+				xfs_btree_check_lptr(cur,
			
 
				+					be64_to_cpu((&ptr->l)[index]), level));
			
 
				 	} else {
			
 
				-		return xfs_btree_check_sptr(cur,
			
 
				-				be32_to_cpu((&ptr->s)[index]), level);
			
 
				+		XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
			
 
				+				xfs_btree_check_sptr(cur,
			
 
				+					be32_to_cpu((&ptr->s)[index]), level));
			
 
				 	}
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -1027,7 +1054,7 @@ xfs_btree_setbuf(
 
				 	}
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				+bool
			
 
				 xfs_btree_ptr_is_null(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				 	union xfs_btree_ptr	*ptr)
			
@@ -1052,7 +1079,7 @@ xfs_btree_set_ptr_null(
 
				 /*
			
 
				  * Get/set/init sibling pointers
			
 
				  */
			
 
				-STATIC void
			
 
				+void
			
 
				 xfs_btree_get_sibling(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				 	struct xfs_btree_block	*block,
			
@@ -2001,7 +2028,7 @@ error0:
 
				 }
			
 
				 
			
 
				 /* Find the high key storage area from a regular key. */
			
 
				-STATIC union xfs_btree_key *
			
 
				+union xfs_btree_key *
			
 
				 xfs_btree_high_key_from_key(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				 	union xfs_btree_key	*key)
			
@@ -2075,7 +2102,7 @@ xfs_btree_get_node_keys(
 
				 }
			
 
				 
			
 
				 /* Derive the keys for any btree block. */
			
 
				-STATIC void
			
 
				+void
			
 
				 xfs_btree_get_keys(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				 	struct xfs_btree_block	*block,
			
@@ -4914,3 +4941,15 @@ xfs_btree_count_blocks(
 
				 	return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
			
 
				 			blocks);
			
 
				 }
			
 
				+
			
 
				+/* Compare two btree pointers. */
			
 
				+int64_t
			
 
				+xfs_btree_diff_two_ptrs(
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	const union xfs_btree_ptr	*a,
			
 
				+	const union xfs_btree_ptr	*b)
			
 
				+{
			
 
				+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
			
 
				+		return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
			
 
				+	return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
			
 
				+}
			
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -255,6 +255,14 @@ typedef struct xfs_btree_cur
 
				  */
			
 
				 #define	XFS_BUF_TO_BLOCK(bp)	((struct xfs_btree_block *)((bp)->b_addr))
			
 
				 
			
 
				+/*
			
 
				+ * Internal long and short btree block checks.  They return NULL if the
			
 
				+ * block is ok or the address of the failed check otherwise.
			
 
				+ */
			
 
				+xfs_failaddr_t __xfs_btree_check_lblock(struct xfs_btree_cur *cur,
			
 
				+		struct xfs_btree_block *block, int level, struct xfs_buf *bp);
			
 
				+xfs_failaddr_t __xfs_btree_check_sblock(struct xfs_btree_cur *cur,
			
 
				+		struct xfs_btree_block *block, int level, struct xfs_buf *bp);
			
 
				 
			
 
				 /*
			
 
				  * Check that block header is ok.
			
@@ -269,10 +277,19 @@ xfs_btree_check_block(
 
				 /*
			
 
				  * Check that (long) pointer is ok.
			
 
				  */
			
 
				-int					/* error (0 or EFSCORRUPTED) */
			
 
				+bool					/* error (0 or EFSCORRUPTED) */
			
 
				 xfs_btree_check_lptr(
			
 
				 	struct xfs_btree_cur	*cur,	/* btree cursor */
			
 
				-	xfs_fsblock_t		ptr,	/* btree block disk address */
			
 
				+	xfs_fsblock_t		fsbno,	/* btree block disk address */
			
 
				+	int			level);	/* btree block level */
			
 
				+
			
 
				+/*
			
 
				+ * Check that (short) pointer is ok.
			
 
				+ */
			
 
				+bool					/* error (0 or EFSCORRUPTED) */
			
 
				+xfs_btree_check_sptr(
			
 
				+	struct xfs_btree_cur	*cur,	/* btree cursor */
			
 
				+	xfs_agblock_t		agbno,	/* btree block disk address */
			
 
				 	int			level);	/* btree block level */
			
 
				 
			
 
				 /*
			
@@ -517,5 +534,16 @@ int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
 
				 		union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
			
 
				 struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
			
 
				 		int level, struct xfs_buf **bpp);
			
 
				+bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr);
			
 
				+int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur,
			
 
				+				const union xfs_btree_ptr *a,
			
 
				+				const union xfs_btree_ptr *b);
			
 
				+void xfs_btree_get_sibling(struct xfs_btree_cur *cur,
			
 
				+			   struct xfs_btree_block *block,
			
 
				+			   union xfs_btree_ptr *ptr, int lr);
			
 
				+void xfs_btree_get_keys(struct xfs_btree_cur *cur,
			
 
				+		struct xfs_btree_block *block, union xfs_btree_key *key);
			
 
				+union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
			
 
				+		union xfs_btree_key *key);
			
 
				 
			
 
				 #endif	/* __XFS_BTREE_H__ */
			
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1466,6 +1466,7 @@ xfs_da3_node_lookup_int(
 
				 	int			max;
			
 
				 	int			error;
			
 
				 	int			retval;
			
 
				+	unsigned int		expected_level = 0;
			
 
				 	struct xfs_inode	*dp = state->args->dp;
			
 
				 
			
 
				 	args = state->args;
			
@@ -1474,7 +1475,7 @@ xfs_da3_node_lookup_int(
 
				 	 * Descend thru the B-tree searching each level for the right
			
 
				 	 * node to use, until the right hashval is found.
			
 
				 	 */
			
 
				-	blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0;
			
 
				+	blkno = args->geo->leafblk;
			
 
				 	for (blk = &state->path.blk[0], state->path.active = 1;
			
 
				 			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
			
 
				 			 blk++, state->path.active++) {
			
@@ -1517,6 +1518,18 @@ xfs_da3_node_lookup_int(
 
				 		dp->d_ops->node_hdr_from_disk(&nodehdr, node);
			
 
				 		btree = dp->d_ops->node_tree_p(node);
			
 
				 
			
 
				+		/* Tree taller than we can handle; bail out! */
			
 
				+		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
			
 
				+			return -EFSCORRUPTED;
			
 
				+
			
 
				+		/* Check the level from the root. */
			
 
				+		if (blkno == args->geo->leafblk)
			
 
				+			expected_level = nodehdr.level - 1;
			
 
				+		else if (expected_level != nodehdr.level)
			
 
				+			return -EFSCORRUPTED;
			
 
				+		else
			
 
				+			expected_level--;
			
 
				+
			
 
				 		max = nodehdr.count;
			
 
				 		blk->hashval = be32_to_cpu(btree[max - 1].hashval);
			
 
				 
			
@@ -1562,8 +1575,15 @@ xfs_da3_node_lookup_int(
 
				 			blk->index = probe;
			
 
				 			blkno = be32_to_cpu(btree[probe].before);
			
 
				 		}
			
 
				+
			
 
				+		/* We can't point back to the root. */
			
 
				+		if (blkno == args->geo->leafblk)
			
 
				+			return -EFSCORRUPTED;
			
 
				 	}
			
 
				 
			
 
				+	if (expected_level != 0)
			
 
				+		return -EFSCORRUPTED;
			
 
				+
			
 
				 	/*
			
 
				 	 * A leaf block that ends in the hashval that we are interested in
			
 
				 	 * (final hashval == search hashval) means that the next block may
			
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -30,6 +30,8 @@
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -38,7 +40,9 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
 
				 /*
			
 
				  * Convert inode mode to directory entry filetype
			
 
				  */
			
 
				-unsigned char xfs_mode_to_ftype(int mode)
			
 
				+unsigned char
			
 
				+xfs_mode_to_ftype(
			
 
				+	int		mode)
			
 
				 {
			
 
				 	switch (mode & S_IFMT) {
			
 
				 	case S_IFREG:
			
@@ -202,22 +206,8 @@ xfs_dir_ino_validate(
 
				 	xfs_mount_t	*mp,
			
 
				 	xfs_ino_t	ino)
			
 
				 {
			
 
				-	xfs_agblock_t	agblkno;
			
 
				-	xfs_agino_t	agino;
			
 
				-	xfs_agnumber_t	agno;
			
 
				-	int		ino_ok;
			
 
				-	int		ioff;
			
 
				-
			
 
				-	agno = XFS_INO_TO_AGNO(mp, ino);
			
 
				-	agblkno = XFS_INO_TO_AGBNO(mp, ino);
			
 
				-	ioff = XFS_INO_TO_OFFSET(mp, ino);
			
 
				-	agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
			
 
				-	ino_ok =
			
 
				-		agno < mp->m_sb.sb_agcount &&
			
 
				-		agblkno < mp->m_sb.sb_agblocks &&
			
 
				-		agblkno != 0 &&
			
 
				-		ioff < (1 << mp->m_sb.sb_inopblog) &&
			
 
				-		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
			
 
				+	bool		ino_ok = xfs_verify_dir_ino(mp, ino);
			
 
				+
			
 
				 	if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) {
			
 
				 		xfs_warn(mp, "Invalid inode number 0x%Lx",
			
 
				 				(unsigned long long) ino);
			
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -324,4 +324,21 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
 
				 		  sizeof(struct xfs_dir2_leaf_tail));
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * The Linux API doesn't pass down the total size of the buffer
			
 
				+ * we read into down to the filesystem.  With the filldir concept
			
 
				+ * it's not needed for correct information, but the XFS dir2 leaf
			
 
				+ * code wants an estimate of the buffer size to calculate it's
			
 
				+ * readahead window and size the buffers used for mapping to
			
 
				+ * physical blocks.
			
 
				+ *
			
 
				+ * Try to give it an estimate that's good enough, maybe at some
			
 
				+ * point we can change the ->readdir prototype to include the
			
 
				+ * buffer size.  For now we use the current glibc buffer size.
			
 
				+ * musl libc hardcodes 2k and dietlibc uses PAGE_SIZE.
			
 
				+ */
			
 
				+#define XFS_READDIR_BUFSIZE	(32768)
			
 
				+
			
 
				+unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype);
			
 
				+
			
 
				 #endif	/* __XFS_DIR2_H__ */
			
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -0,0 +1,106 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (C) 2017 Oracle.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#ifndef __XFS_ERRORTAG_H_
			
 
				+#define __XFS_ERRORTAG_H_
			
 
				+
			
 
				+/*
			
 
				+ * error injection tags - the labels can be anything you want
			
 
				+ * but each tag should have its own unique number
			
 
				+ */
			
 
				+
			
 
				+#define XFS_ERRTAG_NOERROR				0
			
 
				+#define XFS_ERRTAG_IFLUSH_1				1
			
 
				+#define XFS_ERRTAG_IFLUSH_2				2
			
 
				+#define XFS_ERRTAG_IFLUSH_3				3
			
 
				+#define XFS_ERRTAG_IFLUSH_4				4
			
 
				+#define XFS_ERRTAG_IFLUSH_5				5
			
 
				+#define XFS_ERRTAG_IFLUSH_6				6
			
 
				+#define XFS_ERRTAG_DA_READ_BUF				7
			
 
				+#define XFS_ERRTAG_BTREE_CHECK_LBLOCK			8
			
 
				+#define XFS_ERRTAG_BTREE_CHECK_SBLOCK			9
			
 
				+#define XFS_ERRTAG_ALLOC_READ_AGF			10
			
 
				+#define XFS_ERRTAG_IALLOC_READ_AGI			11
			
 
				+#define XFS_ERRTAG_ITOBP_INOTOBP			12
			
 
				+#define XFS_ERRTAG_IUNLINK				13
			
 
				+#define XFS_ERRTAG_IUNLINK_REMOVE			14
			
 
				+#define XFS_ERRTAG_DIR_INO_VALIDATE			15
			
 
				+#define XFS_ERRTAG_BULKSTAT_READ_CHUNK			16
			
 
				+#define XFS_ERRTAG_IODONE_IOERR				17
			
 
				+#define XFS_ERRTAG_STRATREAD_IOERR			18
			
 
				+#define XFS_ERRTAG_STRATCMPL_IOERR			19
			
 
				+#define XFS_ERRTAG_DIOWRITE_IOERR			20
			
 
				+#define XFS_ERRTAG_BMAPIFORMAT				21
			
 
				+#define XFS_ERRTAG_FREE_EXTENT				22
			
 
				+#define XFS_ERRTAG_RMAP_FINISH_ONE			23
			
 
				+#define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE		24
			
 
				+#define XFS_ERRTAG_REFCOUNT_FINISH_ONE			25
			
 
				+#define XFS_ERRTAG_BMAP_FINISH_ONE			26
			
 
				+#define XFS_ERRTAG_AG_RESV_CRITICAL			27
			
 
				+/*
			
 
				+ * DEBUG mode instrumentation to test and/or trigger delayed allocation
			
 
				+ * block killing in the event of failed writes. When enabled, all
			
 
				+ * buffered writes are silenty dropped and handled as if they failed.
			
 
				+ * All delalloc blocks in the range of the write (including pre-existing
			
 
				+ * delalloc blocks!) are tossed as part of the write failure error
			
 
				+ * handling sequence.
			
 
				+ */
			
 
				+#define XFS_ERRTAG_DROP_WRITES				28
			
 
				+#define XFS_ERRTAG_LOG_BAD_CRC				29
			
 
				+#define XFS_ERRTAG_LOG_ITEM_PIN				30
			
 
				+#define XFS_ERRTAG_BUF_LRU_REF				31
			
 
				+#define XFS_ERRTAG_MAX					32
			
 
				+
			
 
				+/*
			
 
				+ * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
			
 
				+ */
			
 
				+#define XFS_RANDOM_DEFAULT				100
			
 
				+#define XFS_RANDOM_IFLUSH_1				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IFLUSH_2				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IFLUSH_3				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IFLUSH_4				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IFLUSH_5				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IFLUSH_6				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_DA_READ_BUF				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_BTREE_CHECK_LBLOCK			(XFS_RANDOM_DEFAULT/4)
			
 
				+#define XFS_RANDOM_BTREE_CHECK_SBLOCK			XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_ALLOC_READ_AGF			XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IALLOC_READ_AGI			XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_ITOBP_INOTOBP			XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IUNLINK				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IUNLINK_REMOVE			XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_DIR_INO_VALIDATE			XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_BULKSTAT_READ_CHUNK			XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_IODONE_IOERR				(XFS_RANDOM_DEFAULT/10)
			
 
				+#define XFS_RANDOM_STRATREAD_IOERR			(XFS_RANDOM_DEFAULT/10)
			
 
				+#define XFS_RANDOM_STRATCMPL_IOERR			(XFS_RANDOM_DEFAULT/10)
			
 
				+#define XFS_RANDOM_DIOWRITE_IOERR			(XFS_RANDOM_DEFAULT/10)
			
 
				+#define XFS_RANDOM_BMAPIFORMAT				XFS_RANDOM_DEFAULT
			
 
				+#define XFS_RANDOM_FREE_EXTENT				1
			
 
				+#define XFS_RANDOM_RMAP_FINISH_ONE			1
			
 
				+#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE		1
			
 
				+#define XFS_RANDOM_REFCOUNT_FINISH_ONE			1
			
 
				+#define XFS_RANDOM_BMAP_FINISH_ONE			1
			
 
				+#define XFS_RANDOM_AG_RESV_CRITICAL			4
			
 
				+#define XFS_RANDOM_DROP_WRITES				1
			
 
				+#define XFS_RANDOM_LOG_BAD_CRC				1
			
 
				+#define XFS_RANDOM_LOG_ITEM_PIN				1
			
 
				+#define XFS_RANDOM_BUF_LRU_REF				2
			
 
				+
			
 
				+#endif /* __XFS_ERRORTAG_H_ */
			
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -315,6 +315,11 @@ static inline bool xfs_sb_good_version(struct xfs_sb *sbp)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				+static inline bool xfs_sb_version_hasrealtime(struct xfs_sb *sbp)
			
 
				+{
			
 
				+	return sbp->sb_rblocks > 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Detect a mismatched features2 field.  Older kernels read/wrote
			
 
				  * this into the wrong slot, so to be safe we keep them in sync.
			
@@ -500,12 +505,12 @@ xfs_sb_has_incompat_log_feature(
 
				 /*
			
 
				  * V5 superblock specific feature checks
			
 
				  */
			
 
				-static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp)
			
 
				+static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp)
			
 
				 {
			
 
				 	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
			
 
				 }
			
 
				 
			
 
				-static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp)
			
 
				+static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp)
			
 
				 {
			
 
				 	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
			
 
				 }
			
@@ -518,7 +523,7 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
 
				 		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
			
 
				 }
			
 
				 
			
 
				-static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
			
 
				+static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
			
 
				 {
			
 
				 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
			
 
				 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
			
@@ -941,7 +946,7 @@ typedef enum xfs_dinode_fmt {
 
				 	XFS_DINODE_FMT_LOCAL,		/* bulk data */
			
 
				 	XFS_DINODE_FMT_EXTENTS,		/* struct xfs_bmbt_rec */
			
 
				 	XFS_DINODE_FMT_BTREE,		/* struct xfs_bmdr_block */
			
 
				-	XFS_DINODE_FMT_UUID		/* uuid_t */
			
 
				+	XFS_DINODE_FMT_UUID		/* added long ago, but never used */
			
 
				 } xfs_dinode_fmt_t;
			
 
				 
			
 
				 /*
			
@@ -1142,7 +1147,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 
				  * Dquot and dquot block format definitions
			
 
				  */
			
 
				 #define XFS_DQUOT_MAGIC		0x4451		/* 'DQ' */
			
 
				-#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
			
 
				+#define XFS_DQUOT_VERSION	(uint8_t)0x01	/* latest version number */
			
 
				 
			
 
				 /*
			
 
				  * This is the main portion of the on-disk representation of quota
			
@@ -1548,10 +1553,6 @@ typedef struct xfs_bmbt_rec {
 
				 typedef uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */
			
 
				 typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
			
 
				 
			
 
				-typedef struct xfs_bmbt_rec_host {
			
 
				-	uint64_t		l0, l1;
			
 
				-} xfs_bmbt_rec_host_t;
			
 
				-
			
 
				 /*
			
 
				  * Values and macros for delayed-allocation startblock fields.
			
 
				  */
			
@@ -1576,24 +1577,6 @@ static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
 
				 	return (xfs_filblks_t)((x) & ~STARTBLOCKMASK);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Possible extent states.
			
 
				- */
			
 
				-typedef enum {
			
 
				-	XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
			
 
				-} xfs_exntst_t;
			
 
				-
			
 
				-/*
			
 
				- * Incore version of above.
			
 
				- */
			
 
				-typedef struct xfs_bmbt_irec
			
 
				-{
			
 
				-	xfs_fileoff_t	br_startoff;	/* starting file offset */
			
 
				-	xfs_fsblock_t	br_startblock;	/* starting block number */
			
 
				-	xfs_filblks_t	br_blockcount;	/* number of blocks */
			
 
				-	xfs_exntst_t	br_state;	/* extent state */
			
 
				-} xfs_bmbt_irec_t;
			
 
				-
			
 
				 /*
			
 
				  * Key structure for non-leaf levels of the tree.
			
 
				  */
			
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -468,6 +468,82 @@ typedef struct xfs_swapext
 
				 #define XFS_FSOP_GOING_FLAGS_LOGFLUSH		0x1	/* flush log but not data */
			
 
				 #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH		0x2	/* don't flush log nor data */
			
 
				 
			
 
				+/* metadata scrubbing */
			
 
				+struct xfs_scrub_metadata {
			
 
				+	__u32 sm_type;		/* What to check? */
			
 
				+	__u32 sm_flags;		/* flags; see below. */
			
 
				+	__u64 sm_ino;		/* inode number. */
			
 
				+	__u32 sm_gen;		/* inode generation. */
			
 
				+	__u32 sm_agno;		/* ag number. */
			
 
				+	__u64 sm_reserved[5];	/* pad to 64 bytes */
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Metadata types and flags for scrub operation.
			
 
				+ */
			
 
				+
			
 
				+/* Scrub subcommands. */
			
 
				+#define XFS_SCRUB_TYPE_PROBE	0	/* presence test ioctl */
			
 
				+#define XFS_SCRUB_TYPE_SB	1	/* superblock */
			
 
				+#define XFS_SCRUB_TYPE_AGF	2	/* AG free header */
			
 
				+#define XFS_SCRUB_TYPE_AGFL	3	/* AG free list */
			
 
				+#define XFS_SCRUB_TYPE_AGI	4	/* AG inode header */
			
 
				+#define XFS_SCRUB_TYPE_BNOBT	5	/* freesp by block btree */
			
 
				+#define XFS_SCRUB_TYPE_CNTBT	6	/* freesp by length btree */
			
 
				+#define XFS_SCRUB_TYPE_INOBT	7	/* inode btree */
			
 
				+#define XFS_SCRUB_TYPE_FINOBT	8	/* free inode btree */
			
 
				+#define XFS_SCRUB_TYPE_RMAPBT	9	/* reverse mapping btree */
			
 
				+#define XFS_SCRUB_TYPE_REFCNTBT	10	/* reference count btree */
			
 
				+#define XFS_SCRUB_TYPE_INODE	11	/* inode record */
			
 
				+#define XFS_SCRUB_TYPE_BMBTD	12	/* data fork block mapping */
			
 
				+#define XFS_SCRUB_TYPE_BMBTA	13	/* attr fork block mapping */
			
 
				+#define XFS_SCRUB_TYPE_BMBTC	14	/* CoW fork block mapping */
			
 
				+#define XFS_SCRUB_TYPE_DIR	15	/* directory */
			
 
				+#define XFS_SCRUB_TYPE_XATTR	16	/* extended attribute */
			
 
				+#define XFS_SCRUB_TYPE_SYMLINK	17	/* symbolic link */
			
 
				+#define XFS_SCRUB_TYPE_PARENT	18	/* parent pointers */
			
 
				+#define XFS_SCRUB_TYPE_RTBITMAP	19	/* realtime bitmap */
			
 
				+#define XFS_SCRUB_TYPE_RTSUM	20	/* realtime summary */
			
 
				+#define XFS_SCRUB_TYPE_UQUOTA	21	/* user quotas */
			
 
				+#define XFS_SCRUB_TYPE_GQUOTA	22	/* group quotas */
			
 
				+#define XFS_SCRUB_TYPE_PQUOTA	23	/* project quotas */
			
 
				+
			
 
				+/* Number of scrub subcommands. */
			
 
				+#define XFS_SCRUB_TYPE_NR	24
			
 
				+
			
 
				+/* i: Repair this metadata. */
			
 
				+#define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
			
 
				+
			
 
				+/* o: Metadata object needs repair. */
			
 
				+#define XFS_SCRUB_OFLAG_CORRUPT		(1 << 1)
			
 
				+
			
 
				+/*
			
 
				+ * o: Metadata object could be optimized.  It's not corrupt, but
			
 
				+ *    we could improve on it somehow.
			
 
				+ */
			
 
				+#define XFS_SCRUB_OFLAG_PREEN		(1 << 2)
			
 
				+
			
 
				+/* o: Cross-referencing failed. */
			
 
				+#define XFS_SCRUB_OFLAG_XFAIL		(1 << 3)
			
 
				+
			
 
				+/* o: Metadata object disagrees with cross-referenced metadata. */
			
 
				+#define XFS_SCRUB_OFLAG_XCORRUPT	(1 << 4)
			
 
				+
			
 
				+/* o: Scan was not complete. */
			
 
				+#define XFS_SCRUB_OFLAG_INCOMPLETE	(1 << 5)
			
 
				+
			
 
				+/* o: Metadata object looked funny but isn't corrupt. */
			
 
				+#define XFS_SCRUB_OFLAG_WARNING		(1 << 6)
			
 
				+
			
 
				+#define XFS_SCRUB_FLAGS_IN	(XFS_SCRUB_IFLAG_REPAIR)
			
 
				+#define XFS_SCRUB_FLAGS_OUT	(XFS_SCRUB_OFLAG_CORRUPT | \
			
 
				+				 XFS_SCRUB_OFLAG_PREEN | \
			
 
				+				 XFS_SCRUB_OFLAG_XFAIL | \
			
 
				+				 XFS_SCRUB_OFLAG_XCORRUPT | \
			
 
				+				 XFS_SCRUB_OFLAG_INCOMPLETE | \
			
 
				+				 XFS_SCRUB_OFLAG_WARNING)
			
 
				+#define XFS_SCRUB_FLAGS_ALL	(XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
			
 
				+
			
 
				 /*
			
 
				  * ioctl limits
			
 
				  */
			
@@ -511,6 +587,7 @@ typedef struct xfs_swapext
 
				 #define XFS_IOC_ZERO_RANGE	_IOW ('X', 57, struct xfs_flock64)
			
 
				 #define XFS_IOC_FREE_EOFBLOCKS	_IOR ('X', 58, struct xfs_fs_eofblocks)
			
 
				 /*	XFS_IOC_GETFSMAP ------ hoisted 59         */
			
 
				+#define XFS_IOC_SCRUB_METADATA	_IOWR('X', 60, struct xfs_scrub_metadata)
			
 
				 
			
 
				 /*
			
 
				  * ioctl commands that replace IRIX syssgi()'s
			
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -31,6 +31,7 @@
 
				 #include "xfs_ialloc_btree.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_cksum.h"
			
@@ -2664,3 +2665,93 @@ xfs_ialloc_pagi_init(
 
				 		xfs_trans_brelse(tp, bp);
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+/* Calculate the first and last possible inode number in an AG. */
			
 
				+void
			
 
				+xfs_ialloc_agino_range(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		agno,
			
 
				+	xfs_agino_t		*first,
			
 
				+	xfs_agino_t		*last)
			
 
				+{
			
 
				+	xfs_agblock_t		bno;
			
 
				+	xfs_agblock_t		eoag;
			
 
				+
			
 
				+	eoag = xfs_ag_block_count(mp, agno);
			
 
				+
			
 
				+	/*
			
 
				+	 * Calculate the first inode, which will be in the first
			
 
				+	 * cluster-aligned block after the AGFL.
			
 
				+	 */
			
 
				+	bno = round_up(XFS_AGFL_BLOCK(mp) + 1,
			
 
				+			xfs_ialloc_cluster_alignment(mp));
			
 
				+	*first = XFS_OFFBNO_TO_AGINO(mp, bno, 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * Calculate the last inode, which will be at the end of the
			
 
				+	 * last (aligned) cluster that can be allocated in the AG.
			
 
				+	 */
			
 
				+	bno = round_down(eoag, xfs_ialloc_cluster_alignment(mp));
			
 
				+	*last = XFS_OFFBNO_TO_AGINO(mp, bno, 0) - 1;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify that an AG inode number pointer neither points outside the AG
			
 
				+ * nor points at static metadata.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_verify_agino(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		agno,
			
 
				+	xfs_agino_t		agino)
			
 
				+{
			
 
				+	xfs_agino_t		first;
			
 
				+	xfs_agino_t		last;
			
 
				+
			
 
				+	xfs_ialloc_agino_range(mp, agno, &first, &last);
			
 
				+	return agino >= first && agino <= last;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify that an FS inode number pointer neither points outside the
			
 
				+ * filesystem nor points at static AG metadata.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_verify_ino(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino)
			
 
				+{
			
 
				+	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ino);
			
 
				+	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ino);
			
 
				+
			
 
				+	if (agno >= mp->m_sb.sb_agcount)
			
 
				+		return false;
			
 
				+	if (XFS_AGINO_TO_INO(mp, agno, agino) != ino)
			
 
				+		return false;
			
 
				+	return xfs_verify_agino(mp, agno, agino);
			
 
				+}
			
 
				+
			
 
				+/* Is this an internal inode number? */
			
 
				+bool
			
 
				+xfs_internal_inum(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino)
			
 
				+{
			
 
				+	return ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
			
 
				+		(xfs_sb_version_hasquota(&mp->m_sb) &&
			
 
				+		 xfs_is_quota_inode(&mp->m_sb, ino));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify that a directory entry's inode number doesn't point at an internal
			
 
				+ * inode, empty space, or static AG metadata.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_verify_dir_ino(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino)
			
 
				+{
			
 
				+	if (xfs_internal_inum(mp, ino))
			
 
				+		return false;
			
 
				+	return xfs_verify_ino(mp, ino);
			
 
				+}
			
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -173,5 +173,12 @@ void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
 
				 		struct xfs_inobt_rec_incore *irec);
			
 
				 
			
 
				 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
			
 
				+void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				+		xfs_agino_t *first, xfs_agino_t *last);
			
 
				+bool xfs_verify_agino(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				+		xfs_agino_t agino);
			
 
				+bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
			
 
				+bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
			
 
				+bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
			
 
				 
			
 
				 #endif	/* __XFS_IALLOC_H__ */
			
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -0,0 +1,1043 @@
 
				+/*
			
 
				+ * Copyright (c) 2017 Christoph Hellwig.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms and conditions of the GNU General Public License,
			
 
				+ * version 2, as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope it will be useful, but WITHOUT
			
 
				+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
			
 
				+ * more details.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/cache.h>
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_fork.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_trace.h"
			
 
				+
			
 
				+/*
			
 
				+ * In-core extent record layout:
			
 
				+ *
			
 
				+ * +-------+----------------------------+
			
 
				+ * | 00:53 | all 54 bits of startoff    |
			
 
				+ * | 54:63 | low 10 bits of startblock  |
			
 
				+ * +-------+----------------------------+
			
 
				+ * | 00:20 | all 21 bits of length      |
			
 
				+ * |    21 | unwritten extent bit       |
			
 
				+ * | 22:63 | high 42 bits of startblock |
			
 
				+ * +-------+----------------------------+
			
 
				+ */
			
 
				+#define XFS_IEXT_STARTOFF_MASK		xfs_mask64lo(BMBT_STARTOFF_BITLEN)
			
 
				+#define XFS_IEXT_LENGTH_MASK		xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
			
 
				+#define XFS_IEXT_STARTBLOCK_MASK	xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
			
 
				+
			
 
				+struct xfs_iext_rec {
			
 
				+	uint64_t			lo;
			
 
				+	uint64_t			hi;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Given that the length can't be a zero, only an empty hi value indicates an
			
 
				+ * unused record.
			
 
				+ */
			
 
				+static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
			
 
				+{
			
 
				+	return rec->hi == 0;
			
 
				+}
			
 
				+
			
 
				+static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
			
 
				+{
			
 
				+	rec->lo = 0;
			
 
				+	rec->hi = 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_set(
			
 
				+	struct xfs_iext_rec	*rec,
			
 
				+	struct xfs_bmbt_irec	*irec)
			
 
				+{
			
 
				+	ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
			
 
				+	ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
			
 
				+	ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
			
 
				+
			
 
				+	rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
			
 
				+	rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
			
 
				+
			
 
				+	rec->lo |= (irec->br_startblock << 54);
			
 
				+	rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
			
 
				+
			
 
				+	if (irec->br_state == XFS_EXT_UNWRITTEN)
			
 
				+		rec->hi |= (1 << 21);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_get(
			
 
				+	struct xfs_bmbt_irec	*irec,
			
 
				+	struct xfs_iext_rec	*rec)
			
 
				+{
			
 
				+	irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
			
 
				+	irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
			
 
				+
			
 
				+	irec->br_startblock = rec->lo >> 54;
			
 
				+	irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
			
 
				+
			
 
				+	if (rec->hi & (1 << 21))
			
 
				+		irec->br_state = XFS_EXT_UNWRITTEN;
			
 
				+	else
			
 
				+		irec->br_state = XFS_EXT_NORM;
			
 
				+}
			
 
				+
			
 
				+enum {
			
 
				+	NODE_SIZE	= 256,
			
 
				+	KEYS_PER_NODE	= NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
			
 
				+	RECS_PER_LEAF	= (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
			
 
				+				sizeof(struct xfs_iext_rec),
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * In-core extent btree block layout:
			
 
				+ *
			
 
				+ * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
			
 
				+ *
			
 
				+ * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
			
 
				+ * contain the startoffset, blockcount, startblock and unwritten extent flag.
			
 
				+ * See above for the exact format, followed by pointers to the previous and next
			
 
				+ * leaf blocks (if there are any).
			
 
				+ *
			
 
				+ * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
			
 
				+ * by an equal number of pointers to the btree blocks at the next lower level.
			
 
				+ *
			
 
				+ *		+-------+-------+-------+-------+-------+----------+----------+
			
 
				+ * Leaf:	| rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
			
 
				+ *		+-------+-------+-------+-------+-------+----------+----------+
			
 
				+ *
			
 
				+ *		+-------+-------+-------+-------+-------+-------+------+-------+
			
 
				+ * Inner:	| key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
			
 
				+ *		+-------+-------+-------+-------+-------+-------+------+-------+
			
 
				+ */
			
 
				+struct xfs_iext_node {
			
 
				+	uint64_t		keys[KEYS_PER_NODE];
			
 
				+#define XFS_IEXT_KEY_INVALID	(1ULL << 63)
			
 
				+	void			*ptrs[KEYS_PER_NODE];
			
 
				+};
			
 
				+
			
 
				+struct xfs_iext_leaf {
			
 
				+	struct xfs_iext_rec	recs[RECS_PER_LEAF];
			
 
				+	struct xfs_iext_leaf	*prev;
			
 
				+	struct xfs_iext_leaf	*next;
			
 
				+};
			
 
				+
			
 
				+inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
			
 
				+{
			
 
				+	return ifp->if_bytes / sizeof(struct xfs_iext_rec);
			
 
				+}
			
 
				+
			
 
				+static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
			
 
				+{
			
 
				+	if (ifp->if_height == 1)
			
 
				+		return xfs_iext_count(ifp);
			
 
				+	return RECS_PER_LEAF;
			
 
				+}
			
 
				+
			
 
				+static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
			
 
				+{
			
 
				+	return &cur->leaf->recs[cur->pos];
			
 
				+}
			
 
				+
			
 
				+static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
			
 
				+		struct xfs_iext_cursor *cur)
			
 
				+{
			
 
				+	if (!cur->leaf)
			
 
				+		return false;
			
 
				+	if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
			
 
				+		return false;
			
 
				+	if (xfs_iext_rec_is_empty(cur_rec(cur)))
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void *
			
 
				+xfs_iext_find_first_leaf(
			
 
				+	struct xfs_ifork	*ifp)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node = ifp->if_u1.if_root;
			
 
				+	int			height;
			
 
				+
			
 
				+	if (!ifp->if_height)
			
 
				+		return NULL;
			
 
				+
			
 
				+	for (height = ifp->if_height; height > 1; height--) {
			
 
				+		node = node->ptrs[0];
			
 
				+		ASSERT(node);
			
 
				+	}
			
 
				+
			
 
				+	return node;
			
 
				+}
			
 
				+
			
 
				+static void *
			
 
				+xfs_iext_find_last_leaf(
			
 
				+	struct xfs_ifork	*ifp)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node = ifp->if_u1.if_root;
			
 
				+	int			height, i;
			
 
				+
			
 
				+	if (!ifp->if_height)
			
 
				+		return NULL;
			
 
				+
			
 
				+	for (height = ifp->if_height; height > 1; height--) {
			
 
				+		for (i = 1; i < KEYS_PER_NODE; i++)
			
 
				+			if (!node->ptrs[i])
			
 
				+				break;
			
 
				+		node = node->ptrs[i - 1];
			
 
				+		ASSERT(node);
			
 
				+	}
			
 
				+
			
 
				+	return node;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_first(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur)
			
 
				+{
			
 
				+	cur->pos = 0;
			
 
				+	cur->leaf = xfs_iext_find_first_leaf(ifp);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_last(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur)
			
 
				+{
			
 
				+	int			i;
			
 
				+
			
 
				+	cur->leaf = xfs_iext_find_last_leaf(ifp);
			
 
				+	if (!cur->leaf) {
			
 
				+		cur->pos = 0;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
			
 
				+		if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
			
 
				+			break;
			
 
				+	}
			
 
				+	cur->pos = i - 1;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_next(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur)
			
 
				+{
			
 
				+	if (!cur->leaf) {
			
 
				+		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
			
 
				+		xfs_iext_first(ifp, cur);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	ASSERT(cur->pos >= 0);
			
 
				+	ASSERT(cur->pos < xfs_iext_max_recs(ifp));
			
 
				+
			
 
				+	cur->pos++;
			
 
				+	if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
			
 
				+	    cur->leaf->next) {
			
 
				+		cur->leaf = cur->leaf->next;
			
 
				+		cur->pos = 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_prev(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur)
			
 
				+{
			
 
				+	if (!cur->leaf) {
			
 
				+		ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
			
 
				+		xfs_iext_last(ifp, cur);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	ASSERT(cur->pos >= 0);
			
 
				+	ASSERT(cur->pos <= RECS_PER_LEAF);
			
 
				+
			
 
				+recurse:
			
 
				+	do {
			
 
				+		cur->pos--;
			
 
				+		if (xfs_iext_valid(ifp, cur))
			
 
				+			return;
			
 
				+	} while (cur->pos > 0);
			
 
				+
			
 
				+	if (ifp->if_height > 1 && cur->leaf->prev) {
			
 
				+		cur->leaf = cur->leaf->prev;
			
 
				+		cur->pos = RECS_PER_LEAF;
			
 
				+		goto recurse;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+xfs_iext_key_cmp(
			
 
				+	struct xfs_iext_node	*node,
			
 
				+	int			n,
			
 
				+	xfs_fileoff_t		offset)
			
 
				+{
			
 
				+	if (node->keys[n] > offset)
			
 
				+		return 1;
			
 
				+	if (node->keys[n] < offset)
			
 
				+		return -1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+xfs_iext_rec_cmp(
			
 
				+	struct xfs_iext_rec	*rec,
			
 
				+	xfs_fileoff_t		offset)
			
 
				+{
			
 
				+	uint64_t		rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
			
 
				+	u32			rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
			
 
				+
			
 
				+	if (rec_offset > offset)
			
 
				+		return 1;
			
 
				+	if (rec_offset + rec_len <= offset)
			
 
				+		return -1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void *
			
 
				+xfs_iext_find_level(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	xfs_fileoff_t		offset,
			
 
				+	int			level)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node = ifp->if_u1.if_root;
			
 
				+	int			height, i;
			
 
				+
			
 
				+	if (!ifp->if_height)
			
 
				+		return NULL;
			
 
				+
			
 
				+	for (height = ifp->if_height; height > level; height--) {
			
 
				+		for (i = 1; i < KEYS_PER_NODE; i++)
			
 
				+			if (xfs_iext_key_cmp(node, i, offset) > 0)
			
 
				+				break;
			
 
				+
			
 
				+		node = node->ptrs[i - 1];
			
 
				+		if (!node)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return node;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+xfs_iext_node_pos(
			
 
				+	struct xfs_iext_node	*node,
			
 
				+	xfs_fileoff_t		offset)
			
 
				+{
			
 
				+	int			i;
			
 
				+
			
 
				+	for (i = 1; i < KEYS_PER_NODE; i++) {
			
 
				+		if (xfs_iext_key_cmp(node, i, offset) > 0)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return i - 1;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+xfs_iext_node_insert_pos(
			
 
				+	struct xfs_iext_node	*node,
			
 
				+	xfs_fileoff_t		offset)
			
 
				+{
			
 
				+	int			i;
			
 
				+
			
 
				+	for (i = 0; i < KEYS_PER_NODE; i++) {
			
 
				+		if (xfs_iext_key_cmp(node, i, offset) > 0)
			
 
				+			return i;
			
 
				+	}
			
 
				+
			
 
				+	return KEYS_PER_NODE;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+xfs_iext_node_nr_entries(
			
 
				+	struct xfs_iext_node	*node,
			
 
				+	int			start)
			
 
				+{
			
 
				+	int			i;
			
 
				+
			
 
				+	for (i = start; i < KEYS_PER_NODE; i++) {
			
 
				+		if (node->keys[i] == XFS_IEXT_KEY_INVALID)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+xfs_iext_leaf_nr_entries(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_leaf	*leaf,
			
 
				+	int			start)
			
 
				+{
			
 
				+	int			i;
			
 
				+
			
 
				+	for (i = start; i < xfs_iext_max_recs(ifp); i++) {
			
 
				+		if (xfs_iext_rec_is_empty(&leaf->recs[i]))
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+static inline uint64_t
			
 
				+xfs_iext_leaf_key(
			
 
				+	struct xfs_iext_leaf	*leaf,
			
 
				+	int			n)
			
 
				+{
			
 
				+	return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_grow(
			
 
				+	struct xfs_ifork	*ifp)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node = kmem_zalloc(NODE_SIZE, KM_NOFS);
			
 
				+	int			i;
			
 
				+
			
 
				+	if (ifp->if_height == 1) {
			
 
				+		struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
			
 
				+
			
 
				+		node->keys[0] = xfs_iext_leaf_key(prev, 0);
			
 
				+		node->ptrs[0] = prev;
			
 
				+	} else  {
			
 
				+		struct xfs_iext_node *prev = ifp->if_u1.if_root;
			
 
				+
			
 
				+		ASSERT(ifp->if_height > 1);
			
 
				+
			
 
				+		node->keys[0] = prev->keys[0];
			
 
				+		node->ptrs[0] = prev;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 1; i < KEYS_PER_NODE; i++)
			
 
				+		node->keys[i] = XFS_IEXT_KEY_INVALID;
			
 
				+
			
 
				+	ifp->if_u1.if_root = node;
			
 
				+	ifp->if_height++;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_update_node(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	xfs_fileoff_t		old_offset,
			
 
				+	xfs_fileoff_t		new_offset,
			
 
				+	int			level,
			
 
				+	void			*ptr)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node = ifp->if_u1.if_root;
			
 
				+	int			height, i;
			
 
				+
			
 
				+	for (height = ifp->if_height; height > level; height--) {
			
 
				+		for (i = 0; i < KEYS_PER_NODE; i++) {
			
 
				+			if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
			
 
				+				break;
			
 
				+			if (node->keys[i] == old_offset)
			
 
				+				node->keys[i] = new_offset;
			
 
				+		}
			
 
				+		node = node->ptrs[i - 1];
			
 
				+		ASSERT(node);
			
 
				+	}
			
 
				+
			
 
				+	ASSERT(node == ptr);
			
 
				+}
			
 
				+
			
 
				+static struct xfs_iext_node *
			
 
				+xfs_iext_split_node(
			
 
				+	struct xfs_iext_node	**nodep,
			
 
				+	int			*pos,
			
 
				+	int			*nr_entries)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node = *nodep;
			
 
				+	struct xfs_iext_node	*new = kmem_zalloc(NODE_SIZE, KM_NOFS);
			
 
				+	const int		nr_move = KEYS_PER_NODE / 2;
			
 
				+	int			nr_keep = nr_move + (KEYS_PER_NODE & 1);
			
 
				+	int			i = 0;
			
 
				+
			
 
				+	/* for sequential append operations just spill over into the new node */
			
 
				+	if (*pos == KEYS_PER_NODE) {
			
 
				+		*nodep = new;
			
 
				+		*pos = 0;
			
 
				+		*nr_entries = 0;
			
 
				+		goto done;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	for (i = 0; i < nr_move; i++) {
			
 
				+		new->keys[i] = node->keys[nr_keep + i];
			
 
				+		new->ptrs[i] = node->ptrs[nr_keep + i];
			
 
				+
			
 
				+		node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
			
 
				+		node->ptrs[nr_keep + i] = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (*pos >= nr_keep) {
			
 
				+		*nodep = new;
			
 
				+		*pos -= nr_keep;
			
 
				+		*nr_entries = nr_move;
			
 
				+	} else {
			
 
				+		*nr_entries = nr_keep;
			
 
				+	}
			
 
				+done:
			
 
				+	for (; i < KEYS_PER_NODE; i++)
			
 
				+		new->keys[i] = XFS_IEXT_KEY_INVALID;
			
 
				+	return new;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_insert_node(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	uint64_t		offset,
			
 
				+	void			*ptr,
			
 
				+	int			level)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node, *new;
			
 
				+	int			i, pos, nr_entries;
			
 
				+
			
 
				+again:
			
 
				+	if (ifp->if_height < level)
			
 
				+		xfs_iext_grow(ifp);
			
 
				+
			
 
				+	new = NULL;
			
 
				+	node = xfs_iext_find_level(ifp, offset, level);
			
 
				+	pos = xfs_iext_node_insert_pos(node, offset);
			
 
				+	nr_entries = xfs_iext_node_nr_entries(node, pos);
			
 
				+
			
 
				+	ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
			
 
				+	ASSERT(nr_entries <= KEYS_PER_NODE);
			
 
				+
			
 
				+	if (nr_entries == KEYS_PER_NODE)
			
 
				+		new = xfs_iext_split_node(&node, &pos, &nr_entries);
			
 
				+
			
 
				+	/*
			
 
				+	 * Update the pointers in higher levels if the first entry changes
			
 
				+	 * in an existing node.
			
 
				+	 */
			
 
				+	if (node != new && pos == 0 && nr_entries > 0)
			
 
				+		xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
			
 
				+
			
 
				+	for (i = nr_entries; i > pos; i--) {
			
 
				+		node->keys[i] = node->keys[i - 1];
			
 
				+		node->ptrs[i] = node->ptrs[i - 1];
			
 
				+	}
			
 
				+	node->keys[pos] = offset;
			
 
				+	node->ptrs[pos] = ptr;
			
 
				+
			
 
				+	if (new) {
			
 
				+		offset = new->keys[0];
			
 
				+		ptr = new;
			
 
				+		level++;
			
 
				+		goto again;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static struct xfs_iext_leaf *
			
 
				+xfs_iext_split_leaf(
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	int			*nr_entries)
			
 
				+{
			
 
				+	struct xfs_iext_leaf	*leaf = cur->leaf;
			
 
				+	struct xfs_iext_leaf	*new = kmem_zalloc(NODE_SIZE, KM_NOFS);
			
 
				+	const int		nr_move = RECS_PER_LEAF / 2;
			
 
				+	int			nr_keep = nr_move + (RECS_PER_LEAF & 1);
			
 
				+	int			i;
			
 
				+
			
 
				+	/* for sequential append operations just spill over into the new node */
			
 
				+	if (cur->pos == RECS_PER_LEAF) {
			
 
				+		cur->leaf = new;
			
 
				+		cur->pos = 0;
			
 
				+		*nr_entries = 0;
			
 
				+		goto done;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < nr_move; i++) {
			
 
				+		new->recs[i] = leaf->recs[nr_keep + i];
			
 
				+		xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
			
 
				+	}
			
 
				+
			
 
				+	if (cur->pos >= nr_keep) {
			
 
				+		cur->leaf = new;
			
 
				+		cur->pos -= nr_keep;
			
 
				+		*nr_entries = nr_move;
			
 
				+	} else {
			
 
				+		*nr_entries = nr_keep;
			
 
				+	}
			
 
				+done:
			
 
				+	if (leaf->next)
			
 
				+		leaf->next->prev = new;
			
 
				+	new->next = leaf->next;
			
 
				+	new->prev = leaf;
			
 
				+	leaf->next = new;
			
 
				+	return new;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_alloc_root(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur)
			
 
				+{
			
 
				+	ASSERT(ifp->if_bytes == 0);
			
 
				+
			
 
				+	ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
			
 
				+	ifp->if_height = 1;
			
 
				+
			
 
				+	/* now that we have a node step into it */
			
 
				+	cur->leaf = ifp->if_u1.if_root;
			
 
				+	cur->pos = 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_realloc_root(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur)
			
 
				+{
			
 
				+	size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
			
 
				+	void *new;
			
 
				+
			
 
				+	/* account for the prev/next pointers */
			
 
				+	if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
			
 
				+		new_size = NODE_SIZE;
			
 
				+
			
 
				+	new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS);
			
 
				+	memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
			
 
				+	ifp->if_u1.if_root = new;
			
 
				+	cur->leaf = new;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_insert(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	struct xfs_bmbt_irec	*irec,
			
 
				+	int			state)
			
 
				+{
			
 
				+	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
			
 
				+	xfs_fileoff_t		offset = irec->br_startoff;
			
 
				+	struct xfs_iext_leaf	*new = NULL;
			
 
				+	int			nr_entries, i;
			
 
				+
			
 
				+	trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
			
 
				+
			
 
				+	if (ifp->if_height == 0)
			
 
				+		xfs_iext_alloc_root(ifp, cur);
			
 
				+	else if (ifp->if_height == 1)
			
 
				+		xfs_iext_realloc_root(ifp, cur);
			
 
				+
			
 
				+	nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
			
 
				+	ASSERT(nr_entries <= RECS_PER_LEAF);
			
 
				+	ASSERT(cur->pos >= nr_entries ||
			
 
				+	       xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
			
 
				+
			
 
				+	if (nr_entries == RECS_PER_LEAF)
			
 
				+		new = xfs_iext_split_leaf(cur, &nr_entries);
			
 
				+
			
 
				+	/*
			
 
				+	 * Update the pointers in higher levels if the first entry changes
			
 
				+	 * in an existing node.
			
 
				+	 */
			
 
				+	if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
			
 
				+		xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
			
 
				+				offset, 1, cur->leaf);
			
 
				+	}
			
 
				+
			
 
				+	for (i = nr_entries; i > cur->pos; i--)
			
 
				+		cur->leaf->recs[i] = cur->leaf->recs[i - 1];
			
 
				+	xfs_iext_set(cur_rec(cur), irec);
			
 
				+	ifp->if_bytes += sizeof(struct xfs_iext_rec);
			
 
				+
			
 
				+	if (new)
			
 
				+		xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
			
 
				+}
			
 
				+
			
 
				+static struct xfs_iext_node *
			
 
				+xfs_iext_rebalance_node(
			
 
				+	struct xfs_iext_node	*parent,
			
 
				+	int			*pos,
			
 
				+	struct xfs_iext_node	*node,
			
 
				+	int			nr_entries)
			
 
				+{
			
 
				+	/*
			
 
				+	 * If the neighbouring nodes are completely full, or have different
			
 
				+	 * parents, we might never be able to merge our node, and will only
			
 
				+	 * delete it once the number of entries hits zero.
			
 
				+	 */
			
 
				+	if (nr_entries == 0)
			
 
				+		return node;
			
 
				+
			
 
				+	if (*pos > 0) {
			
 
				+		struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
			
 
				+		int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
			
 
				+
			
 
				+		if (nr_prev + nr_entries <= KEYS_PER_NODE) {
			
 
				+			for (i = 0; i < nr_entries; i++) {
			
 
				+				prev->keys[nr_prev + i] = node->keys[i];
			
 
				+				prev->ptrs[nr_prev + i] = node->ptrs[i];
			
 
				+			}
			
 
				+			return node;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
			
 
				+		struct xfs_iext_node *next = parent->ptrs[*pos + 1];
			
 
				+		int nr_next = xfs_iext_node_nr_entries(next, 0), i;
			
 
				+
			
 
				+		if (nr_entries + nr_next <= KEYS_PER_NODE) {
			
 
				+			/*
			
 
				+			 * Merge the next node into this node so that we don't
			
 
				+			 * have to do an additional update of the keys in the
			
 
				+			 * higher levels.
			
 
				+			 */
			
 
				+			for (i = 0; i < nr_next; i++) {
			
 
				+				node->keys[nr_entries + i] = next->keys[i];
			
 
				+				node->ptrs[nr_entries + i] = next->ptrs[i];
			
 
				+			}
			
 
				+
			
 
				+			++*pos;
			
 
				+			return next;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_remove_node(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	xfs_fileoff_t		offset,
			
 
				+	void			*victim)
			
 
				+{
			
 
				+	struct xfs_iext_node	*node, *parent;
			
 
				+	int			level = 2, pos, nr_entries, i;
			
 
				+
			
 
				+	ASSERT(level <= ifp->if_height);
			
 
				+	node = xfs_iext_find_level(ifp, offset, level);
			
 
				+	pos = xfs_iext_node_pos(node, offset);
			
 
				+again:
			
 
				+	ASSERT(node->ptrs[pos]);
			
 
				+	ASSERT(node->ptrs[pos] == victim);
			
 
				+	kmem_free(victim);
			
 
				+
			
 
				+	nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
			
 
				+	offset = node->keys[0];
			
 
				+	for (i = pos; i < nr_entries; i++) {
			
 
				+		node->keys[i] = node->keys[i + 1];
			
 
				+		node->ptrs[i] = node->ptrs[i + 1];
			
 
				+	}
			
 
				+	node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
			
 
				+	node->ptrs[nr_entries] = NULL;
			
 
				+
			
 
				+	if (pos == 0 && nr_entries > 0) {
			
 
				+		xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
			
 
				+		offset = node->keys[0];
			
 
				+	}
			
 
				+
			
 
				+	if (nr_entries >= KEYS_PER_NODE / 2)
			
 
				+		return;
			
 
				+
			
 
				+	if (level < ifp->if_height) {
			
 
				+		/*
			
 
				+		 * If we aren't at the root yet try to find a neighbour node to
			
 
				+		 * merge with (or delete the node if it is empty), and then
			
 
				+		 * recurse up to the next level.
			
 
				+		 */
			
 
				+		level++;
			
 
				+		parent = xfs_iext_find_level(ifp, offset, level);
			
 
				+		pos = xfs_iext_node_pos(parent, offset);
			
 
				+
			
 
				+		ASSERT(pos != KEYS_PER_NODE);
			
 
				+		ASSERT(parent->ptrs[pos] == node);
			
 
				+
			
 
				+		node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
			
 
				+		if (node) {
			
 
				+			victim = node;
			
 
				+			node = parent;
			
 
				+			goto again;
			
 
				+		}
			
 
				+	} else if (nr_entries == 1) {
			
 
				+		/*
			
 
				+		 * If we are at the root and only one entry is left we can just
			
 
				+		 * free this node and update the root pointer.
			
 
				+		 */
			
 
				+		ASSERT(node == ifp->if_u1.if_root);
			
 
				+		ifp->if_u1.if_root = node->ptrs[0];
			
 
				+		ifp->if_height--;
			
 
				+		kmem_free(node);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_rebalance_leaf(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	struct xfs_iext_leaf	*leaf,
			
 
				+	xfs_fileoff_t		offset,
			
 
				+	int			nr_entries)
			
 
				+{
			
 
				+	/*
			
 
				+	 * If the neighbouring nodes are completely full we might never be able
			
 
				+	 * to merge our node, and will only delete it once the number of
			
 
				+	 * entries hits zero.
			
 
				+	 */
			
 
				+	if (nr_entries == 0)
			
 
				+		goto remove_node;
			
 
				+
			
 
				+	if (leaf->prev) {
			
 
				+		int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
			
 
				+
			
 
				+		if (nr_prev + nr_entries <= RECS_PER_LEAF) {
			
 
				+			for (i = 0; i < nr_entries; i++)
			
 
				+				leaf->prev->recs[nr_prev + i] = leaf->recs[i];
			
 
				+
			
 
				+			if (cur->leaf == leaf) {
			
 
				+				cur->leaf = leaf->prev;
			
 
				+				cur->pos += nr_prev;
			
 
				+			}
			
 
				+			goto remove_node;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (leaf->next) {
			
 
				+		int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
			
 
				+
			
 
				+		if (nr_entries + nr_next <= RECS_PER_LEAF) {
			
 
				+			/*
			
 
				+			 * Merge the next node into this node so that we don't
			
 
				+			 * have to do an additional update of the keys in the
			
 
				+			 * higher levels.
			
 
				+			 */
			
 
				+			for (i = 0; i < nr_next; i++) {
			
 
				+				leaf->recs[nr_entries + i] =
			
 
				+					leaf->next->recs[i];
			
 
				+			}
			
 
				+
			
 
				+			if (cur->leaf == leaf->next) {
			
 
				+				cur->leaf = leaf;
			
 
				+				cur->pos += nr_entries;
			
 
				+			}
			
 
				+
			
 
				+			offset = xfs_iext_leaf_key(leaf->next, 0);
			
 
				+			leaf = leaf->next;
			
 
				+			goto remove_node;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return;
			
 
				+remove_node:
			
 
				+	if (leaf->prev)
			
 
				+		leaf->prev->next = leaf->next;
			
 
				+	if (leaf->next)
			
 
				+		leaf->next->prev = leaf->prev;
			
 
				+	xfs_iext_remove_node(ifp, offset, leaf);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_iext_free_last_leaf(
			
 
				+	struct xfs_ifork	*ifp)
			
 
				+{
			
 
				+	ifp->if_u1.if_root = NULL;
			
 
				+	ifp->if_height--;
			
 
				+	kmem_free(ifp->if_u1.if_root);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_remove(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	int			state)
			
 
				+{
			
 
				+	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
			
 
				+	struct xfs_iext_leaf	*leaf = cur->leaf;
			
 
				+	xfs_fileoff_t		offset = xfs_iext_leaf_key(leaf, 0);
			
 
				+	int			i, nr_entries;
			
 
				+
			
 
				+	trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
			
 
				+
			
 
				+	ASSERT(ifp->if_height > 0);
			
 
				+	ASSERT(ifp->if_u1.if_root != NULL);
			
 
				+	ASSERT(xfs_iext_valid(ifp, cur));
			
 
				+
			
 
				+	nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
			
 
				+	for (i = cur->pos; i < nr_entries; i++)
			
 
				+		leaf->recs[i] = leaf->recs[i + 1];
			
 
				+	xfs_iext_rec_clear(&leaf->recs[nr_entries]);
			
 
				+	ifp->if_bytes -= sizeof(struct xfs_iext_rec);
			
 
				+
			
 
				+	if (cur->pos == 0 && nr_entries > 0) {
			
 
				+		xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
			
 
				+				leaf);
			
 
				+		offset = xfs_iext_leaf_key(leaf, 0);
			
 
				+	} else if (cur->pos == nr_entries) {
			
 
				+		if (ifp->if_height > 1 && leaf->next)
			
 
				+			cur->leaf = leaf->next;
			
 
				+		else
			
 
				+			cur->leaf = NULL;
			
 
				+		cur->pos = 0;
			
 
				+	}
			
 
				+
			
 
				+	if (nr_entries >= RECS_PER_LEAF / 2)
			
 
				+		return;
			
 
				+
			
 
				+	if (ifp->if_height > 1)
			
 
				+		xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
			
 
				+	else if (nr_entries == 0)
			
 
				+		xfs_iext_free_last_leaf(ifp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Lookup the extent covering bno.
			
 
				+ *
			
 
				+ * If there is an extent covering bno return the extent index, and store the
			
 
				+ * expanded extent structure in *gotp, and the extent cursor in *cur.
			
 
				+ * If there is no extent covering bno, but there is an extent after it (e.g.
			
 
				+ * it lies in a hole) return that extent in *gotp and its cursor in *cur
			
 
				+ * instead.
			
 
				+ * If bno is beyond the last extent return false, and return an invalid
			
 
				+ * cursor value.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_iext_lookup_extent(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	xfs_fileoff_t		offset,
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	struct xfs_bmbt_irec	*gotp)
			
 
				+{
			
 
				+	XFS_STATS_INC(ip->i_mount, xs_look_exlist);
			
 
				+
			
 
				+	cur->leaf = xfs_iext_find_level(ifp, offset, 1);
			
 
				+	if (!cur->leaf) {
			
 
				+		cur->pos = 0;
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
			
 
				+		struct xfs_iext_rec *rec = cur_rec(cur);
			
 
				+
			
 
				+		if (xfs_iext_rec_is_empty(rec))
			
 
				+			break;
			
 
				+		if (xfs_iext_rec_cmp(rec, offset) >= 0)
			
 
				+			goto found;
			
 
				+	}
			
 
				+
			
 
				+	/* Try looking in the next node for an entry > offset */
			
 
				+	if (ifp->if_height == 1 || !cur->leaf->next)
			
 
				+		return false;
			
 
				+	cur->leaf = cur->leaf->next;
			
 
				+	cur->pos = 0;
			
 
				+	if (!xfs_iext_valid(ifp, cur))
			
 
				+		return false;
			
 
				+found:
			
 
				+	xfs_iext_get(gotp, cur_rec(cur));
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Returns the last extent before end, and if this extent doesn't cover
			
 
				+ * end, update end to the end of the extent.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_iext_lookup_extent_before(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	xfs_fileoff_t		*end,
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	struct xfs_bmbt_irec	*gotp)
			
 
				+{
			
 
				+	/* could be optimized to not even look up the next on a match.. */
			
 
				+	if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
			
 
				+	    gotp->br_startoff <= *end - 1)
			
 
				+		return true;
			
 
				+	if (!xfs_iext_prev_extent(ifp, cur, gotp))
			
 
				+		return false;
			
 
				+	*end = gotp->br_startoff + gotp->br_blockcount;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_update_extent(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	int			state,
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	struct xfs_bmbt_irec	*new)
			
 
				+{
			
 
				+	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
			
 
				+
			
 
				+	if (cur->pos == 0) {
			
 
				+		struct xfs_bmbt_irec	old;
			
 
				+
			
 
				+		xfs_iext_get(&old, cur_rec(cur));
			
 
				+		if (new->br_startoff != old.br_startoff) {
			
 
				+			xfs_iext_update_node(ifp, old.br_startoff,
			
 
				+					new->br_startoff, 1, cur->leaf);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
			
 
				+	xfs_iext_set(cur_rec(cur), new);
			
 
				+	trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return true if the cursor points at an extent and return the extent structure
			
 
				+ * in gotp.  Else return false.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_iext_get_extent(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	struct xfs_iext_cursor	*cur,
			
 
				+	struct xfs_bmbt_irec	*gotp)
			
 
				+{
			
 
				+	if (!xfs_iext_valid(ifp, cur))
			
 
				+		return false;
			
 
				+	xfs_iext_get(gotp, cur_rec(cur));
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is a recursive function, because of that we need to be extremely
			
 
				+ * careful with stack usage.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_iext_destroy_node(
			
 
				+	struct xfs_iext_node	*node,
			
 
				+	int			level)
			
 
				+{
			
 
				+	int			i;
			
 
				+
			
 
				+	if (level > 1) {
			
 
				+		for (i = 0; i < KEYS_PER_NODE; i++) {
			
 
				+			if (node->keys[i] == XFS_IEXT_KEY_INVALID)
			
 
				+				break;
			
 
				+			xfs_iext_destroy_node(node->ptrs[i], level - 1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	kmem_free(node);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_iext_destroy(
			
 
				+	struct xfs_ifork	*ifp)
			
 
				+{
			
 
				+	xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
			
 
				+
			
 
				+	ifp->if_bytes = 0;
			
 
				+	ifp->if_height = 0;
			
 
				+	ifp->if_u1.if_root = NULL;
			
 
				+}
			
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -24,6 +24,7 @@
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_defer.h"
			
 
				 #include "xfs_inode.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_cksum.h"
			
 
				 #include "xfs_icache.h"
			
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -42,21 +42,27 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
 
				 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				 
			
 
				+static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev)
			
 
				+{
			
 
				+	return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				- * Move inode type and inode format specific information from the
			
 
				- * on-disk inode to the in-core inode.  For fifos, devs, and sockets
			
 
				- * this means set if_rdev to the proper value.  For files, directories,
			
 
				- * and symlinks this means to bring in the in-line data or extent
			
 
				- * pointers.  For a file in B-tree format, only the root is immediately
			
 
				- * brought in-core.  The rest will be in-lined in if_extents when it
			
 
				- * is first referenced (see xfs_iread_extents()).
			
 
				+ * Copy inode type and data and attr format specific information from the
			
 
				+ * on-disk inode to the in-core inode and fork structures.  For fifos, devices,
			
 
				+ * and sockets this means set i_rdev to the proper value.  For files,
			
 
				+ * directories, and symlinks this means to bring in the in-line data or extent
			
 
				+ * pointers as well as the attribute fork.  For a fork in B-tree format, only
			
 
				+ * the root is immediately brought in-core.  The rest will be read in later when
			
 
				+ * first referenced (see xfs_iread_extents()).
			
 
				  */
			
 
				 int
			
 
				 xfs_iformat_fork(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	xfs_dinode_t		*dip)
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_dinode	*dip)
			
 
				 {
			
 
				-	xfs_attr_shortform_t	*atp;
			
 
				+	struct inode		*inode = VFS_I(ip);
			
 
				+	struct xfs_attr_shortform *atp;
			
 
				 	int			size;
			
 
				 	int			error = 0;
			
 
				 	xfs_fsize_t             di_size;
			
@@ -95,8 +101,7 @@ xfs_iformat_fork(
 
				 		return -EFSCORRUPTED;
			
 
				 	}
			
 
				 
			
 
				-	if (unlikely(xfs_is_reflink_inode(ip) &&
			
 
				-	    (VFS_I(ip)->i_mode & S_IFMT) != S_IFREG)) {
			
 
				+	if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) {
			
 
				 		xfs_warn(ip->i_mount,
			
 
				 			"corrupt dinode %llu, wrong file type for reflink.",
			
 
				 			ip->i_ino);
			
@@ -115,7 +120,7 @@ xfs_iformat_fork(
 
				 		return -EFSCORRUPTED;
			
 
				 	}
			
 
				 
			
 
				-	switch (VFS_I(ip)->i_mode & S_IFMT) {
			
 
				+	switch (inode->i_mode & S_IFMT) {
			
 
				 	case S_IFIFO:
			
 
				 	case S_IFCHR:
			
 
				 	case S_IFBLK:
			
@@ -126,7 +131,7 @@ xfs_iformat_fork(
 
				 			return -EFSCORRUPTED;
			
 
				 		}
			
 
				 		ip->i_d.di_size = 0;
			
 
				-		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
			
 
				+		inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
			
 
				 		break;
			
 
				 
			
 
				 	case S_IFREG:
			
@@ -184,8 +189,7 @@ xfs_iformat_fork(
 
				 		return error;
			
 
				 
			
 
				 	/* Check inline dir contents. */
			
 
				-	if (S_ISDIR(VFS_I(ip)->i_mode) &&
			
 
				-	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
			
 
				+	if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) {
			
 
				 		error = xfs_dir2_sf_verify(ip);
			
 
				 		if (error) {
			
 
				 			xfs_idestroy_fork(ip, XFS_DATA_FORK);
			
@@ -265,19 +269,14 @@ xfs_init_local_fork(
 
				 	if (zero_terminate)
			
 
				 		mem_size++;
			
 
				 
			
 
				-	if (size == 0)
			
 
				-		ifp->if_u1.if_data = NULL;
			
 
				-	else if (mem_size <= sizeof(ifp->if_u2.if_inline_data))
			
 
				-		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				-	else {
			
 
				+	if (size) {
			
 
				 		real_size = roundup(mem_size, 4);
			
 
				 		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
			
 
				-	}
			
 
				-
			
 
				-	if (size) {
			
 
				 		memcpy(ifp->if_u1.if_data, data, size);
			
 
				 		if (zero_terminate)
			
 
				 			ifp->if_u1.if_data[size] = '\0';
			
 
				+	} else {
			
 
				+		ifp->if_u1.if_data = NULL;
			
 
				 	}
			
 
				 
			
 
				 	ifp->if_bytes = size;
			
@@ -288,13 +287,6 @@ xfs_init_local_fork(
 
				 
			
 
				 /*
			
 
				  * The file is in-lined in the on-disk inode.
			
 
				- * If it fits into if_inline_data, then copy
			
 
				- * it there, otherwise allocate a buffer for it
			
 
				- * and copy the data there.  Either way, set
			
 
				- * if_data to point at the data.
			
 
				- * If we allocate a buffer for the data, make
			
 
				- * sure that its size is a multiple of 4 and
			
 
				- * record the real size in i_real_bytes.
			
 
				  */
			
 
				 STATIC int
			
 
				 xfs_iformat_local(
			
@@ -324,9 +316,7 @@ xfs_iformat_local(
 
				 
			
 
				 /*
			
 
				  * The file consists of a set of extents all of which fit into the on-disk
			
 
				- * inode.  If there are few enough extents to fit into the if_inline_ext, then
			
 
				- * copy them there.  Otherwise allocate a buffer for them and copy them into it.
			
 
				- * Either way, set if_extents to point at the extents.
			
 
				+ * inode.
			
 
				  */
			
 
				 STATIC int
			
 
				 xfs_iformat_extents(
			
@@ -336,9 +326,12 @@ xfs_iformat_extents(
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				 	int			nex = XFS_DFORK_NEXTENTS(dip, whichfork);
			
 
				 	int			size = nex * sizeof(xfs_bmbt_rec_t);
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	struct xfs_bmbt_rec	*dp;
			
 
				+	struct xfs_bmbt_irec	new;
			
 
				 	int			i;
			
 
				 
			
 
				 	/*
			
@@ -354,27 +347,25 @@ xfs_iformat_extents(
 
				 	}
			
 
				 
			
 
				 	ifp->if_real_bytes = 0;
			
 
				-	if (nex == 0)
			
 
				-		ifp->if_u1.if_extents = NULL;
			
 
				-	else if (nex <= XFS_INLINE_EXTS)
			
 
				-		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				-	else
			
 
				-		xfs_iext_add(ifp, 0, nex);
			
 
				-
			
 
				-	ifp->if_bytes = size;
			
 
				+	ifp->if_bytes = 0;
			
 
				+	ifp->if_u1.if_root = NULL;
			
 
				+	ifp->if_height = 0;
			
 
				 	if (size) {
			
 
				 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
			
 
				+
			
 
				+		xfs_iext_first(ifp, &icur);
			
 
				 		for (i = 0; i < nex; i++, dp++) {
			
 
				-			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				-			ep->l0 = get_unaligned_be64(&dp->l0);
			
 
				-			ep->l1 = get_unaligned_be64(&dp->l1);
			
 
				-			if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) {
			
 
				+			xfs_bmbt_disk_get_all(dp, &new);
			
 
				+			if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
			
 
				 				XFS_ERROR_REPORT("xfs_iformat_extents(2)",
			
 
				 						 XFS_ERRLEVEL_LOW, mp);
			
 
				 				return -EFSCORRUPTED;
			
 
				 			}
			
 
				+
			
 
				+			xfs_iext_insert(ip, &icur, &new, state);
			
 
				+			trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
			
 
				+			xfs_iext_next(ifp, &icur);
			
 
				 		}
			
 
				-		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
			
 
				 	}
			
 
				 	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				 	return 0;
			
@@ -440,46 +431,13 @@ xfs_iformat_btree(
 
				 	ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				 	ifp->if_flags |= XFS_IFBROOT;
			
 
				 
			
 
				+	ifp->if_real_bytes = 0;
			
 
				+	ifp->if_bytes = 0;
			
 
				+	ifp->if_u1.if_root = NULL;
			
 
				+	ifp->if_height = 0;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Read in extents from a btree-format inode.
			
 
				- * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
			
 
				- */
			
 
				-int
			
 
				-xfs_iread_extents(
			
 
				-	xfs_trans_t	*tp,
			
 
				-	xfs_inode_t	*ip,
			
 
				-	int		whichfork)
			
 
				-{
			
 
				-	int		error;
			
 
				-	xfs_ifork_t	*ifp;
			
 
				-	xfs_extnum_t	nextents;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				-
			
 
				-	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
			
 
				-		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
			
 
				-				 ip->i_mount);
			
 
				-		return -EFSCORRUPTED;
			
 
				-	}
			
 
				-	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-
			
 
				-	/*
			
 
				-	 * We know that the size is valid (it's checked in iformat_btree)
			
 
				-	 */
			
 
				-	ifp->if_bytes = ifp->if_real_bytes = 0;
			
 
				-	xfs_iext_add(ifp, 0, nextents);
			
 
				-	error = xfs_bmap_read_extents(tp, ip, whichfork);
			
 
				-	if (error) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-		return error;
			
 
				-	}
			
 
				-	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				-	return 0;
			
 
				-}
			
 
				 /*
			
 
				  * Reallocate the space for if_broot based on the number of records
			
 
				  * being added or deleted as indicated in rec_diff.  Move the records
			
@@ -644,26 +602,9 @@ xfs_idata_realloc(
 
				 	ASSERT(new_size >= 0);
			
 
				 
			
 
				 	if (new_size == 0) {
			
 
				-		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				-			kmem_free(ifp->if_u1.if_data);
			
 
				-		}
			
 
				+		kmem_free(ifp->if_u1.if_data);
			
 
				 		ifp->if_u1.if_data = NULL;
			
 
				 		real_size = 0;
			
 
				-	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
			
 
				-		/*
			
 
				-		 * If the valid extents/data can fit in if_inline_ext/data,
			
 
				-		 * copy them from the malloc'd vector and free it.
			
 
				-		 */
			
 
				-		if (ifp->if_u1.if_data == NULL) {
			
 
				-			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				-		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				-			ASSERT(ifp->if_real_bytes != 0);
			
 
				-			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
			
 
				-			      new_size);
			
 
				-			kmem_free(ifp->if_u1.if_data);
			
 
				-			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				-		}
			
 
				-		real_size = 0;
			
 
				 	} else {
			
 
				 		/*
			
 
				 		 * Stuck with malloc/realloc.
			
@@ -677,7 +618,7 @@ xfs_idata_realloc(
 
				 			ASSERT(ifp->if_real_bytes == 0);
			
 
				 			ifp->if_u1.if_data = kmem_alloc(real_size,
			
 
				 							KM_SLEEP | KM_NOFS);
			
 
				-		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				+		} else {
			
 
				 			/*
			
 
				 			 * Only do the realloc if the underlying size
			
 
				 			 * is really changing.
			
@@ -688,12 +629,6 @@ xfs_idata_realloc(
 
				 							real_size,
			
 
				 							KM_SLEEP | KM_NOFS);
			
 
				 			}
			
 
				-		} else {
			
 
				-			ASSERT(ifp->if_real_bytes == 0);
			
 
				-			ifp->if_u1.if_data = kmem_alloc(real_size,
			
 
				-							KM_SLEEP | KM_NOFS);
			
 
				-			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
			
 
				-				ifp->if_bytes);
			
 
				 		}
			
 
				 	}
			
 
				 	ifp->if_real_bytes = real_size;
			
@@ -721,23 +656,18 @@ xfs_idestroy_fork(
 
				 	 * so check and free it up if we do.
			
 
				 	 */
			
 
				 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
			
 
				-		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
			
 
				-		    (ifp->if_u1.if_data != NULL)) {
			
 
				+		if (ifp->if_u1.if_data != NULL) {
			
 
				 			ASSERT(ifp->if_real_bytes != 0);
			
 
				 			kmem_free(ifp->if_u1.if_data);
			
 
				 			ifp->if_u1.if_data = NULL;
			
 
				 			ifp->if_real_bytes = 0;
			
 
				 		}
			
 
				-	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
			
 
				-		   ((ifp->if_flags & XFS_IFEXTIREC) ||
			
 
				-		    ((ifp->if_u1.if_extents != NULL) &&
			
 
				-		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
			
 
				-		ASSERT(ifp->if_real_bytes != 0);
			
 
				+	} else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) {
			
 
				 		xfs_iext_destroy(ifp);
			
 
				 	}
			
 
				-	ASSERT(ifp->if_u1.if_extents == NULL ||
			
 
				-	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
			
 
				+
			
 
				 	ASSERT(ifp->if_real_bytes == 0);
			
 
				+
			
 
				 	if (whichfork == XFS_ATTR_FORK) {
			
 
				 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
			
 
				 		ip->i_afp = NULL;
			
@@ -747,19 +677,9 @@ xfs_idestroy_fork(
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/* Count number of incore extents based on if_bytes */
			
 
				-xfs_extnum_t
			
 
				-xfs_iext_count(struct xfs_ifork *ifp)
			
 
				-{
			
 
				-	return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Convert in-core extents to on-disk form
			
 
				  *
			
 
				- * For either the data or attr fork in extent format, we need to endian convert
			
 
				- * the in-core extent as we place them into the on-disk inode.
			
 
				- *
			
 
				  * In the case of the data fork, the in-core and on-disk fork sizes can be
			
 
				  * different due to delayed allocation extents. We only copy on-disk extents
			
 
				  * here, so callers must always use the physical fork size to determine the
			
@@ -768,53 +688,32 @@ xfs_iext_count(struct xfs_ifork *ifp)
 
				  */
			
 
				 int
			
 
				 xfs_iextents_copy(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	xfs_bmbt_rec_t		*dp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_bmbt_rec	*dp,
			
 
				 	int			whichfork)
			
 
				 {
			
 
				-	int			copied;
			
 
				-	int			i;
			
 
				-	xfs_ifork_t		*ifp;
			
 
				-	int			nrecs;
			
 
				-	xfs_fsblock_t		start_block;
			
 
				+	int			state = xfs_bmap_fork_to_state(whichfork);
			
 
				+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+	struct xfs_bmbt_irec	rec;
			
 
				+	int			copied = 0;
			
 
				 
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
			
 
				 	ASSERT(ifp->if_bytes > 0);
			
 
				 
			
 
				-	nrecs = xfs_iext_count(ifp);
			
 
				-	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
			
 
				-	ASSERT(nrecs > 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * There are some delayed allocation extents in the
			
 
				-	 * inode, so copy the extents one at a time and skip
			
 
				-	 * the delayed ones.  There must be at least one
			
 
				-	 * non-delayed extent.
			
 
				-	 */
			
 
				-	copied = 0;
			
 
				-	for (i = 0; i < nrecs; i++) {
			
 
				-		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				-
			
 
				-		ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep));
			
 
				-
			
 
				-		start_block = xfs_bmbt_get_startblock(ep);
			
 
				-		if (isnullstartblock(start_block)) {
			
 
				-			/*
			
 
				-			 * It's a delayed allocation extent, so skip it.
			
 
				-			 */
			
 
				+	for_each_xfs_iext(ifp, &icur, &rec) {
			
 
				+		if (isnullstartblock(rec.br_startblock))
			
 
				 			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* Translate to on disk format */
			
 
				-		put_unaligned_be64(ep->l0, &dp->l0);
			
 
				-		put_unaligned_be64(ep->l1, &dp->l1);
			
 
				+		ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec));
			
 
				+		xfs_bmbt_disk_set_all(dp, &rec);
			
 
				+		trace_xfs_write_extent(ip, &icur, state, _RET_IP_);
			
 
				+		copied += sizeof(struct xfs_bmbt_rec);
			
 
				 		dp++;
			
 
				-		copied++;
			
 
				 	}
			
 
				-	ASSERT(copied != 0);
			
 
				 
			
 
				-	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
			
 
				+	ASSERT(copied > 0);
			
 
				+	ASSERT(copied <= ifp->if_bytes);
			
 
				+	return copied;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -872,7 +771,6 @@ xfs_iflush_fork(
 
				 		       !(iip->ili_fields & extflag[whichfork]));
			
 
				 		if ((iip->ili_fields & extflag[whichfork]) &&
			
 
				 		    (ifp->if_bytes > 0)) {
			
 
				-			ASSERT(xfs_iext_get_ext(ifp, 0));
			
 
				 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
			
 
				 			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
			
 
				 				whichfork);
			
@@ -894,16 +792,7 @@ xfs_iflush_fork(
 
				 	case XFS_DINODE_FMT_DEV:
			
 
				 		if (iip->ili_fields & XFS_ILOG_DEV) {
			
 
				 			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				-			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
			
 
				-		}
			
 
				-		break;
			
 
				-
			
 
				-	case XFS_DINODE_FMT_UUID:
			
 
				-		if (iip->ili_fields & XFS_ILOG_UUID) {
			
 
				-			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				-			memcpy(XFS_DFORK_DPTR(dip),
			
 
				-			       &ip->i_df.if_u2.if_uuid,
			
 
				-			       sizeof(uuid_t));
			
 
				+			xfs_dinode_put_rdev(dip, sysv_encode_dev(VFS_I(ip)->i_rdev));
			
 
				 		}
			
 
				 		break;
			
 
				 
			
@@ -913,33 +802,6 @@ xfs_iflush_fork(
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Return a pointer to the extent record at file index idx.
			
 
				- */
			
 
				-xfs_bmbt_rec_host_t *
			
 
				-xfs_iext_get_ext(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx)		/* index of target extent */
			
 
				-{
			
 
				-	ASSERT(idx >= 0);
			
 
				-	ASSERT(idx < xfs_iext_count(ifp));
			
 
				-
			
 
				-	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
			
 
				-		return ifp->if_u1.if_ext_irec->er_extbuf;
			
 
				-	} else if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		xfs_ext_irec_t	*erp;		/* irec pointer */
			
 
				-		int		erp_idx = 0;	/* irec index */
			
 
				-		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
			
 
				-
			
 
				-		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
			
 
				-		return &erp->er_extbuf[page_idx];
			
 
				-	} else if (ifp->if_bytes) {
			
 
				-		return &ifp->if_u1.if_extents[idx];
			
 
				-	} else {
			
 
				-		return NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /* Convert bmap state flags to an inode fork. */
			
 
				 struct xfs_ifork *
			
 
				 xfs_iext_state_to_fork(
			
@@ -953,1011 +815,6 @@ xfs_iext_state_to_fork(
 
				 	return &ip->i_df;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Insert new item(s) into the extent records for incore inode
			
 
				- * fork 'ifp'.  'count' new items are inserted at index 'idx'.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_insert(
			
 
				-	xfs_inode_t	*ip,		/* incore inode pointer */
			
 
				-	xfs_extnum_t	idx,		/* starting index of new items */
			
 
				-	xfs_extnum_t	count,		/* number of inserted items */
			
 
				-	xfs_bmbt_irec_t	*new,		/* items to insert */
			
 
				-	int		state)		/* type of extent conversion */
			
 
				-{
			
 
				-	xfs_ifork_t	*ifp = xfs_iext_state_to_fork(ip, state);
			
 
				-	xfs_extnum_t	i;		/* extent record index */
			
 
				-
			
 
				-	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
			
 
				-	xfs_iext_add(ifp, idx, count);
			
 
				-	for (i = idx; i < idx + count; i++, new++)
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when the amount of space required for incore file
			
 
				- * extents needs to be increased. The ext_diff parameter stores the
			
 
				- * number of new extents being added and the idx parameter contains
			
 
				- * the extent index where the new extents will be added. If the new
			
 
				- * extents are being appended, then we just need to (re)allocate and
			
 
				- * initialize the space. Otherwise, if the new extents are being
			
 
				- * inserted into the middle of the existing entries, a bit more work
			
 
				- * is required to make room for the new extents to be inserted. The
			
 
				- * caller is responsible for filling in the new extent entries upon
			
 
				- * return.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_add(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin adding exts */
			
 
				-	int		ext_diff)	/* number of extents to add */
			
 
				-{
			
 
				-	int		byte_diff;	/* new bytes being added */
			
 
				-	int		new_size;	/* size of extents after adding */
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	ASSERT((idx >= 0) && (idx <= nextents));
			
 
				-	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
			
 
				-	new_size = ifp->if_bytes + byte_diff;
			
 
				-	/*
			
 
				-	 * If the new number of extents (nextents + ext_diff)
			
 
				-	 * fits inside the inode, then continue to use the inline
			
 
				-	 * extent buffer.
			
 
				-	 */
			
 
				-	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
			
 
				-		if (idx < nextents) {
			
 
				-			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
			
 
				-				&ifp->if_u2.if_inline_ext[idx],
			
 
				-				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
			
 
				-			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
			
 
				-		}
			
 
				-		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				-		ifp->if_real_bytes = 0;
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Otherwise use a linear (direct) extent list.
			
 
				-	 * If the extents are currently inside the inode,
			
 
				-	 * xfs_iext_realloc_direct will switch us from
			
 
				-	 * inline to direct extent allocation mode.
			
 
				-	 */
			
 
				-	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
			
 
				-		xfs_iext_realloc_direct(ifp, new_size);
			
 
				-		if (idx < nextents) {
			
 
				-			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
			
 
				-				&ifp->if_u1.if_extents[idx],
			
 
				-				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
			
 
				-			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
			
 
				-		}
			
 
				-	}
			
 
				-	/* Indirection array */
			
 
				-	else {
			
 
				-		xfs_ext_irec_t	*erp;
			
 
				-		int		erp_idx = 0;
			
 
				-		int		page_idx = idx;
			
 
				-
			
 
				-		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
			
 
				-		if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
			
 
				-		} else {
			
 
				-			xfs_iext_irec_init(ifp);
			
 
				-			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-			erp = ifp->if_u1.if_ext_irec;
			
 
				-		}
			
 
				-		/* Extents fit in target extent page */
			
 
				-		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
			
 
				-			if (page_idx < erp->er_extcount) {
			
 
				-				memmove(&erp->er_extbuf[page_idx + ext_diff],
			
 
				-					&erp->er_extbuf[page_idx],
			
 
				-					(erp->er_extcount - page_idx) *
			
 
				-					sizeof(xfs_bmbt_rec_t));
			
 
				-				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
			
 
				-			}
			
 
				-			erp->er_extcount += ext_diff;
			
 
				-			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				-		}
			
 
				-		/* Insert a new extent page */
			
 
				-		else if (erp) {
			
 
				-			xfs_iext_add_indirect_multi(ifp,
			
 
				-				erp_idx, page_idx, ext_diff);
			
 
				-		}
			
 
				-		/*
			
 
				-		 * If extent(s) are being appended to the last page in
			
 
				-		 * the indirection array and the new extent(s) don't fit
			
 
				-		 * in the page, then erp is NULL and erp_idx is set to
			
 
				-		 * the next index needed in the indirection array.
			
 
				-		 */
			
 
				-		else {
			
 
				-			uint	count = ext_diff;
			
 
				-
			
 
				-			while (count) {
			
 
				-				erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				-				erp->er_extcount = min(count, XFS_LINEAR_EXTS);
			
 
				-				count -= erp->er_extcount;
			
 
				-				if (count)
			
 
				-					erp_idx++;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when incore extents are being added to the indirection
			
 
				- * array and the new extents do not fit in the target extent list. The
			
 
				- * erp_idx parameter contains the irec index for the target extent list
			
 
				- * in the indirection array, and the idx parameter contains the extent
			
 
				- * index within the list. The number of extents being added is stored
			
 
				- * in the count parameter.
			
 
				- *
			
 
				- *    |-------|   |-------|
			
 
				- *    |       |   |       |    idx - number of extents before idx
			
 
				- *    |  idx  |   | count |
			
 
				- *    |       |   |       |    count - number of extents being inserted at idx
			
 
				- *    |-------|   |-------|
			
 
				- *    | count |   | nex2  |    nex2 - number of extents after idx + count
			
 
				- *    |-------|   |-------|
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_add_indirect_multi(
			
 
				-	xfs_ifork_t	*ifp,			/* inode fork pointer */
			
 
				-	int		erp_idx,		/* target extent irec index */
			
 
				-	xfs_extnum_t	idx,			/* index within target list */
			
 
				-	int		count)			/* new extents being added */
			
 
				-{
			
 
				-	int		byte_diff;		/* new bytes being added */
			
 
				-	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
			
 
				-	xfs_extnum_t	ext_diff;		/* number of extents to add */
			
 
				-	xfs_extnum_t	ext_cnt;		/* new extents still needed */
			
 
				-	xfs_extnum_t	nex2;			/* extents after idx + count */
			
 
				-	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
			
 
				-	int		nlists;			/* number of irec's (lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-	nex2 = erp->er_extcount - idx;
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-
			
 
				-	/*
			
 
				-	 * Save second part of target extent list
			
 
				-	 * (all extents past */
			
 
				-	if (nex2) {
			
 
				-		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
			
 
				-		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
			
 
				-		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
			
 
				-		erp->er_extcount -= nex2;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
			
 
				-		memset(&erp->er_extbuf[idx], 0, byte_diff);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Add the new extents to the end of the target
			
 
				-	 * list, then allocate new irec record(s) and
			
 
				-	 * extent buffer(s) as needed to store the rest
			
 
				-	 * of the new extents.
			
 
				-	 */
			
 
				-	ext_cnt = count;
			
 
				-	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
			
 
				-	if (ext_diff) {
			
 
				-		erp->er_extcount += ext_diff;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				-		ext_cnt -= ext_diff;
			
 
				-	}
			
 
				-	while (ext_cnt) {
			
 
				-		erp_idx++;
			
 
				-		erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				-		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
			
 
				-		erp->er_extcount = ext_diff;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				-		ext_cnt -= ext_diff;
			
 
				-	}
			
 
				-
			
 
				-	/* Add nex2 extents back to indirection array */
			
 
				-	if (nex2) {
			
 
				-		xfs_extnum_t	ext_avail;
			
 
				-		int		i;
			
 
				-
			
 
				-		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
			
 
				-		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
			
 
				-		i = 0;
			
 
				-		/*
			
 
				-		 * If nex2 extents fit in the current page, append
			
 
				-		 * nex2_ep after the new extents.
			
 
				-		 */
			
 
				-		if (nex2 <= ext_avail) {
			
 
				-			i = erp->er_extcount;
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Otherwise, check if space is available in the
			
 
				-		 * next page.
			
 
				-		 */
			
 
				-		else if ((erp_idx < nlists - 1) &&
			
 
				-			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
			
 
				-			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
			
 
				-			erp_idx++;
			
 
				-			erp++;
			
 
				-			/* Create a hole for nex2 extents */
			
 
				-			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
			
 
				-				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Final choice, create a new extent page for
			
 
				-		 * nex2 extents.
			
 
				-		 */
			
 
				-		else {
			
 
				-			erp_idx++;
			
 
				-			erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				-		}
			
 
				-		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
			
 
				-		kmem_free(nex2_ep);
			
 
				-		erp->er_extcount += nex2;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when the amount of space required for incore file
			
 
				- * extents needs to be decreased. The ext_diff parameter stores the
			
 
				- * number of extents to be removed and the idx parameter contains
			
 
				- * the extent index where the extents will be removed from.
			
 
				- *
			
 
				- * If the amount of space needed has decreased below the linear
			
 
				- * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
			
 
				- * extent array.  Otherwise, use kmem_realloc() to adjust the
			
 
				- * size to what is needed.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove(
			
 
				-	xfs_inode_t	*ip,		/* incore inode pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				-	int		ext_diff,	/* number of extents to remove */
			
 
				-	int		state)		/* type of extent conversion */
			
 
				-{
			
 
				-	xfs_ifork_t	*ifp = xfs_iext_state_to_fork(ip, state);
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		new_size;	/* size of extents after removal */
			
 
				-
			
 
				-	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
			
 
				-
			
 
				-	ASSERT(ext_diff > 0);
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
			
 
				-
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	} else if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		xfs_iext_remove_indirect(ifp, idx, ext_diff);
			
 
				-	} else if (ifp->if_real_bytes) {
			
 
				-		xfs_iext_remove_direct(ifp, idx, ext_diff);
			
 
				-	} else {
			
 
				-		xfs_iext_remove_inline(ifp, idx, ext_diff);
			
 
				-	}
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This removes ext_diff extents from the inline buffer, beginning
			
 
				- * at extent index idx.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove_inline(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				-	int		ext_diff)	/* number of extents to remove */
			
 
				-{
			
 
				-	int		nextents;	/* number of extents in file */
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				-	ASSERT(idx < XFS_INLINE_EXTS);
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	ASSERT(((nextents - ext_diff) > 0) &&
			
 
				-		(nextents - ext_diff) < XFS_INLINE_EXTS);
			
 
				-
			
 
				-	if (idx + ext_diff < nextents) {
			
 
				-		memmove(&ifp->if_u2.if_inline_ext[idx],
			
 
				-			&ifp->if_u2.if_inline_ext[idx + ext_diff],
			
 
				-			(nextents - (idx + ext_diff)) *
			
 
				-			 sizeof(xfs_bmbt_rec_t));
			
 
				-		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
			
 
				-			0, ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	} else {
			
 
				-		memset(&ifp->if_u2.if_inline_ext[idx], 0,
			
 
				-			ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This removes ext_diff extents from a linear (direct) extent list,
			
 
				- * beginning at extent index idx. If the extents are being removed
			
 
				- * from the end of the list (ie. truncate) then we just need to re-
			
 
				- * allocate the list to remove the extra space. Otherwise, if the
			
 
				- * extents are being removed from the middle of the existing extent
			
 
				- * entries, then we first need to move the extent records beginning
			
 
				- * at idx + ext_diff up in the list to overwrite the records being
			
 
				- * removed, then remove the extra space via kmem_realloc.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove_direct(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				-	int		ext_diff)	/* number of extents to remove */
			
 
				-{
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		new_size;	/* size of extents after removal */
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				-	new_size = ifp->if_bytes -
			
 
				-		(ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-		return;
			
 
				-	}
			
 
				-	/* Move extents up in the list (if needed) */
			
 
				-	if (idx + ext_diff < nextents) {
			
 
				-		memmove(&ifp->if_u1.if_extents[idx],
			
 
				-			&ifp->if_u1.if_extents[idx + ext_diff],
			
 
				-			(nextents - (idx + ext_diff)) *
			
 
				-			 sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
			
 
				-		0, ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	/*
			
 
				-	 * Reallocate the direct extent list. If the extents
			
 
				-	 * will fit inside the inode then xfs_iext_realloc_direct
			
 
				-	 * will switch from direct to inline extent allocation
			
 
				-	 * mode for us.
			
 
				-	 */
			
 
				-	xfs_iext_realloc_direct(ifp, new_size);
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when incore extents are being removed from the
			
 
				- * indirection array and the extents being removed span multiple extent
			
 
				- * buffers. The idx parameter contains the file extent index where we
			
 
				- * want to begin removing extents, and the count parameter contains
			
 
				- * how many extents need to be removed.
			
 
				- *
			
 
				- *    |-------|   |-------|
			
 
				- *    | nex1  |   |       |    nex1 - number of extents before idx
			
 
				- *    |-------|   | count |
			
 
				- *    |       |   |       |    count - number of extents being removed at idx
			
 
				- *    | count |   |-------|
			
 
				- *    |       |   | nex2  |    nex2 - number of extents after idx + count
			
 
				- *    |-------|   |-------|
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove_indirect(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing extents */
			
 
				-	int		count)		/* number of extents to remove */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	int		erp_idx = 0;	/* indirection array index */
			
 
				-	xfs_extnum_t	ext_cnt;	/* extents left to remove */
			
 
				-	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
			
 
				-	xfs_extnum_t	nex1;		/* number of extents before idx */
			
 
				-	xfs_extnum_t	nex2;		/* extents after idx + count */
			
 
				-	int		page_idx = idx;	/* index in target extent list */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
			
 
				-	ASSERT(erp != NULL);
			
 
				-	nex1 = page_idx;
			
 
				-	ext_cnt = count;
			
 
				-	while (ext_cnt) {
			
 
				-		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
			
 
				-		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
			
 
				-		/*
			
 
				-		 * Check for deletion of entire list;
			
 
				-		 * xfs_iext_irec_remove() updates extent offsets.
			
 
				-		 */
			
 
				-		if (ext_diff == erp->er_extcount) {
			
 
				-			xfs_iext_irec_remove(ifp, erp_idx);
			
 
				-			ext_cnt -= ext_diff;
			
 
				-			nex1 = 0;
			
 
				-			if (ext_cnt) {
			
 
				-				ASSERT(erp_idx < ifp->if_real_bytes /
			
 
				-					XFS_IEXT_BUFSZ);
			
 
				-				erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-				nex1 = 0;
			
 
				-				continue;
			
 
				-			} else {
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		/* Move extents up (if needed) */
			
 
				-		if (nex2) {
			
 
				-			memmove(&erp->er_extbuf[nex1],
			
 
				-				&erp->er_extbuf[nex1 + ext_diff],
			
 
				-				nex2 * sizeof(xfs_bmbt_rec_t));
			
 
				-		}
			
 
				-		/* Zero out rest of page */
			
 
				-		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
			
 
				-			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
			
 
				-		/* Update remaining counters */
			
 
				-		erp->er_extcount -= ext_diff;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
			
 
				-		ext_cnt -= ext_diff;
			
 
				-		nex1 = 0;
			
 
				-		erp_idx++;
			
 
				-		erp++;
			
 
				-	}
			
 
				-	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
			
 
				-	xfs_iext_irec_compact(ifp);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Create, destroy, or resize a linear (direct) block of extents.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_realloc_direct(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		new_size)	/* new size of extents after adding */
			
 
				-{
			
 
				-	int		rnew_size;	/* real new size of extents */
			
 
				-
			
 
				-	rnew_size = new_size;
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
			
 
				-		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
			
 
				-		 (new_size != ifp->if_real_bytes)));
			
 
				-
			
 
				-	/* Free extent records */
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	}
			
 
				-	/* Resize direct extent list and zero any new bytes */
			
 
				-	else if (ifp->if_real_bytes) {
			
 
				-		/* Check if extents will fit inside the inode */
			
 
				-		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
			
 
				-			xfs_iext_direct_to_inline(ifp, new_size /
			
 
				-				(uint)sizeof(xfs_bmbt_rec_t));
			
 
				-			ifp->if_bytes = new_size;
			
 
				-			return;
			
 
				-		}
			
 
				-		if (!is_power_of_2(new_size)){
			
 
				-			rnew_size = roundup_pow_of_two(new_size);
			
 
				-		}
			
 
				-		if (rnew_size != ifp->if_real_bytes) {
			
 
				-			ifp->if_u1.if_extents =
			
 
				-				kmem_realloc(ifp->if_u1.if_extents,
			
 
				-						rnew_size, KM_NOFS);
			
 
				-		}
			
 
				-		if (rnew_size > ifp->if_real_bytes) {
			
 
				-			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
			
 
				-				(uint)sizeof(xfs_bmbt_rec_t)], 0,
			
 
				-				rnew_size - ifp->if_real_bytes);
			
 
				-		}
			
 
				-	}
			
 
				-	/* Switch from the inline extent buffer to a direct extent list */
			
 
				-	else {
			
 
				-		if (!is_power_of_2(new_size)) {
			
 
				-			rnew_size = roundup_pow_of_two(new_size);
			
 
				-		}
			
 
				-		xfs_iext_inline_to_direct(ifp, rnew_size);
			
 
				-	}
			
 
				-	ifp->if_real_bytes = rnew_size;
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Switch from linear (direct) extent records to inline buffer.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_direct_to_inline(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	nextents)	/* number of extents in file */
			
 
				-{
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
			
 
				-	ASSERT(nextents <= XFS_INLINE_EXTS);
			
 
				-	/*
			
 
				-	 * The inline buffer was zeroed when we switched
			
 
				-	 * from inline to direct extent allocation mode,
			
 
				-	 * so we don't need to clear it here.
			
 
				-	 */
			
 
				-	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
			
 
				-		nextents * sizeof(xfs_bmbt_rec_t));
			
 
				-	kmem_free(ifp->if_u1.if_extents);
			
 
				-	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				-	ifp->if_real_bytes = 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Switch from inline buffer to linear (direct) extent records.
			
 
				- * new_size should already be rounded up to the next power of 2
			
 
				- * by the caller (when appropriate), so use new_size as it is.
			
 
				- * However, since new_size may be rounded up, we can't update
			
 
				- * if_bytes here. It is the caller's responsibility to update
			
 
				- * if_bytes upon return.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_inline_to_direct(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		new_size)	/* number of extents in file */
			
 
				-{
			
 
				-	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
			
 
				-	memset(ifp->if_u1.if_extents, 0, new_size);
			
 
				-	if (ifp->if_bytes) {
			
 
				-		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
			
 
				-			ifp->if_bytes);
			
 
				-		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
			
 
				-			sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-	ifp->if_real_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Resize an extent indirection array to new_size bytes.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_iext_realloc_indirect(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		new_size)	/* new indirection array size */
			
 
				-{
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	ASSERT(ifp->if_real_bytes);
			
 
				-	ASSERT((new_size >= 0) &&
			
 
				-	       (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) *
			
 
				-			     sizeof(xfs_ext_irec_t))));
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	} else {
			
 
				-		ifp->if_u1.if_ext_irec =
			
 
				-			kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Switch from indirection array to linear (direct) extent allocations.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_iext_indirect_to_direct(
			
 
				-	 xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		size;		/* size of file extents */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	ASSERT(nextents <= XFS_LINEAR_EXTS);
			
 
				-	size = nextents * sizeof(xfs_bmbt_rec_t);
			
 
				-
			
 
				-	xfs_iext_irec_compact_pages(ifp);
			
 
				-	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
			
 
				-
			
 
				-	ep = ifp->if_u1.if_ext_irec->er_extbuf;
			
 
				-	kmem_free(ifp->if_u1.if_ext_irec);
			
 
				-	ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				-	ifp->if_u1.if_extents = ep;
			
 
				-	ifp->if_bytes = size;
			
 
				-	if (nextents < XFS_LINEAR_EXTS) {
			
 
				-		xfs_iext_realloc_direct(ifp, size);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Remove all records from the indirection array.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_iext_irec_remove_all(
			
 
				-	struct xfs_ifork *ifp)
			
 
				-{
			
 
				-	int		nlists;
			
 
				-	int		i;
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	for (i = 0; i < nlists; i++)
			
 
				-		kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
			
 
				-	kmem_free(ifp->if_u1.if_ext_irec);
			
 
				-	ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Free incore file extents.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_destroy(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		xfs_iext_irec_remove_all(ifp);
			
 
				-	} else if (ifp->if_real_bytes) {
			
 
				-		kmem_free(ifp->if_u1.if_extents);
			
 
				-	} else if (ifp->if_bytes) {
			
 
				-		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
			
 
				-			sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-	ifp->if_u1.if_extents = NULL;
			
 
				-	ifp->if_real_bytes = 0;
			
 
				-	ifp->if_bytes = 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return a pointer to the extent record for file system block bno.
			
 
				- */
			
 
				-xfs_bmbt_rec_host_t *			/* pointer to found extent record */
			
 
				-xfs_iext_bno_to_ext(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_fileoff_t	bno,		/* block number to search for */
			
 
				-	xfs_extnum_t	*idxp)		/* index of target extent */
			
 
				-{
			
 
				-	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
			
 
				-	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
			
 
				-	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
			
 
				-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
			
 
				-	int		high;		/* upper boundary in search */
			
 
				-	xfs_extnum_t	idx = 0;	/* index of target extent */
			
 
				-	int		low;		/* lower boundary in search */
			
 
				-	xfs_extnum_t	nextents;	/* number of file extents */
			
 
				-	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
			
 
				-
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	if (nextents == 0) {
			
 
				-		*idxp = 0;
			
 
				-		return NULL;
			
 
				-	}
			
 
				-	low = 0;
			
 
				-	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		/* Find target extent list */
			
 
				-		int	erp_idx = 0;
			
 
				-		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
			
 
				-		base = erp->er_extbuf;
			
 
				-		high = erp->er_extcount - 1;
			
 
				-	} else {
			
 
				-		base = ifp->if_u1.if_extents;
			
 
				-		high = nextents - 1;
			
 
				-	}
			
 
				-	/* Binary search extent records */
			
 
				-	while (low <= high) {
			
 
				-		idx = (low + high) >> 1;
			
 
				-		ep = base + idx;
			
 
				-		startoff = xfs_bmbt_get_startoff(ep);
			
 
				-		blockcount = xfs_bmbt_get_blockcount(ep);
			
 
				-		if (bno < startoff) {
			
 
				-			high = idx - 1;
			
 
				-		} else if (bno >= startoff + blockcount) {
			
 
				-			low = idx + 1;
			
 
				-		} else {
			
 
				-			/* Convert back to file-based extent index */
			
 
				-			if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-				idx += erp->er_extoff;
			
 
				-			}
			
 
				-			*idxp = idx;
			
 
				-			return ep;
			
 
				-		}
			
 
				-	}
			
 
				-	/* Convert back to file-based extent index */
			
 
				-	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		idx += erp->er_extoff;
			
 
				-	}
			
 
				-	if (bno >= startoff + blockcount) {
			
 
				-		if (++idx == nextents) {
			
 
				-			ep = NULL;
			
 
				-		} else {
			
 
				-			ep = xfs_iext_get_ext(ifp, idx);
			
 
				-		}
			
 
				-	}
			
 
				-	*idxp = idx;
			
 
				-	return ep;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return a pointer to the indirection array entry containing the
			
 
				- * extent record for filesystem block bno. Store the index of the
			
 
				- * target irec in *erp_idxp.
			
 
				- */
			
 
				-xfs_ext_irec_t *			/* pointer to found extent record */
			
 
				-xfs_iext_bno_to_irec(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_fileoff_t	bno,		/* block number to search for */
			
 
				-	int		*erp_idxp)	/* irec index of target ext list */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
			
 
				-	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
			
 
				-	int		erp_idx;	/* indirection array index */
			
 
				-	int		nlists;		/* number of extent irec's (lists) */
			
 
				-	int		high;		/* binary search upper limit */
			
 
				-	int		low;		/* binary search lower limit */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	erp_idx = 0;
			
 
				-	low = 0;
			
 
				-	high = nlists - 1;
			
 
				-	while (low <= high) {
			
 
				-		erp_idx = (low + high) >> 1;
			
 
				-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
			
 
				-		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
			
 
				-			high = erp_idx - 1;
			
 
				-		} else if (erp_next && bno >=
			
 
				-			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
			
 
				-			low = erp_idx + 1;
			
 
				-		} else {
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	*erp_idxp = erp_idx;
			
 
				-	return erp;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return a pointer to the indirection array entry containing the
			
 
				- * extent record at file extent index *idxp. Store the index of the
			
 
				- * target irec in *erp_idxp and store the page index of the target
			
 
				- * extent record in *idxp.
			
 
				- */
			
 
				-xfs_ext_irec_t *
			
 
				-xfs_iext_idx_to_irec(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
			
 
				-	int		*erp_idxp,	/* pointer to target irec */
			
 
				-	int		realloc)	/* new bytes were just added */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
			
 
				-	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
			
 
				-	int		erp_idx;	/* indirection array index */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-	int		high;		/* binary search upper limit */
			
 
				-	int		low;		/* binary search lower limit */
			
 
				-	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	ASSERT(page_idx >= 0);
			
 
				-	ASSERT(page_idx <= xfs_iext_count(ifp));
			
 
				-	ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
			
 
				-
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	erp_idx = 0;
			
 
				-	low = 0;
			
 
				-	high = nlists - 1;
			
 
				-
			
 
				-	/* Binary search extent irec's */
			
 
				-	while (low <= high) {
			
 
				-		erp_idx = (low + high) >> 1;
			
 
				-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-		prev = erp_idx > 0 ? erp - 1 : NULL;
			
 
				-		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
			
 
				-		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
			
 
				-			high = erp_idx - 1;
			
 
				-		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
			
 
				-			   (page_idx == erp->er_extoff + erp->er_extcount &&
			
 
				-			    !realloc)) {
			
 
				-			low = erp_idx + 1;
			
 
				-		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
			
 
				-			   erp->er_extcount == XFS_LINEAR_EXTS) {
			
 
				-			ASSERT(realloc);
			
 
				-			page_idx = 0;
			
 
				-			erp_idx++;
			
 
				-			erp = erp_idx < nlists ? erp + 1 : NULL;
			
 
				-			break;
			
 
				-		} else {
			
 
				-			page_idx -= erp->er_extoff;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	*idxp = page_idx;
			
 
				-	*erp_idxp = erp_idx;
			
 
				-	return erp;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Allocate and initialize an indirection array once the space needed
			
 
				- * for incore extents increases above XFS_IEXT_BUFSZ.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_init(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	ASSERT(nextents <= XFS_LINEAR_EXTS);
			
 
				-
			
 
				-	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
			
 
				-
			
 
				-	if (nextents == 0) {
			
 
				-		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
			
 
				-	} else if (!ifp->if_real_bytes) {
			
 
				-		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
			
 
				-	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
			
 
				-		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
			
 
				-	}
			
 
				-	erp->er_extbuf = ifp->if_u1.if_extents;
			
 
				-	erp->er_extcount = nextents;
			
 
				-	erp->er_extoff = 0;
			
 
				-
			
 
				-	ifp->if_flags |= XFS_IFEXTIREC;
			
 
				-	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
			
 
				-	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
			
 
				-	ifp->if_u1.if_ext_irec = erp;
			
 
				-
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Allocate and initialize a new entry in the indirection array.
			
 
				- */
			
 
				-xfs_ext_irec_t *
			
 
				-xfs_iext_irec_new(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		erp_idx)	/* index for new irec */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	int		i;		/* loop counter */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-
			
 
				-	/* Resize indirection array */
			
 
				-	xfs_iext_realloc_indirect(ifp, ++nlists *
			
 
				-				  sizeof(xfs_ext_irec_t));
			
 
				-	/*
			
 
				-	 * Move records down in the array so the
			
 
				-	 * new page can use erp_idx.
			
 
				-	 */
			
 
				-	erp = ifp->if_u1.if_ext_irec;
			
 
				-	for (i = nlists - 1; i > erp_idx; i--) {
			
 
				-		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
			
 
				-	}
			
 
				-	ASSERT(i == erp_idx);
			
 
				-
			
 
				-	/* Initialize new extent record */
			
 
				-	erp = ifp->if_u1.if_ext_irec;
			
 
				-	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
			
 
				-	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
			
 
				-	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
			
 
				-	erp[erp_idx].er_extcount = 0;
			
 
				-	erp[erp_idx].er_extoff = erp_idx > 0 ?
			
 
				-		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
			
 
				-	return (&erp[erp_idx]);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Remove a record from the indirection array.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_remove(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		erp_idx)	/* irec index to remove */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	int		i;		/* loop counter */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-	if (erp->er_extbuf) {
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
			
 
				-			-erp->er_extcount);
			
 
				-		kmem_free(erp->er_extbuf);
			
 
				-	}
			
 
				-	/* Compact extent records */
			
 
				-	erp = ifp->if_u1.if_ext_irec;
			
 
				-	for (i = erp_idx; i < nlists - 1; i++) {
			
 
				-		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Manually free the last extent record from the indirection
			
 
				-	 * array.  A call to xfs_iext_realloc_indirect() with a size
			
 
				-	 * of zero would result in a call to xfs_iext_destroy() which
			
 
				-	 * would in turn call this function again, creating a nasty
			
 
				-	 * infinite loop.
			
 
				-	 */
			
 
				-	if (--nlists) {
			
 
				-		xfs_iext_realloc_indirect(ifp,
			
 
				-			nlists * sizeof(xfs_ext_irec_t));
			
 
				-	} else {
			
 
				-		kmem_free(ifp->if_u1.if_ext_irec);
			
 
				-	}
			
 
				-	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called to clean up large amounts of unused memory allocated
			
 
				- * by the indirection array.  Before compacting anything though, verify
			
 
				- * that the indirection array is still needed and switch back to the
			
 
				- * linear extent list (or even the inline buffer) if possible.  The
			
 
				- * compaction policy is as follows:
			
 
				- *
			
 
				- *    Full Compaction: Extents fit into a single page (or inline buffer)
			
 
				- * Partial Compaction: Extents occupy less than 50% of allocated space
			
 
				- *      No Compaction: Extents occupy at least 50% of allocated space
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_compact(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-
			
 
				-	if (nextents == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	} else if (nextents <= XFS_INLINE_EXTS) {
			
 
				-		xfs_iext_indirect_to_direct(ifp);
			
 
				-		xfs_iext_direct_to_inline(ifp, nextents);
			
 
				-	} else if (nextents <= XFS_LINEAR_EXTS) {
			
 
				-		xfs_iext_indirect_to_direct(ifp);
			
 
				-	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
			
 
				-		xfs_iext_irec_compact_pages(ifp);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Combine extents from neighboring extent pages.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_compact_pages(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
			
 
				-	int		erp_idx = 0;	/* indirection array index */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	while (erp_idx < nlists - 1) {
			
 
				-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-		erp_next = erp + 1;
			
 
				-		if (erp_next->er_extcount <=
			
 
				-		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
			
 
				-			memcpy(&erp->er_extbuf[erp->er_extcount],
			
 
				-				erp_next->er_extbuf, erp_next->er_extcount *
			
 
				-				sizeof(xfs_bmbt_rec_t));
			
 
				-			erp->er_extcount += erp_next->er_extcount;
			
 
				-			/*
			
 
				-			 * Free page before removing extent record
			
 
				-			 * so er_extoffs don't get modified in
			
 
				-			 * xfs_iext_irec_remove.
			
 
				-			 */
			
 
				-			kmem_free(erp_next->er_extbuf);
			
 
				-			erp_next->er_extbuf = NULL;
			
 
				-			xfs_iext_irec_remove(ifp, erp_idx + 1);
			
 
				-			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-		} else {
			
 
				-			erp_idx++;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called to update the er_extoff field in the indirection
			
 
				- * array when extents have been added or removed from one of the
			
 
				- * extent lists. erp_idx contains the irec index to begin updating
			
 
				- * at and ext_diff contains the number of extents that were added
			
 
				- * or removed.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_update_extoffs(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		erp_idx,	/* irec index to update */
			
 
				-	int		ext_diff)	/* number of new extents */
			
 
				-{
			
 
				-	int		i;		/* loop counter */
			
 
				-	int		nlists;		/* number of irec's (ex lists */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	for (i = erp_idx; i < nlists; i++) {
			
 
				-		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Initialize an inode's copy-on-write fork.
			
 
				  */
			
@@ -1974,61 +831,3 @@ xfs_ifork_init_cow(
 
				 	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
			
 
				 	ip->i_cnextents = 0;
			
 
				 }
			
 
				-
			
 
				-/*
			
 
				- * Lookup the extent covering bno.
			
 
				- *
			
 
				- * If there is an extent covering bno return the extent index, and store the
			
 
				- * expanded extent structure in *gotp, and the extent index in *idx.
			
 
				- * If there is no extent covering bno, but there is an extent after it (e.g.
			
 
				- * it lies in a hole) return that extent in *gotp and its index in *idx
			
 
				- * instead.
			
 
				- * If bno is beyond the last extent return false, and return the index after
			
 
				- * the last valid index in *idxp.
			
 
				- */
			
 
				-bool
			
 
				-xfs_iext_lookup_extent(
			
 
				-	struct xfs_inode	*ip,
			
 
				-	struct xfs_ifork	*ifp,
			
 
				-	xfs_fileoff_t		bno,
			
 
				-	xfs_extnum_t		*idxp,
			
 
				-	struct xfs_bmbt_irec	*gotp)
			
 
				-{
			
 
				-	struct xfs_bmbt_rec_host *ep;
			
 
				-
			
 
				-	XFS_STATS_INC(ip->i_mount, xs_look_exlist);
			
 
				-
			
 
				-	ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
			
 
				-	if (!ep)
			
 
				-		return false;
			
 
				-	xfs_bmbt_get_all(ep, gotp);
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return true if there is an extent at index idx, and return the expanded
			
 
				- * extent structure at idx in that case.  Else return false.
			
 
				- */
			
 
				-bool
			
 
				-xfs_iext_get_extent(
			
 
				-	struct xfs_ifork	*ifp,
			
 
				-	xfs_extnum_t		idx,
			
 
				-	struct xfs_bmbt_irec	*gotp)
			
 
				-{
			
 
				-	if (idx < 0 || idx >= xfs_iext_count(ifp))
			
 
				-		return false;
			
 
				-	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xfs_iext_update_extent(
			
 
				-	struct xfs_ifork	*ifp,
			
 
				-	xfs_extnum_t		idx,
			
 
				-	struct xfs_bmbt_irec	*gotp)
			
 
				-{
			
 
				-	ASSERT(idx >= 0);
			
 
				-	ASSERT(idx < xfs_iext_count(ifp));
			
 
				-
			
 
				-	xfs_bmbt_set_all(xfs_iext_get_ext(ifp, idx), gotp);
			
 
				-}
			
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -21,57 +21,20 @@
 
				 struct xfs_inode_log_item;
			
 
				 struct xfs_dinode;
			
 
				 
			
 
				-/*
			
 
				- * The following xfs_ext_irec_t struct introduces a second (top) level
			
 
				- * to the in-core extent allocation scheme. These structs are allocated
			
 
				- * in a contiguous block, creating an indirection array where each entry
			
 
				- * (irec) contains a pointer to a buffer of in-core extent records which
			
 
				- * it manages. Each extent buffer is 4k in size, since 4k is the system
			
 
				- * page size on Linux i386 and systems with larger page sizes don't seem
			
 
				- * to gain much, if anything, by using their native page size as the
			
 
				- * extent buffer size. Also, using 4k extent buffers everywhere provides
			
 
				- * a consistent interface for CXFS across different platforms.
			
 
				- *
			
 
				- * There is currently no limit on the number of irec's (extent lists)
			
 
				- * allowed, so heavily fragmented files may require an indirection array
			
 
				- * which spans multiple system pages of memory. The number of extents
			
 
				- * which would require this amount of contiguous memory is very large
			
 
				- * and should not cause problems in the foreseeable future. However,
			
 
				- * if the memory needed for the contiguous array ever becomes a problem,
			
 
				- * it is possible that a third level of indirection may be required.
			
 
				- */
			
 
				-typedef struct xfs_ext_irec {
			
 
				-	xfs_bmbt_rec_host_t *er_extbuf;	/* block of extent records */
			
 
				-	xfs_extnum_t	er_extoff;	/* extent offset in file */
			
 
				-	xfs_extnum_t	er_extcount;	/* number of extents in page/block */
			
 
				-} xfs_ext_irec_t;
			
 
				-
			
 
				 /*
			
 
				  * File incore extent information, present for each of data & attr forks.
			
 
				  */
			
 
				-#define	XFS_IEXT_BUFSZ		4096
			
 
				-#define	XFS_LINEAR_EXTS		(XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
			
 
				-#define	XFS_INLINE_EXTS		2
			
 
				-#define	XFS_INLINE_DATA		32
			
 
				 typedef struct xfs_ifork {
			
 
				 	int			if_bytes;	/* bytes in if_u1 */
			
 
				 	int			if_real_bytes;	/* bytes allocated in if_u1 */
			
 
				 	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
			
 
				 	short			if_broot_bytes;	/* bytes allocated for root */
			
 
				 	unsigned char		if_flags;	/* per-fork flags */
			
 
				+	int			if_height;	/* height of the extent tree */
			
 
				 	union {
			
 
				-		xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
			
 
				-		xfs_ext_irec_t	*if_ext_irec;	/* irec map file exts */
			
 
				+		void		*if_root;	/* extent tree root */
			
 
				 		char		*if_data;	/* inline file data */
			
 
				 	} if_u1;
			
 
				-	union {
			
 
				-		xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
			
 
				-						/* very small file extents */
			
 
				-		char		if_inline_data[XFS_INLINE_DATA];
			
 
				-						/* very small file data */
			
 
				-		xfs_dev_t	if_rdev;	/* dev number if special */
			
 
				-		uuid_t		if_uuid;	/* mount point value */
			
 
				-	} if_u2;
			
 
				 } xfs_ifork_t;
			
 
				 
			
 
				 /*
			
@@ -80,7 +43,6 @@ typedef struct xfs_ifork {
 
				 #define	XFS_IFINLINE	0x01	/* Inline data is read in */
			
 
				 #define	XFS_IFEXTENTS	0x02	/* All extent pointers are read in */
			
 
				 #define	XFS_IFBROOT	0x04	/* i_broot points to the bmap b-tree root */
			
 
				-#define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
			
 
				 
			
 
				 /*
			
 
				  * Fork handling.
			
@@ -150,45 +112,75 @@ int		xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
 
				 				  int);
			
 
				 void		xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
			
 
				 
			
 
				-struct xfs_bmbt_rec_host *
			
 
				-		xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
			
 
				-xfs_extnum_t	xfs_iext_count(struct xfs_ifork *);
			
 
				-void		xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
			
 
				-				struct xfs_bmbt_irec *, int);
			
 
				-void		xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_add_indirect_multi(struct xfs_ifork *, int,
			
 
				-					    xfs_extnum_t, int);
			
 
				-void		xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int);
			
 
				-void		xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_realloc_direct(struct xfs_ifork *, int);
			
 
				-void		xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t);
			
 
				-void		xfs_iext_inline_to_direct(struct xfs_ifork *, int);
			
 
				+xfs_extnum_t	xfs_iext_count(struct xfs_ifork *ifp);
			
 
				+void		xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur,
			
 
				+			struct xfs_bmbt_irec *, int);
			
 
				+void		xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *,
			
 
				+			int);
			
 
				 void		xfs_iext_destroy(struct xfs_ifork *);
			
 
				-struct xfs_bmbt_rec_host *
			
 
				-		xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *);
			
 
				-struct xfs_ext_irec *
			
 
				-		xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *);
			
 
				-struct xfs_ext_irec *
			
 
				-		xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *,
			
 
				-				     int);
			
 
				-void		xfs_iext_irec_init(struct xfs_ifork *);
			
 
				-struct xfs_ext_irec *
			
 
				-		xfs_iext_irec_new(struct xfs_ifork *, int);
			
 
				-void		xfs_iext_irec_remove(struct xfs_ifork *, int);
			
 
				-void		xfs_iext_irec_compact(struct xfs_ifork *);
			
 
				-void		xfs_iext_irec_compact_pages(struct xfs_ifork *);
			
 
				-void		xfs_iext_irec_compact_full(struct xfs_ifork *);
			
 
				-void		xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
			
 
				 
			
 
				 bool		xfs_iext_lookup_extent(struct xfs_inode *ip,
			
 
				 			struct xfs_ifork *ifp, xfs_fileoff_t bno,
			
 
				-			xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
			
 
				-bool		xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
			
 
				+			struct xfs_iext_cursor *cur,
			
 
				 			struct xfs_bmbt_irec *gotp);
			
 
				-void		xfs_iext_update_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
			
 
				+bool		xfs_iext_lookup_extent_before(struct xfs_inode *ip,
			
 
				+			struct xfs_ifork *ifp, xfs_fileoff_t *end,
			
 
				+			struct xfs_iext_cursor *cur,
			
 
				 			struct xfs_bmbt_irec *gotp);
			
 
				+bool		xfs_iext_get_extent(struct xfs_ifork *ifp,
			
 
				+			struct xfs_iext_cursor *cur,
			
 
				+			struct xfs_bmbt_irec *gotp);
			
 
				+void		xfs_iext_update_extent(struct xfs_inode *ip, int state,
			
 
				+			struct xfs_iext_cursor *cur,
			
 
				+			struct xfs_bmbt_irec *gotp);
			
 
				+
			
 
				+void		xfs_iext_first(struct xfs_ifork *, struct xfs_iext_cursor *);
			
 
				+void		xfs_iext_last(struct xfs_ifork *, struct xfs_iext_cursor *);
			
 
				+void		xfs_iext_next(struct xfs_ifork *, struct xfs_iext_cursor *);
			
 
				+void		xfs_iext_prev(struct xfs_ifork *, struct xfs_iext_cursor *);
			
 
				+
			
 
				+static inline bool xfs_iext_next_extent(struct xfs_ifork *ifp,
			
 
				+		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
			
 
				+{
			
 
				+	xfs_iext_next(ifp, cur);
			
 
				+	return xfs_iext_get_extent(ifp, cur, gotp);
			
 
				+}
			
 
				+
			
 
				+static inline bool xfs_iext_prev_extent(struct xfs_ifork *ifp,
			
 
				+		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
			
 
				+{
			
 
				+	xfs_iext_prev(ifp, cur);
			
 
				+	return xfs_iext_get_extent(ifp, cur, gotp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return the extent after cur in gotp without updating the cursor.
			
 
				+ */
			
 
				+static inline bool xfs_iext_peek_next_extent(struct xfs_ifork *ifp,
			
 
				+		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
			
 
				+{
			
 
				+	struct xfs_iext_cursor ncur = *cur;
			
 
				+
			
 
				+	xfs_iext_next(ifp, &ncur);
			
 
				+	return xfs_iext_get_extent(ifp, &ncur, gotp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return the extent before cur in gotp without updating the cursor.
			
 
				+ */
			
 
				+static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp,
			
 
				+		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
			
 
				+{
			
 
				+	struct xfs_iext_cursor ncur = *cur;
			
 
				+
			
 
				+	xfs_iext_prev(ifp, &ncur);
			
 
				+	return xfs_iext_get_extent(ifp, &ncur, gotp);
			
 
				+}
			
 
				+
			
 
				+#define for_each_xfs_iext(ifp, ext, got)		\
			
 
				+	for (xfs_iext_first((ifp), (ext));		\
			
 
				+	     xfs_iext_get_extent((ifp), (ext), (got));	\
			
 
				+	     xfs_iext_next((ifp), (ext)))
			
 
				 
			
 
				 extern struct kmem_zone	*xfs_ifork_zone;
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -264,7 +264,7 @@ typedef struct xfs_trans_header {
 
				  * (if any) is indicated in the ilf_dsize field.  Changes to this structure
			
 
				  * must be added on to the end.
			
 
				  */
			
 
				-typedef struct xfs_inode_log_format {
			
 
				+struct xfs_inode_log_format {
			
 
				 	uint16_t		ilf_type;	/* inode log item type */
			
 
				 	uint16_t		ilf_size;	/* size of this item */
			
 
				 	uint32_t		ilf_fields;	/* flags for fields logged */
			
@@ -274,12 +274,12 @@ typedef struct xfs_inode_log_format {
 
				 	uint64_t		ilf_ino;	/* inode number */
			
 
				 	union {
			
 
				 		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				-		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				+		u8		__pad[16];	/* unused */
			
 
				 	} ilf_u;
			
 
				 	int64_t			ilf_blkno;	/* blkno of inode buffer */
			
 
				 	int32_t			ilf_len;	/* len of inode buffer */
			
 
				 	int32_t			ilf_boffset;	/* off of inode in buffer */
			
 
				-} xfs_inode_log_format_t;
			
 
				+};
			
 
				 
			
 
				 /*
			
 
				  * Old 32 bit systems will log in this format without the 64 bit
			
@@ -295,7 +295,7 @@ struct xfs_inode_log_format_32 {
 
				 	uint64_t		ilf_ino;	/* inode number */
			
 
				 	union {
			
 
				 		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				-		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				+		u8		__pad[16];	/* unused */
			
 
				 	} ilf_u;
			
 
				 	int64_t			ilf_blkno;	/* blkno of inode buffer */
			
 
				 	int32_t			ilf_len;	/* len of inode buffer */
			
@@ -311,7 +311,7 @@ struct xfs_inode_log_format_32 {
 
				 #define	XFS_ILOG_DEXT	0x004	/* log i_df.if_extents */
			
 
				 #define	XFS_ILOG_DBROOT	0x008	/* log i_df.i_broot */
			
 
				 #define	XFS_ILOG_DEV	0x010	/* log the dev field */
			
 
				-#define	XFS_ILOG_UUID	0x020	/* log the uuid field */
			
 
				+#define	XFS_ILOG_UUID	0x020	/* added long ago, but never used */
			
 
				 #define	XFS_ILOG_ADATA	0x040	/* log i_af.if_data */
			
 
				 #define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
			
 
				 #define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
			
@@ -329,9 +329,9 @@ struct xfs_inode_log_format_32 {
 
				 
			
 
				 #define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
			
 
				 				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
			
 
				-				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
			
 
				-				 XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
			
 
				-				 XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
			
 
				+				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
			
 
				+				 XFS_ILOG_ABROOT | XFS_ILOG_DOWNER | \
			
 
				+				 XFS_ILOG_AOWNER)
			
 
				 
			
 
				 #define	XFS_ILOG_DFORK		(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
			
 
				 				 XFS_ILOG_DBROOT)
			
@@ -341,10 +341,10 @@ struct xfs_inode_log_format_32 {
 
				 
			
 
				 #define	XFS_ILOG_ALL		(XFS_ILOG_CORE | XFS_ILOG_DDATA | \
			
 
				 				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
			
 
				-				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
			
 
				-				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
			
 
				-				 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \
			
 
				-				 XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
			
 
				+				 XFS_ILOG_DEV | XFS_ILOG_ADATA | \
			
 
				+				 XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
			
 
				+				 XFS_ILOG_TIMESTAMP | XFS_ILOG_DOWNER | \
			
 
				+				 XFS_ILOG_AOWNER)
			
 
				 
			
 
				 static inline int xfs_ilog_fbroot(int w)
			
 
				 {
			
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -30,6 +30,7 @@
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_refcount_btree.h"
			
 
				 #include "xfs_alloc.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_cksum.h"
			
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -34,6 +34,7 @@
 
				 #include "xfs_rmap_btree.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_extent_busy.h"
			
 
				 #include "xfs_bmap.h"
			
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -672,7 +672,6 @@ xfs_rtmodify_range(
 
				 		/*
			
 
				 		 * Compute a mask of relevant bits.
			
 
				 		 */
			
 
				-		bit = 0;
			
 
				 		mask = ((xfs_rtword_t)1 << lastbit) - 1;
			
 
				 		/*
			
 
				 		 * Set/clear the active bits.
			
@@ -1086,3 +1085,15 @@ xfs_rtalloc_query_all(
 
				 
			
 
				 	return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv);
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Verify that an realtime block number pointer doesn't point off the
			
 
				+ * end of the realtime device.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_verify_rtbno(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_rtblock_t		rtbno)
			
 
				+{
			
 
				+	return rtbno < mp->m_sb.sb_rblocks;
			
 
				+}
			
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -47,6 +47,12 @@ typedef uint64_t	xfs_filblks_t;	/* number of blocks in a file */
 
				 typedef int64_t		xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
			
 
				 typedef int64_t		xfs_sfiloff_t;	/* signed block number in a file */
			
 
				 
			
 
				+/*
			
 
				+ * New verifiers will return the instruction address of the failing check.
			
 
				+ * NULL means everything is ok.
			
 
				+ */
			
 
				+typedef void *		xfs_failaddr_t;
			
 
				+
			
 
				 /*
			
 
				  * Null values for the types.
			
 
				  */
			
@@ -136,5 +142,21 @@ typedef uint32_t	xfs_dqid_t;
 
				 #define	XFS_NBWORD	(1 << XFS_NBWORDLOG)
			
 
				 #define	XFS_WORDMASK	((1 << XFS_WORDLOG) - 1)
			
 
				 
			
 
				+struct xfs_iext_cursor {
			
 
				+	struct xfs_iext_leaf	*leaf;
			
 
				+	int			pos;
			
 
				+};
			
 
				+
			
 
				+typedef enum {
			
 
				+	XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
			
 
				+} xfs_exntst_t;
			
 
				+
			
 
				+typedef struct xfs_bmbt_irec
			
 
				+{
			
 
				+	xfs_fileoff_t	br_startoff;	/* starting file offset */
			
 
				+	xfs_fsblock_t	br_startblock;	/* starting block number */
			
 
				+	xfs_filblks_t	br_blockcount;	/* number of blocks */
			
 
				+	xfs_exntst_t	br_state;	/* extent state */
			
 
				+} xfs_bmbt_irec_t;
			
 
				 
			
 
				 #endif	/* __XFS_TYPES_H__ */
			
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -0,0 +1,658 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/*
			
 
				+ * Set up scrub to check all the static metadata in each AG.
			
 
				+ * This means the SB, AGF, AGI, and AGFL headers.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_setup_ag_header(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+
			
 
				+	if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
			
 
				+	    sc->sm->sm_ino || sc->sm->sm_gen)
			
 
				+		return -EINVAL;
			
 
				+	return xfs_scrub_setup_fs(sc, ip);
			
 
				+}
			
 
				+
			
 
				+/* Walk all the blocks in the AGFL. */
			
 
				+int
			
 
				+xfs_scrub_walk_agfl(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				(*fn)(struct xfs_scrub_context *,
			
 
				+					      xfs_agblock_t bno, void *),
			
 
				+	void				*priv)
			
 
				+{
			
 
				+	struct xfs_agf			*agf;
			
 
				+	__be32				*agfl_bno;
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	unsigned int			flfirst;
			
 
				+	unsigned int			fllast;
			
 
				+	int				i;
			
 
				+	int				error;
			
 
				+
			
 
				+	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
			
 
				+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, sc->sa.agfl_bp);
			
 
				+	flfirst = be32_to_cpu(agf->agf_flfirst);
			
 
				+	fllast = be32_to_cpu(agf->agf_fllast);
			
 
				+
			
 
				+	/* Nothing to walk in an empty AGFL. */
			
 
				+	if (agf->agf_flcount == cpu_to_be32(0))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* first to last is a consecutive list. */
			
 
				+	if (fllast >= flfirst) {
			
 
				+		for (i = flfirst; i <= fllast; i++) {
			
 
				+			error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+				return error;
			
 
				+		}
			
 
				+
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* first to the end */
			
 
				+	for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
			
 
				+		error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	/* the start to last. */
			
 
				+	for (i = 0; i <= fllast; i++) {
			
 
				+		error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Superblock */
			
 
				+
			
 
				+/*
			
 
				+ * Scrub the filesystem superblock.
			
 
				+ *
			
 
				+ * Note: We do /not/ attempt to check AG 0's superblock.  Mount is
			
 
				+ * responsible for validating all the geometry information in sb 0, so
			
 
				+ * if the filesystem is capable of initiating online scrub, then clearly
			
 
				+ * sb 0 is ok and we can use its information to check everything else.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_superblock(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	struct xfs_dsb			*sb;
			
 
				+	xfs_agnumber_t			agno;
			
 
				+	uint32_t			v2_ok;
			
 
				+	__be32				features_mask;
			
 
				+	int				error;
			
 
				+	__be16				vernum_mask;
			
 
				+
			
 
				+	agno = sc->sm->sm_agno;
			
 
				+	if (agno == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
			
 
				+		  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
			
 
				+		  XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
			
 
				+	if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
			
 
				+		return error;
			
 
				+
			
 
				+	sb = XFS_BUF_TO_SBP(bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Verify the geometries match.  Fields that are permanently
			
 
				+	 * set by mkfs are checked; fields that can be updated later
			
 
				+	 * (and are not propagated to backup superblocks) are preen
			
 
				+	 * checked.
			
 
				+	 */
			
 
				+	if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rblocks != cpu_to_be64(mp->m_sb.sb_rblocks))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rextents != cpu_to_be64(mp->m_sb.sb_rextents))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (!uuid_equal(&sb->sb_uuid, &mp->m_sb.sb_uuid))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_logstart != cpu_to_be64(mp->m_sb.sb_logstart))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_agblocks != cpu_to_be32(mp->m_sb.sb_agblocks))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_agcount != cpu_to_be32(mp->m_sb.sb_agcount))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rbmblocks != cpu_to_be32(mp->m_sb.sb_rbmblocks))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_logblocks != cpu_to_be32(mp->m_sb.sb_logblocks))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	/* Check sb_versionnum bits that are set at mkfs time. */
			
 
				+	vernum_mask = cpu_to_be16(~XFS_SB_VERSION_OKBITS |
			
 
				+				  XFS_SB_VERSION_NUMBITS |
			
 
				+				  XFS_SB_VERSION_ALIGNBIT |
			
 
				+				  XFS_SB_VERSION_DALIGNBIT |
			
 
				+				  XFS_SB_VERSION_SHAREDBIT |
			
 
				+				  XFS_SB_VERSION_LOGV2BIT |
			
 
				+				  XFS_SB_VERSION_SECTORBIT |
			
 
				+				  XFS_SB_VERSION_EXTFLGBIT |
			
 
				+				  XFS_SB_VERSION_DIRV2BIT);
			
 
				+	if ((sb->sb_versionnum & vernum_mask) !=
			
 
				+	    (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	/* Check sb_versionnum bits that can be set after mkfs time. */
			
 
				+	vernum_mask = cpu_to_be16(XFS_SB_VERSION_ATTRBIT |
			
 
				+				  XFS_SB_VERSION_NLINKBIT |
			
 
				+				  XFS_SB_VERSION_QUOTABIT);
			
 
				+	if ((sb->sb_versionnum & vernum_mask) !=
			
 
				+	    (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_sectsize != cpu_to_be16(mp->m_sb.sb_sectsize))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_inodesize != cpu_to_be16(mp->m_sb.sb_inodesize))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_inopblock != cpu_to_be16(mp->m_sb.sb_inopblock))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (memcmp(sb->sb_fname, mp->m_sb.sb_fname, sizeof(sb->sb_fname)))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_blocklog != mp->m_sb.sb_blocklog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_sectlog != mp->m_sb.sb_sectlog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_inodelog != mp->m_sb.sb_inodelog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_inopblog != mp->m_sb.sb_inopblog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_agblklog != mp->m_sb.sb_agblklog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_rextslog != mp->m_sb.sb_rextslog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_imax_pct != mp->m_sb.sb_imax_pct)
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Skip the summary counters since we track them in memory anyway.
			
 
				+	 * sb_icount, sb_ifree, sb_fdblocks, sb_frexents
			
 
				+	 */
			
 
				+
			
 
				+	if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Skip the quota flags since repair will force quotacheck.
			
 
				+	 * sb_qflags
			
 
				+	 */
			
 
				+
			
 
				+	if (sb->sb_flags != mp->m_sb.sb_flags)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_shared_vn != mp->m_sb.sb_shared_vn)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_inoalignmt != cpu_to_be32(mp->m_sb.sb_inoalignmt))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_unit != cpu_to_be32(mp->m_sb.sb_unit))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_width != cpu_to_be32(mp->m_sb.sb_width))
			
 
				+		xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_dirblklog != mp->m_sb.sb_dirblklog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_logsectlog != mp->m_sb.sb_logsectlog)
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_logsectsize != cpu_to_be16(mp->m_sb.sb_logsectsize))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (sb->sb_logsunit != cpu_to_be32(mp->m_sb.sb_logsunit))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	/* Do we see any invalid bits in sb_features2? */
			
 
				+	if (!xfs_sb_version_hasmorebits(&mp->m_sb)) {
			
 
				+		if (sb->sb_features2 != 0)
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+	} else {
			
 
				+		v2_ok = XFS_SB_VERSION2_OKBITS;
			
 
				+		if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5)
			
 
				+			v2_ok |= XFS_SB_VERSION2_CRCBIT;
			
 
				+
			
 
				+		if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+		if (sb->sb_features2 != sb->sb_bad_features2)
			
 
				+			xfs_scrub_block_set_preen(sc, bp);
			
 
				+	}
			
 
				+
			
 
				+	/* Check sb_features2 flags that are set at mkfs time. */
			
 
				+	features_mask = cpu_to_be32(XFS_SB_VERSION2_LAZYSBCOUNTBIT |
			
 
				+				    XFS_SB_VERSION2_PROJID32BIT |
			
 
				+				    XFS_SB_VERSION2_CRCBIT |
			
 
				+				    XFS_SB_VERSION2_FTYPE);
			
 
				+	if ((sb->sb_features2 & features_mask) !=
			
 
				+	    (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	/* Check sb_features2 flags that can be set after mkfs time. */
			
 
				+	features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT);
			
 
				+	if ((sb->sb_features2 & features_mask) !=
			
 
				+	    (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		/* all v5 fields must be zero */
			
 
				+		if (memchr_inv(&sb->sb_features_compat, 0,
			
 
				+				sizeof(struct xfs_dsb) -
			
 
				+				offsetof(struct xfs_dsb, sb_features_compat)))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+	} else {
			
 
				+		/* Check compat flags; all are set at mkfs time. */
			
 
				+		features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN);
			
 
				+		if ((sb->sb_features_compat & features_mask) !=
			
 
				+		    (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+		/* Check ro compat flags; all are set at mkfs time. */
			
 
				+		features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN |
			
 
				+					    XFS_SB_FEAT_RO_COMPAT_FINOBT |
			
 
				+					    XFS_SB_FEAT_RO_COMPAT_RMAPBT |
			
 
				+					    XFS_SB_FEAT_RO_COMPAT_REFLINK);
			
 
				+		if ((sb->sb_features_ro_compat & features_mask) !=
			
 
				+		    (cpu_to_be32(mp->m_sb.sb_features_ro_compat) &
			
 
				+		     features_mask))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+		/* Check incompat flags; all are set at mkfs time. */
			
 
				+		features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN |
			
 
				+					    XFS_SB_FEAT_INCOMPAT_FTYPE |
			
 
				+					    XFS_SB_FEAT_INCOMPAT_SPINODES |
			
 
				+					    XFS_SB_FEAT_INCOMPAT_META_UUID);
			
 
				+		if ((sb->sb_features_incompat & features_mask) !=
			
 
				+		    (cpu_to_be32(mp->m_sb.sb_features_incompat) &
			
 
				+		     features_mask))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+		/* Check log incompat flags; all are set at mkfs time. */
			
 
				+		features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN);
			
 
				+		if ((sb->sb_features_log_incompat & features_mask) !=
			
 
				+		    (cpu_to_be32(mp->m_sb.sb_features_log_incompat) &
			
 
				+		     features_mask))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+		/* Don't care about sb_crc */
			
 
				+
			
 
				+		if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+		if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
			
 
				+			xfs_scrub_block_set_preen(sc, bp);
			
 
				+
			
 
				+		/* Don't care about sb_lsn */
			
 
				+	}
			
 
				+
			
 
				+	if (xfs_sb_version_hasmetauuid(&mp->m_sb)) {
			
 
				+		/* The metadata UUID must be the same for all supers */
			
 
				+		if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid))
			
 
				+			xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+	}
			
 
				+
			
 
				+	/* Everything else must be zero. */
			
 
				+	if (memchr_inv(sb + 1, 0,
			
 
				+			BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
			
 
				+		xfs_scrub_block_set_corrupt(sc, bp);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* AGF */
			
 
				+
			
 
				+/* Scrub the AGF. */
			
 
				+int
			
 
				+xfs_scrub_agf(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_agf			*agf;
			
 
				+	xfs_agnumber_t			agno;
			
 
				+	xfs_agblock_t			agbno;
			
 
				+	xfs_agblock_t			eoag;
			
 
				+	xfs_agblock_t			agfl_first;
			
 
				+	xfs_agblock_t			agfl_last;
			
 
				+	xfs_agblock_t			agfl_count;
			
 
				+	xfs_agblock_t			fl_count;
			
 
				+	int				level;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	agno = sc->sa.agno = sc->sm->sm_agno;
			
 
				+	error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
			
 
				+			&sc->sa.agf_bp, &sc->sa.agfl_bp);
			
 
				+	if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
			
 
				+
			
 
				+	/* Check the AG length */
			
 
				+	eoag = be32_to_cpu(agf->agf_length);
			
 
				+	if (eoag != xfs_ag_block_count(mp, agno))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+	/* Check the AGF btree roots and levels */
			
 
				+	agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
			
 
				+	if (!xfs_verify_agbno(mp, agno, agbno))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+	agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
			
 
				+	if (!xfs_verify_agbno(mp, agno, agbno))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+	level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
			
 
				+	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+	level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
			
 
				+	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
			
 
				+		agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
			
 
				+		if (!xfs_verify_agbno(mp, agno, agbno))
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+		level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
			
 
				+		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+	}
			
 
				+
			
 
				+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
			
 
				+		agbno = be32_to_cpu(agf->agf_refcount_root);
			
 
				+		if (!xfs_verify_agbno(mp, agno, agbno))
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+		level = be32_to_cpu(agf->agf_refcount_level);
			
 
				+		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+	}
			
 
				+
			
 
				+	/* Check the AGFL counters */
			
 
				+	agfl_first = be32_to_cpu(agf->agf_flfirst);
			
 
				+	agfl_last = be32_to_cpu(agf->agf_fllast);
			
 
				+	agfl_count = be32_to_cpu(agf->agf_flcount);
			
 
				+	if (agfl_last > agfl_first)
			
 
				+		fl_count = agfl_last - agfl_first + 1;
			
 
				+	else
			
 
				+		fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
			
 
				+	if (agfl_count != 0 && fl_count != agfl_count)
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* AGFL */
			
 
				+
			
 
				+struct xfs_scrub_agfl_info {
			
 
				+	unsigned int			sz_entries;
			
 
				+	unsigned int			nr_entries;
			
 
				+	xfs_agblock_t			*entries;
			
 
				+};
			
 
				+
			
 
				+/* Scrub an AGFL block. */
			
 
				+STATIC int
			
 
				+xfs_scrub_agfl_block(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_agblock_t			agbno,
			
 
				+	void				*priv)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_scrub_agfl_info	*sai = priv;
			
 
				+	xfs_agnumber_t			agno = sc->sa.agno;
			
 
				+
			
 
				+	if (xfs_verify_agbno(mp, agno, agbno) &&
			
 
				+	    sai->nr_entries < sai->sz_entries)
			
 
				+		sai->entries[sai->nr_entries++] = agbno;
			
 
				+	else
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+xfs_scrub_agblock_cmp(
			
 
				+	const void		*pa,
			
 
				+	const void		*pb)
			
 
				+{
			
 
				+	const xfs_agblock_t	*a = pa;
			
 
				+	const xfs_agblock_t	*b = pb;
			
 
				+
			
 
				+	return (int)*a - (int)*b;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the AGFL. */
			
 
				+int
			
 
				+xfs_scrub_agfl(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_scrub_agfl_info	sai = { 0 };
			
 
				+	struct xfs_agf			*agf;
			
 
				+	xfs_agnumber_t			agno;
			
 
				+	unsigned int			agflcount;
			
 
				+	unsigned int			i;
			
 
				+	int				error;
			
 
				+
			
 
				+	agno = sc->sa.agno = sc->sm->sm_agno;
			
 
				+	error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
			
 
				+			&sc->sa.agf_bp, &sc->sa.agfl_bp);
			
 
				+	if (!xfs_scrub_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
			
 
				+		goto out;
			
 
				+	if (!sc->sa.agf_bp)
			
 
				+		return -EFSCORRUPTED;
			
 
				+
			
 
				+	/* Allocate buffer to ensure uniqueness of AGFL entries. */
			
 
				+	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
			
 
				+	agflcount = be32_to_cpu(agf->agf_flcount);
			
 
				+	if (agflcount > XFS_AGFL_SIZE(sc->mp)) {
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+		goto out;
			
 
				+	}
			
 
				+	sai.sz_entries = agflcount;
			
 
				+	sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS);
			
 
				+	if (!sai.entries) {
			
 
				+		error = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Check the blocks in the AGFL. */
			
 
				+	error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai);
			
 
				+	if (error)
			
 
				+		goto out_free;
			
 
				+
			
 
				+	if (agflcount != sai.nr_entries) {
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+		goto out_free;
			
 
				+	}
			
 
				+
			
 
				+	/* Sort entries, check for duplicates. */
			
 
				+	sort(sai.entries, sai.nr_entries, sizeof(sai.entries[0]),
			
 
				+			xfs_scrub_agblock_cmp, NULL);
			
 
				+	for (i = 1; i < sai.nr_entries; i++) {
			
 
				+		if (sai.entries[i] == sai.entries[i - 1]) {
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+out_free:
			
 
				+	kmem_free(sai.entries);
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* AGI */
			
 
				+
			
 
				+/* Scrub the AGI. */
			
 
				+int
			
 
				+xfs_scrub_agi(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_agi			*agi;
			
 
				+	xfs_agnumber_t			agno;
			
 
				+	xfs_agblock_t			agbno;
			
 
				+	xfs_agblock_t			eoag;
			
 
				+	xfs_agino_t			agino;
			
 
				+	xfs_agino_t			first_agino;
			
 
				+	xfs_agino_t			last_agino;
			
 
				+	xfs_agino_t			icount;
			
 
				+	int				i;
			
 
				+	int				level;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	agno = sc->sa.agno = sc->sm->sm_agno;
			
 
				+	error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
			
 
				+			&sc->sa.agf_bp, &sc->sa.agfl_bp);
			
 
				+	if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
			
 
				+
			
 
				+	/* Check the AG length */
			
 
				+	eoag = be32_to_cpu(agi->agi_length);
			
 
				+	if (eoag != xfs_ag_block_count(mp, agno))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+	/* Check btree roots and levels */
			
 
				+	agbno = be32_to_cpu(agi->agi_root);
			
 
				+	if (!xfs_verify_agbno(mp, agno, agbno))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+	level = be32_to_cpu(agi->agi_level);
			
 
				+	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
			
 
				+		agbno = be32_to_cpu(agi->agi_free_root);
			
 
				+		if (!xfs_verify_agbno(mp, agno, agbno))
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+		level = be32_to_cpu(agi->agi_free_level);
			
 
				+		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+	}
			
 
				+
			
 
				+	/* Check inode counters */
			
 
				+	xfs_ialloc_agino_range(mp, agno, &first_agino, &last_agino);
			
 
				+	icount = be32_to_cpu(agi->agi_count);
			
 
				+	if (icount > last_agino - first_agino + 1 ||
			
 
				+	    icount < be32_to_cpu(agi->agi_freecount))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+	/* Check inode pointers */
			
 
				+	agino = be32_to_cpu(agi->agi_newino);
			
 
				+	if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+	agino = be32_to_cpu(agi->agi_dirino);
			
 
				+	if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+	/* Check unlinked inode buckets */
			
 
				+	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
			
 
				+		agino = be32_to_cpu(agi->agi_unlinked[i]);
			
 
				+		if (agino == NULLAGINO)
			
 
				+			continue;
			
 
				+		if (!xfs_verify_agino(mp, agno, agino))
			
 
				+			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+	}
			
 
				+
			
 
				+	if (agi->agi_pad32 != cpu_to_be32(0))
			
 
				+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -0,0 +1,102 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_rmap.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/btree.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/*
			
 
				+ * Set us up to scrub free space btrees.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_setup_ag_allocbt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	return xfs_scrub_setup_ag_btree(sc, ip, false);
			
 
				+}
			
 
				+
			
 
				+/* Free space btree scrubber. */
			
 
				+
			
 
				+/* Scrub a bnobt/cntbt record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_allocbt_rec(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	union xfs_btree_rec		*rec)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = bs->cur->bc_mp;
			
 
				+	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
			
 
				+	xfs_agblock_t			bno;
			
 
				+	xfs_extlen_t			len;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	bno = be32_to_cpu(rec->alloc.ar_startblock);
			
 
				+	len = be32_to_cpu(rec->alloc.ar_blockcount);
			
 
				+
			
 
				+	if (bno + len <= bno ||
			
 
				+	    !xfs_verify_agbno(mp, agno, bno) ||
			
 
				+	    !xfs_verify_agbno(mp, agno, bno + len - 1))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the freespace btrees for some AG. */
			
 
				+STATIC int
			
 
				+xfs_scrub_allocbt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_btnum_t			which)
			
 
				+{
			
 
				+	struct xfs_owner_info		oinfo;
			
 
				+	struct xfs_btree_cur		*cur;
			
 
				+
			
 
				+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
			
 
				+	cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
			
 
				+	return xfs_scrub_btree(sc, cur, xfs_scrub_allocbt_rec, &oinfo, NULL);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_scrub_bnobt(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	return xfs_scrub_allocbt(sc, XFS_BTNUM_BNO);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_scrub_cntbt(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
			
 
				+}
			
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -0,0 +1,471 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_da_format.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2.h"
			
 
				+#include "xfs_attr.h"
			
 
				+#include "xfs_attr_leaf.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/dabtree.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+#include <linux/posix_acl_xattr.h>
			
 
				+#include <linux/xattr.h>
			
 
				+
			
 
				+/* Set us up to scrub an inode's extended attributes. */
			
 
				+int
			
 
				+xfs_scrub_setup_xattr(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	size_t				sz;
			
 
				+
			
 
				+	/*
			
 
				+	 * Allocate the buffer without the inode lock held.  We need enough
			
 
				+	 * space to read every xattr value in the file or enough space to
			
 
				+	 * hold three copies of the xattr free space bitmap.  (Not both at
			
 
				+	 * the same time.)
			
 
				+	 */
			
 
				+	sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) *
			
 
				+			BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
			
 
				+	sc->buf = kmem_zalloc_large(sz, KM_SLEEP);
			
 
				+	if (!sc->buf)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	return xfs_scrub_setup_inode_contents(sc, ip, 0);
			
 
				+}
			
 
				+
			
 
				+/* Extended Attributes */
			
 
				+
			
 
				+struct xfs_scrub_xattr {
			
 
				+	struct xfs_attr_list_context	context;
			
 
				+	struct xfs_scrub_context	*sc;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Check that an extended attribute key can be looked up by hash.
			
 
				+ *
			
 
				+ * We use the XFS attribute list iterator (i.e. xfs_attr_list_int_ilocked)
			
 
				+ * to call this function for every attribute key in an inode.  Once
			
 
				+ * we're here, we load the attribute value to see if any errors happen,
			
 
				+ * or if we get more or less data than we expected.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_scrub_xattr_listent(
			
 
				+	struct xfs_attr_list_context	*context,
			
 
				+	int				flags,
			
 
				+	unsigned char			*name,
			
 
				+	int				namelen,
			
 
				+	int				valuelen)
			
 
				+{
			
 
				+	struct xfs_scrub_xattr		*sx;
			
 
				+	struct xfs_da_args		args = { NULL };
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	sx = container_of(context, struct xfs_scrub_xattr, context);
			
 
				+
			
 
				+	if (flags & XFS_ATTR_INCOMPLETE) {
			
 
				+		/* Incomplete attr key, just mark the inode for preening. */
			
 
				+		xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino, NULL);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	args.flags = ATTR_KERNOTIME;
			
 
				+	if (flags & XFS_ATTR_ROOT)
			
 
				+		args.flags |= ATTR_ROOT;
			
 
				+	else if (flags & XFS_ATTR_SECURE)
			
 
				+		args.flags |= ATTR_SECURE;
			
 
				+	args.geo = context->dp->i_mount->m_attr_geo;
			
 
				+	args.whichfork = XFS_ATTR_FORK;
			
 
				+	args.dp = context->dp;
			
 
				+	args.name = name;
			
 
				+	args.namelen = namelen;
			
 
				+	args.hashval = xfs_da_hashname(args.name, args.namelen);
			
 
				+	args.trans = context->tp;
			
 
				+	args.value = sx->sc->buf;
			
 
				+	args.valuelen = XATTR_SIZE_MAX;
			
 
				+
			
 
				+	error = xfs_attr_get_ilocked(context->dp, &args);
			
 
				+	if (error == -EEXIST)
			
 
				+		error = 0;
			
 
				+	if (!xfs_scrub_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno,
			
 
				+			&error))
			
 
				+		goto fail_xref;
			
 
				+	if (args.valuelen != valuelen)
			
 
				+		xfs_scrub_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK,
			
 
				+					     args.blkno);
			
 
				+
			
 
				+fail_xref:
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Mark a range [start, start+len) in this map.  Returns true if the
			
 
				+ * region was free, and false if there's a conflict or a problem.
			
 
				+ *
			
 
				+ * Within a char, the lowest bit of the char represents the byte with
			
 
				+ * the smallest address
			
 
				+ */
			
 
				+STATIC bool
			
 
				+xfs_scrub_xattr_set_map(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	unsigned long			*map,
			
 
				+	unsigned int			start,
			
 
				+	unsigned int			len)
			
 
				+{
			
 
				+	unsigned int			mapsize = sc->mp->m_attr_geo->blksize;
			
 
				+	bool				ret = true;
			
 
				+
			
 
				+	if (start >= mapsize)
			
 
				+		return false;
			
 
				+	if (start + len > mapsize) {
			
 
				+		len = mapsize - start;
			
 
				+		ret = false;
			
 
				+	}
			
 
				+
			
 
				+	if (find_next_bit(map, mapsize, start) < start + len)
			
 
				+		ret = false;
			
 
				+	bitmap_set(map, start, len);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check the leaf freemap from the usage bitmap.  Returns false if the
			
 
				+ * attr freemap has problems or points to used space.
			
 
				+ */
			
 
				+STATIC bool
			
 
				+xfs_scrub_xattr_check_freemap(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	unsigned long			*map,
			
 
				+	struct xfs_attr3_icleaf_hdr	*leafhdr)
			
 
				+{
			
 
				+	unsigned long			*freemap;
			
 
				+	unsigned long			*dstmap;
			
 
				+	unsigned int			mapsize = sc->mp->m_attr_geo->blksize;
			
 
				+	int				i;
			
 
				+
			
 
				+	/* Construct bitmap of freemap contents. */
			
 
				+	freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
			
 
				+	bitmap_zero(freemap, mapsize);
			
 
				+	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
			
 
				+		if (!xfs_scrub_xattr_set_map(sc, freemap,
			
 
				+				leafhdr->freemap[i].base,
			
 
				+				leafhdr->freemap[i].size))
			
 
				+			return false;
			
 
				+	}
			
 
				+
			
 
				+	/* Look for bits that are set in freemap and are marked in use. */
			
 
				+	dstmap = freemap + BITS_TO_LONGS(mapsize);
			
 
				+	return bitmap_and(dstmap, freemap, map, mapsize) == 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check this leaf entry's relations to everything else.
			
 
				+ * Returns the number of bytes used for the name/value data.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_xattr_entry(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	char				*buf_end,
			
 
				+	struct xfs_attr_leafblock	*leaf,
			
 
				+	struct xfs_attr3_icleaf_hdr	*leafhdr,
			
 
				+	unsigned long			*usedmap,
			
 
				+	struct xfs_attr_leaf_entry	*ent,
			
 
				+	int				idx,
			
 
				+	unsigned int			*usedbytes,
			
 
				+	__u32				*last_hashval)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = ds->state->mp;
			
 
				+	char				*name_end;
			
 
				+	struct xfs_attr_leaf_name_local	*lentry;
			
 
				+	struct xfs_attr_leaf_name_remote *rentry;
			
 
				+	unsigned int			nameidx;
			
 
				+	unsigned int			namesize;
			
 
				+
			
 
				+	if (ent->pad2 != 0)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+	/* Hash values in order? */
			
 
				+	if (be32_to_cpu(ent->hashval) < *last_hashval)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	*last_hashval = be32_to_cpu(ent->hashval);
			
 
				+
			
 
				+	nameidx = be16_to_cpu(ent->nameidx);
			
 
				+	if (nameidx < leafhdr->firstused ||
			
 
				+	    nameidx >= mp->m_attr_geo->blksize) {
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* Check the name information. */
			
 
				+	if (ent->flags & XFS_ATTR_LOCAL) {
			
 
				+		lentry = xfs_attr3_leaf_name_local(leaf, idx);
			
 
				+		namesize = xfs_attr_leaf_entsize_local(lentry->namelen,
			
 
				+				be16_to_cpu(lentry->valuelen));
			
 
				+		name_end = (char *)lentry + namesize;
			
 
				+		if (lentry->namelen == 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	} else {
			
 
				+		rentry = xfs_attr3_leaf_name_remote(leaf, idx);
			
 
				+		namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
			
 
				+		name_end = (char *)rentry + namesize;
			
 
				+		if (rentry->namelen == 0 || rentry->valueblk == 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	}
			
 
				+	if (name_end > buf_end)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+	if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, nameidx, namesize))
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	if (!(ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
			
 
				+		*usedbytes += namesize;
			
 
				+}
			
 
				+
			
 
				+/* Scrub an attribute leaf. */
			
 
				+STATIC int
			
 
				+xfs_scrub_xattr_block(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level)
			
 
				+{
			
 
				+	struct xfs_attr3_icleaf_hdr	leafhdr;
			
 
				+	struct xfs_mount		*mp = ds->state->mp;
			
 
				+	struct xfs_da_state_blk		*blk = &ds->state->path.blk[level];
			
 
				+	struct xfs_buf			*bp = blk->bp;
			
 
				+	xfs_dablk_t			*last_checked = ds->private;
			
 
				+	struct xfs_attr_leafblock	*leaf = bp->b_addr;
			
 
				+	struct xfs_attr_leaf_entry	*ent;
			
 
				+	struct xfs_attr_leaf_entry	*entries;
			
 
				+	unsigned long			*usedmap = ds->sc->buf;
			
 
				+	char				*buf_end;
			
 
				+	size_t				off;
			
 
				+	__u32				last_hashval = 0;
			
 
				+	unsigned int			usedbytes = 0;
			
 
				+	unsigned int			hdrsize;
			
 
				+	int				i;
			
 
				+
			
 
				+	if (*last_checked == blk->blkno)
			
 
				+		return 0;
			
 
				+	*last_checked = blk->blkno;
			
 
				+	bitmap_zero(usedmap, mp->m_attr_geo->blksize);
			
 
				+
			
 
				+	/* Check all the padding. */
			
 
				+	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb)) {
			
 
				+		struct xfs_attr3_leafblock	*leaf = bp->b_addr;
			
 
				+
			
 
				+		if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 ||
			
 
				+		    leaf->hdr.info.hdr.pad != 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	} else {
			
 
				+		if (leaf->hdr.pad1 != 0 || leaf->hdr.info.pad != 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	}
			
 
				+
			
 
				+	/* Check the leaf header */
			
 
				+	xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
			
 
				+	hdrsize = xfs_attr3_leaf_hdr_size(leaf);
			
 
				+
			
 
				+	if (leafhdr.usedbytes > mp->m_attr_geo->blksize)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	if (leafhdr.firstused > mp->m_attr_geo->blksize)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	if (leafhdr.firstused < hdrsize)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, 0, hdrsize))
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+	if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	entries = xfs_attr3_leaf_entryp(leaf);
			
 
				+	if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+	buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
			
 
				+	for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
			
 
				+		/* Mark the leaf entry itself. */
			
 
				+		off = (char *)ent - (char *)leaf;
			
 
				+		if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, off,
			
 
				+				sizeof(xfs_attr_leaf_entry_t))) {
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		/* Check the entry and nameval. */
			
 
				+		xfs_scrub_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
			
 
				+				usedmap, ent, i, &usedbytes, &last_hashval);
			
 
				+
			
 
				+		if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_scrub_xattr_check_freemap(ds->sc, usedmap, &leafhdr))
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+	if (leafhdr.usedbytes != usedbytes)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+out:
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Scrub a attribute btree record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_xattr_rec(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	void				*rec)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = ds->state->mp;
			
 
				+	struct xfs_attr_leaf_entry	*ent = rec;
			
 
				+	struct xfs_da_state_blk		*blk;
			
 
				+	struct xfs_attr_leaf_name_local	*lentry;
			
 
				+	struct xfs_attr_leaf_name_remote	*rentry;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	xfs_dahash_t			calc_hash;
			
 
				+	xfs_dahash_t			hash;
			
 
				+	int				nameidx;
			
 
				+	int				hdrsize;
			
 
				+	unsigned int			badflags;
			
 
				+	int				error;
			
 
				+
			
 
				+	blk = &ds->state->path.blk[level];
			
 
				+
			
 
				+	/* Check the whole block, if necessary. */
			
 
				+	error = xfs_scrub_xattr_block(ds, level);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+	if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Check the hash of the entry. */
			
 
				+	error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Find the attr entry's location. */
			
 
				+	bp = blk->bp;
			
 
				+	hdrsize = xfs_attr3_leaf_hdr_size(bp->b_addr);
			
 
				+	nameidx = be16_to_cpu(ent->nameidx);
			
 
				+	if (nameidx < hdrsize || nameidx >= mp->m_attr_geo->blksize) {
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Retrieve the entry and check it. */
			
 
				+	hash = be32_to_cpu(ent->hashval);
			
 
				+	badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE |
			
 
				+			XFS_ATTR_INCOMPLETE);
			
 
				+	if ((ent->flags & badflags) != 0)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	if (ent->flags & XFS_ATTR_LOCAL) {
			
 
				+		lentry = (struct xfs_attr_leaf_name_local *)
			
 
				+				(((char *)bp->b_addr) + nameidx);
			
 
				+		if (lentry->namelen <= 0) {
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen);
			
 
				+	} else {
			
 
				+		rentry = (struct xfs_attr_leaf_name_remote *)
			
 
				+				(((char *)bp->b_addr) + nameidx);
			
 
				+		if (rentry->namelen <= 0) {
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		calc_hash = xfs_da_hashname(rentry->name, rentry->namelen);
			
 
				+	}
			
 
				+	if (calc_hash != hash)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the extended attribute metadata. */
			
 
				+int
			
 
				+xfs_scrub_xattr(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_scrub_xattr		sx;
			
 
				+	struct attrlist_cursor_kern	cursor = { 0 };
			
 
				+	xfs_dablk_t			last_checked = -1U;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	if (!xfs_inode_hasattr(sc->ip))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	memset(&sx, 0, sizeof(sx));
			
 
				+	/* Check attribute tree structure */
			
 
				+	error = xfs_scrub_da_btree(sc, XFS_ATTR_FORK, xfs_scrub_xattr_rec,
			
 
				+			&last_checked);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Check that every attr key can also be looked up by hash. */
			
 
				+	sx.context.dp = sc->ip;
			
 
				+	sx.context.cursor = &cursor;
			
 
				+	sx.context.resynch = 1;
			
 
				+	sx.context.put_listent = xfs_scrub_xattr_listent;
			
 
				+	sx.context.tp = sc->tp;
			
 
				+	sx.context.flags = ATTR_INCOMPLETE;
			
 
				+	sx.sc = sc;
			
 
				+
			
 
				+	/*
			
 
				+	 * Look up every xattr in this file by name.
			
 
				+	 *
			
 
				+	 * Use the backend implementation of xfs_attr_list to call
			
 
				+	 * xfs_scrub_xattr_listent on every attribute key in this inode.
			
 
				+	 * In other words, we use the same iterator/callback mechanism
			
 
				+	 * that listattr uses to scrub extended attributes, though in our
			
 
				+	 * _listent function, we check the value of the attribute.
			
 
				+	 *
			
 
				+	 * The VFS only locks i_rwsem when modifying attrs, so keep all
			
 
				+	 * three locks held because that's the only way to ensure we're
			
 
				+	 * the only thread poking into the da btree.  We traverse the da
			
 
				+	 * btree while holding a leaf buffer locked for the xattr name
			
 
				+	 * iteration, which doesn't really follow the usual buffer
			
 
				+	 * locking order.
			
 
				+	 */
			
 
				+	error = xfs_attr_list_int_ilocked(&sx.context);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
			
 
				+		goto out;
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -0,0 +1,363 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_fork.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_rtalloc.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_rmap.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/btree.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/* Set us up with an inode's bmap. */
			
 
				+int
			
 
				+xfs_scrub_setup_inode_bmap(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	int				error;
			
 
				+
			
 
				+	error = xfs_scrub_get_inode(sc, ip);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
			
 
				+	xfs_ilock(sc->ip, sc->ilock_flags);
			
 
				+
			
 
				+	/*
			
 
				+	 * We don't want any ephemeral data fork updates sitting around
			
 
				+	 * while we inspect block mappings, so wait for directio to finish
			
 
				+	 * and flush dirty data if we have delalloc reservations.
			
 
				+	 */
			
 
				+	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
			
 
				+	    sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
			
 
				+		inode_dio_wait(VFS_I(sc->ip));
			
 
				+		error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Got the inode, lock it and we're ready to go. */
			
 
				+	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+	sc->ilock_flags |= XFS_ILOCK_EXCL;
			
 
				+	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+out:
			
 
				+	/* scrub teardown will unlock and release the inode */
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Inode fork block mapping (BMBT) scrubber.
			
 
				+ * More complex than the others because we have to scrub
			
 
				+ * all the extents regardless of whether or not the fork
			
 
				+ * is in btree format.
			
 
				+ */
			
 
				+
			
 
				+struct xfs_scrub_bmap_info {
			
 
				+	struct xfs_scrub_context	*sc;
			
 
				+	xfs_fileoff_t			lastoff;
			
 
				+	bool				is_rt;
			
 
				+	bool				is_shared;
			
 
				+	int				whichfork;
			
 
				+};
			
 
				+
			
 
				+/* Scrub a single extent record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_bmap_extent(
			
 
				+	struct xfs_inode		*ip,
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	struct xfs_scrub_bmap_info	*info,
			
 
				+	struct xfs_bmbt_irec		*irec)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = info->sc->mp;
			
 
				+	struct xfs_buf			*bp = NULL;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	if (cur)
			
 
				+		xfs_btree_get_block(cur, 0, &bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Check for out-of-order extents.  This record could have come
			
 
				+	 * from the incore list, for which there is no ordering check.
			
 
				+	 */
			
 
				+	if (irec->br_startoff < info->lastoff)
			
 
				+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
			
 
				+				irec->br_startoff);
			
 
				+
			
 
				+	/* There should never be a "hole" extent in either extent list. */
			
 
				+	if (irec->br_startblock == HOLESTARTBLOCK)
			
 
				+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
			
 
				+				irec->br_startoff);
			
 
				+
			
 
				+	/*
			
 
				+	 * Check for delalloc extents.  We never iterate the ones in the
			
 
				+	 * in-core extent scan, and we should never see these in the bmbt.
			
 
				+	 */
			
 
				+	if (isnullstartblock(irec->br_startblock))
			
 
				+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
			
 
				+				irec->br_startoff);
			
 
				+
			
 
				+	/* Make sure the extent points to a valid place. */
			
 
				+	if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
			
 
				+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
			
 
				+				irec->br_startoff);
			
 
				+	if (info->is_rt &&
			
 
				+	    (!xfs_verify_rtbno(mp, irec->br_startblock) ||
			
 
				+	     !xfs_verify_rtbno(mp, irec->br_startblock +
			
 
				+				irec->br_blockcount - 1)))
			
 
				+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
			
 
				+				irec->br_startoff);
			
 
				+	if (!info->is_rt &&
			
 
				+	    (!xfs_verify_fsbno(mp, irec->br_startblock) ||
			
 
				+	     !xfs_verify_fsbno(mp, irec->br_startblock +
			
 
				+				irec->br_blockcount - 1)))
			
 
				+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
			
 
				+				irec->br_startoff);
			
 
				+
			
 
				+	/* We don't allow unwritten extents on attr forks. */
			
 
				+	if (irec->br_state == XFS_EXT_UNWRITTEN &&
			
 
				+	    info->whichfork == XFS_ATTR_FORK)
			
 
				+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
			
 
				+				irec->br_startoff);
			
 
				+
			
 
				+	info->lastoff = irec->br_startoff + irec->br_blockcount;
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub a bmbt record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_bmapbt_rec(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	union xfs_btree_rec		*rec)
			
 
				+{
			
 
				+	struct xfs_bmbt_irec		irec;
			
 
				+	struct xfs_scrub_bmap_info	*info = bs->private;
			
 
				+	struct xfs_inode		*ip = bs->cur->bc_private.b.ip;
			
 
				+	struct xfs_buf			*bp = NULL;
			
 
				+	struct xfs_btree_block		*block;
			
 
				+	uint64_t			owner;
			
 
				+	int				i;
			
 
				+
			
 
				+	/*
			
 
				+	 * Check the owners of the btree blocks up to the level below
			
 
				+	 * the root since the verifiers don't do that.
			
 
				+	 */
			
 
				+	if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
			
 
				+	    bs->cur->bc_ptrs[0] == 1) {
			
 
				+		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
			
 
				+			block = xfs_btree_get_block(bs->cur, i, &bp);
			
 
				+			owner = be64_to_cpu(block->bb_u.l.bb_owner);
			
 
				+			if (owner != ip->i_ino)
			
 
				+				xfs_scrub_fblock_set_corrupt(bs->sc,
			
 
				+						info->whichfork, 0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Set up the in-core record and scrub it. */
			
 
				+	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
			
 
				+	return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec);
			
 
				+}
			
 
				+
			
 
				+/* Scan the btree records. */
			
 
				+STATIC int
			
 
				+xfs_scrub_bmap_btree(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				whichfork,
			
 
				+	struct xfs_scrub_bmap_info	*info)
			
 
				+{
			
 
				+	struct xfs_owner_info		oinfo;
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_inode		*ip = sc->ip;
			
 
				+	struct xfs_btree_cur		*cur;
			
 
				+	int				error;
			
 
				+
			
 
				+	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
			
 
				+	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
			
 
				+	error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info);
			
 
				+	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
			
 
				+					  XFS_BTREE_NOERROR);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Scrub an inode fork's block mappings.
			
 
				+ *
			
 
				+ * First we scan every record in every btree block, if applicable.
			
 
				+ * Then we unconditionally scan the incore extent cache.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_scrub_bmap(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				whichfork)
			
 
				+{
			
 
				+	struct xfs_bmbt_irec		irec;
			
 
				+	struct xfs_scrub_bmap_info	info = { NULL };
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_inode		*ip = sc->ip;
			
 
				+	struct xfs_ifork		*ifp;
			
 
				+	xfs_fileoff_t			endoff;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				+	bool				found;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+
			
 
				+	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
			
 
				+	info.whichfork = whichfork;
			
 
				+	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
			
 
				+	info.sc = sc;
			
 
				+
			
 
				+	switch (whichfork) {
			
 
				+	case XFS_COW_FORK:
			
 
				+		/* Non-existent CoW forks are ignorable. */
			
 
				+		if (!ifp)
			
 
				+			goto out;
			
 
				+		/* No CoW forks on non-reflink inodes/filesystems. */
			
 
				+		if (!xfs_is_reflink_inode(ip)) {
			
 
				+			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		break;
			
 
				+	case XFS_ATTR_FORK:
			
 
				+		if (!ifp)
			
 
				+			goto out;
			
 
				+		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
			
 
				+		    !xfs_sb_version_hasattr2(&mp->m_sb))
			
 
				+			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
			
 
				+		break;
			
 
				+	default:
			
 
				+		ASSERT(whichfork == XFS_DATA_FORK);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/* Check the fork values */
			
 
				+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
			
 
				+	case XFS_DINODE_FMT_UUID:
			
 
				+	case XFS_DINODE_FMT_DEV:
			
 
				+	case XFS_DINODE_FMT_LOCAL:
			
 
				+		/* No mappings to check. */
			
 
				+		goto out;
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		if (whichfork == XFS_COW_FORK) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		error = xfs_scrub_bmap_btree(sc, whichfork, &info);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	default:
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Now try to scrub the in-memory extent list. */
			
 
				+        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				+		error = xfs_iread_extents(sc->tp, ip, whichfork);
			
 
				+		if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Find the offset of the last extent in the mapping. */
			
 
				+	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Scrub extent records. */
			
 
				+	info.lastoff = 0;
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec);
			
 
				+	     found != 0;
			
 
				+	     found = xfs_iext_next_extent(ifp, &icur, &irec)) {
			
 
				+		if (xfs_scrub_should_terminate(sc, &error))
			
 
				+			break;
			
 
				+		if (isnullstartblock(irec.br_startblock))
			
 
				+			continue;
			
 
				+		if (irec.br_startoff >= endoff) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, whichfork,
			
 
				+					irec.br_startoff);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub an inode's data fork. */
			
 
				+int
			
 
				+xfs_scrub_bmap_data(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	return xfs_scrub_bmap(sc, XFS_DATA_FORK);
			
 
				+}
			
 
				+
			
 
				+/* Scrub an inode's attr fork. */
			
 
				+int
			
 
				+xfs_scrub_bmap_attr(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	return xfs_scrub_bmap(sc, XFS_ATTR_FORK);
			
 
				+}
			
 
				+
			
 
				+/* Scrub an inode's CoW fork. */
			
 
				+int
			
 
				+xfs_scrub_bmap_cow(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	if (!xfs_is_reflink_inode(sc->ip))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	return xfs_scrub_bmap(sc, XFS_COW_FORK);
			
 
				+}
			
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -0,0 +1,516 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/btree.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/* btree scrubbing */
			
 
				+
			
 
				+/*
			
 
				+ * Check for btree operation errors.  See the section about handling
			
 
				+ * operational errors in common.c.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_scrub_btree_process_error(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	int				level,
			
 
				+	int				*error)
			
 
				+{
			
 
				+	if (*error == 0)
			
 
				+		return true;
			
 
				+
			
 
				+	switch (*error) {
			
 
				+	case -EDEADLOCK:
			
 
				+		/* Used to restart an op with deadlock avoidance. */
			
 
				+		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
			
 
				+		break;
			
 
				+	case -EFSBADCRC:
			
 
				+	case -EFSCORRUPTED:
			
 
				+		/* Note the badness but don't abort. */
			
 
				+		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+		*error = 0;
			
 
				+		/* fall through */
			
 
				+	default:
			
 
				+		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
			
 
				+			trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
			
 
				+					*error, __return_address);
			
 
				+		else
			
 
				+			trace_xfs_scrub_btree_op_error(sc, cur, level,
			
 
				+					*error, __return_address);
			
 
				+		break;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/* Record btree block corruption. */
			
 
				+void
			
 
				+xfs_scrub_btree_set_corrupt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	int				level)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+
			
 
				+	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
			
 
				+		trace_xfs_scrub_ifork_btree_error(sc, cur, level,
			
 
				+				__return_address);
			
 
				+	else
			
 
				+		trace_xfs_scrub_btree_error(sc, cur, level,
			
 
				+				__return_address);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Make sure this record is in order and doesn't stray outside of the parent
			
 
				+ * keys.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_btree_rec(
			
 
				+	struct xfs_scrub_btree	*bs)
			
 
				+{
			
 
				+	struct xfs_btree_cur	*cur = bs->cur;
			
 
				+	union xfs_btree_rec	*rec;
			
 
				+	union xfs_btree_key	key;
			
 
				+	union xfs_btree_key	hkey;
			
 
				+	union xfs_btree_key	*keyp;
			
 
				+	struct xfs_btree_block	*block;
			
 
				+	struct xfs_btree_block	*keyblock;
			
 
				+	struct xfs_buf		*bp;
			
 
				+
			
 
				+	block = xfs_btree_get_block(cur, 0, &bp);
			
 
				+	rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
			
 
				+
			
 
				+	trace_xfs_scrub_btree_rec(bs->sc, cur, 0);
			
 
				+
			
 
				+	/* If this isn't the first record, are they in order? */
			
 
				+	if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, 0);
			
 
				+	bs->firstrec = false;
			
 
				+	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
			
 
				+
			
 
				+	if (cur->bc_nlevels == 1)
			
 
				+		return;
			
 
				+
			
 
				+	/* Is this at least as large as the parent low key? */
			
 
				+	cur->bc_ops->init_key_from_rec(&key, rec);
			
 
				+	keyblock = xfs_btree_get_block(cur, 1, &bp);
			
 
				+	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
			
 
				+	if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
			
 
				+
			
 
				+	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
			
 
				+		return;
			
 
				+
			
 
				+	/* Is this no larger than the parent high key? */
			
 
				+	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
			
 
				+	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
			
 
				+	if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Make sure this key is in order and doesn't stray outside of the parent
			
 
				+ * keys.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_btree_key(
			
 
				+	struct xfs_scrub_btree	*bs,
			
 
				+	int			level)
			
 
				+{
			
 
				+	struct xfs_btree_cur	*cur = bs->cur;
			
 
				+	union xfs_btree_key	*key;
			
 
				+	union xfs_btree_key	*keyp;
			
 
				+	struct xfs_btree_block	*block;
			
 
				+	struct xfs_btree_block	*keyblock;
			
 
				+	struct xfs_buf		*bp;
			
 
				+
			
 
				+	block = xfs_btree_get_block(cur, level, &bp);
			
 
				+	key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
			
 
				+
			
 
				+	trace_xfs_scrub_btree_key(bs->sc, cur, level);
			
 
				+
			
 
				+	/* If this isn't the first key, are they in order? */
			
 
				+	if (!bs->firstkey[level] &&
			
 
				+	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
			
 
				+	bs->firstkey[level] = false;
			
 
				+	memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
			
 
				+
			
 
				+	if (level + 1 >= cur->bc_nlevels)
			
 
				+		return;
			
 
				+
			
 
				+	/* Is this at least as large as the parent low key? */
			
 
				+	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
			
 
				+	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
			
 
				+	if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
			
 
				+
			
 
				+	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
			
 
				+		return;
			
 
				+
			
 
				+	/* Is this no larger than the parent high key? */
			
 
				+	key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
			
 
				+	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
			
 
				+	if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check a btree pointer.  Returns true if it's ok to use this pointer.
			
 
				+ * Callers do not need to set the corrupt flag.
			
 
				+ */
			
 
				+static bool
			
 
				+xfs_scrub_btree_ptr_ok(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	int				level,
			
 
				+	union xfs_btree_ptr		*ptr)
			
 
				+{
			
 
				+	bool				res;
			
 
				+
			
 
				+	/* A btree rooted in an inode has no block pointer to the root. */
			
 
				+	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
			
 
				+	    level == bs->cur->bc_nlevels)
			
 
				+		return true;
			
 
				+
			
 
				+	/* Otherwise, check the pointers. */
			
 
				+	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
			
 
				+		res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
			
 
				+	else
			
 
				+		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
			
 
				+	if (!res)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
			
 
				+
			
 
				+	return res;
			
 
				+}
			
 
				+
			
 
				+/* Check that a btree block's sibling matches what we expect it. */
			
 
				+STATIC int
			
 
				+xfs_scrub_btree_block_check_sibling(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	int				level,
			
 
				+	int				direction,
			
 
				+	union xfs_btree_ptr		*sibling)
			
 
				+{
			
 
				+	struct xfs_btree_cur		*cur = bs->cur;
			
 
				+	struct xfs_btree_block		*pblock;
			
 
				+	struct xfs_buf			*pbp;
			
 
				+	struct xfs_btree_cur		*ncur = NULL;
			
 
				+	union xfs_btree_ptr		*pp;
			
 
				+	int				success;
			
 
				+	int				error;
			
 
				+
			
 
				+	error = xfs_btree_dup_cursor(cur, &ncur);
			
 
				+	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) ||
			
 
				+	    !ncur)
			
 
				+		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the pointer is null, we shouldn't be able to move the upper
			
 
				+	 * level pointer anywhere.
			
 
				+	 */
			
 
				+	if (xfs_btree_ptr_is_null(cur, sibling)) {
			
 
				+		if (direction > 0)
			
 
				+			error = xfs_btree_increment(ncur, level + 1, &success);
			
 
				+		else
			
 
				+			error = xfs_btree_decrement(ncur, level + 1, &success);
			
 
				+		if (error == 0 && success)
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
			
 
				+		error = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Increment upper level pointer. */
			
 
				+	if (direction > 0)
			
 
				+		error = xfs_btree_increment(ncur, level + 1, &success);
			
 
				+	else
			
 
				+		error = xfs_btree_decrement(ncur, level + 1, &success);
			
 
				+	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error))
			
 
				+		goto out;
			
 
				+	if (!success) {
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Compare upper level pointer to sibling pointer. */
			
 
				+	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
			
 
				+	pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
			
 
				+	if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
			
 
				+out:
			
 
				+	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Check the siblings of a btree block. */
			
 
				+STATIC int
			
 
				+xfs_scrub_btree_block_check_siblings(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	struct xfs_btree_block		*block)
			
 
				+{
			
 
				+	struct xfs_btree_cur		*cur = bs->cur;
			
 
				+	union xfs_btree_ptr		leftsib;
			
 
				+	union xfs_btree_ptr		rightsib;
			
 
				+	int				level;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
			
 
				+	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
			
 
				+	level = xfs_btree_get_level(block);
			
 
				+
			
 
				+	/* Root block should never have siblings. */
			
 
				+	if (level == cur->bc_nlevels - 1) {
			
 
				+		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
			
 
				+		    !xfs_btree_ptr_is_null(cur, &rightsib))
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Does the left & right sibling pointers match the adjacent
			
 
				+	 * parent level pointers?
			
 
				+	 * (These function absorbs error codes for us.)
			
 
				+	 */
			
 
				+	error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Grab and scrub a btree block given a btree pointer.  Returns block
			
 
				+ * and buffer pointers (if applicable) if they're ok to use.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_scrub_btree_get_block(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	int				level,
			
 
				+	union xfs_btree_ptr		*pp,
			
 
				+	struct xfs_btree_block		**pblock,
			
 
				+	struct xfs_buf			**pbp)
			
 
				+{
			
 
				+	void				*failed_at;
			
 
				+	int				error;
			
 
				+
			
 
				+	*pblock = NULL;
			
 
				+	*pbp = NULL;
			
 
				+
			
 
				+	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
			
 
				+	if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) ||
			
 
				+	    !*pblock)
			
 
				+		return error;
			
 
				+
			
 
				+	xfs_btree_get_block(bs->cur, level, pbp);
			
 
				+	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
			
 
				+		failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
			
 
				+				level, *pbp);
			
 
				+	else
			
 
				+		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
			
 
				+				 level, *pbp);
			
 
				+	if (failed_at) {
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Check the block's siblings; this function absorbs error codes
			
 
				+	 * for us.
			
 
				+	 */
			
 
				+	return xfs_scrub_btree_block_check_siblings(bs, *pblock);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check that the low and high keys of this block match the keys stored
			
 
				+ * in the parent block.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_btree_block_keys(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	int				level,
			
 
				+	struct xfs_btree_block		*block)
			
 
				+{
			
 
				+	union xfs_btree_key		block_keys;
			
 
				+	struct xfs_btree_cur		*cur = bs->cur;
			
 
				+	union xfs_btree_key		*high_bk;
			
 
				+	union xfs_btree_key		*parent_keys;
			
 
				+	union xfs_btree_key		*high_pk;
			
 
				+	struct xfs_btree_block		*parent_block;
			
 
				+	struct xfs_buf			*bp;
			
 
				+
			
 
				+	if (level >= cur->bc_nlevels - 1)
			
 
				+		return;
			
 
				+
			
 
				+	/* Calculate the keys for this block. */
			
 
				+	xfs_btree_get_keys(cur, block, &block_keys);
			
 
				+
			
 
				+	/* Obtain the parent's copy of the keys for this block. */
			
 
				+	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
			
 
				+	parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
			
 
				+			parent_block);
			
 
				+
			
 
				+	if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
			
 
				+
			
 
				+	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
			
 
				+		return;
			
 
				+
			
 
				+	/* Get high keys */
			
 
				+	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
			
 
				+	high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
			
 
				+			parent_block);
			
 
				+
			
 
				+	if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Visit all nodes and leaves of a btree.  Check that all pointers and
			
 
				+ * records are in order, that the keys reflect the records, and use a callback
			
 
				+ * so that the caller can verify individual records.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_btree(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	xfs_scrub_btree_rec_fn		scrub_fn,
			
 
				+	struct xfs_owner_info		*oinfo,
			
 
				+	void				*private)
			
 
				+{
			
 
				+	struct xfs_scrub_btree		bs = { NULL };
			
 
				+	union xfs_btree_ptr		ptr;
			
 
				+	union xfs_btree_ptr		*pp;
			
 
				+	union xfs_btree_rec		*recp;
			
 
				+	struct xfs_btree_block		*block;
			
 
				+	int				level;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	int				i;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	/* Initialize scrub state */
			
 
				+	bs.cur = cur;
			
 
				+	bs.scrub_rec = scrub_fn;
			
 
				+	bs.oinfo = oinfo;
			
 
				+	bs.firstrec = true;
			
 
				+	bs.private = private;
			
 
				+	bs.sc = sc;
			
 
				+	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
			
 
				+		bs.firstkey[i] = true;
			
 
				+	INIT_LIST_HEAD(&bs.to_check);
			
 
				+
			
 
				+	/* Don't try to check a tree with a height we can't handle. */
			
 
				+	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
			
 
				+		xfs_scrub_btree_set_corrupt(sc, cur, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Load the root of the btree.  The helper function absorbs
			
 
				+	 * error codes for us.
			
 
				+	 */
			
 
				+	level = cur->bc_nlevels - 1;
			
 
				+	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
			
 
				+	if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
			
 
				+		goto out;
			
 
				+	error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp);
			
 
				+	if (error || !block)
			
 
				+		goto out;
			
 
				+
			
 
				+	cur->bc_ptrs[level] = 1;
			
 
				+
			
 
				+	while (level < cur->bc_nlevels) {
			
 
				+		block = xfs_btree_get_block(cur, level, &bp);
			
 
				+
			
 
				+		if (level == 0) {
			
 
				+			/* End of leaf, pop back towards the root. */
			
 
				+			if (cur->bc_ptrs[level] >
			
 
				+			    be16_to_cpu(block->bb_numrecs)) {
			
 
				+				xfs_scrub_btree_block_keys(&bs, level, block);
			
 
				+				if (level < cur->bc_nlevels - 1)
			
 
				+					cur->bc_ptrs[level + 1]++;
			
 
				+				level++;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			/* Records in order for scrub? */
			
 
				+			xfs_scrub_btree_rec(&bs);
			
 
				+
			
 
				+			/* Call out to the record checker. */
			
 
				+			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
			
 
				+			error = bs.scrub_rec(&bs, recp);
			
 
				+			if (error)
			
 
				+				break;
			
 
				+			if (xfs_scrub_should_terminate(sc, &error) ||
			
 
				+			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
			
 
				+				break;
			
 
				+
			
 
				+			cur->bc_ptrs[level]++;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* End of node, pop back towards the root. */
			
 
				+		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
			
 
				+			xfs_scrub_btree_block_keys(&bs, level, block);
			
 
				+			if (level < cur->bc_nlevels - 1)
			
 
				+				cur->bc_ptrs[level + 1]++;
			
 
				+			level++;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Keys in order for scrub? */
			
 
				+		xfs_scrub_btree_key(&bs, level);
			
 
				+
			
 
				+		/* Drill another level deeper. */
			
 
				+		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
			
 
				+		if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) {
			
 
				+			cur->bc_ptrs[level]++;
			
 
				+			continue;
			
 
				+		}
			
 
				+		level--;
			
 
				+		error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp);
			
 
				+		if (error || !block)
			
 
				+			goto out;
			
 
				+
			
 
				+		cur->bc_ptrs[level] = 1;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -0,0 +1,57 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#ifndef __XFS_SCRUB_BTREE_H__
			
 
				+#define __XFS_SCRUB_BTREE_H__
			
 
				+
			
 
				+/* btree scrub */
			
 
				+
			
 
				+/* Check for btree operation errors. */
			
 
				+bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc,
			
 
				+		struct xfs_btree_cur *cur, int level, int *error);
			
 
				+
			
 
				+/* Check for btree corruption. */
			
 
				+void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc,
			
 
				+		struct xfs_btree_cur *cur, int level);
			
 
				+
			
 
				+struct xfs_scrub_btree;
			
 
				+typedef int (*xfs_scrub_btree_rec_fn)(
			
 
				+	struct xfs_scrub_btree	*bs,
			
 
				+	union xfs_btree_rec	*rec);
			
 
				+
			
 
				+struct xfs_scrub_btree {
			
 
				+	/* caller-provided scrub state */
			
 
				+	struct xfs_scrub_context	*sc;
			
 
				+	struct xfs_btree_cur		*cur;
			
 
				+	xfs_scrub_btree_rec_fn		scrub_rec;
			
 
				+	struct xfs_owner_info		*oinfo;
			
 
				+	void				*private;
			
 
				+
			
 
				+	/* internal scrub state */
			
 
				+	union xfs_btree_rec		lastrec;
			
 
				+	bool				firstrec;
			
 
				+	union xfs_btree_key		lastkey[XFS_BTREE_MAXLEVELS];
			
 
				+	bool				firstkey[XFS_BTREE_MAXLEVELS];
			
 
				+	struct list_head		to_check;
			
 
				+};
			
 
				+int xfs_scrub_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
			
 
				+		    xfs_scrub_btree_rec_fn scrub_fn,
			
 
				+		    struct xfs_owner_info *oinfo, void *private);
			
 
				+
			
 
				+#endif /* __XFS_SCRUB_BTREE_H__ */
			
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -0,0 +1,574 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_icache.h"
			
 
				+#include "xfs_itable.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_alloc_btree.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_refcount.h"
			
 
				+#include "xfs_refcount_btree.h"
			
 
				+#include "xfs_rmap.h"
			
 
				+#include "xfs_rmap_btree.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+#include "scrub/btree.h"
			
 
				+
			
 
				+/* Common code for the metadata scrubbers. */
			
 
				+
			
 
				+/*
			
 
				+ * Handling operational errors.
			
 
				+ *
			
 
				+ * The *_process_error() family of functions are used to process error return
			
 
				+ * codes from functions called as part of a scrub operation.
			
 
				+ *
			
 
				+ * If there's no error, we return true to tell the caller that it's ok
			
 
				+ * to move on to the next check in its list.
			
 
				+ *
			
 
				+ * For non-verifier errors (e.g. ENOMEM) we return false to tell the
			
 
				+ * caller that something bad happened, and we preserve *error so that
			
 
				+ * the caller can return the *error up the stack to userspace.
			
 
				+ *
			
 
				+ * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
			
 
				+ * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
			
 
				+ * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
			
 
				+ * not via return codes.  We return false to tell the caller that
			
 
				+ * something bad happened.  Since the error has been cleared, the caller
			
 
				+ * will (presumably) return that zero and scrubbing will move on to
			
 
				+ * whatever's next.
			
 
				+ *
			
 
				+ * ftrace can be used to record the precise metadata location and the
			
 
				+ * approximate code location of the failed operation.
			
 
				+ */
			
 
				+
			
 
				+/* Check for operational errors. */
			
 
				+bool
			
 
				+xfs_scrub_process_error(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_agnumber_t			agno,
			
 
				+	xfs_agblock_t			bno,
			
 
				+	int				*error)
			
 
				+{
			
 
				+	switch (*error) {
			
 
				+	case 0:
			
 
				+		return true;
			
 
				+	case -EDEADLOCK:
			
 
				+		/* Used to restart an op with deadlock avoidance. */
			
 
				+		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
			
 
				+		break;
			
 
				+	case -EFSBADCRC:
			
 
				+	case -EFSCORRUPTED:
			
 
				+		/* Note the badness but don't abort. */
			
 
				+		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+		*error = 0;
			
 
				+		/* fall through */
			
 
				+	default:
			
 
				+		trace_xfs_scrub_op_error(sc, agno, bno, *error,
			
 
				+				__return_address);
			
 
				+		break;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/* Check for operational errors for a file offset. */
			
 
				+bool
			
 
				+xfs_scrub_fblock_process_error(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				whichfork,
			
 
				+	xfs_fileoff_t			offset,
			
 
				+	int				*error)
			
 
				+{
			
 
				+	switch (*error) {
			
 
				+	case 0:
			
 
				+		return true;
			
 
				+	case -EDEADLOCK:
			
 
				+		/* Used to restart an op with deadlock avoidance. */
			
 
				+		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
			
 
				+		break;
			
 
				+	case -EFSBADCRC:
			
 
				+	case -EFSCORRUPTED:
			
 
				+		/* Note the badness but don't abort. */
			
 
				+		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+		*error = 0;
			
 
				+		/* fall through */
			
 
				+	default:
			
 
				+		trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
			
 
				+				__return_address);
			
 
				+		break;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Handling scrub corruption/optimization/warning checks.
			
 
				+ *
			
 
				+ * The *_set_{corrupt,preen,warning}() family of functions are used to
			
 
				+ * record the presence of metadata that is incorrect (corrupt), could be
			
 
				+ * optimized somehow (preen), or should be flagged for administrative
			
 
				+ * review but is not incorrect (warn).
			
 
				+ *
			
 
				+ * ftrace can be used to record the precise metadata location and
			
 
				+ * approximate code location of the failed check.
			
 
				+ */
			
 
				+
			
 
				+/* Record a block which could be optimized. */
			
 
				+void
			
 
				+xfs_scrub_block_set_preen(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_buf			*bp)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
			
 
				+	trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Record an inode which could be optimized.  The trace data will
			
 
				+ * include the block given by bp if bp is given; otherwise it will use
			
 
				+ * the block location of the inode record itself.
			
 
				+ */
			
 
				+void
			
 
				+xfs_scrub_ino_set_preen(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_ino_t			ino,
			
 
				+	struct xfs_buf			*bp)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
			
 
				+	trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0,
			
 
				+			__return_address);
			
 
				+}
			
 
				+
			
 
				+/* Record a corrupt block. */
			
 
				+void
			
 
				+xfs_scrub_block_set_corrupt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_buf			*bp)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+	trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Record a corrupt inode.  The trace data will include the block given
			
 
				+ * by bp if bp is given; otherwise it will use the block location of the
			
 
				+ * inode record itself.
			
 
				+ */
			
 
				+void
			
 
				+xfs_scrub_ino_set_corrupt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_ino_t			ino,
			
 
				+	struct xfs_buf			*bp)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+	trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
			
 
				+}
			
 
				+
			
 
				+/* Record corruption in a block indexed by a file fork. */
			
 
				+void
			
 
				+xfs_scrub_fblock_set_corrupt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				whichfork,
			
 
				+	xfs_fileoff_t			offset)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+	trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Warn about inodes that need administrative review but is not
			
 
				+ * incorrect.
			
 
				+ */
			
 
				+void
			
 
				+xfs_scrub_ino_set_warning(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_ino_t			ino,
			
 
				+	struct xfs_buf			*bp)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
			
 
				+	trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0,
			
 
				+			__return_address);
			
 
				+}
			
 
				+
			
 
				+/* Warn about a block indexed by a file fork that needs review. */
			
 
				+void
			
 
				+xfs_scrub_fblock_set_warning(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				whichfork,
			
 
				+	xfs_fileoff_t			offset)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
			
 
				+	trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address);
			
 
				+}
			
 
				+
			
 
				+/* Signal an incomplete scrub. */
			
 
				+void
			
 
				+xfs_scrub_set_incomplete(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
			
 
				+	trace_xfs_scrub_incomplete(sc, __return_address);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * AG scrubbing
			
 
				+ *
			
 
				+ * These helpers facilitate locking an allocation group's header
			
 
				+ * buffers, setting up cursors for all btrees that are present, and
			
 
				+ * cleaning everything up once we're through.
			
 
				+ */
			
 
				+
			
 
				+/* Decide if we want to return an AG header read failure. */
			
 
				+static inline bool
			
 
				+want_ag_read_header_failure(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	unsigned int			type)
			
 
				+{
			
 
				+	/* Return all AG header read failures when scanning btrees. */
			
 
				+	if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
			
 
				+	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
			
 
				+	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
			
 
				+		return true;
			
 
				+	/*
			
 
				+	 * If we're scanning a given type of AG header, we only want to
			
 
				+	 * see read failures from that specific header.  We'd like the
			
 
				+	 * other headers to cross-check them, but this isn't required.
			
 
				+	 */
			
 
				+	if (sc->sm->sm_type == type)
			
 
				+		return true;
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Grab all the headers for an AG.
			
 
				+ *
			
 
				+ * The headers should be released by xfs_scrub_ag_free, but as a fail
			
 
				+ * safe we attach all the buffers we grab to the scrub transaction so
			
 
				+ * they'll all be freed when we cancel it.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_ag_read_headers(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_agnumber_t			agno,
			
 
				+	struct xfs_buf			**agi,
			
 
				+	struct xfs_buf			**agf,
			
 
				+	struct xfs_buf			**agfl)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	int				error;
			
 
				+
			
 
				+	error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
			
 
				+	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
			
 
				+		goto out;
			
 
				+
			
 
				+	error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
			
 
				+	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
			
 
				+		goto out;
			
 
				+
			
 
				+	error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
			
 
				+	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
			
 
				+		goto out;
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Release all the AG btree cursors. */
			
 
				+void
			
 
				+xfs_scrub_ag_btcur_free(
			
 
				+	struct xfs_scrub_ag		*sa)
			
 
				+{
			
 
				+	if (sa->refc_cur)
			
 
				+		xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
			
 
				+	if (sa->rmap_cur)
			
 
				+		xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
			
 
				+	if (sa->fino_cur)
			
 
				+		xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
			
 
				+	if (sa->ino_cur)
			
 
				+		xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
			
 
				+	if (sa->cnt_cur)
			
 
				+		xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
			
 
				+	if (sa->bno_cur)
			
 
				+		xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
			
 
				+
			
 
				+	sa->refc_cur = NULL;
			
 
				+	sa->rmap_cur = NULL;
			
 
				+	sa->fino_cur = NULL;
			
 
				+	sa->ino_cur = NULL;
			
 
				+	sa->bno_cur = NULL;
			
 
				+	sa->cnt_cur = NULL;
			
 
				+}
			
 
				+
			
 
				+/* Initialize all the btree cursors for an AG. */
			
 
				+int
			
 
				+xfs_scrub_ag_btcur_init(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_scrub_ag		*sa)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	xfs_agnumber_t			agno = sa->agno;
			
 
				+
			
 
				+	if (sa->agf_bp) {
			
 
				+		/* Set up a bnobt cursor for cross-referencing. */
			
 
				+		sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
			
 
				+				agno, XFS_BTNUM_BNO);
			
 
				+		if (!sa->bno_cur)
			
 
				+			goto err;
			
 
				+
			
 
				+		/* Set up a cntbt cursor for cross-referencing. */
			
 
				+		sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
			
 
				+				agno, XFS_BTNUM_CNT);
			
 
				+		if (!sa->cnt_cur)
			
 
				+			goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Set up a inobt cursor for cross-referencing. */
			
 
				+	if (sa->agi_bp) {
			
 
				+		sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
			
 
				+					agno, XFS_BTNUM_INO);
			
 
				+		if (!sa->ino_cur)
			
 
				+			goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Set up a finobt cursor for cross-referencing. */
			
 
				+	if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
			
 
				+		sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
			
 
				+				agno, XFS_BTNUM_FINO);
			
 
				+		if (!sa->fino_cur)
			
 
				+			goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Set up a rmapbt cursor for cross-referencing. */
			
 
				+	if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
			
 
				+		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
			
 
				+				agno);
			
 
				+		if (!sa->rmap_cur)
			
 
				+			goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Set up a refcountbt cursor for cross-referencing. */
			
 
				+	if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
			
 
				+		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
			
 
				+				sa->agf_bp, agno, NULL);
			
 
				+		if (!sa->refc_cur)
			
 
				+			goto err;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+err:
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+/* Release the AG header context and btree cursors. */
			
 
				+void
			
 
				+xfs_scrub_ag_free(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_scrub_ag		*sa)
			
 
				+{
			
 
				+	xfs_scrub_ag_btcur_free(sa);
			
 
				+	if (sa->agfl_bp) {
			
 
				+		xfs_trans_brelse(sc->tp, sa->agfl_bp);
			
 
				+		sa->agfl_bp = NULL;
			
 
				+	}
			
 
				+	if (sa->agf_bp) {
			
 
				+		xfs_trans_brelse(sc->tp, sa->agf_bp);
			
 
				+		sa->agf_bp = NULL;
			
 
				+	}
			
 
				+	if (sa->agi_bp) {
			
 
				+		xfs_trans_brelse(sc->tp, sa->agi_bp);
			
 
				+		sa->agi_bp = NULL;
			
 
				+	}
			
 
				+	sa->agno = NULLAGNUMBER;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * For scrub, grab the AGI and the AGF headers, in that order.  Locking
			
 
				+ * order requires us to get the AGI before the AGF.  We use the
			
 
				+ * transaction to avoid deadlocking on crosslinked metadata buffers;
			
 
				+ * either the caller passes one in (bmap scrub) or we have to create a
			
 
				+ * transaction ourselves.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_ag_init(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_agnumber_t			agno,
			
 
				+	struct xfs_scrub_ag		*sa)
			
 
				+{
			
 
				+	int				error;
			
 
				+
			
 
				+	sa->agno = agno;
			
 
				+	error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
			
 
				+			&sa->agf_bp, &sa->agfl_bp);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	return xfs_scrub_ag_btcur_init(sc, sa);
			
 
				+}
			
 
				+
			
 
				+/* Per-scrubber setup functions */
			
 
				+
			
 
				+/* Set us up with a transaction and an empty context. */
			
 
				+int
			
 
				+xfs_scrub_setup_fs(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp);
			
 
				+}
			
 
				+
			
 
				+/* Set us up with AG headers and btree cursors. */
			
 
				+int
			
 
				+xfs_scrub_setup_ag_btree(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip,
			
 
				+	bool				force_log)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	int				error;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the caller asks us to checkpont the log, do so.  This
			
 
				+	 * expensive operation should be performed infrequently and only
			
 
				+	 * as a last resort.  Any caller that sets force_log should
			
 
				+	 * document why they need to do so.
			
 
				+	 */
			
 
				+	if (force_log) {
			
 
				+		error = xfs_scrub_checkpoint_log(mp);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	error = xfs_scrub_setup_ag_header(sc, ip);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa);
			
 
				+}
			
 
				+
			
 
				+/* Push everything out of the log onto disk. */
			
 
				+int
			
 
				+xfs_scrub_checkpoint_log(
			
 
				+	struct xfs_mount	*mp)
			
 
				+{
			
 
				+	int			error;
			
 
				+
			
 
				+	error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	xfs_ail_push_all_sync(mp->m_ail);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Given an inode and the scrub control structure, grab either the
			
 
				+ * inode referenced in the control structure or the inode passed in.
			
 
				+ * The inode is not locked.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_get_inode(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip_in)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_inode		*ip = NULL;
			
 
				+	int				error;
			
 
				+
			
 
				+	/*
			
 
				+	 * If userspace passed us an AG number or a generation number
			
 
				+	 * without an inode number, they haven't got a clue so bail out
			
 
				+	 * immediately.
			
 
				+	 */
			
 
				+	if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* We want to scan the inode we already had opened. */
			
 
				+	if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
			
 
				+		sc->ip = ip_in;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* Look up the inode, see if the generation number matches. */
			
 
				+	if (xfs_internal_inum(mp, sc->sm->sm_ino))
			
 
				+		return -ENOENT;
			
 
				+	error = xfs_iget(mp, NULL, sc->sm->sm_ino,
			
 
				+			XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
			
 
				+	if (error == -ENOENT || error == -EINVAL) {
			
 
				+		/* inode doesn't exist... */
			
 
				+		return -ENOENT;
			
 
				+	} else if (error) {
			
 
				+		trace_xfs_scrub_op_error(sc,
			
 
				+				XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
			
 
				+				XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
			
 
				+				error, __return_address);
			
 
				+		return error;
			
 
				+	}
			
 
				+	if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
			
 
				+		iput(VFS_I(ip));
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	sc->ip = ip;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Set us up to scrub a file's contents. */
			
 
				+int
			
 
				+xfs_scrub_setup_inode_contents(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip,
			
 
				+	unsigned int			resblks)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	int				error;
			
 
				+
			
 
				+	error = xfs_scrub_get_inode(sc, ip);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/* Got the inode, lock it and we're ready to go. */
			
 
				+	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
			
 
				+	xfs_ilock(sc->ip, sc->ilock_flags);
			
 
				+	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+	sc->ilock_flags |= XFS_ILOCK_EXCL;
			
 
				+	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+out:
			
 
				+	/* scrub teardown will unlock and release the inode for us */
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -0,0 +1,144 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#ifndef __XFS_SCRUB_COMMON_H__
			
 
				+#define __XFS_SCRUB_COMMON_H__
			
 
				+
			
 
				+/*
			
 
				+ * We /could/ terminate a scrub/repair operation early.  If we're not
			
 
				+ * in a good place to continue (fatal signal, etc.) then bail out.
			
 
				+ * Note that we're careful not to make any judgements about *error.
			
 
				+ */
			
 
				+static inline bool
			
 
				+xfs_scrub_should_terminate(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				*error)
			
 
				+{
			
 
				+	if (fatal_signal_pending(current)) {
			
 
				+		if (*error == 0)
			
 
				+			*error = -EAGAIN;
			
 
				+		return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Grab an empty transaction so that we can re-grab locked buffers if
			
 
				+ * one of our btrees turns out to be cyclic.
			
 
				+ */
			
 
				+static inline int
			
 
				+xfs_scrub_trans_alloc(
			
 
				+	struct xfs_scrub_metadata	*sm,
			
 
				+	struct xfs_mount		*mp,
			
 
				+	struct xfs_trans		**tpp)
			
 
				+{
			
 
				+	return xfs_trans_alloc_empty(mp, tpp);
			
 
				+}
			
 
				+
			
 
				+bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
			
 
				+		xfs_agblock_t bno, int *error);
			
 
				+bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
			
 
				+		xfs_fileoff_t offset, int *error);
			
 
				+
			
 
				+void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
			
 
				+		struct xfs_buf *bp);
			
 
				+void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
			
 
				+		struct xfs_buf *bp);
			
 
				+
			
 
				+void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc,
			
 
				+		struct xfs_buf *bp);
			
 
				+void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
			
 
				+		struct xfs_buf *bp);
			
 
				+void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
			
 
				+		xfs_fileoff_t offset);
			
 
				+
			
 
				+void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
			
 
				+		struct xfs_buf *bp);
			
 
				+void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
			
 
				+		xfs_fileoff_t offset);
			
 
				+
			
 
				+void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_checkpoint_log(struct xfs_mount *mp);
			
 
				+
			
 
				+/* Setup functions */
			
 
				+int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
			
 
				+			      struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc,
			
 
				+			       struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
			
 
				+				struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_ag_rmapbt(struct xfs_scrub_context *sc,
			
 
				+			      struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_ag_refcountbt(struct xfs_scrub_context *sc,
			
 
				+				  struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_inode(struct xfs_scrub_context *sc,
			
 
				+			  struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc,
			
 
				+			       struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc,
			
 
				+				    struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_directory(struct xfs_scrub_context *sc,
			
 
				+			      struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_xattr(struct xfs_scrub_context *sc,
			
 
				+			  struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_symlink(struct xfs_scrub_context *sc,
			
 
				+			    struct xfs_inode *ip);
			
 
				+int xfs_scrub_setup_parent(struct xfs_scrub_context *sc,
			
 
				+			   struct xfs_inode *ip);
			
 
				+#ifdef CONFIG_XFS_RT
			
 
				+int xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip);
			
 
				+#else
			
 
				+static inline int
			
 
				+xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip)
			
 
				+{
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+#endif
			
 
				+#ifdef CONFIG_XFS_QUOTA
			
 
				+int xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip);
			
 
				+#else
			
 
				+static inline int
			
 
				+xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip)
			
 
				+{
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa);
			
 
				+int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
			
 
				+		      struct xfs_scrub_ag *sa);
			
 
				+int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
			
 
				+			      struct xfs_buf **agi, struct xfs_buf **agf,
			
 
				+			      struct xfs_buf **agfl);
			
 
				+void xfs_scrub_ag_btcur_free(struct xfs_scrub_ag *sa);
			
 
				+int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
			
 
				+			    struct xfs_scrub_ag *sa);
			
 
				+int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
			
 
				+			int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
			
 
				+				  void *),
			
 
				+			void *priv);
			
 
				+
			
 
				+int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc,
			
 
				+			     struct xfs_inode *ip, bool force_log);
			
 
				+int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
			
 
				+int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc,
			
 
				+				   struct xfs_inode *ip, unsigned int resblks);
			
 
				+
			
 
				+#endif	/* __XFS_SCRUB_COMMON_H__ */
			
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -0,0 +1,591 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_fork.h"
			
 
				+#include "xfs_da_format.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2.h"
			
 
				+#include "xfs_dir2_priv.h"
			
 
				+#include "xfs_attr_leaf.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+#include "scrub/dabtree.h"
			
 
				+
			
 
				+/* Directory/Attribute Btree */
			
 
				+
			
 
				+/*
			
 
				+ * Check for da btree operation errors.  See the section about handling
			
 
				+ * operational errors in common.c.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_scrub_da_process_error(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	int				*error)
			
 
				+{
			
 
				+	struct xfs_scrub_context	*sc = ds->sc;
			
 
				+
			
 
				+	if (*error == 0)
			
 
				+		return true;
			
 
				+
			
 
				+	switch (*error) {
			
 
				+	case -EDEADLOCK:
			
 
				+		/* Used to restart an op with deadlock avoidance. */
			
 
				+		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
			
 
				+		break;
			
 
				+	case -EFSBADCRC:
			
 
				+	case -EFSCORRUPTED:
			
 
				+		/* Note the badness but don't abort. */
			
 
				+		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+		*error = 0;
			
 
				+		/* fall through */
			
 
				+	default:
			
 
				+		trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork,
			
 
				+				xfs_dir2_da_to_db(ds->dargs.geo,
			
 
				+					ds->state->path.blk[level].blkno),
			
 
				+				*error, __return_address);
			
 
				+		break;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check for da btree corruption.  See the section about handling
			
 
				+ * operational errors in common.c.
			
 
				+ */
			
 
				+void
			
 
				+xfs_scrub_da_set_corrupt(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level)
			
 
				+{
			
 
				+	struct xfs_scrub_context	*sc = ds->sc;
			
 
				+
			
 
				+	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+
			
 
				+	trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork,
			
 
				+			xfs_dir2_da_to_db(ds->dargs.geo,
			
 
				+				ds->state->path.blk[level].blkno),
			
 
				+			__return_address);
			
 
				+}
			
 
				+
			
 
				+/* Find an entry at a certain level in a da btree. */
			
 
				+STATIC void *
			
 
				+xfs_scrub_da_btree_entry(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	int				rec)
			
 
				+{
			
 
				+	char				*ents;
			
 
				+	struct xfs_da_state_blk		*blk;
			
 
				+	void				*baddr;
			
 
				+
			
 
				+	/* Dispatch the entry finding function. */
			
 
				+	blk = &ds->state->path.blk[level];
			
 
				+	baddr = blk->bp->b_addr;
			
 
				+	switch (blk->magic) {
			
 
				+	case XFS_ATTR_LEAF_MAGIC:
			
 
				+	case XFS_ATTR3_LEAF_MAGIC:
			
 
				+		ents = (char *)xfs_attr3_leaf_entryp(baddr);
			
 
				+		return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
			
 
				+	case XFS_DIR2_LEAFN_MAGIC:
			
 
				+	case XFS_DIR3_LEAFN_MAGIC:
			
 
				+		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
			
 
				+		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
			
 
				+	case XFS_DIR2_LEAF1_MAGIC:
			
 
				+	case XFS_DIR3_LEAF1_MAGIC:
			
 
				+		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
			
 
				+		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
			
 
				+	case XFS_DA_NODE_MAGIC:
			
 
				+	case XFS_DA3_NODE_MAGIC:
			
 
				+		ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
			
 
				+		return ents + (rec * sizeof(struct xfs_da_node_entry));
			
 
				+	}
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/* Scrub a da btree hash (key). */
			
 
				+int
			
 
				+xfs_scrub_da_btree_hash(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	__be32				*hashp)
			
 
				+{
			
 
				+	struct xfs_da_state_blk		*blks;
			
 
				+	struct xfs_da_node_entry	*entry;
			
 
				+	xfs_dahash_t			hash;
			
 
				+	xfs_dahash_t			parent_hash;
			
 
				+
			
 
				+	/* Is this hash in order? */
			
 
				+	hash = be32_to_cpu(*hashp);
			
 
				+	if (hash < ds->hashes[level])
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	ds->hashes[level] = hash;
			
 
				+
			
 
				+	if (level == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Is this hash no larger than the parent hash? */
			
 
				+	blks = ds->state->path.blk;
			
 
				+	entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index);
			
 
				+	parent_hash = be32_to_cpu(entry->hashval);
			
 
				+	if (parent_hash < hash)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check a da btree pointer.  Returns true if it's ok to use this
			
 
				+ * pointer.
			
 
				+ */
			
 
				+STATIC bool
			
 
				+xfs_scrub_da_btree_ptr_ok(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	xfs_dablk_t			blkno)
			
 
				+{
			
 
				+	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The da btree scrubber can handle leaf1 blocks as a degenerate
			
 
				+ * form of leafn blocks.  Since the regular da code doesn't handle
			
 
				+ * leaf1, we must multiplex the verifiers.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_scrub_da_btree_read_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_da_blkinfo	*info = bp->b_addr;
			
 
				+
			
 
				+	switch (be16_to_cpu(info->magic)) {
			
 
				+	case XFS_DIR2_LEAF1_MAGIC:
			
 
				+	case XFS_DIR3_LEAF1_MAGIC:
			
 
				+		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
			
 
				+		bp->b_ops->verify_read(bp);
			
 
				+		return;
			
 
				+	default:
			
 
				+		/*
			
 
				+		 * xfs_da3_node_buf_ops already know how to handle
			
 
				+		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
			
 
				+		 */
			
 
				+		bp->b_ops = &xfs_da3_node_buf_ops;
			
 
				+		bp->b_ops->verify_read(bp);
			
 
				+		return;
			
 
				+	}
			
 
				+}
			
 
				+static void
			
 
				+xfs_scrub_da_btree_write_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_da_blkinfo	*info = bp->b_addr;
			
 
				+
			
 
				+	switch (be16_to_cpu(info->magic)) {
			
 
				+	case XFS_DIR2_LEAF1_MAGIC:
			
 
				+	case XFS_DIR3_LEAF1_MAGIC:
			
 
				+		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
			
 
				+		bp->b_ops->verify_write(bp);
			
 
				+		return;
			
 
				+	default:
			
 
				+		/*
			
 
				+		 * xfs_da3_node_buf_ops already know how to handle
			
 
				+		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
			
 
				+		 */
			
 
				+		bp->b_ops = &xfs_da3_node_buf_ops;
			
 
				+		bp->b_ops->verify_write(bp);
			
 
				+		return;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
			
 
				+	.name = "xfs_scrub_da_btree",
			
 
				+	.verify_read = xfs_scrub_da_btree_read_verify,
			
 
				+	.verify_write = xfs_scrub_da_btree_write_verify,
			
 
				+};
			
 
				+
			
 
				+/* Check a block's sibling. */
			
 
				+STATIC int
			
 
				+xfs_scrub_da_btree_block_check_sibling(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	int				direction,
			
 
				+	xfs_dablk_t			sibling)
			
 
				+{
			
 
				+	int				retval;
			
 
				+	int				error;
			
 
				+
			
 
				+	memcpy(&ds->state->altpath, &ds->state->path,
			
 
				+			sizeof(ds->state->altpath));
			
 
				+
			
 
				+	/*
			
 
				+	 * If the pointer is null, we shouldn't be able to move the upper
			
 
				+	 * level pointer anywhere.
			
 
				+	 */
			
 
				+	if (sibling == 0) {
			
 
				+		error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
			
 
				+				direction, false, &retval);
			
 
				+		if (error == 0 && retval == 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		error = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Move the alternate cursor one block in the direction given. */
			
 
				+	error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
			
 
				+			direction, false, &retval);
			
 
				+	if (!xfs_scrub_da_process_error(ds, level, &error))
			
 
				+		return error;
			
 
				+	if (retval) {
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		return error;
			
 
				+	}
			
 
				+
			
 
				+	/* Compare upper level pointer to sibling pointer. */
			
 
				+	if (ds->state->altpath.blk[level].blkno != sibling)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Check a block's sibling pointers. */
			
 
				+STATIC int
			
 
				+xfs_scrub_da_btree_block_check_siblings(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	struct xfs_da_blkinfo		*hdr)
			
 
				+{
			
 
				+	xfs_dablk_t			forw;
			
 
				+	xfs_dablk_t			back;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	forw = be32_to_cpu(hdr->forw);
			
 
				+	back = be32_to_cpu(hdr->back);
			
 
				+
			
 
				+	/* Top level blocks should not have sibling pointers. */
			
 
				+	if (level == 0) {
			
 
				+		if (forw != 0 || back != 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Check back (left) and forw (right) pointers.  These functions
			
 
				+	 * absorb error codes for us.
			
 
				+	 */
			
 
				+	error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+	error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw);
			
 
				+
			
 
				+out:
			
 
				+	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Load a dir/attribute block from a btree. */
			
 
				+STATIC int
			
 
				+xfs_scrub_da_btree_block(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	xfs_dablk_t			blkno)
			
 
				+{
			
 
				+	struct xfs_da_state_blk		*blk;
			
 
				+	struct xfs_da_intnode		*node;
			
 
				+	struct xfs_da_node_entry	*btree;
			
 
				+	struct xfs_da3_blkinfo		*hdr3;
			
 
				+	struct xfs_da_args		*dargs = &ds->dargs;
			
 
				+	struct xfs_inode		*ip = ds->dargs.dp;
			
 
				+	xfs_ino_t			owner;
			
 
				+	int				*pmaxrecs;
			
 
				+	struct xfs_da3_icnode_hdr	nodehdr;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	blk = &ds->state->path.blk[level];
			
 
				+	ds->state->path.active = level + 1;
			
 
				+
			
 
				+	/* Release old block. */
			
 
				+	if (blk->bp) {
			
 
				+		xfs_trans_brelse(dargs->trans, blk->bp);
			
 
				+		blk->bp = NULL;
			
 
				+	}
			
 
				+
			
 
				+	/* Check the pointer. */
			
 
				+	blk->blkno = blkno;
			
 
				+	if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno))
			
 
				+		goto out_nobuf;
			
 
				+
			
 
				+	/* Read the buffer. */
			
 
				+	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
			
 
				+			&blk->bp, dargs->whichfork,
			
 
				+			&xfs_scrub_da_btree_buf_ops);
			
 
				+	if (!xfs_scrub_da_process_error(ds, level, &error))
			
 
				+		goto out_nobuf;
			
 
				+
			
 
				+	/*
			
 
				+	 * We didn't find a dir btree root block, which means that
			
 
				+	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
			
 
				+	 * to be), so jump out now.
			
 
				+	 */
			
 
				+	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
			
 
				+			blk->bp == NULL)
			
 
				+		goto out_nobuf;
			
 
				+
			
 
				+	/* It's /not/ ok for attr trees not to have a da btree. */
			
 
				+	if (blk->bp == NULL) {
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		goto out_nobuf;
			
 
				+	}
			
 
				+
			
 
				+	hdr3 = blk->bp->b_addr;
			
 
				+	blk->magic = be16_to_cpu(hdr3->hdr.magic);
			
 
				+	pmaxrecs = &ds->maxrecs[level];
			
 
				+
			
 
				+	/* We only started zeroing the header on v5 filesystems. */
			
 
				+	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+
			
 
				+	/* Check the owner. */
			
 
				+	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
			
 
				+		owner = be64_to_cpu(hdr3->owner);
			
 
				+		if (owner != ip->i_ino)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+	}
			
 
				+
			
 
				+	/* Check the siblings. */
			
 
				+	error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Interpret the buffer. */
			
 
				+	switch (blk->magic) {
			
 
				+	case XFS_ATTR_LEAF_MAGIC:
			
 
				+	case XFS_ATTR3_LEAF_MAGIC:
			
 
				+		xfs_trans_buf_set_type(dargs->trans, blk->bp,
			
 
				+				XFS_BLFT_ATTR_LEAF_BUF);
			
 
				+		blk->magic = XFS_ATTR_LEAF_MAGIC;
			
 
				+		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
			
 
				+		if (ds->tree_level != 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		break;
			
 
				+	case XFS_DIR2_LEAFN_MAGIC:
			
 
				+	case XFS_DIR3_LEAFN_MAGIC:
			
 
				+		xfs_trans_buf_set_type(dargs->trans, blk->bp,
			
 
				+				XFS_BLFT_DIR_LEAFN_BUF);
			
 
				+		blk->magic = XFS_DIR2_LEAFN_MAGIC;
			
 
				+		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
			
 
				+		if (ds->tree_level != 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		break;
			
 
				+	case XFS_DIR2_LEAF1_MAGIC:
			
 
				+	case XFS_DIR3_LEAF1_MAGIC:
			
 
				+		xfs_trans_buf_set_type(dargs->trans, blk->bp,
			
 
				+				XFS_BLFT_DIR_LEAF1_BUF);
			
 
				+		blk->magic = XFS_DIR2_LEAF1_MAGIC;
			
 
				+		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
			
 
				+		if (ds->tree_level != 0)
			
 
				+			xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		break;
			
 
				+	case XFS_DA_NODE_MAGIC:
			
 
				+	case XFS_DA3_NODE_MAGIC:
			
 
				+		xfs_trans_buf_set_type(dargs->trans, blk->bp,
			
 
				+				XFS_BLFT_DA_NODE_BUF);
			
 
				+		blk->magic = XFS_DA_NODE_MAGIC;
			
 
				+		node = blk->bp->b_addr;
			
 
				+		ip->d_ops->node_hdr_from_disk(&nodehdr, node);
			
 
				+		btree = ip->d_ops->node_tree_p(node);
			
 
				+		*pmaxrecs = nodehdr.count;
			
 
				+		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
			
 
				+		if (level == 0) {
			
 
				+			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
			
 
				+				xfs_scrub_da_set_corrupt(ds, level);
			
 
				+				goto out_freebp;
			
 
				+			}
			
 
				+			ds->tree_level = nodehdr.level;
			
 
				+		} else {
			
 
				+			if (ds->tree_level != nodehdr.level) {
			
 
				+				xfs_scrub_da_set_corrupt(ds, level);
			
 
				+				goto out_freebp;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* XXX: Check hdr3.pad32 once we know how to fix it. */
			
 
				+		break;
			
 
				+	default:
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		goto out_freebp;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+out_freebp:
			
 
				+	xfs_trans_brelse(dargs->trans, blk->bp);
			
 
				+	blk->bp = NULL;
			
 
				+out_nobuf:
			
 
				+	blk->blkno = 0;
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Visit all nodes and leaves of a da btree. */
			
 
				+int
			
 
				+xfs_scrub_da_btree(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	int				whichfork,
			
 
				+	xfs_scrub_da_btree_rec_fn	scrub_fn,
			
 
				+	void				*private)
			
 
				+{
			
 
				+	struct xfs_scrub_da_btree	ds = {};
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_da_state_blk		*blks;
			
 
				+	struct xfs_da_node_entry	*key;
			
 
				+	void				*rec;
			
 
				+	xfs_dablk_t			blkno;
			
 
				+	int				level;
			
 
				+	int				error;
			
 
				+
			
 
				+	/* Skip short format data structures; no btree to scan. */
			
 
				+	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
			
 
				+	    XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Set up initial da state. */
			
 
				+	ds.dargs.dp = sc->ip;
			
 
				+	ds.dargs.whichfork = whichfork;
			
 
				+	ds.dargs.trans = sc->tp;
			
 
				+	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
			
 
				+	ds.state = xfs_da_state_alloc();
			
 
				+	ds.state->args = &ds.dargs;
			
 
				+	ds.state->mp = mp;
			
 
				+	ds.sc = sc;
			
 
				+	ds.private = private;
			
 
				+	if (whichfork == XFS_ATTR_FORK) {
			
 
				+		ds.dargs.geo = mp->m_attr_geo;
			
 
				+		ds.lowest = 0;
			
 
				+		ds.highest = 0;
			
 
				+	} else {
			
 
				+		ds.dargs.geo = mp->m_dir_geo;
			
 
				+		ds.lowest = ds.dargs.geo->leafblk;
			
 
				+		ds.highest = ds.dargs.geo->freeblk;
			
 
				+	}
			
 
				+	blkno = ds.lowest;
			
 
				+	level = 0;
			
 
				+
			
 
				+	/* Find the root of the da tree, if present. */
			
 
				+	blks = ds.state->path.blk;
			
 
				+	error = xfs_scrub_da_btree_block(&ds, level, blkno);
			
 
				+	if (error)
			
 
				+		goto out_state;
			
 
				+	/*
			
 
				+	 * We didn't find a block at ds.lowest, which means that there's
			
 
				+	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
			
 
				+	 * so jump out now.
			
 
				+	 */
			
 
				+	if (blks[level].bp == NULL)
			
 
				+		goto out_state;
			
 
				+
			
 
				+	blks[level].index = 0;
			
 
				+	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
			
 
				+		/* Handle leaf block. */
			
 
				+		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
			
 
				+			/* End of leaf, pop back towards the root. */
			
 
				+			if (blks[level].index >= ds.maxrecs[level]) {
			
 
				+				if (level > 0)
			
 
				+					blks[level - 1].index++;
			
 
				+				ds.tree_level++;
			
 
				+				level--;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			/* Dispatch record scrubbing. */
			
 
				+			rec = xfs_scrub_da_btree_entry(&ds, level,
			
 
				+					blks[level].index);
			
 
				+			error = scrub_fn(&ds, level, rec);
			
 
				+			if (error)
			
 
				+				break;
			
 
				+			if (xfs_scrub_should_terminate(sc, &error) ||
			
 
				+			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
			
 
				+				break;
			
 
				+
			
 
				+			blks[level].index++;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		/* End of node, pop back towards the root. */
			
 
				+		if (blks[level].index >= ds.maxrecs[level]) {
			
 
				+			if (level > 0)
			
 
				+				blks[level - 1].index++;
			
 
				+			ds.tree_level++;
			
 
				+			level--;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Hashes in order for scrub? */
			
 
				+		key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index);
			
 
				+		error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+
			
 
				+		/* Drill another level deeper. */
			
 
				+		blkno = be32_to_cpu(key->before);
			
 
				+		level++;
			
 
				+		ds.tree_level--;
			
 
				+		error = xfs_scrub_da_btree_block(&ds, level, blkno);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+		if (blks[level].bp == NULL)
			
 
				+			goto out;
			
 
				+
			
 
				+		blks[level].index = 0;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	/* Release all the buffers we're tracking. */
			
 
				+	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
			
 
				+		if (blks[level].bp == NULL)
			
 
				+			continue;
			
 
				+		xfs_trans_brelse(sc->tp, blks[level].bp);
			
 
				+		blks[level].bp = NULL;
			
 
				+	}
			
 
				+
			
 
				+out_state:
			
 
				+	xfs_da_state_free(ds.state);
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/dabtree.h
+++ b/fs/xfs/scrub/dabtree.h
@@ -0,0 +1,59 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#ifndef __XFS_SCRUB_DABTREE_H__
			
 
				+#define __XFS_SCRUB_DABTREE_H__
			
 
				+
			
 
				+/* dir/attr btree */
			
 
				+
			
 
				+struct xfs_scrub_da_btree {
			
 
				+	struct xfs_da_args		dargs;
			
 
				+	xfs_dahash_t			hashes[XFS_DA_NODE_MAXDEPTH];
			
 
				+	int				maxrecs[XFS_DA_NODE_MAXDEPTH];
			
 
				+	struct xfs_da_state		*state;
			
 
				+	struct xfs_scrub_context	*sc;
			
 
				+	void				*private;
			
 
				+
			
 
				+	/*
			
 
				+	 * Lowest and highest directory block address in which we expect
			
 
				+	 * to find dir/attr btree node blocks.  For a directory this
			
 
				+	 * (presumably) means between LEAF_OFFSET and FREE_OFFSET; for
			
 
				+	 * attributes there is no limit.
			
 
				+	 */
			
 
				+	xfs_dablk_t			lowest;
			
 
				+	xfs_dablk_t			highest;
			
 
				+
			
 
				+	int				tree_level;
			
 
				+};
			
 
				+
			
 
				+typedef int (*xfs_scrub_da_btree_rec_fn)(struct xfs_scrub_da_btree *ds,
			
 
				+		int level, void *rec);
			
 
				+
			
 
				+/* Check for da btree operation errors. */
			
 
				+bool xfs_scrub_da_process_error(struct xfs_scrub_da_btree *ds, int level, int *error);
			
 
				+
			
 
				+/* Check for da btree corruption. */
			
 
				+void xfs_scrub_da_set_corrupt(struct xfs_scrub_da_btree *ds, int level);
			
 
				+
			
 
				+int xfs_scrub_da_btree_hash(struct xfs_scrub_da_btree *ds, int level,
			
 
				+			    __be32 *hashp);
			
 
				+int xfs_scrub_da_btree(struct xfs_scrub_context *sc, int whichfork,
			
 
				+		       xfs_scrub_da_btree_rec_fn scrub_fn, void *private);
			
 
				+
			
 
				+#endif /* __XFS_SCRUB_DABTREE_H__ */
			
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -0,0 +1,816 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_icache.h"
			
 
				+#include "xfs_itable.h"
			
 
				+#include "xfs_da_format.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2.h"
			
 
				+#include "xfs_dir2_priv.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+#include "scrub/dabtree.h"
			
 
				+
			
 
				+/* Set us up to scrub directories. */
			
 
				+int
			
 
				+xfs_scrub_setup_directory(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	return xfs_scrub_setup_inode_contents(sc, ip, 0);
			
 
				+}
			
 
				+
			
 
				+/* Directories */
			
 
				+
			
 
				+/* Scrub a directory entry. */
			
 
				+
			
 
				+struct xfs_scrub_dir_ctx {
			
 
				+	/* VFS fill-directory iterator */
			
 
				+	struct dir_context		dir_iter;
			
 
				+
			
 
				+	struct xfs_scrub_context	*sc;
			
 
				+};
			
 
				+
			
 
				+/* Check that an inode's mode matches a given DT_ type. */
			
 
				+STATIC int
			
 
				+xfs_scrub_dir_check_ftype(
			
 
				+	struct xfs_scrub_dir_ctx	*sdc,
			
 
				+	xfs_fileoff_t			offset,
			
 
				+	xfs_ino_t			inum,
			
 
				+	int				dtype)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sdc->sc->mp;
			
 
				+	struct xfs_inode		*ip;
			
 
				+	int				ino_dtype;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	if (!xfs_sb_version_hasftype(&mp->m_sb)) {
			
 
				+		if (dtype != DT_UNKNOWN && dtype != DT_DIR)
			
 
				+			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
			
 
				+					offset);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Grab the inode pointed to by the dirent.  We release the
			
 
				+	 * inode before we cancel the scrub transaction.  Since we're
			
 
				+	 * don't know a priori that releasing the inode won't trigger
			
 
				+	 * eofblocks cleanup (which allocates what would be a nested
			
 
				+	 * transaction), we can't use DONTCACHE here because DONTCACHE
			
 
				+	 * inodes can trigger immediate inactive cleanup of the inode.
			
 
				+	 */
			
 
				+	error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
			
 
				+	if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
			
 
				+			&error))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Convert mode to the DT_* values that dir_emit uses. */
			
 
				+	ino_dtype = xfs_dir3_get_dtype(mp,
			
 
				+			xfs_mode_to_ftype(VFS_I(ip)->i_mode));
			
 
				+	if (ino_dtype != dtype)
			
 
				+		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
			
 
				+	iput(VFS_I(ip));
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Scrub a single directory entry.
			
 
				+ *
			
 
				+ * We use the VFS directory iterator (i.e. readdir) to call this
			
 
				+ * function for every directory entry in a directory.  Once we're here,
			
 
				+ * we check the inode number to make sure it's sane, then we check that
			
 
				+ * we can look up this filename.  Finally, we check the ftype.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_scrub_dir_actor(
			
 
				+	struct dir_context		*dir_iter,
			
 
				+	const char			*name,
			
 
				+	int				namelen,
			
 
				+	loff_t				pos,
			
 
				+	u64				ino,
			
 
				+	unsigned			type)
			
 
				+{
			
 
				+	struct xfs_mount		*mp;
			
 
				+	struct xfs_inode		*ip;
			
 
				+	struct xfs_scrub_dir_ctx	*sdc;
			
 
				+	struct xfs_name			xname;
			
 
				+	xfs_ino_t			lookup_ino;
			
 
				+	xfs_dablk_t			offset;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	sdc = container_of(dir_iter, struct xfs_scrub_dir_ctx, dir_iter);
			
 
				+	ip = sdc->sc->ip;
			
 
				+	mp = ip->i_mount;
			
 
				+	offset = xfs_dir2_db_to_da(mp->m_dir_geo,
			
 
				+			xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos));
			
 
				+
			
 
				+	/* Does this inode number make sense? */
			
 
				+	if (!xfs_verify_dir_ino(mp, ino)) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!strncmp(".", name, namelen)) {
			
 
				+		/* If this is "." then check that the inum matches the dir. */
			
 
				+		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
			
 
				+			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
			
 
				+					offset);
			
 
				+		if (ino != ip->i_ino)
			
 
				+			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
			
 
				+					offset);
			
 
				+	} else if (!strncmp("..", name, namelen)) {
			
 
				+		/*
			
 
				+		 * If this is ".." in the root inode, check that the inum
			
 
				+		 * matches this dir.
			
 
				+		 */
			
 
				+		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
			
 
				+			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
			
 
				+					offset);
			
 
				+		if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino)
			
 
				+			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
			
 
				+					offset);
			
 
				+	}
			
 
				+
			
 
				+	/* Verify that we can look up this name by hash. */
			
 
				+	xname.name = name;
			
 
				+	xname.len = namelen;
			
 
				+	xname.type = XFS_DIR3_FT_UNKNOWN;
			
 
				+
			
 
				+	error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
			
 
				+	if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
			
 
				+			&error))
			
 
				+		goto fail_xref;
			
 
				+	if (lookup_ino != ino) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Verify the file type.  This function absorbs error codes. */
			
 
				+	error = xfs_scrub_dir_check_ftype(sdc, offset, lookup_ino, type);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+out:
			
 
				+	return error;
			
 
				+fail_xref:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub a directory btree record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_dir_rec(
			
 
				+	struct xfs_scrub_da_btree	*ds,
			
 
				+	int				level,
			
 
				+	void				*rec)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = ds->state->mp;
			
 
				+	struct xfs_dir2_leaf_entry	*ent = rec;
			
 
				+	struct xfs_inode		*dp = ds->dargs.dp;
			
 
				+	struct xfs_dir2_data_entry	*dent;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	xfs_ino_t			ino;
			
 
				+	xfs_dablk_t			rec_bno;
			
 
				+	xfs_dir2_db_t			db;
			
 
				+	xfs_dir2_data_aoff_t		off;
			
 
				+	xfs_dir2_dataptr_t		ptr;
			
 
				+	xfs_dahash_t			calc_hash;
			
 
				+	xfs_dahash_t			hash;
			
 
				+	unsigned int			tag;
			
 
				+	int				error;
			
 
				+
			
 
				+	/* Check the hash of the entry. */
			
 
				+	error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Valid hash pointer? */
			
 
				+	ptr = be32_to_cpu(ent->address);
			
 
				+	if (ptr == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Find the directory entry's location. */
			
 
				+	db = xfs_dir2_dataptr_to_db(mp->m_dir_geo, ptr);
			
 
				+	off = xfs_dir2_dataptr_to_off(mp->m_dir_geo, ptr);
			
 
				+	rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db);
			
 
				+
			
 
				+	if (rec_bno >= mp->m_dir_geo->leafblk) {
			
 
				+		xfs_scrub_da_set_corrupt(ds, level);
			
 
				+		goto out;
			
 
				+	}
			
 
				+	error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp);
			
 
				+	if (!xfs_scrub_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
			
 
				+			&error))
			
 
				+		goto out;
			
 
				+	if (!bp) {
			
 
				+		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Retrieve the entry, sanity check it, and compare hashes. */
			
 
				+	dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
			
 
				+	ino = be64_to_cpu(dent->inumber);
			
 
				+	hash = be32_to_cpu(ent->hashval);
			
 
				+	tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
			
 
				+	if (!xfs_verify_dir_ino(mp, ino) || tag != off)
			
 
				+		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
			
 
				+	if (dent->namelen == 0) {
			
 
				+		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
			
 
				+		goto out_relse;
			
 
				+	}
			
 
				+	calc_hash = xfs_da_hashname(dent->name, dent->namelen);
			
 
				+	if (calc_hash != hash)
			
 
				+		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
			
 
				+
			
 
				+out_relse:
			
 
				+	xfs_trans_brelse(ds->dargs.trans, bp);
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Is this unused entry either in the bestfree or smaller than all of
			
 
				+ * them?  We've already checked that the bestfrees are sorted longest to
			
 
				+ * shortest, and that there aren't any bogus entries.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_directory_check_free_entry(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_dablk_t			lblk,
			
 
				+	struct xfs_dir2_data_free	*bf,
			
 
				+	struct xfs_dir2_data_unused	*dup)
			
 
				+{
			
 
				+	struct xfs_dir2_data_free	*dfp;
			
 
				+	unsigned int			dup_length;
			
 
				+
			
 
				+	dup_length = be16_to_cpu(dup->length);
			
 
				+
			
 
				+	/* Unused entry is shorter than any of the bestfrees */
			
 
				+	if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
			
 
				+		return;
			
 
				+
			
 
				+	for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--)
			
 
				+		if (dup_length == be16_to_cpu(dfp->length))
			
 
				+			return;
			
 
				+
			
 
				+	/* Unused entry should be in the bestfrees but wasn't found. */
			
 
				+	xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+}
			
 
				+
			
 
				+/* Check free space info in a directory data block. */
			
 
				+STATIC int
			
 
				+xfs_scrub_directory_data_bestfree(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_dablk_t			lblk,
			
 
				+	bool				is_block)
			
 
				+{
			
 
				+	struct xfs_dir2_data_unused	*dup;
			
 
				+	struct xfs_dir2_data_free	*dfp;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	struct xfs_dir2_data_free	*bf;
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	const struct xfs_dir_ops	*d_ops;
			
 
				+	char				*ptr;
			
 
				+	char				*endptr;
			
 
				+	u16				tag;
			
 
				+	unsigned int			nr_bestfrees = 0;
			
 
				+	unsigned int			nr_frees = 0;
			
 
				+	unsigned int			smallest_bestfree;
			
 
				+	int				newlen;
			
 
				+	int				offset;
			
 
				+	int				error;
			
 
				+
			
 
				+	d_ops = sc->ip->d_ops;
			
 
				+
			
 
				+	if (is_block) {
			
 
				+		/* dir block format */
			
 
				+		if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+		error = xfs_dir3_block_read(sc->tp, sc->ip, &bp);
			
 
				+	} else {
			
 
				+		/* dir data format */
			
 
				+		error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp);
			
 
				+	}
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
			
 
				+
			
 
				+	/* Do the bestfrees correspond to actual free space? */
			
 
				+	bf = d_ops->data_bestfree_p(bp->b_addr);
			
 
				+	smallest_bestfree = UINT_MAX;
			
 
				+	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
			
 
				+		offset = be16_to_cpu(dfp->offset);
			
 
				+		if (offset == 0)
			
 
				+			continue;
			
 
				+		if (offset >= mp->m_dir_geo->blksize) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+			goto out_buf;
			
 
				+		}
			
 
				+		dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset);
			
 
				+		tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
			
 
				+
			
 
				+		/* bestfree doesn't match the entry it points at? */
			
 
				+		if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
			
 
				+		    be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
			
 
				+		    tag != ((char *)dup - (char *)bp->b_addr)) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+			goto out_buf;
			
 
				+		}
			
 
				+
			
 
				+		/* bestfree records should be ordered largest to smallest */
			
 
				+		if (smallest_bestfree < be16_to_cpu(dfp->length)) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+			goto out_buf;
			
 
				+		}
			
 
				+
			
 
				+		smallest_bestfree = be16_to_cpu(dfp->length);
			
 
				+		nr_bestfrees++;
			
 
				+	}
			
 
				+
			
 
				+	/* Make sure the bestfrees are actually the best free spaces. */
			
 
				+	ptr = (char *)d_ops->data_entry_p(bp->b_addr);
			
 
				+	if (is_block) {
			
 
				+		struct xfs_dir2_block_tail	*btp;
			
 
				+
			
 
				+		btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr);
			
 
				+		endptr = (char *)xfs_dir2_block_leaf_p(btp);
			
 
				+	} else
			
 
				+		endptr = (char *)bp->b_addr + BBTOB(bp->b_length);
			
 
				+
			
 
				+	/* Iterate the entries, stopping when we hit or go past the end. */
			
 
				+	while (ptr < endptr) {
			
 
				+		dup = (struct xfs_dir2_data_unused *)ptr;
			
 
				+		/* Skip real entries */
			
 
				+		if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
			
 
				+			struct xfs_dir2_data_entry	*dep;
			
 
				+
			
 
				+			dep = (struct xfs_dir2_data_entry *)ptr;
			
 
				+			newlen = d_ops->data_entsize(dep->namelen);
			
 
				+			if (newlen <= 0) {
			
 
				+				xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
			
 
				+						lblk);
			
 
				+				goto out_buf;
			
 
				+			}
			
 
				+			ptr += newlen;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Spot check this free entry */
			
 
				+		tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
			
 
				+		if (tag != ((char *)dup - (char *)bp->b_addr))
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+
			
 
				+		/*
			
 
				+		 * Either this entry is a bestfree or it's smaller than
			
 
				+		 * any of the bestfrees.
			
 
				+		 */
			
 
				+		xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup);
			
 
				+
			
 
				+		/* Move on. */
			
 
				+		newlen = be16_to_cpu(dup->length);
			
 
				+		if (newlen <= 0) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+			goto out_buf;
			
 
				+		}
			
 
				+		ptr += newlen;
			
 
				+		if (ptr <= endptr)
			
 
				+			nr_frees++;
			
 
				+	}
			
 
				+
			
 
				+	/* We're required to fill all the space. */
			
 
				+	if (ptr != endptr)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+
			
 
				+	/* Did we see at least as many free slots as there are bestfrees? */
			
 
				+	if (nr_frees < nr_bestfrees)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+out_buf:
			
 
				+	xfs_trans_brelse(sc->tp, bp);
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Does the free space length in the free space index block ($len) match
			
 
				+ * the longest length in the directory data block's bestfree array?
			
 
				+ * Assume that we've already checked that the data block's bestfree
			
 
				+ * array is in order.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_directory_check_freesp(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_dablk_t			lblk,
			
 
				+	struct xfs_buf			*dbp,
			
 
				+	unsigned int			len)
			
 
				+{
			
 
				+	struct xfs_dir2_data_free	*dfp;
			
 
				+
			
 
				+	dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr);
			
 
				+
			
 
				+	if (len != be16_to_cpu(dfp->length))
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+
			
 
				+	if (len > 0 && be16_to_cpu(dfp->offset) == 0)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+}
			
 
				+
			
 
				+/* Check free space info in a directory leaf1 block. */
			
 
				+STATIC int
			
 
				+xfs_scrub_directory_leaf1_bestfree(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_da_args		*args,
			
 
				+	xfs_dablk_t			lblk)
			
 
				+{
			
 
				+	struct xfs_dir3_icleaf_hdr	leafhdr;
			
 
				+	struct xfs_dir2_leaf_entry	*ents;
			
 
				+	struct xfs_dir2_leaf_tail	*ltp;
			
 
				+	struct xfs_dir2_leaf		*leaf;
			
 
				+	struct xfs_buf			*dbp;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	const struct xfs_dir_ops	*d_ops = sc->ip->d_ops;
			
 
				+	struct xfs_da_geometry		*geo = sc->mp->m_dir_geo;
			
 
				+	__be16				*bestp;
			
 
				+	__u16				best;
			
 
				+	__u32				hash;
			
 
				+	__u32				lasthash = 0;
			
 
				+	__u32				bestcount;
			
 
				+	unsigned int			stale = 0;
			
 
				+	int				i;
			
 
				+	int				error;
			
 
				+
			
 
				+	/* Read the free space block. */
			
 
				+	error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	leaf = bp->b_addr;
			
 
				+	d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				+	ents = d_ops->leaf_ents_p(leaf);
			
 
				+	ltp = xfs_dir2_leaf_tail_p(geo, leaf);
			
 
				+	bestcount = be32_to_cpu(ltp->bestcount);
			
 
				+	bestp = xfs_dir2_leaf_bests_p(ltp);
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
			
 
				+		struct xfs_dir3_leaf_hdr	*hdr3 = bp->b_addr;
			
 
				+
			
 
				+		if (hdr3->pad != cpu_to_be32(0))
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * There should be as many bestfree slots as there are dir data
			
 
				+	 * blocks that can fit under i_size.
			
 
				+	 */
			
 
				+	if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_d.di_size)) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Is the leaf count even remotely sane? */
			
 
				+	if (leafhdr.count > d_ops->leaf_max_ents(geo)) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Leaves and bests don't overlap in leaf format. */
			
 
				+	if ((char *)&ents[leafhdr.count] > (char *)bestp) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Check hash value order, count stale entries.  */
			
 
				+	for (i = 0; i < leafhdr.count; i++) {
			
 
				+		hash = be32_to_cpu(ents[i].hashval);
			
 
				+		if (i > 0 && lasthash > hash)
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+		lasthash = hash;
			
 
				+		if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
			
 
				+			stale++;
			
 
				+	}
			
 
				+	if (leafhdr.stale != stale)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+
			
 
				+	/* Check all the bestfree entries. */
			
 
				+	for (i = 0; i < bestcount; i++, bestp++) {
			
 
				+		best = be16_to_cpu(*bestp);
			
 
				+		if (best == NULLDATAOFF)
			
 
				+			continue;
			
 
				+		error = xfs_dir3_data_read(sc->tp, sc->ip,
			
 
				+				i * args->geo->fsbcount, -1, &dbp);
			
 
				+		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
			
 
				+				&error))
			
 
				+			continue;
			
 
				+		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
			
 
				+		xfs_trans_brelse(sc->tp, dbp);
			
 
				+	}
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Check free space info in a directory freespace block. */
			
 
				+STATIC int
			
 
				+xfs_scrub_directory_free_bestfree(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_da_args		*args,
			
 
				+	xfs_dablk_t			lblk)
			
 
				+{
			
 
				+	struct xfs_dir3_icfree_hdr	freehdr;
			
 
				+	struct xfs_buf			*dbp;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	__be16				*bestp;
			
 
				+	__u16				best;
			
 
				+	unsigned int			stale = 0;
			
 
				+	int				i;
			
 
				+	int				error;
			
 
				+
			
 
				+	/* Read the free space block */
			
 
				+	error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
			
 
				+		struct xfs_dir3_free_hdr	*hdr3 = bp->b_addr;
			
 
				+
			
 
				+		if (hdr3->pad != cpu_to_be32(0))
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+	}
			
 
				+
			
 
				+	/* Check all the entries. */
			
 
				+	sc->ip->d_ops->free_hdr_from_disk(&freehdr, bp->b_addr);
			
 
				+	bestp = sc->ip->d_ops->free_bests_p(bp->b_addr);
			
 
				+	for (i = 0; i < freehdr.nvalid; i++, bestp++) {
			
 
				+		best = be16_to_cpu(*bestp);
			
 
				+		if (best == NULLDATAOFF) {
			
 
				+			stale++;
			
 
				+			continue;
			
 
				+		}
			
 
				+		error = xfs_dir3_data_read(sc->tp, sc->ip,
			
 
				+				(freehdr.firstdb + i) * args->geo->fsbcount,
			
 
				+				-1, &dbp);
			
 
				+		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
			
 
				+				&error))
			
 
				+			continue;
			
 
				+		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
			
 
				+		xfs_trans_brelse(sc->tp, dbp);
			
 
				+	}
			
 
				+
			
 
				+	if (freehdr.nused + stale != freehdr.nvalid)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Check free space information in directories. */
			
 
				+STATIC int
			
 
				+xfs_scrub_directory_blocks(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_bmbt_irec		got;
			
 
				+	struct xfs_da_args		args;
			
 
				+	struct xfs_ifork		*ifp;
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	xfs_fileoff_t			leaf_lblk;
			
 
				+	xfs_fileoff_t			free_lblk;
			
 
				+	xfs_fileoff_t			lblk;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				+	xfs_dablk_t			dabno;
			
 
				+	bool				found;
			
 
				+	int				is_block = 0;
			
 
				+	int				error;
			
 
				+
			
 
				+	/* Ignore local format directories. */
			
 
				+	if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
			
 
				+	    sc->ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
			
 
				+		return 0;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
			
 
				+	lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
			
 
				+	leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
			
 
				+	free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
			
 
				+
			
 
				+	/* Is this a block dir? */
			
 
				+	args.dp = sc->ip;
			
 
				+	args.geo = mp->m_dir_geo;
			
 
				+	args.trans = sc->tp;
			
 
				+	error = xfs_dir2_isblock(&args, &is_block);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Iterate all the data extents in the directory... */
			
 
				+	found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
			
 
				+	while (found) {
			
 
				+		/* Block directories only have a single block at offset 0. */
			
 
				+		if (is_block &&
			
 
				+		    (got.br_startoff > 0 ||
			
 
				+		     got.br_blockcount != args.geo->fsbcount)) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
			
 
				+					got.br_startoff);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* No more data blocks... */
			
 
				+		if (got.br_startoff >= leaf_lblk)
			
 
				+			break;
			
 
				+
			
 
				+		/*
			
 
				+		 * Check each data block's bestfree data.
			
 
				+		 *
			
 
				+		 * Iterate all the fsbcount-aligned block offsets in
			
 
				+		 * this directory.  The directory block reading code is
			
 
				+		 * smart enough to do its own bmap lookups to handle
			
 
				+		 * discontiguous directory blocks.  When we're done
			
 
				+		 * with the extent record, re-query the bmap at the
			
 
				+		 * next fsbcount-aligned offset to avoid redundant
			
 
				+		 * block checks.
			
 
				+		 */
			
 
				+		for (lblk = roundup((xfs_dablk_t)got.br_startoff,
			
 
				+				args.geo->fsbcount);
			
 
				+		     lblk < got.br_startoff + got.br_blockcount;
			
 
				+		     lblk += args.geo->fsbcount) {
			
 
				+			error = xfs_scrub_directory_data_bestfree(sc, lblk,
			
 
				+					is_block);
			
 
				+			if (error)
			
 
				+				goto out;
			
 
				+		}
			
 
				+		dabno = got.br_startoff + got.br_blockcount;
			
 
				+		lblk = roundup(dabno, args.geo->fsbcount);
			
 
				+		found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
			
 
				+	}
			
 
				+
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Look for a leaf1 block, which has free info. */
			
 
				+	if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) &&
			
 
				+	    got.br_startoff == leaf_lblk &&
			
 
				+	    got.br_blockcount == args.geo->fsbcount &&
			
 
				+	    !xfs_iext_next_extent(ifp, &icur, &got)) {
			
 
				+		if (is_block) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		error = xfs_scrub_directory_leaf1_bestfree(sc, &args,
			
 
				+				leaf_lblk);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Scan for free blocks */
			
 
				+	lblk = free_lblk;
			
 
				+	found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
			
 
				+	while (found) {
			
 
				+		/*
			
 
				+		 * Dirs can't have blocks mapped above 2^32.
			
 
				+		 * Single-block dirs shouldn't even be here.
			
 
				+		 */
			
 
				+		lblk = got.br_startoff;
			
 
				+		if (lblk & ~0xFFFFFFFFULL) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		if (is_block) {
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Check each dir free block's bestfree data.
			
 
				+		 *
			
 
				+		 * Iterate all the fsbcount-aligned block offsets in
			
 
				+		 * this directory.  The directory block reading code is
			
 
				+		 * smart enough to do its own bmap lookups to handle
			
 
				+		 * discontiguous directory blocks.  When we're done
			
 
				+		 * with the extent record, re-query the bmap at the
			
 
				+		 * next fsbcount-aligned offset to avoid redundant
			
 
				+		 * block checks.
			
 
				+		 */
			
 
				+		for (lblk = roundup((xfs_dablk_t)got.br_startoff,
			
 
				+				args.geo->fsbcount);
			
 
				+		     lblk < got.br_startoff + got.br_blockcount;
			
 
				+		     lblk += args.geo->fsbcount) {
			
 
				+			error = xfs_scrub_directory_free_bestfree(sc, &args,
			
 
				+					lblk);
			
 
				+			if (error)
			
 
				+				goto out;
			
 
				+		}
			
 
				+		dabno = got.br_startoff + got.br_blockcount;
			
 
				+		lblk = roundup(dabno, args.geo->fsbcount);
			
 
				+		found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
			
 
				+	}
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub a whole directory. */
			
 
				+int
			
 
				+xfs_scrub_directory(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_scrub_dir_ctx	sdc = {
			
 
				+		.dir_iter.actor = xfs_scrub_dir_actor,
			
 
				+		.dir_iter.pos = 0,
			
 
				+		.sc = sc,
			
 
				+	};
			
 
				+	size_t				bufsize;
			
 
				+	loff_t				oldpos;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	/* Plausible size? */
			
 
				+	if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) {
			
 
				+		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Check directory tree structure */
			
 
				+	error = xfs_scrub_da_btree(sc, XFS_DATA_FORK, xfs_scrub_dir_rec, NULL);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		return error;
			
 
				+
			
 
				+	/* Check the freespace. */
			
 
				+	error = xfs_scrub_directory_blocks(sc);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Check that every dirent we see can also be looked up by hash.
			
 
				+	 * Userspace usually asks for a 32k buffer, so we will too.
			
 
				+	 */
			
 
				+	bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
			
 
				+			sc->ip->i_d.di_size);
			
 
				+
			
 
				+	/*
			
 
				+	 * Look up every name in this directory by hash.
			
 
				+	 *
			
 
				+	 * Use the xfs_readdir function to call xfs_scrub_dir_actor on
			
 
				+	 * every directory entry in this directory.  In _actor, we check
			
 
				+	 * the name, inode number, and ftype (if applicable) of the
			
 
				+	 * entry.  xfs_readdir uses the VFS filldir functions to provide
			
 
				+	 * iteration context.
			
 
				+	 *
			
 
				+	 * The VFS grabs a read or write lock via i_rwsem before it reads
			
 
				+	 * or writes to a directory.  If we've gotten this far we've
			
 
				+	 * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
			
 
				+	 * getting a write lock on i_rwsem.  Therefore, it is safe for us
			
 
				+	 * to drop the ILOCK here in order to reuse the _readdir and
			
 
				+	 * _dir_lookup routines, which do their own ILOCK locking.
			
 
				+	 */
			
 
				+	oldpos = 0;
			
 
				+	sc->ilock_flags &= ~XFS_ILOCK_EXCL;
			
 
				+	xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
			
 
				+	while (true) {
			
 
				+		error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize);
			
 
				+		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
			
 
				+				&error))
			
 
				+			goto out;
			
 
				+		if (oldpos == sdc.dir_iter.pos)
			
 
				+			break;
			
 
				+		oldpos = sdc.dir_iter.pos;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -0,0 +1,337 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_icache.h"
			
 
				+#include "xfs_rmap.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/btree.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/*
			
 
				+ * Set us up to scrub inode btrees.
			
 
				+ * If we detect a discrepancy between the inobt and the inode,
			
 
				+ * try again after forcing logged inode cores out to disk.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_setup_ag_iallocbt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder);
			
 
				+}
			
 
				+
			
 
				+/* Inode btree scrubber. */
			
 
				+
			
 
				+/* Is this chunk worth checking? */
			
 
				+STATIC bool
			
 
				+xfs_scrub_iallocbt_chunk(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	struct xfs_inobt_rec_incore	*irec,
			
 
				+	xfs_agino_t			agino,
			
 
				+	xfs_extlen_t			len)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = bs->cur->bc_mp;
			
 
				+	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
			
 
				+	xfs_agblock_t			bno;
			
 
				+
			
 
				+	bno = XFS_AGINO_TO_AGBNO(mp, agino);
			
 
				+	if (bno + len <= bno ||
			
 
				+	    !xfs_verify_agbno(mp, agno, bno) ||
			
 
				+	    !xfs_verify_agbno(mp, agno, bno + len - 1))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/* Count the number of free inodes. */
			
 
				+static unsigned int
			
 
				+xfs_scrub_iallocbt_freecount(
			
 
				+	xfs_inofree_t			freemask)
			
 
				+{
			
 
				+	BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64));
			
 
				+	return hweight64(freemask);
			
 
				+}
			
 
				+
			
 
				+/* Check a particular inode with ir_free. */
			
 
				+STATIC int
			
 
				+xfs_scrub_iallocbt_check_cluster_freemask(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	xfs_ino_t			fsino,
			
 
				+	xfs_agino_t			chunkino,
			
 
				+	xfs_agino_t			clusterino,
			
 
				+	struct xfs_inobt_rec_incore	*irec,
			
 
				+	struct xfs_buf			*bp)
			
 
				+{
			
 
				+	struct xfs_dinode		*dip;
			
 
				+	struct xfs_mount		*mp = bs->cur->bc_mp;
			
 
				+	bool				inode_is_free = false;
			
 
				+	bool				freemask_ok;
			
 
				+	bool				inuse;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	if (xfs_scrub_should_terminate(bs->sc, &error))
			
 
				+		return error;
			
 
				+
			
 
				+	dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
			
 
				+	if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
			
 
				+	    (dip->di_version >= 3 &&
			
 
				+	     be64_to_cpu(dip->di_ino) != fsino + clusterino)) {
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino))
			
 
				+		inode_is_free = true;
			
 
				+	error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp,
			
 
				+			fsino + clusterino, &inuse);
			
 
				+	if (error == -ENODATA) {
			
 
				+		/* Not cached, just read the disk buffer */
			
 
				+		freemask_ok = inode_is_free ^ !!(dip->di_mode);
			
 
				+		if (!bs->sc->try_harder && !freemask_ok)
			
 
				+			return -EDEADLOCK;
			
 
				+	} else if (error < 0) {
			
 
				+		/*
			
 
				+		 * Inode is only half assembled, or there was an IO error,
			
 
				+		 * or the verifier failed, so don't bother trying to check.
			
 
				+		 * The inode scrubber can deal with this.
			
 
				+		 */
			
 
				+		goto out;
			
 
				+	} else {
			
 
				+		/* Inode is all there. */
			
 
				+		freemask_ok = inode_is_free ^ inuse;
			
 
				+	}
			
 
				+	if (!freemask_ok)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+out:
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Make sure the free mask is consistent with what the inodes think. */
			
 
				+STATIC int
			
 
				+xfs_scrub_iallocbt_check_freemask(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	struct xfs_inobt_rec_incore	*irec)
			
 
				+{
			
 
				+	struct xfs_owner_info		oinfo;
			
 
				+	struct xfs_imap			imap;
			
 
				+	struct xfs_mount		*mp = bs->cur->bc_mp;
			
 
				+	struct xfs_dinode		*dip;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	xfs_ino_t			fsino;
			
 
				+	xfs_agino_t			nr_inodes;
			
 
				+	xfs_agino_t			agino;
			
 
				+	xfs_agino_t			chunkino;
			
 
				+	xfs_agino_t			clusterino;
			
 
				+	xfs_agblock_t			agbno;
			
 
				+	int				blks_per_cluster;
			
 
				+	uint16_t			holemask;
			
 
				+	uint16_t			ir_holemask;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	/* Make sure the freemask matches the inode records. */
			
 
				+	blks_per_cluster = xfs_icluster_size_fsb(mp);
			
 
				+	nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
			
 
				+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
			
 
				+
			
 
				+	for (agino = irec->ir_startino;
			
 
				+	     agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
			
 
				+	     agino += blks_per_cluster * mp->m_sb.sb_inopblock) {
			
 
				+		fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
			
 
				+		chunkino = agino - irec->ir_startino;
			
 
				+		agbno = XFS_AGINO_TO_AGBNO(mp, agino);
			
 
				+
			
 
				+		/* Compute the holemask mask for this cluster. */
			
 
				+		for (clusterino = 0, holemask = 0; clusterino < nr_inodes;
			
 
				+		     clusterino += XFS_INODES_PER_HOLEMASK_BIT)
			
 
				+			holemask |= XFS_INOBT_MASK((chunkino + clusterino) /
			
 
				+					XFS_INODES_PER_HOLEMASK_BIT);
			
 
				+
			
 
				+		/* The whole cluster must be a hole or not a hole. */
			
 
				+		ir_holemask = (irec->ir_holemask & holemask);
			
 
				+		if (ir_holemask != holemask && ir_holemask != 0) {
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* If any part of this is a hole, skip it. */
			
 
				+		if (ir_holemask)
			
 
				+			continue;
			
 
				+
			
 
				+		/* Grab the inode cluster buffer. */
			
 
				+		imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
			
 
				+				agbno);
			
 
				+		imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
			
 
				+		imap.im_boffset = 0;
			
 
				+
			
 
				+		error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
			
 
				+				&dip, &bp, 0, 0);
			
 
				+		if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
			
 
				+			continue;
			
 
				+
			
 
				+		/* Which inodes are free? */
			
 
				+		for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
			
 
				+			error = xfs_scrub_iallocbt_check_cluster_freemask(bs,
			
 
				+					fsino, chunkino, clusterino, irec, bp);
			
 
				+			if (error) {
			
 
				+				xfs_trans_brelse(bs->cur->bc_tp, bp);
			
 
				+				return error;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		xfs_trans_brelse(bs->cur->bc_tp, bp);
			
 
				+	}
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub an inobt/finobt record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_iallocbt_rec(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	union xfs_btree_rec		*rec)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = bs->cur->bc_mp;
			
 
				+	struct xfs_inobt_rec_incore	irec;
			
 
				+	uint64_t			holes;
			
 
				+	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
			
 
				+	xfs_agino_t			agino;
			
 
				+	xfs_agblock_t			agbno;
			
 
				+	xfs_extlen_t			len;
			
 
				+	int				holecount;
			
 
				+	int				i;
			
 
				+	int				error = 0;
			
 
				+	unsigned int			real_freecount;
			
 
				+	uint16_t			holemask;
			
 
				+
			
 
				+	xfs_inobt_btrec_to_irec(mp, rec, &irec);
			
 
				+
			
 
				+	if (irec.ir_count > XFS_INODES_PER_CHUNK ||
			
 
				+	    irec.ir_freecount > XFS_INODES_PER_CHUNK)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	real_freecount = irec.ir_freecount +
			
 
				+			(XFS_INODES_PER_CHUNK - irec.ir_count);
			
 
				+	if (real_freecount != xfs_scrub_iallocbt_freecount(irec.ir_free))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	agino = irec.ir_startino;
			
 
				+	/* Record has to be properly aligned within the AG. */
			
 
				+	if (!xfs_verify_agino(mp, agno, agino) ||
			
 
				+	    !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) {
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Make sure this record is aligned to cluster and inoalignmnt size. */
			
 
				+	agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
			
 
				+	if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) ||
			
 
				+	    (agbno & (xfs_icluster_size_fsb(mp) - 1)))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	/* Handle non-sparse inodes */
			
 
				+	if (!xfs_inobt_issparse(irec.ir_holemask)) {
			
 
				+		len = XFS_B_TO_FSB(mp,
			
 
				+				XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize);
			
 
				+		if (irec.ir_count != XFS_INODES_PER_CHUNK)
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+		if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len))
			
 
				+			goto out;
			
 
				+		goto check_freemask;
			
 
				+	}
			
 
				+
			
 
				+	/* Check each chunk of a sparse inode cluster. */
			
 
				+	holemask = irec.ir_holemask;
			
 
				+	holecount = 0;
			
 
				+	len = XFS_B_TO_FSB(mp,
			
 
				+			XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize);
			
 
				+	holes = ~xfs_inobt_irec_to_allocmask(&irec);
			
 
				+	if ((holes & irec.ir_free) != holes ||
			
 
				+	    irec.ir_freecount > irec.ir_count)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) {
			
 
				+		if (holemask & 1)
			
 
				+			holecount += XFS_INODES_PER_HOLEMASK_BIT;
			
 
				+		else if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len))
			
 
				+			break;
			
 
				+		holemask >>= 1;
			
 
				+		agino += XFS_INODES_PER_HOLEMASK_BIT;
			
 
				+	}
			
 
				+
			
 
				+	if (holecount > XFS_INODES_PER_CHUNK ||
			
 
				+	    holecount + irec.ir_count != XFS_INODES_PER_CHUNK)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+check_freemask:
			
 
				+	error = xfs_scrub_iallocbt_check_freemask(bs, &irec);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the inode btrees for some AG. */
			
 
				+STATIC int
			
 
				+xfs_scrub_iallocbt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_btnum_t			which)
			
 
				+{
			
 
				+	struct xfs_btree_cur		*cur;
			
 
				+	struct xfs_owner_info		oinfo;
			
 
				+
			
 
				+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
			
 
				+	cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
			
 
				+	return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_scrub_inobt(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_scrub_finobt(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
			
 
				+}
			
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -0,0 +1,611 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_icache.h"
			
 
				+#include "xfs_inode_buf.h"
			
 
				+#include "xfs_inode_fork.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_da_format.h"
			
 
				+#include "xfs_reflink.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/*
			
 
				+ * Grab total control of the inode metadata.  It doesn't matter here if
			
 
				+ * the file data is still changing; exclusive access to the metadata is
			
 
				+ * the goal.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_setup_inode(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	int				error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Try to get the inode.  If the verifiers fail, we try again
			
 
				+	 * in raw mode.
			
 
				+	 */
			
 
				+	error = xfs_scrub_get_inode(sc, ip);
			
 
				+	switch (error) {
			
 
				+	case 0:
			
 
				+		break;
			
 
				+	case -EFSCORRUPTED:
			
 
				+	case -EFSBADCRC:
			
 
				+		return 0;
			
 
				+	default:
			
 
				+		return error;
			
 
				+	}
			
 
				+
			
 
				+	/* Got the inode, lock it and we're ready to go. */
			
 
				+	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
			
 
				+	xfs_ilock(sc->ip, sc->ilock_flags);
			
 
				+	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+	sc->ilock_flags |= XFS_ILOCK_EXCL;
			
 
				+	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+out:
			
 
				+	/* scrub teardown will unlock and release the inode for us */
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Inode core */
			
 
				+
			
 
				+/*
			
 
				+ * Validate di_extsize hint.
			
 
				+ *
			
 
				+ * The rules are documented at xfs_ioctl_setattr_check_extsize().
			
 
				+ * These functions must be kept in sync with each other.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_inode_extsize(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_buf			*bp,
			
 
				+	struct xfs_dinode		*dip,
			
 
				+	xfs_ino_t			ino,
			
 
				+	uint16_t			mode,
			
 
				+	uint16_t			flags)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	bool				rt_flag;
			
 
				+	bool				hint_flag;
			
 
				+	bool				inherit_flag;
			
 
				+	uint32_t			extsize;
			
 
				+	uint32_t			extsize_bytes;
			
 
				+	uint32_t			blocksize_bytes;
			
 
				+
			
 
				+	rt_flag = (flags & XFS_DIFLAG_REALTIME);
			
 
				+	hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
			
 
				+	inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
			
 
				+	extsize = be32_to_cpu(dip->di_extsize);
			
 
				+	extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
			
 
				+
			
 
				+	if (rt_flag)
			
 
				+		blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
			
 
				+	else
			
 
				+		blocksize_bytes = mp->m_sb.sb_blocksize;
			
 
				+
			
 
				+	if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (hint_flag && !S_ISREG(mode))
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (inherit_flag && !S_ISDIR(mode))
			
 
				+		goto bad;
			
 
				+
			
 
				+	if ((hint_flag || inherit_flag) && extsize == 0)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (!(hint_flag || inherit_flag) && extsize != 0)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (extsize_bytes % blocksize_bytes)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (extsize > MAXEXTLEN)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
			
 
				+		goto bad;
			
 
				+
			
 
				+	return;
			
 
				+bad:
			
 
				+	xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Validate di_cowextsize hint.
			
 
				+ *
			
 
				+ * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
			
 
				+ * These functions must be kept in sync with each other.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_scrub_inode_cowextsize(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_buf			*bp,
			
 
				+	struct xfs_dinode		*dip,
			
 
				+	xfs_ino_t			ino,
			
 
				+	uint16_t			mode,
			
 
				+	uint16_t			flags,
			
 
				+	uint64_t			flags2)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	bool				rt_flag;
			
 
				+	bool				hint_flag;
			
 
				+	uint32_t			extsize;
			
 
				+	uint32_t			extsize_bytes;
			
 
				+
			
 
				+	rt_flag = (flags & XFS_DIFLAG_REALTIME);
			
 
				+	hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
			
 
				+	extsize = be32_to_cpu(dip->di_cowextsize);
			
 
				+	extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
			
 
				+
			
 
				+	if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (hint_flag && extsize == 0)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (!hint_flag && extsize != 0)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (hint_flag && rt_flag)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (extsize_bytes % mp->m_sb.sb_blocksize)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (extsize > MAXEXTLEN)
			
 
				+		goto bad;
			
 
				+
			
 
				+	if (extsize > mp->m_sb.sb_agblocks / 2)
			
 
				+		goto bad;
			
 
				+
			
 
				+	return;
			
 
				+bad:
			
 
				+	xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+}
			
 
				+
			
 
				+/* Make sure the di_flags make sense for the inode. */
			
 
				+STATIC void
			
 
				+xfs_scrub_inode_flags(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_buf			*bp,
			
 
				+	struct xfs_dinode		*dip,
			
 
				+	xfs_ino_t			ino,
			
 
				+	uint16_t			mode,
			
 
				+	uint16_t			flags)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+
			
 
				+	if (flags & ~XFS_DIFLAG_ANY)
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* rt flags require rt device */
			
 
				+	if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
			
 
				+	    !mp->m_rtdev_targp)
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* new rt bitmap flag only valid for rbmino */
			
 
				+	if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino)
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* directory-only flags */
			
 
				+	if ((flags & (XFS_DIFLAG_RTINHERIT |
			
 
				+		     XFS_DIFLAG_EXTSZINHERIT |
			
 
				+		     XFS_DIFLAG_PROJINHERIT |
			
 
				+		     XFS_DIFLAG_NOSYMLINKS)) &&
			
 
				+	    !S_ISDIR(mode))
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* file-only flags */
			
 
				+	if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) &&
			
 
				+	    !S_ISREG(mode))
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* filestreams and rt make no sense */
			
 
				+	if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME))
			
 
				+		goto bad;
			
 
				+
			
 
				+	return;
			
 
				+bad:
			
 
				+	xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+}
			
 
				+
			
 
				+/* Make sure the di_flags2 make sense for the inode. */
			
 
				+STATIC void
			
 
				+xfs_scrub_inode_flags2(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_buf			*bp,
			
 
				+	struct xfs_dinode		*dip,
			
 
				+	xfs_ino_t			ino,
			
 
				+	uint16_t			mode,
			
 
				+	uint16_t			flags,
			
 
				+	uint64_t			flags2)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+
			
 
				+	if (flags2 & ~XFS_DIFLAG2_ANY)
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* reflink flag requires reflink feature */
			
 
				+	if ((flags2 & XFS_DIFLAG2_REFLINK) &&
			
 
				+	    !xfs_sb_version_hasreflink(&mp->m_sb))
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* cowextsize flag is checked w.r.t. mode separately */
			
 
				+
			
 
				+	/* file/dir-only flags */
			
 
				+	if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode)))
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* file-only flags */
			
 
				+	if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode))
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* realtime and reflink make no sense, currently */
			
 
				+	if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
			
 
				+		goto bad;
			
 
				+
			
 
				+	/* dax and reflink make no sense, currently */
			
 
				+	if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
			
 
				+		goto bad;
			
 
				+
			
 
				+	return;
			
 
				+bad:
			
 
				+	xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+}
			
 
				+
			
 
				+/* Scrub all the ondisk inode fields. */
			
 
				+STATIC void
			
 
				+xfs_scrub_dinode(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_buf			*bp,
			
 
				+	struct xfs_dinode		*dip,
			
 
				+	xfs_ino_t			ino)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	size_t				fork_recs;
			
 
				+	unsigned long long		isize;
			
 
				+	uint64_t			flags2;
			
 
				+	uint32_t			nextents;
			
 
				+	uint16_t			flags;
			
 
				+	uint16_t			mode;
			
 
				+
			
 
				+	flags = be16_to_cpu(dip->di_flags);
			
 
				+	if (dip->di_version >= 3)
			
 
				+		flags2 = be64_to_cpu(dip->di_flags2);
			
 
				+	else
			
 
				+		flags2 = 0;
			
 
				+
			
 
				+	/* di_mode */
			
 
				+	mode = be16_to_cpu(dip->di_mode);
			
 
				+	if (mode & ~(S_IALLUGO | S_IFMT))
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+	/* v1/v2 fields */
			
 
				+	switch (dip->di_version) {
			
 
				+	case 1:
			
 
				+		/*
			
 
				+		 * We autoconvert v1 inodes into v2 inodes on writeout,
			
 
				+		 * so just mark this inode for preening.
			
 
				+		 */
			
 
				+		xfs_scrub_ino_set_preen(sc, ino, bp);
			
 
				+		break;
			
 
				+	case 2:
			
 
				+	case 3:
			
 
				+		if (dip->di_onlink != 0)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+		if (dip->di_mode == 0 && sc->ip)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+		if (dip->di_projid_hi != 0 &&
			
 
				+		    !xfs_sb_version_hasprojid32bit(&mp->m_sb))
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	default:
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * di_uid/di_gid -- -1 isn't invalid, but there's no way that
			
 
				+	 * userspace could have created that.
			
 
				+	 */
			
 
				+	if (dip->di_uid == cpu_to_be32(-1U) ||
			
 
				+	    dip->di_gid == cpu_to_be32(-1U))
			
 
				+		xfs_scrub_ino_set_warning(sc, ino, bp);
			
 
				+
			
 
				+	/* di_format */
			
 
				+	switch (dip->di_format) {
			
 
				+	case XFS_DINODE_FMT_DEV:
			
 
				+		if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
			
 
				+		    !S_ISFIFO(mode) && !S_ISSOCK(mode))
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_LOCAL:
			
 
				+		if (!S_ISDIR(mode) && !S_ISLNK(mode))
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		if (!S_ISREG(mode) && !S_ISDIR(mode))
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_UUID:
			
 
				+	default:
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * di_size.  xfs_dinode_verify checks for things that screw up
			
 
				+	 * the VFS such as the upper bit being set and zero-length
			
 
				+	 * symlinks/directories, but we can do more here.
			
 
				+	 */
			
 
				+	isize = be64_to_cpu(dip->di_size);
			
 
				+	if (isize & (1ULL << 63))
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+	/* Devices, fifos, and sockets must have zero size */
			
 
				+	if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+	/* Directories can't be larger than the data section size (32G) */
			
 
				+	if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+	/* Symlinks can't be larger than SYMLINK_MAXLEN */
			
 
				+	if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Warn if the running kernel can't handle the kinds of offsets
			
 
				+	 * needed to deal with the file size.  In other words, if the
			
 
				+	 * pagecache can't cache all the blocks in this file due to
			
 
				+	 * overly large offsets, flag the inode for admin review.
			
 
				+	 */
			
 
				+	if (isize >= mp->m_super->s_maxbytes)
			
 
				+		xfs_scrub_ino_set_warning(sc, ino, bp);
			
 
				+
			
 
				+	/* di_nblocks */
			
 
				+	if (flags2 & XFS_DIFLAG2_REFLINK) {
			
 
				+		; /* nblocks can exceed dblocks */
			
 
				+	} else if (flags & XFS_DIFLAG_REALTIME) {
			
 
				+		/*
			
 
				+		 * nblocks is the sum of data extents (in the rtdev),
			
 
				+		 * attr extents (in the datadev), and both forks' bmbt
			
 
				+		 * blocks (in the datadev).  This clumsy check is the
			
 
				+		 * best we can do without cross-referencing with the
			
 
				+		 * inode forks.
			
 
				+		 */
			
 
				+		if (be64_to_cpu(dip->di_nblocks) >=
			
 
				+		    mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+	} else {
			
 
				+		if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+	}
			
 
				+
			
 
				+	xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags);
			
 
				+
			
 
				+	xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags);
			
 
				+
			
 
				+	/* di_nextents */
			
 
				+	nextents = be32_to_cpu(dip->di_nextents);
			
 
				+	fork_recs =  XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
			
 
				+	switch (dip->di_format) {
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		if (nextents > fork_recs)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		if (nextents <= fork_recs)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	default:
			
 
				+		if (nextents != 0)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/* di_forkoff */
			
 
				+	if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+	if (dip->di_anextents != 0 && dip->di_forkoff == 0)
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+	if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+	/* di_aformat */
			
 
				+	if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
			
 
				+	    dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
			
 
				+	    dip->di_aformat != XFS_DINODE_FMT_BTREE)
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+
			
 
				+	/* di_anextents */
			
 
				+	nextents = be16_to_cpu(dip->di_anextents);
			
 
				+	fork_recs =  XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
			
 
				+	switch (dip->di_aformat) {
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		if (nextents > fork_recs)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		if (nextents <= fork_recs)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		break;
			
 
				+	default:
			
 
				+		if (nextents != 0)
			
 
				+			xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+	}
			
 
				+
			
 
				+	if (dip->di_version >= 3) {
			
 
				+		xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
			
 
				+		xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
			
 
				+				flags2);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Map and read a raw inode. */
			
 
				+STATIC int
			
 
				+xfs_scrub_inode_map_raw(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_ino_t			ino,
			
 
				+	struct xfs_buf			**bpp,
			
 
				+	struct xfs_dinode		**dipp)
			
 
				+{
			
 
				+	struct xfs_imap			imap;
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_buf			*bp = NULL;
			
 
				+	struct xfs_dinode		*dip;
			
 
				+	int				error;
			
 
				+
			
 
				+	error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED);
			
 
				+	if (error == -EINVAL) {
			
 
				+		/*
			
 
				+		 * Inode could have gotten deleted out from under us;
			
 
				+		 * just forget about it.
			
 
				+		 */
			
 
				+		error = -ENOENT;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
			
 
				+			XFS_INO_TO_AGBNO(mp, ino), &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
			
 
				+			imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp,
			
 
				+			NULL);
			
 
				+	if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
			
 
				+			XFS_INO_TO_AGBNO(mp, ino), &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	/*
			
 
				+	 * Is this really an inode?  We disabled verifiers in the above
			
 
				+	 * xfs_trans_read_buf call because the inode buffer verifier
			
 
				+	 * fails on /any/ inode record in the inode cluster with a bad
			
 
				+	 * magic or version number, not just the one that we're
			
 
				+	 * checking.  Therefore, grab the buffer unconditionally, attach
			
 
				+	 * the inode verifiers by hand, and run the inode verifier only
			
 
				+	 * on the one inode we want.
			
 
				+	 */
			
 
				+	bp->b_ops = &xfs_inode_buf_ops;
			
 
				+	dip = xfs_buf_offset(bp, imap.im_boffset);
			
 
				+	if (!xfs_dinode_verify(mp, ino, dip) ||
			
 
				+	    !xfs_dinode_good_version(mp, dip->di_version)) {
			
 
				+		xfs_scrub_ino_set_corrupt(sc, ino, bp);
			
 
				+		goto out_buf;
			
 
				+	}
			
 
				+
			
 
				+	/* ...and is it the one we asked for? */
			
 
				+	if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) {
			
 
				+		error = -ENOENT;
			
 
				+		goto out_buf;
			
 
				+	}
			
 
				+
			
 
				+	*dipp = dip;
			
 
				+	*bpp = bp;
			
 
				+out:
			
 
				+	return error;
			
 
				+out_buf:
			
 
				+	xfs_trans_brelse(sc->tp, bp);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub an inode. */
			
 
				+int
			
 
				+xfs_scrub_inode(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_dinode		di;
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_buf			*bp = NULL;
			
 
				+	struct xfs_dinode		*dip;
			
 
				+	xfs_ino_t			ino;
			
 
				+
			
 
				+	bool				has_shared;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	/* Did we get the in-core inode, or are we doing this manually? */
			
 
				+	if (sc->ip) {
			
 
				+		ino = sc->ip->i_ino;
			
 
				+		xfs_inode_to_disk(sc->ip, &di, 0);
			
 
				+		dip = &di;
			
 
				+	} else {
			
 
				+		/* Map & read inode. */
			
 
				+		ino = sc->sm->sm_ino;
			
 
				+		error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip);
			
 
				+		if (error || !bp)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	xfs_scrub_dinode(sc, bp, dip, ino);
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Now let's do the things that require a live inode. */
			
 
				+	if (!sc->ip)
			
 
				+		goto out;
			
 
				+
			
 
				+	/*
			
 
				+	 * Does this inode have the reflink flag set but no shared extents?
			
 
				+	 * Set the preening flag if this is the case.
			
 
				+	 */
			
 
				+	if (xfs_is_reflink_inode(sc->ip)) {
			
 
				+		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
			
 
				+				&has_shared);
			
 
				+		if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
			
 
				+				XFS_INO_TO_AGBNO(mp, ino), &error))
			
 
				+			goto out;
			
 
				+		if (!has_shared)
			
 
				+			xfs_scrub_ino_set_preen(sc, ino, bp);
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	if (bp)
			
 
				+		xfs_trans_brelse(sc->tp, bp);
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -0,0 +1,317 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_icache.h"
			
 
				+#include "xfs_dir2.h"
			
 
				+#include "xfs_dir2_priv.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/* Set us up to scrub parents. */
			
 
				+int
			
 
				+xfs_scrub_setup_parent(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	return xfs_scrub_setup_inode_contents(sc, ip, 0);
			
 
				+}
			
 
				+
			
 
				+/* Parent pointers */
			
 
				+
			
 
				+/* Look for an entry in a parent pointing to this inode. */
			
 
				+
			
 
				+struct xfs_scrub_parent_ctx {
			
 
				+	struct dir_context		dc;
			
 
				+	xfs_ino_t			ino;
			
 
				+	xfs_nlink_t			nlink;
			
 
				+};
			
 
				+
			
 
				+/* Look for a single entry in a directory pointing to an inode. */
			
 
				+STATIC int
			
 
				+xfs_scrub_parent_actor(
			
 
				+	struct dir_context		*dc,
			
 
				+	const char			*name,
			
 
				+	int				namelen,
			
 
				+	loff_t				pos,
			
 
				+	u64				ino,
			
 
				+	unsigned			type)
			
 
				+{
			
 
				+	struct xfs_scrub_parent_ctx	*spc;
			
 
				+
			
 
				+	spc = container_of(dc, struct xfs_scrub_parent_ctx, dc);
			
 
				+	if (spc->ino == ino)
			
 
				+		spc->nlink++;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Count the number of dentries in the parent dir that point to this inode. */
			
 
				+STATIC int
			
 
				+xfs_scrub_parent_count_parent_dentries(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*parent,
			
 
				+	xfs_nlink_t			*nlink)
			
 
				+{
			
 
				+	struct xfs_scrub_parent_ctx	spc = {
			
 
				+		.dc.actor = xfs_scrub_parent_actor,
			
 
				+		.dc.pos = 0,
			
 
				+		.ino = sc->ip->i_ino,
			
 
				+		.nlink = 0,
			
 
				+	};
			
 
				+	size_t				bufsize;
			
 
				+	loff_t				oldpos;
			
 
				+	uint				lock_mode;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If there are any blocks, read-ahead block 0 as we're almost
			
 
				+	 * certain to have the next operation be a read there.  This is
			
 
				+	 * how we guarantee that the parent's extent map has been loaded,
			
 
				+	 * if there is one.
			
 
				+	 */
			
 
				+	lock_mode = xfs_ilock_data_map_shared(parent);
			
 
				+	if (parent->i_d.di_nextents > 0)
			
 
				+		error = xfs_dir3_data_readahead(parent, 0, -1);
			
 
				+	xfs_iunlock(parent, lock_mode);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Iterate the parent dir to confirm that there is
			
 
				+	 * exactly one entry pointing back to the inode being
			
 
				+	 * scanned.
			
 
				+	 */
			
 
				+	bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
			
 
				+			parent->i_d.di_size);
			
 
				+	oldpos = 0;
			
 
				+	while (true) {
			
 
				+		error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+		if (oldpos == spc.dc.pos)
			
 
				+			break;
			
 
				+		oldpos = spc.dc.pos;
			
 
				+	}
			
 
				+	*nlink = spc.nlink;
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Given the inode number of the alleged parent of the inode being
			
 
				+ * scrubbed, try to validate that the parent has exactly one directory
			
 
				+ * entry pointing back to the inode being scrubbed.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_scrub_parent_validate(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	xfs_ino_t			dnum,
			
 
				+	bool				*try_again)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_inode		*dp = NULL;
			
 
				+	xfs_nlink_t			expected_nlink;
			
 
				+	xfs_nlink_t			nlink;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	*try_again = false;
			
 
				+
			
 
				+	/* '..' must not point to ourselves. */
			
 
				+	if (sc->ip->i_ino == dnum) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If we're an unlinked directory, the parent /won't/ have a link
			
 
				+	 * to us.  Otherwise, it should have one link.
			
 
				+	 */
			
 
				+	expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
			
 
				+
			
 
				+	/*
			
 
				+	 * Grab this parent inode.  We release the inode before we
			
 
				+	 * cancel the scrub transaction.  Since we're don't know a
			
 
				+	 * priori that releasing the inode won't trigger eofblocks
			
 
				+	 * cleanup (which allocates what would be a nested transaction)
			
 
				+	 * if the parent pointer erroneously points to a file, we
			
 
				+	 * can't use DONTCACHE here because DONTCACHE inodes can trigger
			
 
				+	 * immediate inactive cleanup of the inode.
			
 
				+	 */
			
 
				+	error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
			
 
				+		goto out;
			
 
				+	if (dp == sc->ip) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out_rele;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We prefer to keep the inode locked while we lock and search
			
 
				+	 * its alleged parent for a forward reference.  If we can grab
			
 
				+	 * the iolock, validate the pointers and we're done.  We must
			
 
				+	 * use nowait here to avoid an ABBA deadlock on the parent and
			
 
				+	 * the child inodes.
			
 
				+	 */
			
 
				+	if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
			
 
				+		error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
			
 
				+		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
			
 
				+				&error))
			
 
				+			goto out_unlock;
			
 
				+		if (nlink != expected_nlink)
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * The game changes if we get here.  We failed to lock the parent,
			
 
				+	 * so we're going to try to verify both pointers while only holding
			
 
				+	 * one lock so as to avoid deadlocking with something that's actually
			
 
				+	 * trying to traverse down the directory tree.
			
 
				+	 */
			
 
				+	xfs_iunlock(sc->ip, sc->ilock_flags);
			
 
				+	sc->ilock_flags = 0;
			
 
				+	xfs_ilock(dp, XFS_IOLOCK_SHARED);
			
 
				+
			
 
				+	/* Go looking for our dentry. */
			
 
				+	error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	/* Drop the parent lock, relock this inode. */
			
 
				+	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
			
 
				+	sc->ilock_flags = XFS_IOLOCK_EXCL;
			
 
				+	xfs_ilock(sc->ip, sc->ilock_flags);
			
 
				+
			
 
				+	/*
			
 
				+	 * If we're an unlinked directory, the parent /won't/ have a link
			
 
				+	 * to us.  Otherwise, it should have one link.  We have to re-set
			
 
				+	 * it here because we dropped the lock on sc->ip.
			
 
				+	 */
			
 
				+	expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
			
 
				+
			
 
				+	/* Look up '..' to see if the inode changed. */
			
 
				+	error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
			
 
				+		goto out_rele;
			
 
				+
			
 
				+	/* Drat, parent changed.  Try again! */
			
 
				+	if (dnum != dp->i_ino) {
			
 
				+		iput(VFS_I(dp));
			
 
				+		*try_again = true;
			
 
				+		return 0;
			
 
				+	}
			
 
				+	iput(VFS_I(dp));
			
 
				+
			
 
				+	/*
			
 
				+	 * '..' didn't change, so check that there was only one entry
			
 
				+	 * for us in the parent.
			
 
				+	 */
			
 
				+	if (nlink != expected_nlink)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+	return error;
			
 
				+
			
 
				+out_unlock:
			
 
				+	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
			
 
				+out_rele:
			
 
				+	iput(VFS_I(dp));
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub a parent pointer. */
			
 
				+int
			
 
				+xfs_scrub_parent(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	xfs_ino_t			dnum;
			
 
				+	bool				try_again;
			
 
				+	int				tries = 0;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we're a directory, check that the '..' link points up to
			
 
				+	 * a directory that has one entry pointing to us.
			
 
				+	 */
			
 
				+	if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	/* We're not a special inode, are we? */
			
 
				+	if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * The VFS grabs a read or write lock via i_rwsem before it reads
			
 
				+	 * or writes to a directory.  If we've gotten this far we've
			
 
				+	 * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
			
 
				+	 * getting a write lock on i_rwsem.  Therefore, it is safe for us
			
 
				+	 * to drop the ILOCK here in order to do directory lookups.
			
 
				+	 */
			
 
				+	sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
			
 
				+	xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
			
 
				+
			
 
				+	/* Look up '..' */
			
 
				+	error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
			
 
				+		goto out;
			
 
				+	if (!xfs_verify_dir_ino(mp, dnum)) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Is this the root dir?  Then '..' must point to itself. */
			
 
				+	if (sc->ip == mp->m_rootip) {
			
 
				+		if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
			
 
				+		    sc->ip->i_ino != dnum)
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	do {
			
 
				+		error = xfs_scrub_parent_validate(sc, dnum, &try_again);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+	} while (try_again && ++tries < 20);
			
 
				+
			
 
				+	/*
			
 
				+	 * We gave it our best shot but failed, so mark this scrub
			
 
				+	 * incomplete.  Userspace can decide if it wants to try again.
			
 
				+	 */
			
 
				+	if (try_again && tries == 20)
			
 
				+		xfs_scrub_set_incomplete(sc);
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -0,0 +1,304 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_fork.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_quota.h"
			
 
				+#include "xfs_qm.h"
			
 
				+#include "xfs_dquot.h"
			
 
				+#include "xfs_dquot_item.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/* Convert a scrub type code to a DQ flag, or return 0 if error. */
			
 
				+static inline uint
			
 
				+xfs_scrub_quota_to_dqtype(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	switch (sc->sm->sm_type) {
			
 
				+	case XFS_SCRUB_TYPE_UQUOTA:
			
 
				+		return XFS_DQ_USER;
			
 
				+	case XFS_SCRUB_TYPE_GQUOTA:
			
 
				+		return XFS_DQ_GROUP;
			
 
				+	case XFS_SCRUB_TYPE_PQUOTA:
			
 
				+		return XFS_DQ_PROJ;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Set us up to scrub a quota. */
			
 
				+int
			
 
				+xfs_scrub_setup_quota(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	uint				dqtype;
			
 
				+
			
 
				+	/*
			
 
				+	 * If userspace gave us an AG number or inode data, they don't
			
 
				+	 * know what they're doing.  Get out.
			
 
				+	 */
			
 
				+	if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	dqtype = xfs_scrub_quota_to_dqtype(sc);
			
 
				+	if (dqtype == 0)
			
 
				+		return -EINVAL;
			
 
				+	if (!xfs_this_quota_on(sc->mp, dqtype))
			
 
				+		return -ENOENT;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Quotas. */
			
 
				+
			
 
				+/* Scrub the fields in an individual quota item. */
			
 
				+STATIC void
			
 
				+xfs_scrub_quota_item(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	uint				dqtype,
			
 
				+	struct xfs_dquot		*dq,
			
 
				+	xfs_dqid_t			id)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_disk_dquot		*d = &dq->q_core;
			
 
				+	struct xfs_quotainfo		*qi = mp->m_quotainfo;
			
 
				+	xfs_fileoff_t			offset;
			
 
				+	unsigned long long		bsoft;
			
 
				+	unsigned long long		isoft;
			
 
				+	unsigned long long		rsoft;
			
 
				+	unsigned long long		bhard;
			
 
				+	unsigned long long		ihard;
			
 
				+	unsigned long long		rhard;
			
 
				+	unsigned long long		bcount;
			
 
				+	unsigned long long		icount;
			
 
				+	unsigned long long		rcount;
			
 
				+	xfs_ino_t			fs_icount;
			
 
				+
			
 
				+	offset = id * qi->qi_dqperchunk;
			
 
				+
			
 
				+	/*
			
 
				+	 * We fed $id and DQNEXT into the xfs_qm_dqget call, which means
			
 
				+	 * that the actual dquot we got must either have the same id or
			
 
				+	 * the next higher id.
			
 
				+	 */
			
 
				+	if (id > be32_to_cpu(d->d_id))
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
			
 
				+
			
 
				+	/* Did we get the dquot type we wanted? */
			
 
				+	if (dqtype != (d->d_flags & XFS_DQ_ALLTYPES))
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
			
 
				+
			
 
				+	if (d->d_pad0 != cpu_to_be32(0) || d->d_pad != cpu_to_be16(0))
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
			
 
				+
			
 
				+	/* Check the limits. */
			
 
				+	bhard = be64_to_cpu(d->d_blk_hardlimit);
			
 
				+	ihard = be64_to_cpu(d->d_ino_hardlimit);
			
 
				+	rhard = be64_to_cpu(d->d_rtb_hardlimit);
			
 
				+
			
 
				+	bsoft = be64_to_cpu(d->d_blk_softlimit);
			
 
				+	isoft = be64_to_cpu(d->d_ino_softlimit);
			
 
				+	rsoft = be64_to_cpu(d->d_rtb_softlimit);
			
 
				+
			
 
				+	/*
			
 
				+	 * Warn if the hard limits are larger than the fs.
			
 
				+	 * Administrators can do this, though in production this seems
			
 
				+	 * suspect, which is why we flag it for review.
			
 
				+	 *
			
 
				+	 * Complain about corruption if the soft limit is greater than
			
 
				+	 * the hard limit.
			
 
				+	 */
			
 
				+	if (bhard > mp->m_sb.sb_dblocks)
			
 
				+		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
			
 
				+	if (bsoft > bhard)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
			
 
				+
			
 
				+	if (ihard > mp->m_maxicount)
			
 
				+		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
			
 
				+	if (isoft > ihard)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
			
 
				+
			
 
				+	if (rhard > mp->m_sb.sb_rblocks)
			
 
				+		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
			
 
				+	if (rsoft > rhard)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
			
 
				+
			
 
				+	/* Check the resource counts. */
			
 
				+	bcount = be64_to_cpu(d->d_bcount);
			
 
				+	icount = be64_to_cpu(d->d_icount);
			
 
				+	rcount = be64_to_cpu(d->d_rtbcount);
			
 
				+	fs_icount = percpu_counter_sum(&mp->m_icount);
			
 
				+
			
 
				+	/*
			
 
				+	 * Check that usage doesn't exceed physical limits.  However, on
			
 
				+	 * a reflink filesystem we're allowed to exceed physical space
			
 
				+	 * if there are no quota limits.
			
 
				+	 */
			
 
				+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
			
 
				+		if (mp->m_sb.sb_dblocks < bcount)
			
 
				+			xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK,
			
 
				+					offset);
			
 
				+	} else {
			
 
				+		if (mp->m_sb.sb_dblocks < bcount)
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
			
 
				+					offset);
			
 
				+	}
			
 
				+	if (icount > fs_icount || rcount > mp->m_sb.sb_rblocks)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
			
 
				+
			
 
				+	/*
			
 
				+	 * We can violate the hard limits if the admin suddenly sets a
			
 
				+	 * lower limit than the actual usage.  However, we flag it for
			
 
				+	 * admin review.
			
 
				+	 */
			
 
				+	if (id != 0 && bhard != 0 && bcount > bhard)
			
 
				+		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
			
 
				+	if (id != 0 && ihard != 0 && icount > ihard)
			
 
				+		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
			
 
				+	if (id != 0 && rhard != 0 && rcount > rhard)
			
 
				+		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
			
 
				+}
			
 
				+
			
 
				+/* Scrub all of a quota type's items. */
			
 
				+int
			
 
				+xfs_scrub_quota(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_bmbt_irec		irec = { 0 };
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	struct xfs_inode		*ip;
			
 
				+	struct xfs_quotainfo		*qi = mp->m_quotainfo;
			
 
				+	struct xfs_dquot		*dq;
			
 
				+	xfs_fileoff_t			max_dqid_off;
			
 
				+	xfs_fileoff_t			off = 0;
			
 
				+	xfs_dqid_t			id = 0;
			
 
				+	uint				dqtype;
			
 
				+	int				nimaps;
			
 
				+	int				error;
			
 
				+
			
 
				+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	mutex_lock(&qi->qi_quotaofflock);
			
 
				+	dqtype = xfs_scrub_quota_to_dqtype(sc);
			
 
				+	if (!xfs_this_quota_on(sc->mp, dqtype)) {
			
 
				+		error = -ENOENT;
			
 
				+		goto out_unlock_quota;
			
 
				+	}
			
 
				+
			
 
				+	/* Attach to the quota inode and set sc->ip so that reporting works. */
			
 
				+	ip = xfs_quota_inode(sc->mp, dqtype);
			
 
				+	sc->ip = ip;
			
 
				+
			
 
				+	/* Look for problem extents. */
			
 
				+	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+	if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
			
 
				+		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
			
 
				+		goto out_unlock_inode;
			
 
				+	}
			
 
				+	max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
			
 
				+	while (1) {
			
 
				+		if (xfs_scrub_should_terminate(sc, &error))
			
 
				+			break;
			
 
				+
			
 
				+		off = irec.br_startoff + irec.br_blockcount;
			
 
				+		nimaps = 1;
			
 
				+		error = xfs_bmapi_read(ip, off, -1, &irec, &nimaps,
			
 
				+				XFS_BMAPI_ENTIRE);
			
 
				+		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, off,
			
 
				+				&error))
			
 
				+			goto out_unlock_inode;
			
 
				+		if (!nimaps)
			
 
				+			break;
			
 
				+		if (irec.br_startblock == HOLESTARTBLOCK)
			
 
				+			continue;
			
 
				+
			
 
				+		/* Check the extent record doesn't point to crap. */
			
 
				+		if (irec.br_startblock + irec.br_blockcount <=
			
 
				+		    irec.br_startblock)
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
			
 
				+					irec.br_startoff);
			
 
				+		if (!xfs_verify_fsbno(mp, irec.br_startblock) ||
			
 
				+		    !xfs_verify_fsbno(mp, irec.br_startblock +
			
 
				+					irec.br_blockcount - 1))
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
			
 
				+					irec.br_startoff);
			
 
				+
			
 
				+		/*
			
 
				+		 * Unwritten extents or blocks mapped above the highest
			
 
				+		 * quota id shouldn't happen.
			
 
				+		 */
			
 
				+		if (isnullstartblock(irec.br_startblock) ||
			
 
				+		    irec.br_startoff > max_dqid_off ||
			
 
				+		    irec.br_startoff + irec.br_blockcount > max_dqid_off + 1)
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
			
 
				+	}
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Check all the quota items. */
			
 
				+	while (id < ((xfs_dqid_t)-1ULL)) {
			
 
				+		if (xfs_scrub_should_terminate(sc, &error))
			
 
				+			break;
			
 
				+
			
 
				+		error = xfs_qm_dqget(mp, NULL, id, dqtype, XFS_QMOPT_DQNEXT,
			
 
				+				&dq);
			
 
				+		if (error == -ENOENT)
			
 
				+			break;
			
 
				+		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK,
			
 
				+				id * qi->qi_dqperchunk, &error))
			
 
				+			break;
			
 
				+
			
 
				+		xfs_scrub_quota_item(sc, dqtype, dq, id);
			
 
				+
			
 
				+		id = be32_to_cpu(dq->q_core.d_id) + 1;
			
 
				+		xfs_qm_dqput(dq);
			
 
				+		if (!id)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	/* We set sc->ip earlier, so make sure we clear it now. */
			
 
				+	sc->ip = NULL;
			
 
				+out_unlock_quota:
			
 
				+	mutex_unlock(&qi->qi_quotaofflock);
			
 
				+	return error;
			
 
				+
			
 
				+out_unlock_inode:
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	goto out;
			
 
				+}
			
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -0,0 +1,99 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_rmap.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/btree.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/*
			
 
				+ * Set us up to scrub reference count btrees.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_setup_ag_refcountbt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	return xfs_scrub_setup_ag_btree(sc, ip, false);
			
 
				+}
			
 
				+
			
 
				+/* Reference count btree scrubber. */
			
 
				+
			
 
				+/* Scrub a refcountbt record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_refcountbt_rec(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	union xfs_btree_rec		*rec)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = bs->cur->bc_mp;
			
 
				+	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
			
 
				+	xfs_agblock_t			bno;
			
 
				+	xfs_extlen_t			len;
			
 
				+	xfs_nlink_t			refcount;
			
 
				+	bool				has_cowflag;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	bno = be32_to_cpu(rec->refc.rc_startblock);
			
 
				+	len = be32_to_cpu(rec->refc.rc_blockcount);
			
 
				+	refcount = be32_to_cpu(rec->refc.rc_refcount);
			
 
				+
			
 
				+	/* Only CoW records can have refcount == 1. */
			
 
				+	has_cowflag = (bno & XFS_REFC_COW_START);
			
 
				+	if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	/* Check the extent. */
			
 
				+	bno &= ~XFS_REFC_COW_START;
			
 
				+	if (bno + len <= bno ||
			
 
				+	    !xfs_verify_agbno(mp, agno, bno) ||
			
 
				+	    !xfs_verify_agbno(mp, agno, bno + len - 1))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	if (refcount == 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the refcount btree for some AG. */
			
 
				+int
			
 
				+xfs_scrub_refcountbt(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_owner_info		oinfo;
			
 
				+
			
 
				+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
			
 
				+	return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
			
 
				+			&oinfo, NULL);
			
 
				+}
			
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -0,0 +1,138 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_rmap.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/btree.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/*
			
 
				+ * Set us up to scrub reverse mapping btrees.
			
 
				+ */
			
 
				+int
			
 
				+xfs_scrub_setup_ag_rmapbt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	return xfs_scrub_setup_ag_btree(sc, ip, false);
			
 
				+}
			
 
				+
			
 
				+/* Reverse-mapping scrubber. */
			
 
				+
			
 
				+/* Scrub an rmapbt record. */
			
 
				+STATIC int
			
 
				+xfs_scrub_rmapbt_rec(
			
 
				+	struct xfs_scrub_btree		*bs,
			
 
				+	union xfs_btree_rec		*rec)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = bs->cur->bc_mp;
			
 
				+	struct xfs_rmap_irec		irec;
			
 
				+	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
			
 
				+	bool				non_inode;
			
 
				+	bool				is_unwritten;
			
 
				+	bool				is_bmbt;
			
 
				+	bool				is_attr;
			
 
				+	int				error;
			
 
				+
			
 
				+	error = xfs_rmap_btrec_to_irec(rec, &irec);
			
 
				+	if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Check extent. */
			
 
				+	if (irec.rm_startblock + irec.rm_blockcount <= irec.rm_startblock)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	if (irec.rm_owner == XFS_RMAP_OWN_FS) {
			
 
				+		/*
			
 
				+		 * xfs_verify_agbno returns false for static fs metadata.
			
 
				+		 * Since that only exists at the start of the AG, validate
			
 
				+		 * that by hand.
			
 
				+		 */
			
 
				+		if (irec.rm_startblock != 0 ||
			
 
				+		    irec.rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Otherwise we must point somewhere past the static metadata
			
 
				+		 * but before the end of the FS.  Run the regular check.
			
 
				+		 */
			
 
				+		if (!xfs_verify_agbno(mp, agno, irec.rm_startblock) ||
			
 
				+		    !xfs_verify_agbno(mp, agno, irec.rm_startblock +
			
 
				+				irec.rm_blockcount - 1))
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+	}
			
 
				+
			
 
				+	/* Check flags. */
			
 
				+	non_inode = XFS_RMAP_NON_INODE_OWNER(irec.rm_owner);
			
 
				+	is_bmbt = irec.rm_flags & XFS_RMAP_BMBT_BLOCK;
			
 
				+	is_attr = irec.rm_flags & XFS_RMAP_ATTR_FORK;
			
 
				+	is_unwritten = irec.rm_flags & XFS_RMAP_UNWRITTEN;
			
 
				+
			
 
				+	if (is_bmbt && irec.rm_offset != 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	if (non_inode && irec.rm_offset != 0)
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	if (is_unwritten && (is_bmbt || non_inode || is_attr))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	if (non_inode && (is_bmbt || is_unwritten || is_attr))
			
 
				+		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+
			
 
				+	if (!non_inode) {
			
 
				+		if (!xfs_verify_ino(mp, irec.rm_owner))
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+	} else {
			
 
				+		/* Non-inode owner within the magic values? */
			
 
				+		if (irec.rm_owner <= XFS_RMAP_OWN_MIN ||
			
 
				+		    irec.rm_owner > XFS_RMAP_OWN_FS)
			
 
				+			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
			
 
				+	}
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the rmap btree for some AG. */
			
 
				+int
			
 
				+xfs_scrub_rmapbt(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_owner_info		oinfo;
			
 
				+
			
 
				+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
			
 
				+	return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec,
			
 
				+			&oinfo, NULL);
			
 
				+}
			
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -0,0 +1,108 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_rtalloc.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/* Set us up with the realtime metadata locked. */
			
 
				+int
			
 
				+xfs_scrub_setup_rt(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	struct xfs_mount		*mp = sc->mp;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If userspace gave us an AG number or inode data, they don't
			
 
				+	 * know what they're doing.  Get out.
			
 
				+	 */
			
 
				+	if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	error = xfs_scrub_setup_fs(sc, ip);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
			
 
				+	sc->ip = mp->m_rbmip;
			
 
				+	xfs_ilock(sc->ip, sc->ilock_flags);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Realtime bitmap. */
			
 
				+
			
 
				+/* Scrub a free extent record from the realtime bitmap. */
			
 
				+STATIC int
			
 
				+xfs_scrub_rtbitmap_rec(
			
 
				+	struct xfs_trans		*tp,
			
 
				+	struct xfs_rtalloc_rec		*rec,
			
 
				+	void				*priv)
			
 
				+{
			
 
				+	struct xfs_scrub_context	*sc = priv;
			
 
				+
			
 
				+	if (rec->ar_startblock + rec->ar_blockcount <= rec->ar_startblock ||
			
 
				+	    !xfs_verify_rtbno(sc->mp, rec->ar_startblock) ||
			
 
				+	    !xfs_verify_rtbno(sc->mp, rec->ar_startblock +
			
 
				+			rec->ar_blockcount - 1))
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the realtime bitmap. */
			
 
				+int
			
 
				+xfs_scrub_rtbitmap(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	int				error;
			
 
				+
			
 
				+	error = xfs_rtalloc_query_all(sc->tp, xfs_scrub_rtbitmap_rec, sc);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
			
 
				+		goto out;
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrub the realtime summary. */
			
 
				+int
			
 
				+xfs_scrub_rtsummary(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	/* XXX: implement this some day */
			
 
				+	return -ENOENT;
			
 
				+}
			
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -0,0 +1,392 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_icache.h"
			
 
				+#include "xfs_itable.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_alloc_btree.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_refcount.h"
			
 
				+#include "xfs_refcount_btree.h"
			
 
				+#include "xfs_rmap.h"
			
 
				+#include "xfs_rmap_btree.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/btree.h"
			
 
				+
			
 
				+/*
			
 
				+ * Online Scrub and Repair
			
 
				+ *
			
 
				+ * Traditionally, XFS (the kernel driver) did not know how to check or
			
 
				+ * repair on-disk data structures.  That task was left to the xfs_check
			
 
				+ * and xfs_repair tools, both of which require taking the filesystem
			
 
				+ * offline for a thorough but time consuming examination.  Online
			
 
				+ * scrub & repair, on the other hand, enables us to check the metadata
			
 
				+ * for obvious errors while carefully stepping around the filesystem's
			
 
				+ * ongoing operations, locking rules, etc.
			
 
				+ *
			
 
				+ * Given that most XFS metadata consist of records stored in a btree,
			
 
				+ * most of the checking functions iterate the btree blocks themselves
			
 
				+ * looking for irregularities.  When a record block is encountered, each
			
 
				+ * record can be checked for obviously bad values.  Record values can
			
 
				+ * also be cross-referenced against other btrees to look for potential
			
 
				+ * misunderstandings between pieces of metadata.
			
 
				+ *
			
 
				+ * It is expected that the checkers responsible for per-AG metadata
			
 
				+ * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
			
 
				+ * metadata structure, and perform any relevant cross-referencing before
			
 
				+ * unlocking the AG and returning the results to userspace.  These
			
 
				+ * scrubbers must not keep an AG locked for too long to avoid tying up
			
 
				+ * the block and inode allocators.
			
 
				+ *
			
 
				+ * Block maps and b-trees rooted in an inode present a special challenge
			
 
				+ * because they can involve extents from any AG.  The general scrubber
			
 
				+ * structure of lock -> check -> xref -> unlock still holds, but AG
			
 
				+ * locking order rules /must/ be obeyed to avoid deadlocks.  The
			
 
				+ * ordering rule, of course, is that we must lock in increasing AG
			
 
				+ * order.  Helper functions are provided to track which AG headers we've
			
 
				+ * already locked.  If we detect an imminent locking order violation, we
			
 
				+ * can signal a potential deadlock, in which case the scrubber can jump
			
 
				+ * out to the top level, lock all the AGs in order, and retry the scrub.
			
 
				+ *
			
 
				+ * For file data (directories, extended attributes, symlinks) scrub, we
			
 
				+ * can simply lock the inode and walk the data.  For btree data
			
 
				+ * (directories and attributes) we follow the same btree-scrubbing
			
 
				+ * strategy outlined previously to check the records.
			
 
				+ *
			
 
				+ * We use a bit of trickery with transactions to avoid buffer deadlocks
			
 
				+ * if there is a cycle in the metadata.  The basic problem is that
			
 
				+ * travelling down a btree involves locking the current buffer at each
			
 
				+ * tree level.  If a pointer should somehow point back to a buffer that
			
 
				+ * we've already examined, we will deadlock due to the second buffer
			
 
				+ * locking attempt.  Note however that grabbing a buffer in transaction
			
 
				+ * context links the locked buffer to the transaction.  If we try to
			
 
				+ * re-grab the buffer in the context of the same transaction, we avoid
			
 
				+ * the second lock attempt and continue.  Between the verifier and the
			
 
				+ * scrubber, something will notice that something is amiss and report
			
 
				+ * the corruption.  Therefore, each scrubber will allocate an empty
			
 
				+ * transaction, attach buffers to it, and cancel the transaction at the
			
 
				+ * end of the scrub run.  Cancelling a non-dirty transaction simply
			
 
				+ * unlocks the buffers.
			
 
				+ *
			
 
				+ * There are four pieces of data that scrub can communicate to
			
 
				+ * userspace.  The first is the error code (errno), which can be used to
			
 
				+ * communicate operational errors in performing the scrub.  There are
			
 
				+ * also three flags that can be set in the scrub context.  If the data
			
 
				+ * structure itself is corrupt, the CORRUPT flag will be set.  If
			
 
				+ * the metadata is correct but otherwise suboptimal, the PREEN flag
			
 
				+ * will be set.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Scrub probe -- userspace uses this to probe if we're willing to scrub
			
 
				+ * or repair a given mountpoint.  This will be used by xfs_scrub to
			
 
				+ * probe the kernel's abilities to scrub (and repair) the metadata.  We
			
 
				+ * do this by validating the ioctl inputs from userspace, preparing the
			
 
				+ * filesystem for a scrub (or a repair) operation, and immediately
			
 
				+ * returning to userspace.  Userspace can use the returned errno and
			
 
				+ * structure state to decide (in broad terms) if scrub/repair are
			
 
				+ * supported by the running kernel.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_scrub_probe(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	if (sc->sm->sm_ino || sc->sm->sm_agno)
			
 
				+		return -EINVAL;
			
 
				+	if (xfs_scrub_should_terminate(sc, &error))
			
 
				+		return error;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Scrub setup and teardown */
			
 
				+
			
 
				+/* Free all the resources and finish the transactions. */
			
 
				+STATIC int
			
 
				+xfs_scrub_teardown(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip_in,
			
 
				+	int				error)
			
 
				+{
			
 
				+	xfs_scrub_ag_free(sc, &sc->sa);
			
 
				+	if (sc->tp) {
			
 
				+		xfs_trans_cancel(sc->tp);
			
 
				+		sc->tp = NULL;
			
 
				+	}
			
 
				+	if (sc->ip) {
			
 
				+		xfs_iunlock(sc->ip, sc->ilock_flags);
			
 
				+		if (sc->ip != ip_in &&
			
 
				+		    !xfs_internal_inum(sc->mp, sc->ip->i_ino))
			
 
				+			iput(VFS_I(sc->ip));
			
 
				+		sc->ip = NULL;
			
 
				+	}
			
 
				+	if (sc->buf) {
			
 
				+		kmem_free(sc->buf);
			
 
				+		sc->buf = NULL;
			
 
				+	}
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/* Scrubbing dispatch. */
			
 
				+
			
 
				+static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
			
 
				+	{ /* ioctl presence test */
			
 
				+		.setup	= xfs_scrub_setup_fs,
			
 
				+		.scrub	= xfs_scrub_probe,
			
 
				+	},
			
 
				+	{ /* superblock */
			
 
				+		.setup	= xfs_scrub_setup_ag_header,
			
 
				+		.scrub	= xfs_scrub_superblock,
			
 
				+	},
			
 
				+	{ /* agf */
			
 
				+		.setup	= xfs_scrub_setup_ag_header,
			
 
				+		.scrub	= xfs_scrub_agf,
			
 
				+	},
			
 
				+	{ /* agfl */
			
 
				+		.setup	= xfs_scrub_setup_ag_header,
			
 
				+		.scrub	= xfs_scrub_agfl,
			
 
				+	},
			
 
				+	{ /* agi */
			
 
				+		.setup	= xfs_scrub_setup_ag_header,
			
 
				+		.scrub	= xfs_scrub_agi,
			
 
				+	},
			
 
				+	{ /* bnobt */
			
 
				+		.setup	= xfs_scrub_setup_ag_allocbt,
			
 
				+		.scrub	= xfs_scrub_bnobt,
			
 
				+	},
			
 
				+	{ /* cntbt */
			
 
				+		.setup	= xfs_scrub_setup_ag_allocbt,
			
 
				+		.scrub	= xfs_scrub_cntbt,
			
 
				+	},
			
 
				+	{ /* inobt */
			
 
				+		.setup	= xfs_scrub_setup_ag_iallocbt,
			
 
				+		.scrub	= xfs_scrub_inobt,
			
 
				+	},
			
 
				+	{ /* finobt */
			
 
				+		.setup	= xfs_scrub_setup_ag_iallocbt,
			
 
				+		.scrub	= xfs_scrub_finobt,
			
 
				+		.has	= xfs_sb_version_hasfinobt,
			
 
				+	},
			
 
				+	{ /* rmapbt */
			
 
				+		.setup	= xfs_scrub_setup_ag_rmapbt,
			
 
				+		.scrub	= xfs_scrub_rmapbt,
			
 
				+		.has	= xfs_sb_version_hasrmapbt,
			
 
				+	},
			
 
				+	{ /* refcountbt */
			
 
				+		.setup	= xfs_scrub_setup_ag_refcountbt,
			
 
				+		.scrub	= xfs_scrub_refcountbt,
			
 
				+		.has	= xfs_sb_version_hasreflink,
			
 
				+	},
			
 
				+	{ /* inode record */
			
 
				+		.setup	= xfs_scrub_setup_inode,
			
 
				+		.scrub	= xfs_scrub_inode,
			
 
				+	},
			
 
				+	{ /* inode data fork */
			
 
				+		.setup	= xfs_scrub_setup_inode_bmap,
			
 
				+		.scrub	= xfs_scrub_bmap_data,
			
 
				+	},
			
 
				+	{ /* inode attr fork */
			
 
				+		.setup	= xfs_scrub_setup_inode_bmap,
			
 
				+		.scrub	= xfs_scrub_bmap_attr,
			
 
				+	},
			
 
				+	{ /* inode CoW fork */
			
 
				+		.setup	= xfs_scrub_setup_inode_bmap,
			
 
				+		.scrub	= xfs_scrub_bmap_cow,
			
 
				+	},
			
 
				+	{ /* directory */
			
 
				+		.setup	= xfs_scrub_setup_directory,
			
 
				+		.scrub	= xfs_scrub_directory,
			
 
				+	},
			
 
				+	{ /* extended attributes */
			
 
				+		.setup	= xfs_scrub_setup_xattr,
			
 
				+		.scrub	= xfs_scrub_xattr,
			
 
				+	},
			
 
				+	{ /* symbolic link */
			
 
				+		.setup	= xfs_scrub_setup_symlink,
			
 
				+		.scrub	= xfs_scrub_symlink,
			
 
				+	},
			
 
				+	{ /* parent pointers */
			
 
				+		.setup	= xfs_scrub_setup_parent,
			
 
				+		.scrub	= xfs_scrub_parent,
			
 
				+	},
			
 
				+	{ /* realtime bitmap */
			
 
				+		.setup	= xfs_scrub_setup_rt,
			
 
				+		.scrub	= xfs_scrub_rtbitmap,
			
 
				+		.has	= xfs_sb_version_hasrealtime,
			
 
				+	},
			
 
				+	{ /* realtime summary */
			
 
				+		.setup	= xfs_scrub_setup_rt,
			
 
				+		.scrub	= xfs_scrub_rtsummary,
			
 
				+		.has	= xfs_sb_version_hasrealtime,
			
 
				+	},
			
 
				+	{ /* user quota */
			
 
				+		.setup = xfs_scrub_setup_quota,
			
 
				+		.scrub = xfs_scrub_quota,
			
 
				+	},
			
 
				+	{ /* group quota */
			
 
				+		.setup = xfs_scrub_setup_quota,
			
 
				+		.scrub = xfs_scrub_quota,
			
 
				+	},
			
 
				+	{ /* project quota */
			
 
				+		.setup = xfs_scrub_setup_quota,
			
 
				+		.scrub = xfs_scrub_quota,
			
 
				+	},
			
 
				+};
			
 
				+
			
 
				+/* This isn't a stable feature, warn once per day. */
			
 
				+static inline void
			
 
				+xfs_scrub_experimental_warning(
			
 
				+	struct xfs_mount	*mp)
			
 
				+{
			
 
				+	static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT(
			
 
				+			"xfs_scrub_warning", 86400 * HZ, 1);
			
 
				+	ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE);
			
 
				+
			
 
				+	if (__ratelimit(&scrub_warning))
			
 
				+		xfs_alert(mp,
			
 
				+"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
			
 
				+}
			
 
				+
			
 
				+/* Dispatch metadata scrubbing. */
			
 
				+int
			
 
				+xfs_scrub_metadata(
			
 
				+	struct xfs_inode		*ip,
			
 
				+	struct xfs_scrub_metadata	*sm)
			
 
				+{
			
 
				+	struct xfs_scrub_context	sc;
			
 
				+	struct xfs_mount		*mp = ip->i_mount;
			
 
				+	const struct xfs_scrub_meta_ops	*ops;
			
 
				+	bool				try_harder = false;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	trace_xfs_scrub_start(ip, sm, error);
			
 
				+
			
 
				+	/* Forbidden if we are shut down or mounted norecovery. */
			
 
				+	error = -ESHUTDOWN;
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		goto out;
			
 
				+	error = -ENOTRECOVERABLE;
			
 
				+	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Check our inputs. */
			
 
				+	error = -EINVAL;
			
 
				+	sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
			
 
				+	if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
			
 
				+		goto out;
			
 
				+	if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Do we know about this type of metadata? */
			
 
				+	error = -ENOENT;
			
 
				+	if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
			
 
				+		goto out;
			
 
				+	ops = &meta_scrub_ops[sm->sm_type];
			
 
				+	if (ops->scrub == NULL)
			
 
				+		goto out;
			
 
				+
			
 
				+	/*
			
 
				+	 * We won't scrub any filesystem that doesn't have the ability
			
 
				+	 * to record unwritten extents.  The option was made default in
			
 
				+	 * 2003, removed from mkfs in 2007, and cannot be disabled in
			
 
				+	 * v5, so if we find a filesystem without this flag it's either
			
 
				+	 * really old or totally unsupported.  Avoid it either way.
			
 
				+	 * We also don't support v1-v3 filesystems, which aren't
			
 
				+	 * mountable.
			
 
				+	 */
			
 
				+	error = -EOPNOTSUPP;
			
 
				+	if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* Does this fs even support this type of metadata? */
			
 
				+	error = -ENOENT;
			
 
				+	if (ops->has && !ops->has(&mp->m_sb))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* We don't know how to repair anything yet. */
			
 
				+	error = -EOPNOTSUPP;
			
 
				+	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
			
 
				+		goto out;
			
 
				+
			
 
				+	xfs_scrub_experimental_warning(mp);
			
 
				+
			
 
				+retry_op:
			
 
				+	/* Set up for the operation. */
			
 
				+	memset(&sc, 0, sizeof(sc));
			
 
				+	sc.mp = ip->i_mount;
			
 
				+	sc.sm = sm;
			
 
				+	sc.ops = ops;
			
 
				+	sc.try_harder = try_harder;
			
 
				+	sc.sa.agno = NULLAGNUMBER;
			
 
				+	error = sc.ops->setup(&sc, ip);
			
 
				+	if (error)
			
 
				+		goto out_teardown;
			
 
				+
			
 
				+	/* Scrub for errors. */
			
 
				+	error = sc.ops->scrub(&sc);
			
 
				+	if (!try_harder && error == -EDEADLOCK) {
			
 
				+		/*
			
 
				+		 * Scrubbers return -EDEADLOCK to mean 'try harder'.
			
 
				+		 * Tear down everything we hold, then set up again with
			
 
				+		 * preparation for worst-case scenarios.
			
 
				+		 */
			
 
				+		error = xfs_scrub_teardown(&sc, ip, 0);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+		try_harder = true;
			
 
				+		goto retry_op;
			
 
				+	} else if (error)
			
 
				+		goto out_teardown;
			
 
				+
			
 
				+	if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
			
 
				+			       XFS_SCRUB_OFLAG_XCORRUPT))
			
 
				+		xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
			
 
				+
			
 
				+out_teardown:
			
 
				+	error = xfs_scrub_teardown(&sc, ip, error);
			
 
				+out:
			
 
				+	trace_xfs_scrub_done(ip, sm, error);
			
 
				+	if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
			
 
				+		sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
			
 
				+		error = 0;
			
 
				+	}
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -0,0 +1,115 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#ifndef __XFS_SCRUB_SCRUB_H__
			
 
				+#define __XFS_SCRUB_SCRUB_H__
			
 
				+
			
 
				+struct xfs_scrub_context;
			
 
				+
			
 
				+struct xfs_scrub_meta_ops {
			
 
				+	/* Acquire whatever resources are needed for the operation. */
			
 
				+	int		(*setup)(struct xfs_scrub_context *,
			
 
				+				 struct xfs_inode *);
			
 
				+
			
 
				+	/* Examine metadata for errors. */
			
 
				+	int		(*scrub)(struct xfs_scrub_context *);
			
 
				+
			
 
				+	/* Decide if we even have this piece of metadata. */
			
 
				+	bool		(*has)(struct xfs_sb *);
			
 
				+};
			
 
				+
			
 
				+/* Buffer pointers and btree cursors for an entire AG. */
			
 
				+struct xfs_scrub_ag {
			
 
				+	xfs_agnumber_t			agno;
			
 
				+
			
 
				+	/* AG btree roots */
			
 
				+	struct xfs_buf			*agf_bp;
			
 
				+	struct xfs_buf			*agfl_bp;
			
 
				+	struct xfs_buf			*agi_bp;
			
 
				+
			
 
				+	/* AG btrees */
			
 
				+	struct xfs_btree_cur		*bno_cur;
			
 
				+	struct xfs_btree_cur		*cnt_cur;
			
 
				+	struct xfs_btree_cur		*ino_cur;
			
 
				+	struct xfs_btree_cur		*fino_cur;
			
 
				+	struct xfs_btree_cur		*rmap_cur;
			
 
				+	struct xfs_btree_cur		*refc_cur;
			
 
				+};
			
 
				+
			
 
				+struct xfs_scrub_context {
			
 
				+	/* General scrub state. */
			
 
				+	struct xfs_mount		*mp;
			
 
				+	struct xfs_scrub_metadata	*sm;
			
 
				+	const struct xfs_scrub_meta_ops	*ops;
			
 
				+	struct xfs_trans		*tp;
			
 
				+	struct xfs_inode		*ip;
			
 
				+	void				*buf;
			
 
				+	uint				ilock_flags;
			
 
				+	bool				try_harder;
			
 
				+
			
 
				+	/* State tracking for single-AG operations. */
			
 
				+	struct xfs_scrub_ag		sa;
			
 
				+};
			
 
				+
			
 
				+/* Metadata scrubbers */
			
 
				+int xfs_scrub_tester(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_superblock(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_agf(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_agfl(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_agi(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_bnobt(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_cntbt(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_inobt(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_finobt(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_refcountbt(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_inode(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_bmap_data(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_directory(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_xattr(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_symlink(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_parent(struct xfs_scrub_context *sc);
			
 
				+#ifdef CONFIG_XFS_RT
			
 
				+int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc);
			
 
				+int xfs_scrub_rtsummary(struct xfs_scrub_context *sc);
			
 
				+#else
			
 
				+static inline int
			
 
				+xfs_scrub_rtbitmap(struct xfs_scrub_context *sc)
			
 
				+{
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+static inline int
			
 
				+xfs_scrub_rtsummary(struct xfs_scrub_context *sc)
			
 
				+{
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+#endif
			
 
				+#ifdef CONFIG_XFS_QUOTA
			
 
				+int xfs_scrub_quota(struct xfs_scrub_context *sc);
			
 
				+#else
			
 
				+static inline int
			
 
				+xfs_scrub_quota(struct xfs_scrub_context *sc)
			
 
				+{
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif	/* __XFS_SCRUB_SCRUB_H__ */
			
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -0,0 +1,92 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_fork.h"
			
 
				+#include "xfs_symlink.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+#include "scrub/trace.h"
			
 
				+
			
 
				+/* Set us up to scrub a symbolic link. */
			
 
				+int
			
 
				+xfs_scrub_setup_symlink(
			
 
				+	struct xfs_scrub_context	*sc,
			
 
				+	struct xfs_inode		*ip)
			
 
				+{
			
 
				+	/* Allocate the buffer without the inode lock held. */
			
 
				+	sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP);
			
 
				+	if (!sc->buf)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	return xfs_scrub_setup_inode_contents(sc, ip, 0);
			
 
				+}
			
 
				+
			
 
				+/* Symbolic links. */
			
 
				+
			
 
				+int
			
 
				+xfs_scrub_symlink(
			
 
				+	struct xfs_scrub_context	*sc)
			
 
				+{
			
 
				+	struct xfs_inode		*ip = sc->ip;
			
 
				+	struct xfs_ifork		*ifp;
			
 
				+	loff_t				len;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	if (!S_ISLNK(VFS_I(ip)->i_mode))
			
 
				+		return -ENOENT;
			
 
				+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
			
 
				+	len = ip->i_d.di_size;
			
 
				+
			
 
				+	/* Plausible size? */
			
 
				+	if (len > XFS_SYMLINK_MAXLEN || len <= 0) {
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Inline symlink? */
			
 
				+	if (ifp->if_flags & XFS_IFINLINE) {
			
 
				+		if (len > XFS_IFORK_DSIZE(ip) ||
			
 
				+		    len > strnlen(ifp->if_u1.if_data, XFS_IFORK_DSIZE(ip)))
			
 
				+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Remote symlink; must read the contents. */
			
 
				+	error = xfs_readlink_bmap_ilocked(sc->ip, sc->buf);
			
 
				+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
			
 
				+		goto out;
			
 
				+	if (strnlen(sc->buf, XFS_SYMLINK_MAXLEN) < len)
			
 
				+		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -0,0 +1,59 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_shared.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_da_format.h"
			
 
				+#include "xfs_defer.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				+#include "scrub/scrub.h"
			
 
				+#include "scrub/common.h"
			
 
				+
			
 
				+/* Figure out which block the btree cursor was pointing to. */
			
 
				+static inline xfs_fsblock_t
			
 
				+xfs_scrub_btree_cur_fsbno(
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	int				level)
			
 
				+{
			
 
				+	if (level < cur->bc_nlevels && cur->bc_bufs[level])
			
 
				+		return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn);
			
 
				+	else if (level == cur->bc_nlevels - 1 &&
			
 
				+		 cur->bc_flags & XFS_BTREE_LONG_PTRS)
			
 
				+		return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_private.b.ip->i_ino);
			
 
				+	else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS))
			
 
				+		return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, 0);
			
 
				+	return NULLFSBLOCK;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We include this last to have the helpers above available for the trace
			
 
				+ * event implementations.
			
 
				+ */
			
 
				+#define CREATE_TRACE_POINTS
			
 
				+#include "scrub/trace.h"
			
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -0,0 +1,499 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#undef TRACE_SYSTEM
			
 
				+#define TRACE_SYSTEM xfs_scrub
			
 
				+
			
 
				+#if !defined(_TRACE_XFS_SCRUB_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
			
 
				+#define _TRACE_XFS_SCRUB_TRACE_H
			
 
				+
			
 
				+#include <linux/tracepoint.h>
			
 
				+#include "xfs_bit.h"
			
 
				+
			
 
				+DECLARE_EVENT_CLASS(xfs_scrub_class,
			
 
				+	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
			
 
				+		 int error),
			
 
				+	TP_ARGS(ip, sm, error),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_ino_t, inum)
			
 
				+		__field(unsigned int, gen)
			
 
				+		__field(unsigned int, flags)
			
 
				+		__field(int, error)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = ip->i_mount->m_super->s_dev;
			
 
				+		__entry->ino = ip->i_ino;
			
 
				+		__entry->type = sm->sm_type;
			
 
				+		__entry->agno = sm->sm_agno;
			
 
				+		__entry->inum = sm->sm_ino;
			
 
				+		__entry->gen = sm->sm_gen;
			
 
				+		__entry->flags = sm->sm_flags;
			
 
				+		__entry->error = error;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->ino,
			
 
				+		  __entry->type,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->inum,
			
 
				+		  __entry->gen,
			
 
				+		  __entry->flags,
			
 
				+		  __entry->error)
			
 
				+)
			
 
				+#define DEFINE_SCRUB_EVENT(name) \
			
 
				+DEFINE_EVENT(xfs_scrub_class, name, \
			
 
				+	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, \
			
 
				+		 int error), \
			
 
				+	TP_ARGS(ip, sm, error))
			
 
				+
			
 
				+DEFINE_SCRUB_EVENT(xfs_scrub_start);
			
 
				+DEFINE_SCRUB_EVENT(xfs_scrub_done);
			
 
				+DEFINE_SCRUB_EVENT(xfs_scrub_deadlock_retry);
			
 
				+
			
 
				+TRACE_EVENT(xfs_scrub_op_error,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
			
 
				+		 xfs_agblock_t bno, int error, void *ret_ip),
			
 
				+	TP_ARGS(sc, agno, bno, error, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(int, error)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->agno = agno;
			
 
				+		__entry->bno = bno;
			
 
				+		__entry->error = error;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->type,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->error,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(xfs_scrub_file_op_error,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
			
 
				+		 xfs_fileoff_t offset, int error, void *ret_ip),
			
 
				+	TP_ARGS(sc, whichfork, offset, error, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(int, whichfork)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_fileoff_t, offset)
			
 
				+		__field(int, error)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = sc->ip->i_mount->m_super->s_dev;
			
 
				+		__entry->ino = sc->ip->i_ino;
			
 
				+		__entry->whichfork = whichfork;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->offset = offset;
			
 
				+		__entry->error = error;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->ino,
			
 
				+		  __entry->whichfork,
			
 
				+		  __entry->type,
			
 
				+		  __entry->offset,
			
 
				+		  __entry->error,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, void *ret_ip),
			
 
				+	TP_ARGS(sc, daddr, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		xfs_fsblock_t	fsbno;
			
 
				+		xfs_agnumber_t	agno;
			
 
				+		xfs_agblock_t	bno;
			
 
				+
			
 
				+		fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
			
 
				+		agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
			
 
				+		bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
			
 
				+
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->agno = agno;
			
 
				+		__entry->bno = bno;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->type,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->ret_ip)
			
 
				+)
			
 
				+
			
 
				+#define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \
			
 
				+DEFINE_EVENT(xfs_scrub_block_error_class, name, \
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, \
			
 
				+		 void *ret_ip), \
			
 
				+	TP_ARGS(sc, daddr, ret_ip))
			
 
				+
			
 
				+DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error);
			
 
				+DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen);
			
 
				+
			
 
				+DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, xfs_daddr_t daddr,
			
 
				+		 void *ret_ip),
			
 
				+	TP_ARGS(sc, ino, daddr, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		xfs_fsblock_t	fsbno;
			
 
				+		xfs_agnumber_t	agno;
			
 
				+		xfs_agblock_t	bno;
			
 
				+
			
 
				+		if (daddr) {
			
 
				+			fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
			
 
				+			agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
			
 
				+			bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
			
 
				+		} else {
			
 
				+			agno = XFS_INO_TO_AGNO(sc->mp, ino);
			
 
				+			bno = XFS_AGINO_TO_AGBNO(sc->mp,
			
 
				+					XFS_INO_TO_AGINO(sc->mp, ino));
			
 
				+		}
			
 
				+
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->ino = ino;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->agno = agno;
			
 
				+		__entry->bno = bno;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->ino,
			
 
				+		  __entry->type,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->ret_ip)
			
 
				+)
			
 
				+
			
 
				+#define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
			
 
				+DEFINE_EVENT(xfs_scrub_ino_error_class, name, \
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \
			
 
				+		 xfs_daddr_t daddr, void *ret_ip), \
			
 
				+	TP_ARGS(sc, ino, daddr, ret_ip))
			
 
				+
			
 
				+DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error);
			
 
				+DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen);
			
 
				+DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_warning);
			
 
				+
			
 
				+DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
			
 
				+		 xfs_fileoff_t offset, void *ret_ip),
			
 
				+	TP_ARGS(sc, whichfork, offset, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(int, whichfork)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_fileoff_t, offset)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = sc->ip->i_mount->m_super->s_dev;
			
 
				+		__entry->ino = sc->ip->i_ino;
			
 
				+		__entry->whichfork = whichfork;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->offset = offset;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->ino,
			
 
				+		  __entry->whichfork,
			
 
				+		  __entry->type,
			
 
				+		  __entry->offset,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+#define DEFINE_SCRUB_FBLOCK_ERROR_EVENT(name) \
			
 
				+DEFINE_EVENT(xfs_scrub_fblock_error_class, name, \
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, int whichfork, \
			
 
				+		 xfs_fileoff_t offset, void *ret_ip), \
			
 
				+	TP_ARGS(sc, whichfork, offset, ret_ip))
			
 
				+
			
 
				+DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_error);
			
 
				+DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_warning);
			
 
				+
			
 
				+TRACE_EVENT(xfs_scrub_incomplete,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, void *ret_ip),
			
 
				+	TP_ARGS(sc, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d type %u ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->type,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(xfs_scrub_btree_op_error,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
			
 
				+		 int level, int error, void *ret_ip),
			
 
				+	TP_ARGS(sc, cur, level, error, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_btnum_t, btnum)
			
 
				+		__field(int, level)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(int, ptr);
			
 
				+		__field(int, error)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
			
 
				+
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->btnum = cur->bc_btnum;
			
 
				+		__entry->level = level;
			
 
				+		__entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
			
 
				+		__entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
			
 
				+		__entry->ptr = cur->bc_ptrs[level];
			
 
				+		__entry->error = error;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->type,
			
 
				+		  __entry->btnum,
			
 
				+		  __entry->level,
			
 
				+		  __entry->ptr,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->error,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
			
 
				+		 int level, int error, void *ret_ip),
			
 
				+	TP_ARGS(sc, cur, level, error, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(int, whichfork)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_btnum_t, btnum)
			
 
				+		__field(int, level)
			
 
				+		__field(int, ptr)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(int, error)
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->ino = sc->ip->i_ino;
			
 
				+		__entry->whichfork = cur->bc_private.b.whichfork;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->btnum = cur->bc_btnum;
			
 
				+		__entry->level = level;
			
 
				+		__entry->ptr = cur->bc_ptrs[level];
			
 
				+		__entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
			
 
				+		__entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
			
 
				+		__entry->error = error;
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->ino,
			
 
				+		  __entry->whichfork,
			
 
				+		  __entry->type,
			
 
				+		  __entry->btnum,
			
 
				+		  __entry->level,
			
 
				+		  __entry->ptr,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->error,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(xfs_scrub_btree_error,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
			
 
				+		 int level, void *ret_ip),
			
 
				+	TP_ARGS(sc, cur, level, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_btnum_t, btnum)
			
 
				+		__field(int, level)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(int, ptr);
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->btnum = cur->bc_btnum;
			
 
				+		__entry->level = level;
			
 
				+		__entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
			
 
				+		__entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
			
 
				+		__entry->ptr = cur->bc_ptrs[level];
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->type,
			
 
				+		  __entry->btnum,
			
 
				+		  __entry->level,
			
 
				+		  __entry->ptr,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(xfs_scrub_ifork_btree_error,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
			
 
				+		 int level, void *ret_ip),
			
 
				+	TP_ARGS(sc, cur, level, ret_ip),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(int, whichfork)
			
 
				+		__field(unsigned int, type)
			
 
				+		__field(xfs_btnum_t, btnum)
			
 
				+		__field(int, level)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(int, ptr);
			
 
				+		__field(void *, ret_ip)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->ino = sc->ip->i_ino;
			
 
				+		__entry->whichfork = cur->bc_private.b.whichfork;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->btnum = cur->bc_btnum;
			
 
				+		__entry->level = level;
			
 
				+		__entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
			
 
				+		__entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
			
 
				+		__entry->ptr = cur->bc_ptrs[level];
			
 
				+		__entry->ret_ip = ret_ip;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->ino,
			
 
				+		  __entry->whichfork,
			
 
				+		  __entry->type,
			
 
				+		  __entry->btnum,
			
 
				+		  __entry->level,
			
 
				+		  __entry->ptr,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->ret_ip)
			
 
				+);
			
 
				+
			
 
				+DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class,
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
			
 
				+		 int level),
			
 
				+	TP_ARGS(sc, cur, level),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(int, type)
			
 
				+		__field(xfs_btnum_t, btnum)
			
 
				+		__field(xfs_agnumber_t, agno)
			
 
				+		__field(xfs_agblock_t, bno)
			
 
				+		__field(int, level)
			
 
				+		__field(int, nlevels)
			
 
				+		__field(int, ptr)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
			
 
				+
			
 
				+		__entry->dev = sc->mp->m_super->s_dev;
			
 
				+		__entry->type = sc->sm->sm_type;
			
 
				+		__entry->btnum = cur->bc_btnum;
			
 
				+		__entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
			
 
				+		__entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
			
 
				+		__entry->level = level;
			
 
				+		__entry->nlevels = cur->bc_nlevels;
			
 
				+		__entry->ptr = cur->bc_ptrs[level];
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d type %u btnum %d agno %u agbno %u level %d nlevels %d ptr %d",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->type,
			
 
				+		  __entry->btnum,
			
 
				+		  __entry->agno,
			
 
				+		  __entry->bno,
			
 
				+		  __entry->level,
			
 
				+		  __entry->nlevels,
			
 
				+		  __entry->ptr)
			
 
				+)
			
 
				+#define DEFINE_SCRUB_SBTREE_EVENT(name) \
			
 
				+DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
			
 
				+	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, \
			
 
				+		 int level), \
			
 
				+	TP_ARGS(sc, cur, level))
			
 
				+
			
 
				+DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
			
 
				+DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
			
 
				+
			
 
				+#endif /* _TRACE_XFS_SCRUB_TRACE_H */
			
 
				+
			
 
				+#undef TRACE_INCLUDE_PATH
			
 
				+#define TRACE_INCLUDE_PATH .
			
 
				+#define TRACE_INCLUDE_FILE scrub/trace
			
 
				+#include <trace/define_trace.h>
			
--- a/fs/xfs/scrub/xfs_scrub.h
+++ b/fs/xfs/scrub/xfs_scrub.h
@@ -0,0 +1,29 @@
 
				+/*
			
 
				+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
			
 
				+ *
			
 
				+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version 2
			
 
				+ * of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ */
			
 
				+#ifndef __XFS_SCRUB_H__
			
 
				+#define __XFS_SCRUB_H__
			
 
				+
			
 
				+#ifndef CONFIG_XFS_ONLINE_SCRUB
			
 
				+# define xfs_scrub_metadata(ip, sm)	(-ENOTTY)
			
 
				+#else
			
 
				+int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm);
			
 
				+#endif /* CONFIG_XFS_ONLINE_SCRUB */
			
 
				+
			
 
				+#endif	/* __XFS_SCRUB_H__ */
			
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -19,7 +19,6 @@
 
				 #define __XFS_H__
			
 
				 
			
 
				 #ifdef CONFIG_XFS_DEBUG
			
 
				-#define STATIC
			
 
				 #define DEBUG 1
			
 
				 #define XFS_BUF_LOCK_TRACKING 1
			
 
				 #endif
			
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -48,6 +48,8 @@ struct xfs_attr_list_context;
 
				 #define ATTR_KERNOTIME	0x1000	/* [kernel] don't update inode timestamps */
			
 
				 #define ATTR_KERNOVAL	0x2000	/* [kernel] get attr size only, not value */
			
 
				 
			
 
				+#define ATTR_INCOMPLETE	0x4000	/* [kernel] return INCOMPLETE attr keys */
			
 
				+
			
 
				 #define XFS_ATTR_FLAGS \
			
 
				 	{ ATTR_DONTFOLLOW, 	"DONTFOLLOW" }, \
			
 
				 	{ ATTR_ROOT,		"ROOT" }, \
			
@@ -56,7 +58,8 @@ struct xfs_attr_list_context;
 
				 	{ ATTR_CREATE,		"CREATE" }, \
			
 
				 	{ ATTR_REPLACE,		"REPLACE" }, \
			
 
				 	{ ATTR_KERNOTIME,	"KERNOTIME" }, \
			
 
				-	{ ATTR_KERNOVAL,	"KERNOVAL" }
			
 
				+	{ ATTR_KERNOVAL,	"KERNOVAL" }, \
			
 
				+	{ ATTR_INCOMPLETE,	"INCOMPLETE" }
			
 
				 
			
 
				 /*
			
 
				  * The maximum size (into the kernel or returned from the kernel) of an
			
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -251,47 +251,44 @@ xfs_attr3_node_inactive(
 
				 		 * traversal of the tree so we may deal with many blocks
			
 
				 		 * before we come back to this one.
			
 
				 		 */
			
 
				-		error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
			
 
				-						XFS_ATTR_FORK);
			
 
				+		error = xfs_da3_node_read(*trans, dp, child_fsb, -1, &child_bp,
			
 
				+					  XFS_ATTR_FORK);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				-		if (child_bp) {
			
 
				-						/* save for re-read later */
			
 
				-			child_blkno = XFS_BUF_ADDR(child_bp);
			
 
				 
			
 
				-			/*
			
 
				-			 * Invalidate the subtree, however we have to.
			
 
				-			 */
			
 
				-			info = child_bp->b_addr;
			
 
				-			switch (info->magic) {
			
 
				-			case cpu_to_be16(XFS_DA_NODE_MAGIC):
			
 
				-			case cpu_to_be16(XFS_DA3_NODE_MAGIC):
			
 
				-				error = xfs_attr3_node_inactive(trans, dp,
			
 
				-							child_bp, level + 1);
			
 
				-				break;
			
 
				-			case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
			
 
				-			case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
			
 
				-				error = xfs_attr3_leaf_inactive(trans, dp,
			
 
				-							child_bp);
			
 
				-				break;
			
 
				-			default:
			
 
				-				error = -EIO;
			
 
				-				xfs_trans_brelse(*trans, child_bp);
			
 
				-				break;
			
 
				-			}
			
 
				-			if (error)
			
 
				-				return error;
			
 
				+		/* save for re-read later */
			
 
				+		child_blkno = XFS_BUF_ADDR(child_bp);
			
 
				 
			
 
				-			/*
			
 
				-			 * Remove the subsidiary block from the cache
			
 
				-			 * and from the log.
			
 
				-			 */
			
 
				-			error = xfs_da_get_buf(*trans, dp, 0, child_blkno,
			
 
				-				&child_bp, XFS_ATTR_FORK);
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			xfs_trans_binval(*trans, child_bp);
			
 
				+		/*
			
 
				+		 * Invalidate the subtree, however we have to.
			
 
				+		 */
			
 
				+		info = child_bp->b_addr;
			
 
				+		switch (info->magic) {
			
 
				+		case cpu_to_be16(XFS_DA_NODE_MAGIC):
			
 
				+		case cpu_to_be16(XFS_DA3_NODE_MAGIC):
			
 
				+			error = xfs_attr3_node_inactive(trans, dp, child_bp,
			
 
				+							level + 1);
			
 
				+			break;
			
 
				+		case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
			
 
				+		case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
			
 
				+			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
			
 
				+			break;
			
 
				+		default:
			
 
				+			error = -EIO;
			
 
				+			xfs_trans_brelse(*trans, child_bp);
			
 
				+			break;
			
 
				 		}
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				+		/*
			
 
				+		 * Remove the subsidiary block from the cache and from the log.
			
 
				+		 */
			
 
				+		error = xfs_da_get_buf(*trans, dp, 0, child_blkno, &child_bp,
			
 
				+				       XFS_ATTR_FORK);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		xfs_trans_binval(*trans, child_bp);
			
 
				 
			
 
				 		/*
			
 
				 		 * If we're not done, re-read the parent to get the next
			
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -204,19 +204,103 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * We didn't find the block & hash mentioned in the cursor state, so
			
 
				+ * walk down the attr btree looking for the hash.
			
 
				+ */
			
 
				 STATIC int
			
 
				-xfs_attr_node_list(xfs_attr_list_context_t *context)
			
 
				+xfs_attr_node_list_lookup(
			
 
				+	struct xfs_attr_list_context	*context,
			
 
				+	struct attrlist_cursor_kern	*cursor,
			
 
				+	struct xfs_buf			**pbp)
			
 
				 {
			
 
				-	attrlist_cursor_kern_t *cursor;
			
 
				-	xfs_attr_leafblock_t *leaf;
			
 
				-	xfs_da_intnode_t *node;
			
 
				-	struct xfs_attr3_icleaf_hdr leafhdr;
			
 
				-	struct xfs_da3_icnode_hdr nodehdr;
			
 
				-	struct xfs_da_node_entry *btree;
			
 
				-	int error, i;
			
 
				-	struct xfs_buf *bp;
			
 
				-	struct xfs_inode	*dp = context->dp;
			
 
				-	struct xfs_mount	*mp = dp->i_mount;
			
 
				+	struct xfs_da3_icnode_hdr	nodehdr;
			
 
				+	struct xfs_da_intnode		*node;
			
 
				+	struct xfs_da_node_entry	*btree;
			
 
				+	struct xfs_inode		*dp = context->dp;
			
 
				+	struct xfs_mount		*mp = dp->i_mount;
			
 
				+	struct xfs_trans		*tp = context->tp;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	int				i;
			
 
				+	int				error = 0;
			
 
				+	unsigned int			expected_level = 0;
			
 
				+	uint16_t			magic;
			
 
				+
			
 
				+	ASSERT(*pbp == NULL);
			
 
				+	cursor->blkno = 0;
			
 
				+	for (;;) {
			
 
				+		error = xfs_da3_node_read(tp, dp, cursor->blkno, -1, &bp,
			
 
				+				XFS_ATTR_FORK);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		node = bp->b_addr;
			
 
				+		magic = be16_to_cpu(node->hdr.info.magic);
			
 
				+		if (magic == XFS_ATTR_LEAF_MAGIC ||
			
 
				+		    magic == XFS_ATTR3_LEAF_MAGIC)
			
 
				+			break;
			
 
				+		if (magic != XFS_DA_NODE_MAGIC &&
			
 
				+		    magic != XFS_DA3_NODE_MAGIC) {
			
 
				+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
			
 
				+					node);
			
 
				+			goto out_corruptbuf;
			
 
				+		}
			
 
				+
			
 
				+		dp->d_ops->node_hdr_from_disk(&nodehdr, node);
			
 
				+
			
 
				+		/* Tree taller than we can handle; bail out! */
			
 
				+		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
			
 
				+			goto out_corruptbuf;
			
 
				+
			
 
				+		/* Check the level from the root node. */
			
 
				+		if (cursor->blkno == 0)
			
 
				+			expected_level = nodehdr.level - 1;
			
 
				+		else if (expected_level != nodehdr.level)
			
 
				+			goto out_corruptbuf;
			
 
				+		else
			
 
				+			expected_level--;
			
 
				+
			
 
				+		btree = dp->d_ops->node_tree_p(node);
			
 
				+		for (i = 0; i < nodehdr.count; btree++, i++) {
			
 
				+			if (cursor->hashval <= be32_to_cpu(btree->hashval)) {
			
 
				+				cursor->blkno = be32_to_cpu(btree->before);
			
 
				+				trace_xfs_attr_list_node_descend(context,
			
 
				+						btree);
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		xfs_trans_brelse(tp, bp);
			
 
				+
			
 
				+		if (i == nodehdr.count)
			
 
				+			return 0;
			
 
				+
			
 
				+		/* We can't point back to the root. */
			
 
				+		if (cursor->blkno == 0)
			
 
				+			return -EFSCORRUPTED;
			
 
				+	}
			
 
				+
			
 
				+	if (expected_level != 0)
			
 
				+		goto out_corruptbuf;
			
 
				+
			
 
				+	*pbp = bp;
			
 
				+	return 0;
			
 
				+
			
 
				+out_corruptbuf:
			
 
				+	xfs_trans_brelse(tp, bp);
			
 
				+	return -EFSCORRUPTED;
			
 
				+}
			
 
				+
			
 
				+STATIC int
			
 
				+xfs_attr_node_list(
			
 
				+	struct xfs_attr_list_context	*context)
			
 
				+{
			
 
				+	struct xfs_attr3_icleaf_hdr	leafhdr;
			
 
				+	struct attrlist_cursor_kern	*cursor;
			
 
				+	struct xfs_attr_leafblock	*leaf;
			
 
				+	struct xfs_da_intnode		*node;
			
 
				+	struct xfs_buf			*bp;
			
 
				+	struct xfs_inode		*dp = context->dp;
			
 
				+	struct xfs_mount		*mp = dp->i_mount;
			
 
				+	int				error;
			
 
				 
			
 
				 	trace_xfs_attr_node_list(context);
			
 
				 
			
@@ -277,47 +361,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 
				 	 * Note that start of node block is same as start of leaf block.
			
 
				 	 */
			
 
				 	if (bp == NULL) {
			
 
				-		cursor->blkno = 0;
			
 
				-		for (;;) {
			
 
				-			uint16_t magic;
			
 
				-
			
 
				-			error = xfs_da3_node_read(context->tp, dp,
			
 
				-						      cursor->blkno, -1, &bp,
			
 
				-						      XFS_ATTR_FORK);
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			node = bp->b_addr;
			
 
				-			magic = be16_to_cpu(node->hdr.info.magic);
			
 
				-			if (magic == XFS_ATTR_LEAF_MAGIC ||
			
 
				-			    magic == XFS_ATTR3_LEAF_MAGIC)
			
 
				-				break;
			
 
				-			if (magic != XFS_DA_NODE_MAGIC &&
			
 
				-			    magic != XFS_DA3_NODE_MAGIC) {
			
 
				-				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
			
 
				-						     XFS_ERRLEVEL_LOW,
			
 
				-						     context->dp->i_mount,
			
 
				-						     node);
			
 
				-				xfs_trans_brelse(context->tp, bp);
			
 
				-				return -EFSCORRUPTED;
			
 
				-			}
			
 
				-
			
 
				-			dp->d_ops->node_hdr_from_disk(&nodehdr, node);
			
 
				-			btree = dp->d_ops->node_tree_p(node);
			
 
				-			for (i = 0; i < nodehdr.count; btree++, i++) {
			
 
				-				if (cursor->hashval
			
 
				-						<= be32_to_cpu(btree->hashval)) {
			
 
				-					cursor->blkno = be32_to_cpu(btree->before);
			
 
				-					trace_xfs_attr_list_node_descend(context,
			
 
				-									 btree);
			
 
				-					break;
			
 
				-				}
			
 
				-			}
			
 
				-			if (i == nodehdr.count) {
			
 
				-				xfs_trans_brelse(context->tp, bp);
			
 
				-				return 0;
			
 
				-			}
			
 
				-			xfs_trans_brelse(context->tp, bp);
			
 
				-		}
			
 
				+		error = xfs_attr_node_list_lookup(context, cursor, &bp);
			
 
				+		if (error || !bp)
			
 
				+			return error;
			
 
				 	}
			
 
				 	ASSERT(bp != NULL);
			
 
				 
			
@@ -407,7 +453,8 @@ xfs_attr3_leaf_list_int(
 
				 			cursor->offset = 0;
			
 
				 		}
			
 
				 
			
 
				-		if (entry->flags & XFS_ATTR_INCOMPLETE)
			
 
				+		if ((entry->flags & XFS_ATTR_INCOMPLETE) &&
			
 
				+		    !(context->flags & ATTR_INCOMPLETE))
			
 
				 			continue;		/* skip incomplete entries */
			
 
				 
			
 
				 		if (entry->flags & XFS_ATTR_LOCAL) {
			
@@ -499,8 +546,8 @@ xfs_attr_list_int(
 
				 #define	ATTR_ENTBASESIZE		/* minimum bytes used by an attr */ \
			
 
				 	(((struct attrlist_ent *) 0)->a_name - (char *) 0)
			
 
				 #define	ATTR_ENTSIZE(namelen)		/* actual bytes used by an attr */ \
			
 
				-	((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
			
 
				-	 & ~(sizeof(u_int32_t)-1))
			
 
				+	((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(uint32_t)-1) \
			
 
				+	 & ~(sizeof(uint32_t)-1))
			
 
				 
			
 
				 /*
			
 
				  * Format an attribute and copy it out to the user's buffer.
			
@@ -583,6 +630,10 @@ xfs_attr_list(
 
				 	    (cursor->hashval || cursor->blkno || cursor->offset))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	/* Only internal consumers can retrieve incomplete attrs. */
			
 
				+	if (flags & ATTR_INCOMPLETE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	/*
			
 
				 	 * Check for a properly aligned buffer.
			
 
				 	 */
			
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -229,15 +229,17 @@ xfs_bmap_count_leaves(
 
				 	struct xfs_ifork	*ifp,
			
 
				 	xfs_filblks_t		*count)
			
 
				 {
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	struct xfs_bmbt_irec	got;
			
 
				-	xfs_extnum_t		numrecs = 0, i = 0;
			
 
				+	xfs_extnum_t		numrecs = 0;
			
 
				 
			
 
				-	while (xfs_iext_get_extent(ifp, i++, &got)) {
			
 
				+	for_each_xfs_iext(ifp, &icur, &got) {
			
 
				 		if (!isnullstartblock(got.br_startblock)) {
			
 
				 			*count += got.br_blockcount;
			
 
				 			numrecs++;
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				 	return numrecs;
			
 
				 }
			
 
				 
			
@@ -405,125 +407,103 @@ xfs_bmap_count_blocks(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * returns 1 for success, 0 if we failed to map the extent.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_getbmapx_fix_eof_hole(
			
 
				-	xfs_inode_t		*ip,		/* xfs incore inode pointer */
			
 
				-	int			whichfork,
			
 
				-	struct getbmapx		*out,		/* output structure */
			
 
				-	int			prealloced,	/* this is a file with
			
 
				-						 * preallocated data space */
			
 
				-	int64_t			end,		/* last block requested */
			
 
				-	xfs_fsblock_t		startblock,
			
 
				-	bool			moretocome)
			
 
				+static int
			
 
				+xfs_getbmap_report_one(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct getbmapx		*bmv,
			
 
				+	struct kgetbmap		*out,
			
 
				+	int64_t			bmv_end,
			
 
				+	struct xfs_bmbt_irec	*got)
			
 
				 {
			
 
				-	int64_t			fixlen;
			
 
				-	xfs_mount_t		*mp;		/* file system mount point */
			
 
				-	xfs_ifork_t		*ifp;		/* inode fork pointer */
			
 
				-	xfs_extnum_t		lastx;		/* last extent pointer */
			
 
				-	xfs_fileoff_t		fileblock;
			
 
				-
			
 
				-	if (startblock == HOLESTARTBLOCK) {
			
 
				-		mp = ip->i_mount;
			
 
				-		out->bmv_block = -1;
			
 
				-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
			
 
				-		fixlen -= out->bmv_offset;
			
 
				-		if (prealloced && out->bmv_offset + out->bmv_length == end) {
			
 
				-			/* Came to hole at EOF. Trim it. */
			
 
				-			if (fixlen <= 0)
			
 
				-				return 0;
			
 
				-			out->bmv_length = fixlen;
			
 
				-		}
			
 
				+	struct kgetbmap		*p = out + bmv->bmv_entries;
			
 
				+	bool			shared = false, trimmed = false;
			
 
				+	int			error;
			
 
				+
			
 
				+	error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	if (isnullstartblock(got->br_startblock) ||
			
 
				+	    got->br_startblock == DELAYSTARTBLOCK) {
			
 
				+		/*
			
 
				+		 * Delalloc extents that start beyond EOF can occur due to
			
 
				+		 * speculative EOF allocation when the delalloc extent is larger
			
 
				+		 * than the largest freespace extent at conversion time.  These
			
 
				+		 * extents cannot be converted by data writeback, so can exist
			
 
				+		 * here even if we are not supposed to be finding delalloc
			
 
				+		 * extents.
			
 
				+		 */
			
 
				+		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
			
 
				+			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
			
 
				+
			
 
				+		p->bmv_oflags |= BMV_OF_DELALLOC;
			
 
				+		p->bmv_block = -2;
			
 
				 	} else {
			
 
				-		if (startblock == DELAYSTARTBLOCK)
			
 
				-			out->bmv_block = -2;
			
 
				-		else
			
 
				-			out->bmv_block = xfs_fsb_to_db(ip, startblock);
			
 
				-		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
			
 
				-		ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-		if (!moretocome &&
			
 
				-		    xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
			
 
				-		   (lastx == xfs_iext_count(ifp) - 1))
			
 
				-			out->bmv_oflags |= BMV_OF_LAST;
			
 
				+		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
			
 
				 	}
			
 
				 
			
 
				-	return 1;
			
 
				+	if (got->br_state == XFS_EXT_UNWRITTEN &&
			
 
				+	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
			
 
				+		p->bmv_oflags |= BMV_OF_PREALLOC;
			
 
				+
			
 
				+	if (shared)
			
 
				+		p->bmv_oflags |= BMV_OF_SHARED;
			
 
				+
			
 
				+	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
			
 
				+	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
			
 
				+
			
 
				+	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
			
 
				+	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
			
 
				+	bmv->bmv_entries++;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-/* Adjust the reported bmap around shared/unshared extent transitions. */
			
 
				-STATIC int
			
 
				-xfs_getbmap_adjust_shared(
			
 
				-	struct xfs_inode		*ip,
			
 
				-	int				whichfork,
			
 
				-	struct xfs_bmbt_irec		*map,
			
 
				-	struct getbmapx			*out,
			
 
				-	struct xfs_bmbt_irec		*next_map)
			
 
				+static void
			
 
				+xfs_getbmap_report_hole(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct getbmapx		*bmv,
			
 
				+	struct kgetbmap		*out,
			
 
				+	int64_t			bmv_end,
			
 
				+	xfs_fileoff_t		bno,
			
 
				+	xfs_fileoff_t		end)
			
 
				 {
			
 
				-	struct xfs_mount		*mp = ip->i_mount;
			
 
				-	xfs_agnumber_t			agno;
			
 
				-	xfs_agblock_t			agbno;
			
 
				-	xfs_agblock_t			ebno;
			
 
				-	xfs_extlen_t			elen;
			
 
				-	xfs_extlen_t			nlen;
			
 
				-	int				error;
			
 
				+	struct kgetbmap		*p = out + bmv->bmv_entries;
			
 
				 
			
 
				-	next_map->br_startblock = NULLFSBLOCK;
			
 
				-	next_map->br_startoff = NULLFILEOFF;
			
 
				-	next_map->br_blockcount = 0;
			
 
				+	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
			
 
				+		return;
			
 
				 
			
 
				-	/* Only written data blocks can be shared. */
			
 
				-	if (!xfs_is_reflink_inode(ip) ||
			
 
				-	    whichfork != XFS_DATA_FORK ||
			
 
				-	    !xfs_bmap_is_real_extent(map))
			
 
				-		return 0;
			
 
				+	p->bmv_block = -1;
			
 
				+	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
			
 
				+	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
			
 
				 
			
 
				-	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
			
 
				-	agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
			
 
				-	error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
			
 
				-			map->br_blockcount, &ebno, &elen, true);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				+	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
			
 
				+	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
			
 
				+	bmv->bmv_entries++;
			
 
				+}
			
 
				 
			
 
				-	if (ebno == NULLAGBLOCK) {
			
 
				-		/* No shared blocks at all. */
			
 
				-		return 0;
			
 
				-	} else if (agbno == ebno) {
			
 
				-		/*
			
 
				-		 * Shared extent at (agbno, elen).  Shrink the reported
			
 
				-		 * extent length and prepare to move the start of map[i]
			
 
				-		 * to agbno+elen, with the aim of (re)formatting the new
			
 
				-		 * map[i] the next time through the inner loop.
			
 
				-		 */
			
 
				-		out->bmv_length = XFS_FSB_TO_BB(mp, elen);
			
 
				-		out->bmv_oflags |= BMV_OF_SHARED;
			
 
				-		if (elen != map->br_blockcount) {
			
 
				-			*next_map = *map;
			
 
				-			next_map->br_startblock += elen;
			
 
				-			next_map->br_startoff += elen;
			
 
				-			next_map->br_blockcount -= elen;
			
 
				-		}
			
 
				-		map->br_blockcount -= elen;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * There's an unshared extent (agbno, ebno - agbno)
			
 
				-		 * followed by shared extent at (ebno, elen).  Shrink
			
 
				-		 * the reported extent length to cover only the unshared
			
 
				-		 * extent and prepare to move up the start of map[i] to
			
 
				-		 * ebno, with the aim of (re)formatting the new map[i]
			
 
				-		 * the next time through the inner loop.
			
 
				-		 */
			
 
				-		*next_map = *map;
			
 
				-		nlen = ebno - agbno;
			
 
				-		out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
			
 
				-		next_map->br_startblock += nlen;
			
 
				-		next_map->br_startoff += nlen;
			
 
				-		next_map->br_blockcount -= nlen;
			
 
				-		map->br_blockcount -= nlen;
			
 
				-	}
			
 
				+static inline bool
			
 
				+xfs_getbmap_full(
			
 
				+	struct getbmapx		*bmv)
			
 
				+{
			
 
				+	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
			
 
				+}
			
 
				 
			
 
				-	return 0;
			
 
				+static bool
			
 
				+xfs_getbmap_next_rec(
			
 
				+	struct xfs_bmbt_irec	*rec,
			
 
				+	xfs_fileoff_t		total_end)
			
 
				+{
			
 
				+	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
			
 
				+
			
 
				+	if (end == total_end)
			
 
				+		return false;
			
 
				+
			
 
				+	rec->br_startoff += rec->br_blockcount;
			
 
				+	if (!isnullstartblock(rec->br_startblock) &&
			
 
				+	    rec->br_startblock != DELAYSTARTBLOCK)
			
 
				+		rec->br_startblock += rec->br_blockcount;
			
 
				+	rec->br_blockcount = total_end - end;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -535,33 +515,22 @@ xfs_getbmap_adjust_shared(
 
				  */
			
 
				 int						/* error code */
			
 
				 xfs_getbmap(
			
 
				-	xfs_inode_t		*ip,
			
 
				+	struct xfs_inode	*ip,
			
 
				 	struct getbmapx		*bmv,		/* user bmap structure */
			
 
				-	xfs_bmap_format_t	formatter,	/* format to user */
			
 
				-	void			*arg)		/* formatter arg */
			
 
				+	struct kgetbmap		*out)
			
 
				 {
			
 
				-	int64_t			bmvend;		/* last block requested */
			
 
				-	int			error = 0;	/* return value */
			
 
				-	int64_t			fixlen;		/* length for -1 case */
			
 
				-	int			i;		/* extent number */
			
 
				-	int			lock;		/* lock state */
			
 
				-	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
			
 
				-	xfs_mount_t		*mp;		/* file system mount point */
			
 
				-	int			nex;		/* # of user extents can do */
			
 
				-	int			subnex;		/* # of bmapi's can do */
			
 
				-	int			nmap;		/* number of map entries */
			
 
				-	struct getbmapx		*out;		/* output structure */
			
 
				-	int			whichfork;	/* data or attr fork */
			
 
				-	int			prealloced;	/* this is a file with
			
 
				-						 * preallocated data space */
			
 
				-	int			iflags;		/* interface flags */
			
 
				-	int			bmapi_flags;	/* flags for xfs_bmapi */
			
 
				-	int			cur_ext = 0;
			
 
				-	struct xfs_bmbt_irec	inject_map;
			
 
				-
			
 
				-	mp = ip->i_mount;
			
 
				-	iflags = bmv->bmv_iflags;
			
 
				-
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	int			iflags = bmv->bmv_iflags;
			
 
				+	int			whichfork, lock, error = 0;
			
 
				+	int64_t			bmv_end, max_len;
			
 
				+	xfs_fileoff_t		bno, first_bno;
			
 
				+	struct xfs_ifork	*ifp;
			
 
				+	struct xfs_bmbt_irec	got, rec;
			
 
				+	xfs_filblks_t		len;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				+
			
 
				+	if (bmv->bmv_iflags & ~BMV_IF_VALID)
			
 
				+		return -EINVAL;
			
 
				 #ifndef DEBUG
			
 
				 	/* Only allow CoW fork queries if we're debugging. */
			
 
				 	if (iflags & BMV_IF_COWFORK)
			
@@ -570,89 +539,42 @@ xfs_getbmap(
 
				 	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	if (bmv->bmv_length < -1)
			
 
				+		return -EINVAL;
			
 
				+	bmv->bmv_entries = 0;
			
 
				+	if (bmv->bmv_length == 0)
			
 
				+		return 0;
			
 
				+
			
 
				 	if (iflags & BMV_IF_ATTRFORK)
			
 
				 		whichfork = XFS_ATTR_FORK;
			
 
				 	else if (iflags & BMV_IF_COWFORK)
			
 
				 		whichfork = XFS_COW_FORK;
			
 
				 	else
			
 
				 		whichfork = XFS_DATA_FORK;
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 
			
 
				+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
			
 
				 	switch (whichfork) {
			
 
				 	case XFS_ATTR_FORK:
			
 
				-		if (XFS_IFORK_Q(ip)) {
			
 
				-			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
			
 
				-			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
			
 
				-			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
			
 
				-				return -EINVAL;
			
 
				-		} else if (unlikely(
			
 
				-			   ip->i_d.di_aformat != 0 &&
			
 
				-			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
			
 
				-			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
			
 
				-					 ip->i_mount);
			
 
				-			return -EFSCORRUPTED;
			
 
				-		}
			
 
				+		if (!XFS_IFORK_Q(ip))
			
 
				+			goto out_unlock_iolock;
			
 
				 
			
 
				-		prealloced = 0;
			
 
				-		fixlen = 1LL << 32;
			
 
				+		max_len = 1LL << 32;
			
 
				+		lock = xfs_ilock_attr_map_shared(ip);
			
 
				 		break;
			
 
				 	case XFS_COW_FORK:
			
 
				-		if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
			
 
				-			return -EINVAL;
			
 
				+		/* No CoW fork? Just return */
			
 
				+		if (!ifp)
			
 
				+			goto out_unlock_iolock;
			
 
				 
			
 
				-		if (xfs_get_cowextsz_hint(ip)) {
			
 
				-			prealloced = 1;
			
 
				-			fixlen = mp->m_super->s_maxbytes;
			
 
				-		} else {
			
 
				-			prealloced = 0;
			
 
				-			fixlen = XFS_ISIZE(ip);
			
 
				-		}
			
 
				-		break;
			
 
				-	default:
			
 
				-		/* Local format data forks report no extents. */
			
 
				-		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
			
 
				-			bmv->bmv_entries = 0;
			
 
				-			return 0;
			
 
				-		}
			
 
				-		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
			
 
				-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
			
 
				-			return -EINVAL;
			
 
				+		if (xfs_get_cowextsz_hint(ip))
			
 
				+			max_len = mp->m_super->s_maxbytes;
			
 
				+		else
			
 
				+			max_len = XFS_ISIZE(ip);
			
 
				 
			
 
				-		if (xfs_get_extsz_hint(ip) ||
			
 
				-		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
			
 
				-			prealloced = 1;
			
 
				-			fixlen = mp->m_super->s_maxbytes;
			
 
				-		} else {
			
 
				-			prealloced = 0;
			
 
				-			fixlen = XFS_ISIZE(ip);
			
 
				-		}
			
 
				+		lock = XFS_ILOCK_SHARED;
			
 
				+		xfs_ilock(ip, lock);
			
 
				 		break;
			
 
				-	}
			
 
				-
			
 
				-	if (bmv->bmv_length == -1) {
			
 
				-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
			
 
				-		bmv->bmv_length =
			
 
				-			max_t(int64_t, fixlen - bmv->bmv_offset, 0);
			
 
				-	} else if (bmv->bmv_length == 0) {
			
 
				-		bmv->bmv_entries = 0;
			
 
				-		return 0;
			
 
				-	} else if (bmv->bmv_length < 0) {
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	nex = bmv->bmv_count - 1;
			
 
				-	if (nex <= 0)
			
 
				-		return -EINVAL;
			
 
				-	bmvend = bmv->bmv_offset + bmv->bmv_length;
			
 
				-
			
 
				-
			
 
				-	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
			
 
				-		return -ENOMEM;
			
 
				-	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
			
 
				-	if (!out)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
			
 
				-	switch (whichfork) {
			
 
				 	case XFS_DATA_FORK:
			
 
				 		if (!(iflags & BMV_IF_DELALLOC) &&
			
 
				 		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
			
@@ -670,154 +592,105 @@ xfs_getbmap(
 
				 			 */
			
 
				 		}
			
 
				 
			
 
				+		if (xfs_get_extsz_hint(ip) ||
			
 
				+		    (ip->i_d.di_flags &
			
 
				+		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
			
 
				+			max_len = mp->m_super->s_maxbytes;
			
 
				+		else
			
 
				+			max_len = XFS_ISIZE(ip);
			
 
				+
			
 
				 		lock = xfs_ilock_data_map_shared(ip);
			
 
				 		break;
			
 
				-	case XFS_COW_FORK:
			
 
				-		lock = XFS_ILOCK_SHARED;
			
 
				-		xfs_ilock(ip, lock);
			
 
				-		break;
			
 
				-	case XFS_ATTR_FORK:
			
 
				-		lock = xfs_ilock_attr_map_shared(ip);
			
 
				-		break;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Don't let nex be bigger than the number of extents
			
 
				-	 * we can have assuming alternating holes and real extents.
			
 
				-	 */
			
 
				-	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
			
 
				-		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
			
 
				-
			
 
				-	bmapi_flags = xfs_bmapi_aflag(whichfork);
			
 
				-	if (!(iflags & BMV_IF_PREALLOC))
			
 
				-		bmapi_flags |= XFS_BMAPI_IGSTATE;
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate enough space to handle "subnex" maps at a time.
			
 
				-	 */
			
 
				-	error = -ENOMEM;
			
 
				-	subnex = 16;
			
 
				-	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
			
 
				-	if (!map)
			
 
				+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_LOCAL:
			
 
				+		/* Local format inode forks report no extents. */
			
 
				 		goto out_unlock_ilock;
			
 
				+	default:
			
 
				+		error = -EINVAL;
			
 
				+		goto out_unlock_ilock;
			
 
				+	}
			
 
				 
			
 
				-	bmv->bmv_entries = 0;
			
 
				-
			
 
				-	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
			
 
				-	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
			
 
				-		error = 0;
			
 
				-		goto out_free_map;
			
 
				+	if (bmv->bmv_length == -1) {
			
 
				+		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
			
 
				+		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
			
 
				 	}
			
 
				 
			
 
				-	do {
			
 
				-		nmap = (nex> subnex) ? subnex : nex;
			
 
				-		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
			
 
				-				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
			
 
				-				       map, &nmap, bmapi_flags);
			
 
				-		if (error)
			
 
				-			goto out_free_map;
			
 
				-		ASSERT(nmap <= subnex);
			
 
				-
			
 
				-		for (i = 0; i < nmap && bmv->bmv_length &&
			
 
				-				cur_ext < bmv->bmv_count - 1; i++) {
			
 
				-			out[cur_ext].bmv_oflags = 0;
			
 
				-			if (map[i].br_state == XFS_EXT_UNWRITTEN)
			
 
				-				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
			
 
				-			else if (map[i].br_startblock == DELAYSTARTBLOCK)
			
 
				-				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
			
 
				-			out[cur_ext].bmv_offset =
			
 
				-				XFS_FSB_TO_BB(mp, map[i].br_startoff);
			
 
				-			out[cur_ext].bmv_length =
			
 
				-				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
			
 
				-			out[cur_ext].bmv_unused1 = 0;
			
 
				-			out[cur_ext].bmv_unused2 = 0;
			
 
				+	bmv_end = bmv->bmv_offset + bmv->bmv_length;
			
 
				 
			
 
				-			/*
			
 
				-			 * delayed allocation extents that start beyond EOF can
			
 
				-			 * occur due to speculative EOF allocation when the
			
 
				-			 * delalloc extent is larger than the largest freespace
			
 
				-			 * extent at conversion time. These extents cannot be
			
 
				-			 * converted by data writeback, so can exist here even
			
 
				-			 * if we are not supposed to be finding delalloc
			
 
				-			 * extents.
			
 
				-			 */
			
 
				-			if (map[i].br_startblock == DELAYSTARTBLOCK &&
			
 
				-			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
			
 
				-				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
			
 
				-
			
 
				-                        if (map[i].br_startblock == HOLESTARTBLOCK &&
			
 
				-			    whichfork == XFS_ATTR_FORK) {
			
 
				-				/* came to the end of attribute fork */
			
 
				-				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
			
 
				-				goto out_free_map;
			
 
				-			}
			
 
				+	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
			
 
				+	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
			
 
				 
			
 
				-			/* Is this a shared block? */
			
 
				-			error = xfs_getbmap_adjust_shared(ip, whichfork,
			
 
				-					&map[i], &out[cur_ext], &inject_map);
			
 
				-			if (error)
			
 
				-				goto out_free_map;
			
 
				+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				+		error = xfs_iread_extents(NULL, ip, whichfork);
			
 
				+		if (error)
			
 
				+			goto out_unlock_ilock;
			
 
				+	}
			
 
				 
			
 
				-			if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
			
 
				-					&out[cur_ext], prealloced, bmvend,
			
 
				-					map[i].br_startblock,
			
 
				-					inject_map.br_startblock != NULLFSBLOCK))
			
 
				-				goto out_free_map;
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
			
 
				+		/*
			
 
				+		 * Report a whole-file hole if the delalloc flag is set to
			
 
				+		 * stay compatible with the old implementation.
			
 
				+		 */
			
 
				+		if (iflags & BMV_IF_DELALLOC)
			
 
				+			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
			
 
				+					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
			
 
				+		goto out_unlock_ilock;
			
 
				+	}
			
 
				 
			
 
				-			bmv->bmv_offset =
			
 
				-				out[cur_ext].bmv_offset +
			
 
				-				out[cur_ext].bmv_length;
			
 
				-			bmv->bmv_length =
			
 
				-				max_t(int64_t, 0, bmvend - bmv->bmv_offset);
			
 
				+	while (!xfs_getbmap_full(bmv)) {
			
 
				+		xfs_trim_extent(&got, first_bno, len);
			
 
				 
			
 
				-			/*
			
 
				-			 * In case we don't want to return the hole,
			
 
				-			 * don't increase cur_ext so that we can reuse
			
 
				-			 * it in the next loop.
			
 
				-			 */
			
 
				-			if ((iflags & BMV_IF_NO_HOLES) &&
			
 
				-			    map[i].br_startblock == HOLESTARTBLOCK) {
			
 
				-				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
			
 
				-				continue;
			
 
				-			}
			
 
				+		/*
			
 
				+		 * Report an entry for a hole if this extent doesn't directly
			
 
				+		 * follow the previous one.
			
 
				+		 */
			
 
				+		if (got.br_startoff > bno) {
			
 
				+			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
			
 
				+					got.br_startoff);
			
 
				+			if (xfs_getbmap_full(bmv))
			
 
				+				break;
			
 
				+		}
			
 
				 
			
 
				-			/*
			
 
				-			 * In order to report shared extents accurately,
			
 
				-			 * we report each distinct shared/unshared part
			
 
				-			 * of a single bmbt record using multiple bmap
			
 
				-			 * extents.  To make that happen, we iterate the
			
 
				-			 * same map array item multiple times, each
			
 
				-			 * time trimming out the subextent that we just
			
 
				-			 * reported.
			
 
				-			 *
			
 
				-			 * Because of this, we must check the out array
			
 
				-			 * index (cur_ext) directly against bmv_count-1
			
 
				-			 * to avoid overflows.
			
 
				-			 */
			
 
				-			if (inject_map.br_startblock != NULLFSBLOCK) {
			
 
				-				map[i] = inject_map;
			
 
				-				i--;
			
 
				+		/*
			
 
				+		 * In order to report shared extents accurately, we report each
			
 
				+		 * distinct shared / unshared part of a single bmbt record with
			
 
				+		 * an individual getbmapx record.
			
 
				+		 */
			
 
				+		bno = got.br_startoff + got.br_blockcount;
			
 
				+		rec = got;
			
 
				+		do {
			
 
				+			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
			
 
				+					&rec);
			
 
				+			if (error || xfs_getbmap_full(bmv))
			
 
				+				goto out_unlock_ilock;
			
 
				+		} while (xfs_getbmap_next_rec(&rec, bno));
			
 
				+
			
 
				+		if (!xfs_iext_next_extent(ifp, &icur, &got)) {
			
 
				+			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
			
 
				+
			
 
				+			out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
			
 
				+
			
 
				+			if (whichfork != XFS_ATTR_FORK && bno < end &&
			
 
				+			    !xfs_getbmap_full(bmv)) {
			
 
				+				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
			
 
				+						bno, end);
			
 
				 			}
			
 
				-			bmv->bmv_entries++;
			
 
				-			cur_ext++;
			
 
				+			break;
			
 
				 		}
			
 
				-	} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
			
 
				 
			
 
				- out_free_map:
			
 
				-	kmem_free(map);
			
 
				- out_unlock_ilock:
			
 
				-	xfs_iunlock(ip, lock);
			
 
				- out_unlock_iolock:
			
 
				-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
			
 
				-
			
 
				-	for (i = 0; i < cur_ext; i++) {
			
 
				-		/* format results & advance arg */
			
 
				-		error = formatter(&arg, &out[i]);
			
 
				-		if (error)
			
 
				+		if (bno >= first_bno + len)
			
 
				 			break;
			
 
				 	}
			
 
				 
			
 
				-	kmem_free(out);
			
 
				+out_unlock_ilock:
			
 
				+	xfs_iunlock(ip, lock);
			
 
				+out_unlock_iolock:
			
 
				+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
			
 
				 	return error;
			
 
				 }
			
 
				 
			
@@ -1389,53 +1262,12 @@ out:
 
				 
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * @next_fsb will keep track of the extent currently undergoing shift.
			
 
				- * @stop_fsb will keep track of the extent at which we have to stop.
			
 
				- * If we are shifting left, we will start with block (offset + len) and
			
 
				- * shift each extent till last extent.
			
 
				- * If we are shifting right, we will start with last extent inside file space
			
 
				- * and continue until we reach the block corresponding to offset.
			
 
				- */
			
 
				 static int
			
 
				-xfs_shift_file_space(
			
 
				-	struct xfs_inode        *ip,
			
 
				-	xfs_off_t               offset,
			
 
				-	xfs_off_t               len,
			
 
				-	enum shift_direction	direction)
			
 
				+xfs_prepare_shift(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	loff_t			offset)
			
 
				 {
			
 
				-	int			done = 0;
			
 
				-	struct xfs_mount	*mp = ip->i_mount;
			
 
				-	struct xfs_trans	*tp;
			
 
				 	int			error;
			
 
				-	struct xfs_defer_ops	dfops;
			
 
				-	xfs_fsblock_t		first_block;
			
 
				-	xfs_fileoff_t		stop_fsb;
			
 
				-	xfs_fileoff_t		next_fsb;
			
 
				-	xfs_fileoff_t		shift_fsb;
			
 
				-	uint			resblks;
			
 
				-
			
 
				-	ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
			
 
				-
			
 
				-	if (direction == SHIFT_LEFT) {
			
 
				-		/*
			
 
				-		 * Reserve blocks to cover potential extent merges after left
			
 
				-		 * shift operations.
			
 
				-		 */
			
 
				-		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
			
 
				-		next_fsb = XFS_B_TO_FSB(mp, offset + len);
			
 
				-		stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * If right shift, delegate the work of initialization of
			
 
				-		 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
			
 
				-		 */
			
 
				-		resblks = 0;
			
 
				-		next_fsb = NULLFSBLOCK;
			
 
				-		stop_fsb = XFS_B_TO_FSB(mp, offset);
			
 
				-	}
			
 
				-
			
 
				-	shift_fsb = XFS_B_TO_FSB(mp, len);
			
 
				 
			
 
				 	/*
			
 
				 	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
			
@@ -1451,8 +1283,7 @@ xfs_shift_file_space(
 
				 	 * Writeback and invalidate cache for the remainder of the file as we're
			
 
				 	 * about to shift down every extent from offset to EOF.
			
 
				 	 */
			
 
				-	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				-					     offset, -1);
			
 
				+	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
			
@@ -1472,16 +1303,50 @@ xfs_shift_file_space(
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * The extent shifting code works on extent granularity. So, if
			
 
				-	 * stop_fsb is not the starting block of extent, we need to split
			
 
				-	 * the extent at stop_fsb.
			
 
				-	 */
			
 
				-	if (direction == SHIFT_RIGHT) {
			
 
				-		error = xfs_bmap_split_extent(ip, stop_fsb);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_collapse_file_space()
			
 
				+ *	This routine frees disk space and shift extent for the given file.
			
 
				+ *	The first thing we do is to free data blocks in the specified range
			
 
				+ *	by calling xfs_free_file_space(). It would also sync dirty data
			
 
				+ *	and invalidate page cache over the region on which collapse range
			
 
				+ *	is working. And Shift extent records to the left to cover a hole.
			
 
				+ * RETURNS:
			
 
				+ *	0 on success
			
 
				+ *	errno on error
			
 
				+ *
			
 
				+ */
			
 
				+int
			
 
				+xfs_collapse_file_space(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	xfs_off_t		offset,
			
 
				+	xfs_off_t		len)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_trans	*tp;
			
 
				+	int			error;
			
 
				+	struct xfs_defer_ops	dfops;
			
 
				+	xfs_fsblock_t		first_block;
			
 
				+	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
			
 
				+	xfs_fileoff_t		next_fsb = XFS_B_TO_FSB(mp, offset + len);
			
 
				+	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
			
 
				+	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
			
 
				+	bool			done = false;
			
 
				+
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
			
 
				+
			
 
				+	trace_xfs_collapse_file_space(ip);
			
 
				+
			
 
				+	error = xfs_free_file_space(ip, offset, len);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	error = xfs_prepare_shift(ip, offset);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				 
			
 
				 	while (!error && !done) {
			
 
				 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
			
@@ -1495,25 +1360,17 @@ xfs_shift_file_space(
 
				 				XFS_QMOPT_RES_REGBLKS);
			
 
				 		if (error)
			
 
				 			goto out_trans_cancel;
			
 
				-
			
 
				 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 		xfs_defer_init(&dfops, &first_block);
			
 
				-
			
 
				-		/*
			
 
				-		 * We are using the write transaction in which max 2 bmbt
			
 
				-		 * updates are allowed
			
 
				-		 */
			
 
				-		error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
			
 
				-				&done, stop_fsb, &first_block, &dfops,
			
 
				-				direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
			
 
				+		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
			
 
				+				&done, stop_fsb, &first_block, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_bmap_cancel;
			
 
				 
			
 
				 		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_bmap_cancel;
			
 
				-
			
 
				 		error = xfs_trans_commit(tp);
			
 
				 	}
			
 
				 
			
@@ -1526,36 +1383,6 @@ out_trans_cancel:
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * xfs_collapse_file_space()
			
 
				- *	This routine frees disk space and shift extent for the given file.
			
 
				- *	The first thing we do is to free data blocks in the specified range
			
 
				- *	by calling xfs_free_file_space(). It would also sync dirty data
			
 
				- *	and invalidate page cache over the region on which collapse range
			
 
				- *	is working. And Shift extent records to the left to cover a hole.
			
 
				- * RETURNS:
			
 
				- *	0 on success
			
 
				- *	errno on error
			
 
				- *
			
 
				- */
			
 
				-int
			
 
				-xfs_collapse_file_space(
			
 
				-	struct xfs_inode	*ip,
			
 
				-	xfs_off_t		offset,
			
 
				-	xfs_off_t		len)
			
 
				-{
			
 
				-	int error;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				-	trace_xfs_collapse_file_space(ip);
			
 
				-
			
 
				-	error = xfs_free_file_space(ip, offset, len);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * xfs_insert_file_space()
			
 
				  *	This routine create hole space by shifting extents for the given file.
			
@@ -1574,10 +1401,60 @@ xfs_insert_file_space(
 
				 	loff_t			offset,
			
 
				 	loff_t			len)
			
 
				 {
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_trans	*tp;
			
 
				+	int			error;
			
 
				+	struct xfs_defer_ops	dfops;
			
 
				+	xfs_fsblock_t		first_block;
			
 
				+	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, offset);
			
 
				+	xfs_fileoff_t		next_fsb = NULLFSBLOCK;
			
 
				+	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
			
 
				+	bool			done = false;
			
 
				+
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
			
 
				+
			
 
				 	trace_xfs_insert_file_space(ip);
			
 
				 
			
 
				-	return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
			
 
				+	error = xfs_prepare_shift(ip, offset);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * The extent shifting code works on extent granularity. So, if stop_fsb
			
 
				+	 * is not the starting block of extent, we need to split the extent at
			
 
				+	 * stop_fsb.
			
 
				+	 */
			
 
				+	error = xfs_bmap_split_extent(ip, stop_fsb);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	while (!error && !done) {
			
 
				+		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0,
			
 
				+					&tp);
			
 
				+		if (error)
			
 
				+			break;
			
 
				+
			
 
				+		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				+		xfs_defer_init(&dfops, &first_block);
			
 
				+		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
			
 
				+				&done, stop_fsb, &first_block, &dfops);
			
 
				+		if (error)
			
 
				+			goto out_bmap_cancel;
			
 
				+
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				+		if (error)
			
 
				+			goto out_bmap_cancel;
			
 
				+		error = xfs_trans_commit(tp);
			
 
				+	}
			
 
				+
			
 
				+	return error;
			
 
				+
			
 
				+out_bmap_cancel:
			
 
				+	xfs_defer_cancel(&dfops);
			
 
				+	xfs_trans_cancel(tp);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1832,7 +1709,6 @@ xfs_swap_extent_forks(
 
				 	xfs_filblks_t		aforkblks = 0;
			
 
				 	xfs_filblks_t		taforkblks = 0;
			
 
				 	xfs_extnum_t		junk;
			
 
				-	xfs_extnum_t		nextents;
			
 
				 	uint64_t		tmp;
			
 
				 	int			error;
			
 
				 
			
@@ -1907,13 +1783,6 @@ xfs_swap_extent_forks(
 
				 
			
 
				 	switch (ip->i_d.di_format) {
			
 
				 	case XFS_DINODE_FMT_EXTENTS:
			
 
				-		/*
			
 
				-		 * If the extents fit in the inode, fix the pointer.  Otherwise
			
 
				-		 * it's already NULL or pointing to the extent.
			
 
				-		 */
			
 
				-		nextents = xfs_iext_count(&ip->i_df);
			
 
				-		if (nextents <= XFS_INLINE_EXTS)
			
 
				-			ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				 		(*src_log_flags) |= XFS_ILOG_DEXT;
			
 
				 		break;
			
 
				 	case XFS_DINODE_FMT_BTREE:
			
@@ -1925,13 +1794,6 @@ xfs_swap_extent_forks(
 
				 
			
 
				 	switch (tip->i_d.di_format) {
			
 
				 	case XFS_DINODE_FMT_EXTENTS:
			
 
				-		/*
			
 
				-		 * If the extents fit in the inode, fix the pointer.  Otherwise
			
 
				-		 * it's already NULL or pointing to the extent.
			
 
				-		 */
			
 
				-		nextents = xfs_iext_count(&tip->i_df);
			
 
				-		if (nextents <= XFS_INLINE_EXTS)
			
 
				-			tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext;
			
 
				 		(*target_log_flags) |= XFS_ILOG_DEXT;
			
 
				 		break;
			
 
				 	case XFS_DINODE_FMT_BTREE:
			
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -47,10 +47,14 @@ int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
 
				 int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
			
 
				 		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
			
 
				 
			
 
				-/* bmap to userspace formatter - copy to user & advance pointer */
			
 
				-typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *);
			
 
				+struct kgetbmap {
			
 
				+	__s64		bmv_offset;	/* file offset of segment in blocks */
			
 
				+	__s64		bmv_block;	/* starting block (64-bit daddr_t)  */
			
 
				+	__s64		bmv_length;	/* length of segment, blocks	    */
			
 
				+	__s32		bmv_oflags;	/* output flags */
			
 
				+};
			
 
				 int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
			
 
				-		xfs_bmap_format_t formatter, void *arg);
			
 
				+		struct kgetbmap *out);
			
 
				 
			
 
				 /* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
			
 
				 int	xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -42,6 +42,8 @@
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_log.h"
			
 
				+#include "xfs_errortag.h"
			
 
				+#include "xfs_error.h"
			
 
				 
			
 
				 static kmem_zone_t *xfs_buf_zone;
			
 
				 
			
@@ -2129,3 +2131,17 @@ xfs_buf_terminate(void)
 
				 {
			
 
				 	kmem_zone_destroy(xfs_buf_zone);
			
 
				 }
			
 
				+
			
 
				+void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Set the lru reference count to 0 based on the error injection tag.
			
 
				+	 * This allows userspace to disrupt buffer caching for debug/testing
			
 
				+	 * purposes.
			
 
				+	 */
			
 
				+	if (XFS_TEST_ERROR(false, bp->b_target->bt_mount,
			
 
				+			   XFS_ERRTAG_BUF_LRU_REF))
			
 
				+		lru_ref = 0;
			
 
				+
			
 
				+	atomic_set(&bp->b_lru_ref, lru_ref);
			
 
				+}
			
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -352,10 +352,7 @@ extern void xfs_buf_terminate(void);
 
				 #define XFS_BUF_ADDR(bp)		((bp)->b_maps[0].bm_bn)
			
 
				 #define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno))
			
 
				 
			
 
				-static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
			
 
				-{
			
 
				-	atomic_set(&bp->b_lru_ref, lru_ref);
			
 
				-}
			
 
				+void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref);
			
 
				 
			
 
				 static inline int xfs_buf_ispinned(struct xfs_buf *bp)
			
 
				 {
			
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -41,7 +41,7 @@ static unsigned char xfs_dir3_filetype_table[] = {
 
				 	DT_FIFO, DT_SOCK, DT_LNK, DT_WHT,
			
 
				 };
			
 
				 
			
 
				-static unsigned char
			
 
				+unsigned char
			
 
				 xfs_dir3_get_dtype(
			
 
				 	struct xfs_mount	*mp,
			
 
				 	uint8_t			filetype)
			
@@ -266,7 +266,7 @@ xfs_dir2_leaf_readbuf(
 
				 	xfs_dablk_t		next_ra;
			
 
				 	xfs_dablk_t		map_off;
			
 
				 	xfs_dablk_t		last_da;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	int			ra_want;
			
 
				 	int			error = 0;
			
 
				 
			
@@ -283,7 +283,7 @@ xfs_dir2_leaf_readbuf(
 
				 	 */
			
 
				 	last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
			
 
				 	map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off));
			
 
				-	if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map))
			
 
				+	if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
			
 
				 		goto out;
			
 
				 	if (map.br_startoff >= last_da)
			
 
				 		goto out;
			
@@ -311,7 +311,7 @@ xfs_dir2_leaf_readbuf(
 
				 	if (next_ra >= last_da)
			
 
				 		goto out_no_ra;
			
 
				 	if (map.br_blockcount < geo->fsbcount &&
			
 
				-	    !xfs_iext_get_extent(ifp, ++idx, &map))
			
 
				+	    !xfs_iext_next_extent(ifp, &icur, &map))
			
 
				 		goto out_no_ra;
			
 
				 	if (map.br_startoff >= last_da)
			
 
				 		goto out_no_ra;
			
@@ -334,7 +334,7 @@ xfs_dir2_leaf_readbuf(
 
				 			ra_want -= geo->fsbcount;
			
 
				 			next_ra += geo->fsbcount;
			
 
				 		}
			
 
				-		if (!xfs_iext_get_extent(ifp, ++idx, &map)) {
			
 
				+		if (!xfs_iext_next_extent(ifp, &icur, &map)) {
			
 
				 			*ra_blk = last_da;
			
 
				 			break;
			
 
				 		}
			
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -53,13 +53,6 @@
 
				  * otherwise by the lowest id first, see xfs_dqlock2.
			
 
				  */
			
 
				 
			
 
				-#ifdef DEBUG
			
 
				-xfs_buftarg_t *xfs_dqerror_target;
			
 
				-int xfs_do_dqerror;
			
 
				-int xfs_dqreq_num;
			
 
				-int xfs_dqerror_mod = 33;
			
 
				-#endif
			
 
				-
			
 
				 struct kmem_zone		*xfs_qm_dqtrxzone;
			
 
				 static struct kmem_zone		*xfs_qm_dqzone;
			
 
				 
			
@@ -703,7 +696,7 @@ xfs_dq_get_next_id(
 
				 	xfs_dqid_t		next_id = *id + 1; /* simple advance */
			
 
				 	uint			lock_flags;
			
 
				 	struct xfs_bmbt_irec	got;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	cur;
			
 
				 	xfs_fsblock_t		start;
			
 
				 	int			error = 0;
			
 
				 
			
@@ -727,7 +720,7 @@ xfs_dq_get_next_id(
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got)) {
			
 
				+	if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) {
			
 
				 		/* contiguous chunk, bump startoff for the id calculation */
			
 
				 		if (got.br_startoff < start)
			
 
				 			got.br_startoff = start;
			
@@ -770,15 +763,6 @@ xfs_qm_dqget(
 
				 		return -ESRCH;
			
 
				 	}
			
 
				 
			
 
				-#ifdef DEBUG
			
 
				-	if (xfs_do_dqerror) {
			
 
				-		if ((xfs_dqerror_target == mp->m_ddev_targp) &&
			
 
				-		    (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
			
 
				-			xfs_debug(mp, "Returning error in dqget");
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	ASSERT(type == XFS_DQ_USER ||
			
 
				 	       type == XFS_DQ_PROJ ||
			
 
				 	       type == XFS_DQ_GROUP);
			
@@ -786,7 +770,6 @@ xfs_qm_dqget(
 
				 		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				 		ASSERT(xfs_inode_dquot(ip, type) == NULL);
			
 
				 	}
			
 
				-#endif
			
 
				 
			
 
				 restart:
			
 
				 	mutex_lock(&qi->qi_tree_lock);
			
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -21,6 +21,7 @@
 
				 #include "xfs_log_format.h"
			
 
				 #include "xfs_trans_resv.h"
			
 
				 #include "xfs_mount.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_sysfs.h"
			
 
				 
			
@@ -58,6 +59,7 @@ static unsigned int xfs_errortag_random_default[] = {
 
				 	XFS_RANDOM_DROP_WRITES,
			
 
				 	XFS_RANDOM_LOG_BAD_CRC,
			
 
				 	XFS_RANDOM_LOG_ITEM_PIN,
			
 
				+	XFS_RANDOM_BUF_LRU_REF,
			
 
				 };
			
 
				 
			
 
				 struct xfs_errortag_attr {
			
@@ -163,6 +165,7 @@ XFS_ERRORTAG_ATTR_RW(ag_resv_critical,	XFS_ERRTAG_AG_RESV_CRITICAL);
 
				 XFS_ERRORTAG_ATTR_RW(drop_writes,	XFS_ERRTAG_DROP_WRITES);
			
 
				 XFS_ERRORTAG_ATTR_RW(log_bad_crc,	XFS_ERRTAG_LOG_BAD_CRC);
			
 
				 XFS_ERRORTAG_ATTR_RW(log_item_pin,	XFS_ERRTAG_LOG_ITEM_PIN);
			
 
				+XFS_ERRORTAG_ATTR_RW(buf_lru_ref,	XFS_ERRTAG_BUF_LRU_REF);
			
 
				 
			
 
				 static struct attribute *xfs_errortag_attrs[] = {
			
 
				 	XFS_ERRORTAG_ATTR_LIST(noerror),
			
@@ -196,10 +199,11 @@ static struct attribute *xfs_errortag_attrs[] = {
 
				 	XFS_ERRORTAG_ATTR_LIST(drop_writes),
			
 
				 	XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
			
 
				 	XFS_ERRORTAG_ATTR_LIST(log_item_pin),
			
 
				+	XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
 
				-struct kobj_type xfs_errortag_ktype = {
			
 
				+static struct kobj_type xfs_errortag_ktype = {
			
 
				 	.release = xfs_sysfs_release,
			
 
				 	.sysfs_ops = &xfs_errortag_sysfs_ops,
			
 
				 	.default_attrs = xfs_errortag_attrs,
			
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -63,87 +63,6 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 
				 		} \
			
 
				 	}
			
 
				 
			
 
				-/*
			
 
				- * error injection tags - the labels can be anything you want
			
 
				- * but each tag should have its own unique number
			
 
				- */
			
 
				-
			
 
				-#define XFS_ERRTAG_NOERROR				0
			
 
				-#define XFS_ERRTAG_IFLUSH_1				1
			
 
				-#define XFS_ERRTAG_IFLUSH_2				2
			
 
				-#define XFS_ERRTAG_IFLUSH_3				3
			
 
				-#define XFS_ERRTAG_IFLUSH_4				4
			
 
				-#define XFS_ERRTAG_IFLUSH_5				5
			
 
				-#define XFS_ERRTAG_IFLUSH_6				6
			
 
				-#define	XFS_ERRTAG_DA_READ_BUF				7
			
 
				-#define	XFS_ERRTAG_BTREE_CHECK_LBLOCK			8
			
 
				-#define	XFS_ERRTAG_BTREE_CHECK_SBLOCK			9
			
 
				-#define	XFS_ERRTAG_ALLOC_READ_AGF			10
			
 
				-#define	XFS_ERRTAG_IALLOC_READ_AGI			11
			
 
				-#define	XFS_ERRTAG_ITOBP_INOTOBP			12
			
 
				-#define	XFS_ERRTAG_IUNLINK				13
			
 
				-#define	XFS_ERRTAG_IUNLINK_REMOVE			14
			
 
				-#define	XFS_ERRTAG_DIR_INO_VALIDATE			15
			
 
				-#define XFS_ERRTAG_BULKSTAT_READ_CHUNK			16
			
 
				-#define XFS_ERRTAG_IODONE_IOERR				17
			
 
				-#define XFS_ERRTAG_STRATREAD_IOERR			18
			
 
				-#define XFS_ERRTAG_STRATCMPL_IOERR			19
			
 
				-#define XFS_ERRTAG_DIOWRITE_IOERR			20
			
 
				-#define XFS_ERRTAG_BMAPIFORMAT				21
			
 
				-#define XFS_ERRTAG_FREE_EXTENT				22
			
 
				-#define XFS_ERRTAG_RMAP_FINISH_ONE			23
			
 
				-#define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE		24
			
 
				-#define XFS_ERRTAG_REFCOUNT_FINISH_ONE			25
			
 
				-#define XFS_ERRTAG_BMAP_FINISH_ONE			26
			
 
				-#define XFS_ERRTAG_AG_RESV_CRITICAL			27
			
 
				-/*
			
 
				- * DEBUG mode instrumentation to test and/or trigger delayed allocation
			
 
				- * block killing in the event of failed writes. When enabled, all
			
 
				- * buffered writes are silenty dropped and handled as if they failed.
			
 
				- * All delalloc blocks in the range of the write (including pre-existing
			
 
				- * delalloc blocks!) are tossed as part of the write failure error
			
 
				- * handling sequence.
			
 
				- */
			
 
				-#define XFS_ERRTAG_DROP_WRITES				28
			
 
				-#define XFS_ERRTAG_LOG_BAD_CRC				29
			
 
				-#define XFS_ERRTAG_LOG_ITEM_PIN				30
			
 
				-#define XFS_ERRTAG_MAX					31
			
 
				-
			
 
				-/*
			
 
				- * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
			
 
				- */
			
 
				-#define XFS_RANDOM_DEFAULT				100
			
 
				-#define XFS_RANDOM_IFLUSH_1				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IFLUSH_2				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IFLUSH_3				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IFLUSH_4				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IFLUSH_5				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IFLUSH_6				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_DA_READ_BUF				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_BTREE_CHECK_LBLOCK			(XFS_RANDOM_DEFAULT/4)
			
 
				-#define XFS_RANDOM_BTREE_CHECK_SBLOCK			XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_ALLOC_READ_AGF			XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IALLOC_READ_AGI			XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_ITOBP_INOTOBP			XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IUNLINK				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IUNLINK_REMOVE			XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_DIR_INO_VALIDATE			XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_BULKSTAT_READ_CHUNK			XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_IODONE_IOERR				(XFS_RANDOM_DEFAULT/10)
			
 
				-#define XFS_RANDOM_STRATREAD_IOERR			(XFS_RANDOM_DEFAULT/10)
			
 
				-#define XFS_RANDOM_STRATCMPL_IOERR			(XFS_RANDOM_DEFAULT/10)
			
 
				-#define XFS_RANDOM_DIOWRITE_IOERR			(XFS_RANDOM_DEFAULT/10)
			
 
				-#define	XFS_RANDOM_BMAPIFORMAT				XFS_RANDOM_DEFAULT
			
 
				-#define XFS_RANDOM_FREE_EXTENT				1
			
 
				-#define XFS_RANDOM_RMAP_FINISH_ONE			1
			
 
				-#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE		1
			
 
				-#define XFS_RANDOM_REFCOUNT_FINISH_ONE			1
			
 
				-#define XFS_RANDOM_BMAP_FINISH_ONE			1
			
 
				-#define XFS_RANDOM_AG_RESV_CRITICAL			4
			
 
				-#define XFS_RANDOM_DROP_WRITES				1
			
 
				-#define XFS_RANDOM_LOG_BAD_CRC				1
			
 
				-#define XFS_RANDOM_LOG_ITEM_PIN				1
			
 
				-
			
 
				 #ifdef DEBUG
			
 
				 extern int xfs_errortag_init(struct xfs_mount *mp);
			
 
				 extern void xfs_errortag_del(struct xfs_mount *mp);
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -984,7 +984,7 @@ xfs_file_readdir(
 
				 	 * point we can change the ->readdir prototype to include the
			
 
				 	 * buffer size.  For now we use the current glibc buffer size.
			
 
				 	 */
			
 
				-	bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
			
 
				+	bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size);
			
 
				 
			
 
				 	return xfs_readdir(NULL, ip, ctx, bufsize);
			
 
				 }
			
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -610,7 +610,7 @@ again:
 
				 	} else {
			
 
				 		rcu_read_unlock();
			
 
				 		if (flags & XFS_IGET_INCORE) {
			
 
				-			error = -ENOENT;
			
 
				+			error = -ENODATA;
			
 
				 			goto out_error_or_again;
			
 
				 		}
			
 
				 		XFS_STATS_INC(mp, xs_ig_missed);
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -39,6 +39,7 @@
 
				 #include "xfs_ialloc.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_bmap_util.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_filestream.h"
			
@@ -384,14 +385,6 @@ xfs_isilocked(
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-#ifdef DEBUG
			
 
				-int xfs_locked_n;
			
 
				-int xfs_small_retries;
			
 
				-int xfs_middle_retries;
			
 
				-int xfs_lots_retries;
			
 
				-int xfs_lock_delays;
			
 
				-#endif
			
 
				-
			
 
				 /*
			
 
				  * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
			
 
				  * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
			
@@ -544,24 +537,11 @@ again:
 
				 
			
 
				 		if ((attempts % 5) == 0) {
			
 
				 			delay(1); /* Don't just spin the CPU */
			
 
				-#ifdef DEBUG
			
 
				-			xfs_lock_delays++;
			
 
				-#endif
			
 
				 		}
			
 
				 		i = 0;
			
 
				 		try_lock = 0;
			
 
				 		goto again;
			
 
				 	}
			
 
				-
			
 
				-#ifdef DEBUG
			
 
				-	if (attempts) {
			
 
				-		if (attempts < 5) xfs_small_retries++;
			
 
				-		else if (attempts < 100) xfs_middle_retries++;
			
 
				-		else xfs_lots_retries++;
			
 
				-	} else {
			
 
				-		xfs_locked_n++;
			
 
				-	}
			
 
				-#endif
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -767,7 +747,7 @@ xfs_ialloc(
 
				 	xfs_inode_t	*pip,
			
 
				 	umode_t		mode,
			
 
				 	xfs_nlink_t	nlink,
			
 
				-	xfs_dev_t	rdev,
			
 
				+	dev_t		rdev,
			
 
				 	prid_t		prid,
			
 
				 	int		okalloc,
			
 
				 	xfs_buf_t	**ialloc_context,
			
@@ -819,6 +799,7 @@ xfs_ialloc(
 
				 	set_nlink(inode, nlink);
			
 
				 	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
			
 
				 	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
			
 
				+	inode->i_rdev = rdev;
			
 
				 	xfs_set_projid(ip, prid);
			
 
				 
			
 
				 	if (pip && XFS_INHERIT_GID(pip)) {
			
@@ -867,7 +848,6 @@ xfs_ialloc(
 
				 	case S_IFBLK:
			
 
				 	case S_IFSOCK:
			
 
				 		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
			
 
				-		ip->i_df.if_u2.if_rdev = rdev;
			
 
				 		ip->i_df.if_flags = 0;
			
 
				 		flags |= XFS_ILOG_DEV;
			
 
				 		break;
			
@@ -933,7 +913,7 @@ xfs_ialloc(
 
				 		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
			
 
				 		ip->i_df.if_flags = XFS_IFEXTENTS;
			
 
				 		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
			
 
				-		ip->i_df.if_u1.if_extents = NULL;
			
 
				+		ip->i_df.if_u1.if_root = NULL;
			
 
				 		break;
			
 
				 	default:
			
 
				 		ASSERT(0);
			
@@ -975,7 +955,7 @@ xfs_dir_ialloc(
 
				 					   the inode. */
			
 
				 	umode_t		mode,
			
 
				 	xfs_nlink_t	nlink,
			
 
				-	xfs_dev_t	rdev,
			
 
				+	dev_t		rdev,
			
 
				 	prid_t		prid,		/* project id */
			
 
				 	int		okalloc,	/* ok to allocate new space */
			
 
				 	xfs_inode_t	**ipp,		/* pointer to inode; it will be
			
@@ -1147,7 +1127,7 @@ xfs_create(
 
				 	xfs_inode_t		*dp,
			
 
				 	struct xfs_name		*name,
			
 
				 	umode_t			mode,
			
 
				-	xfs_dev_t		rdev,
			
 
				+	dev_t			rdev,
			
 
				 	xfs_inode_t		**ipp)
			
 
				 {
			
 
				 	int			is_dir = S_ISDIR(mode);
			
@@ -1183,7 +1163,6 @@ xfs_create(
 
				 		return error;
			
 
				 
			
 
				 	if (is_dir) {
			
 
				-		rdev = 0;
			
 
				 		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
			
 
				 		tres = &M_RES(mp)->tr_mkdir;
			
 
				 	} else {
			
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -391,7 +391,7 @@ void		xfs_inactive(struct xfs_inode *ip);
 
				 int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
			
 
				 			   struct xfs_inode **ipp, struct xfs_name *ci_name);
			
 
				 int		xfs_create(struct xfs_inode *dp, struct xfs_name *name,
			
 
				-			   umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
			
 
				+			   umode_t mode, dev_t rdev, struct xfs_inode **ipp);
			
 
				 int		xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
			
 
				 			   umode_t mode, struct xfs_inode **ipp);
			
 
				 int		xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
			
@@ -428,7 +428,7 @@ xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
 
				 xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);
			
 
				 
			
 
				 int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
			
 
				-			       xfs_nlink_t, xfs_dev_t, prid_t, int,
			
 
				+			       xfs_nlink_t, dev_t, prid_t, int,
			
 
				 			       struct xfs_inode **, int *);
			
 
				 
			
 
				 /* from xfs_file.c */
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -72,7 +72,6 @@ xfs_inode_item_data_fork_size(
 
				 		break;
			
 
				 
			
 
				 	case XFS_DINODE_FMT_DEV:
			
 
				-	case XFS_DINODE_FMT_UUID:
			
 
				 		break;
			
 
				 	default:
			
 
				 		ASSERT(0);
			
@@ -156,15 +155,13 @@ xfs_inode_item_format_data_fork(
 
				 	switch (ip->i_d.di_format) {
			
 
				 	case XFS_DINODE_FMT_EXTENTS:
			
 
				 		iip->ili_fields &=
			
 
				-			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
			
 
				-			  XFS_ILOG_DEV | XFS_ILOG_UUID);
			
 
				+			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
			
 
				 
			
 
				 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
			
 
				 		    ip->i_d.di_nextents > 0 &&
			
 
				 		    ip->i_df.if_bytes > 0) {
			
 
				 			struct xfs_bmbt_rec *p;
			
 
				 
			
 
				-			ASSERT(ip->i_df.if_u1.if_extents != NULL);
			
 
				 			ASSERT(xfs_iext_count(&ip->i_df) > 0);
			
 
				 
			
 
				 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
			
@@ -181,8 +178,7 @@ xfs_inode_item_format_data_fork(
 
				 		break;
			
 
				 	case XFS_DINODE_FMT_BTREE:
			
 
				 		iip->ili_fields &=
			
 
				-			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
			
 
				-			  XFS_ILOG_DEV | XFS_ILOG_UUID);
			
 
				+			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
			
 
				 
			
 
				 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
			
 
				 		    ip->i_df.if_broot_bytes > 0) {
			
@@ -200,8 +196,7 @@ xfs_inode_item_format_data_fork(
 
				 		break;
			
 
				 	case XFS_DINODE_FMT_LOCAL:
			
 
				 		iip->ili_fields &=
			
 
				-			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
			
 
				-			  XFS_ILOG_DEV | XFS_ILOG_UUID);
			
 
				+			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
			
 
				 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
			
 
				 		    ip->i_df.if_bytes > 0) {
			
 
				 			/*
			
@@ -224,17 +219,9 @@ xfs_inode_item_format_data_fork(
 
				 		break;
			
 
				 	case XFS_DINODE_FMT_DEV:
			
 
				 		iip->ili_fields &=
			
 
				-			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
			
 
				-			  XFS_ILOG_DEXT | XFS_ILOG_UUID);
			
 
				+			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
			
 
				 		if (iip->ili_fields & XFS_ILOG_DEV)
			
 
				-			ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev;
			
 
				-		break;
			
 
				-	case XFS_DINODE_FMT_UUID:
			
 
				-		iip->ili_fields &=
			
 
				-			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
			
 
				-			  XFS_ILOG_DEXT | XFS_ILOG_DEV);
			
 
				-		if (iip->ili_fields & XFS_ILOG_UUID)
			
 
				-			ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid;
			
 
				+			ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
			
 
				 		break;
			
 
				 	default:
			
 
				 		ASSERT(0);
			
@@ -264,7 +251,6 @@ xfs_inode_item_format_attr_fork(
 
				 
			
 
				 			ASSERT(xfs_iext_count(ip->i_afp) ==
			
 
				 				ip->i_d.di_anextents);
			
 
				-			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
			
 
				 
			
 
				 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
			
 
				 			data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
			
@@ -441,7 +427,7 @@ xfs_inode_item_format(
 
				 	ilf->ilf_dsize = 0;
			
 
				 	ilf->ilf_asize = 0;
			
 
				 	ilf->ilf_pad = 0;
			
 
				-	uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null);
			
 
				+	memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
			
 
				 
			
 
				 	xlog_finish_iovec(lv, vecp, sizeof(*ilf));
			
 
				 
			
@@ -892,8 +878,7 @@ xfs_inode_item_format_convert(
 
				 	in_f->ilf_asize = in_f32->ilf_asize;
			
 
				 	in_f->ilf_dsize = in_f32->ilf_dsize;
			
 
				 	in_f->ilf_ino = in_f32->ilf_ino;
			
 
				-	/* copy biggest field of ilf_u */
			
 
				-	uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
			
 
				+	memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
			
 
				 	in_f->ilf_blkno = in_f32->ilf_blkno;
			
 
				 	in_f->ilf_len = in_f32->ilf_len;
			
 
				 	in_f->ilf_boffset = in_f32->ilf_boffset;
			
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -48,7 +48,7 @@ extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
 
				 extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
			
 
				 extern void xfs_iflush_abort(struct xfs_inode *, bool);
			
 
				 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
			
 
				-					 xfs_inode_log_format_t *);
			
 
				+					 struct xfs_inode_log_format *);
			
 
				 
			
 
				 extern struct kmem_zone	*xfs_ili_zone;
			
 
				 
			
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -44,6 +44,7 @@
 
				 #include "xfs_btree.h"
			
 
				 #include <linux/fsmap.h>
			
 
				 #include "xfs_fsmap.h"
			
 
				+#include "scrub/xfs_scrub.h"
			
 
				 
			
 
				 #include <linux/capability.h>
			
 
				 #include <linux/cred.h>
			
@@ -310,8 +311,8 @@ xfs_readlink_by_handle(
 
				 int
			
 
				 xfs_set_dmattrs(
			
 
				 	xfs_inode_t     *ip,
			
 
				-	u_int		evmask,
			
 
				-	u_int16_t	state)
			
 
				+	uint		evmask,
			
 
				+	uint16_t	state)
			
 
				 {
			
 
				 	xfs_mount_t	*mp = ip->i_mount;
			
 
				 	xfs_trans_t	*tp;
			
@@ -1201,6 +1202,8 @@ out_unlock:
 
				  * 8. for non-realtime files, the extent size hint must be limited
			
 
				  *    to half the AG size to avoid alignment extending the extent beyond the
			
 
				  *    limits of the AG.
			
 
				+ *
			
 
				+ * Please keep this function in sync with xfs_scrub_inode_extsize.
			
 
				  */
			
 
				 static int
			
 
				 xfs_ioctl_setattr_check_extsize(
			
@@ -1257,6 +1260,8 @@ xfs_ioctl_setattr_check_extsize(
 
				  * 5. Extent size must be a multiple of the appropriate block size.
			
 
				  * 6. The extent size hint must be limited to half the AG size to avoid
			
 
				  *    alignment extending the extent beyond the limits of the AG.
			
 
				+ *
			
 
				+ * Please keep this function in sync with xfs_scrub_inode_cowextsize.
			
 
				  */
			
 
				 static int
			
 
				 xfs_ioctl_setattr_check_cowextsize(
			
@@ -1540,17 +1545,26 @@ out_drop_write:
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				-xfs_getbmap_format(void **ap, struct getbmapx *bmv)
			
 
				+static bool
			
 
				+xfs_getbmap_format(
			
 
				+	struct kgetbmap		*p,
			
 
				+	struct getbmapx __user	*u,
			
 
				+	size_t			recsize)
			
 
				 {
			
 
				-	struct getbmap __user	*base = (struct getbmap __user *)*ap;
			
 
				-
			
 
				-	/* copy only getbmap portion (not getbmapx) */
			
 
				-	if (copy_to_user(base, bmv, sizeof(struct getbmap)))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	*ap += sizeof(struct getbmap);
			
 
				-	return 0;
			
 
				+	if (put_user(p->bmv_offset, &u->bmv_offset) ||
			
 
				+	    put_user(p->bmv_block, &u->bmv_block) ||
			
 
				+	    put_user(p->bmv_length, &u->bmv_length) ||
			
 
				+	    put_user(0, &u->bmv_count) ||
			
 
				+	    put_user(0, &u->bmv_entries))
			
 
				+		return false;
			
 
				+	if (recsize < sizeof(struct getbmapx))
			
 
				+		return true;
			
 
				+	if (put_user(0, &u->bmv_iflags) ||
			
 
				+	    put_user(p->bmv_oflags, &u->bmv_oflags) ||
			
 
				+	    put_user(0, &u->bmv_unused1) ||
			
 
				+	    put_user(0, &u->bmv_unused2))
			
 
				+		return false;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 STATIC int
			
@@ -1560,68 +1574,57 @@ xfs_ioc_getbmap(
 
				 	void			__user *arg)
			
 
				 {
			
 
				 	struct getbmapx		bmx = { 0 };
			
 
				-	int			error;
			
 
				-
			
 
				-	/* struct getbmap is a strict subset of struct getbmapx. */
			
 
				-	if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
			
 
				-		return -EFAULT;
			
 
				+	struct kgetbmap		*buf;
			
 
				+	size_t			recsize;
			
 
				+	int			error, i;
			
 
				 
			
 
				-	if (bmx.bmv_count < 2)
			
 
				+	switch (cmd) {
			
 
				+	case XFS_IOC_GETBMAPA:
			
 
				+		bmx.bmv_iflags = BMV_IF_ATTRFORK;
			
 
				+		/*FALLTHRU*/
			
 
				+	case XFS_IOC_GETBMAP:
			
 
				+		if (file->f_mode & FMODE_NOCMTIME)
			
 
				+			bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
			
 
				+		/* struct getbmap is a strict subset of struct getbmapx. */
			
 
				+		recsize = sizeof(struct getbmap);
			
 
				+		break;
			
 
				+	case XFS_IOC_GETBMAPX:
			
 
				+		recsize = sizeof(struct getbmapx);
			
 
				+		break;
			
 
				+	default:
			
 
				 		return -EINVAL;
			
 
				+	}
			
 
				 
			
 
				-	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
			
 
				-	if (file->f_mode & FMODE_NOCMTIME)
			
 
				-		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
			
 
				-
			
 
				-	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
			
 
				-			    (__force struct getbmap *)arg+1);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	/* copy back header - only size of getbmap */
			
 
				-	if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
			
 
				-		return -EFAULT;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-STATIC int
			
 
				-xfs_getbmapx_format(void **ap, struct getbmapx *bmv)
			
 
				-{
			
 
				-	struct getbmapx __user	*base = (struct getbmapx __user *)*ap;
			
 
				-
			
 
				-	if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	*ap += sizeof(struct getbmapx);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-STATIC int
			
 
				-xfs_ioc_getbmapx(
			
 
				-	struct xfs_inode	*ip,
			
 
				-	void			__user *arg)
			
 
				-{
			
 
				-	struct getbmapx		bmx;
			
 
				-	int			error;
			
 
				-
			
 
				-	if (copy_from_user(&bmx, arg, sizeof(bmx)))
			
 
				+	if (copy_from_user(&bmx, arg, recsize))
			
 
				 		return -EFAULT;
			
 
				 
			
 
				 	if (bmx.bmv_count < 2)
			
 
				 		return -EINVAL;
			
 
				+	if (bmx.bmv_count > ULONG_MAX / recsize)
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				-	if (bmx.bmv_iflags & (~BMV_IF_VALID))
			
 
				-		return -EINVAL;
			
 
				+	buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0);
			
 
				+	if (!buf)
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				-	error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
			
 
				-			    (__force struct getbmapx *)arg+1);
			
 
				+	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, buf);
			
 
				 	if (error)
			
 
				-		return error;
			
 
				+		goto out_free_buf;
			
 
				 
			
 
				-	/* copy back header */
			
 
				-	if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
			
 
				-		return -EFAULT;
			
 
				+	error = -EFAULT;
			
 
				+	if (copy_to_user(arg, &bmx, recsize))
			
 
				+		goto out_free_buf;
			
 
				+	arg += recsize;
			
 
				+
			
 
				+	for (i = 0; i < bmx.bmv_entries; i++) {
			
 
				+		if (!xfs_getbmap_format(buf + i, arg, recsize))
			
 
				+			goto out_free_buf;
			
 
				+		arg += recsize;
			
 
				+	}
			
 
				 
			
 
				+	error = 0;
			
 
				+out_free_buf:
			
 
				+	kmem_free(buf);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1703,6 +1706,30 @@ xfs_ioc_getfsmap(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+STATIC int
			
 
				+xfs_ioc_scrub_metadata(
			
 
				+	struct xfs_inode		*ip,
			
 
				+	void				__user *arg)
			
 
				+{
			
 
				+	struct xfs_scrub_metadata	scrub;
			
 
				+	int				error;
			
 
				+
			
 
				+	if (!capable(CAP_SYS_ADMIN))
			
 
				+		return -EPERM;
			
 
				+
			
 
				+	if (copy_from_user(&scrub, arg, sizeof(scrub)))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	error = xfs_scrub_metadata(ip, &scrub);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	if (copy_to_user(arg, &scrub, sizeof(scrub)))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int
			
 
				 xfs_ioc_swapext(
			
 
				 	xfs_swapext_t	*sxp)
			
@@ -1878,14 +1905,15 @@ xfs_file_ioctl(
 
				 
			
 
				 	case XFS_IOC_GETBMAP:
			
 
				 	case XFS_IOC_GETBMAPA:
			
 
				-		return xfs_ioc_getbmap(filp, cmd, arg);
			
 
				-
			
 
				 	case XFS_IOC_GETBMAPX:
			
 
				-		return xfs_ioc_getbmapx(ip, arg);
			
 
				+		return xfs_ioc_getbmap(filp, cmd, arg);
			
 
				 
			
 
				 	case FS_IOC_GETFSMAP:
			
 
				 		return xfs_ioc_getfsmap(ip, arg);
			
 
				 
			
 
				+	case XFS_IOC_SCRUB_METADATA:
			
 
				+		return xfs_ioc_scrub_metadata(ip, arg);
			
 
				+
			
 
				 	case XFS_IOC_FD_TO_HANDLE:
			
 
				 	case XFS_IOC_PATH_TO_HANDLE:
			
 
				 	case XFS_IOC_PATH_TO_FSHANDLE: {
			
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -86,7 +86,7 @@ xfs_file_compat_ioctl(
 
				 extern int
			
 
				 xfs_set_dmattrs(
			
 
				 	struct xfs_inode	*ip,
			
 
				-	u_int			evmask,
			
 
				-	u_int16_t		state);
			
 
				+	uint			evmask,
			
 
				+	uint16_t		state);
			
 
				 
			
 
				 #endif
			
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -556,6 +556,7 @@ xfs_file_compat_ioctl(
 
				 	case XFS_IOC_ERROR_INJECTION:
			
 
				 	case XFS_IOC_ERROR_CLEARALL:
			
 
				 	case FS_IOC_GETFSMAP:
			
 
				+	case XFS_IOC_SCRUB_METADATA:
			
 
				 		return xfs_file_ioctl(filp, cmd, p);
			
 
				 #ifndef BROKEN_X86_ALIGNMENT
			
 
				 	/* These are handled fine if no alignment issues */
			
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -30,6 +30,7 @@
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_bmap_util.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_trans_space.h"
			
@@ -389,7 +390,7 @@ xfs_iomap_prealloc_size(
 
				 	struct xfs_inode	*ip,
			
 
				 	loff_t			offset,
			
 
				 	loff_t			count,
			
 
				-	xfs_extnum_t		idx)
			
 
				+	struct xfs_iext_cursor	*icur)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
			
@@ -414,7 +415,7 @@ xfs_iomap_prealloc_size(
 
				 	 */
			
 
				 	if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
			
 
				 	    XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
			
 
				-	    !xfs_iext_get_extent(ifp, idx - 1, &prev) ||
			
 
				+	    !xfs_iext_peek_prev_extent(ifp, icur, &prev) ||
			
 
				 	    prev.br_startoff + prev.br_blockcount < offset_fsb)
			
 
				 		return mp->m_writeio_blocks;
			
 
				 
			
@@ -532,7 +533,7 @@ xfs_file_iomap_begin_delay(
 
				 	xfs_fileoff_t		end_fsb;
			
 
				 	int			error = 0, eof = 0;
			
 
				 	struct xfs_bmbt_irec	got;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 	xfs_fsblock_t		prealloc_blocks = 0;
			
 
				 
			
 
				 	ASSERT(!XFS_IS_REALTIME_INODE(ip));
			
@@ -557,7 +558,7 @@ xfs_file_iomap_begin_delay(
 
				 			goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
			
 
				+	eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got);
			
 
				 	if (!eof && got.br_startoff <= offset_fsb) {
			
 
				 		if (xfs_is_reflink_inode(ip)) {
			
 
				 			bool		shared;
			
@@ -591,7 +592,8 @@ xfs_file_iomap_begin_delay(
 
				 	end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
			
 
				 
			
 
				 	if (eof) {
			
 
				-		prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx);
			
 
				+		prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count,
			
 
				+				&icur);
			
 
				 		if (prealloc_blocks) {
			
 
				 			xfs_extlen_t	align;
			
 
				 			xfs_off_t	end_offset;
			
@@ -613,7 +615,8 @@ xfs_file_iomap_begin_delay(
 
				 
			
 
				 retry:
			
 
				 	error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
			
 
				-			end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof);
			
 
				+			end_fsb - offset_fsb, prealloc_blocks, &got, &icur,
			
 
				+			eof);
			
 
				 	switch (error) {
			
 
				 	case 0:
			
 
				 		break;
			
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -160,7 +160,6 @@ xfs_generic_create(
 
				 	if (S_ISCHR(mode) || S_ISBLK(mode)) {
			
 
				 		if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
			
 
				 			return -EINVAL;
			
 
				-		rdev = sysv_encode_dev(rdev);
			
 
				 	} else {
			
 
				 		rdev = 0;
			
 
				 	}
			
@@ -535,8 +534,7 @@ xfs_vn_getattr(
 
				 	case S_IFBLK:
			
 
				 	case S_IFCHR:
			
 
				 		stat->blksize = BLKDEV_IOSIZE;
			
 
				-		stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
			
 
				-				   sysv_minor(ip->i_df.if_u2.if_rdev));
			
 
				+		stat->rdev = inode->i_rdev;
			
 
				 		break;
			
 
				 	default:
			
 
				 		if (XFS_IS_REALTIME_INODE(ip)) {
			
@@ -885,22 +883,6 @@ xfs_setattr_size(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	/*
			
 
				-	 * We are going to log the inode size change in this transaction so
			
 
				-	 * any previous writes that are beyond the on disk EOF and the new
			
 
				-	 * EOF that have not been written out need to be written here.  If we
			
 
				-	 * do not write the data out, we expose ourselves to the null files
			
 
				-	 * problem. Note that this includes any block zeroing we did above;
			
 
				-	 * otherwise those blocks may not be zeroed after a crash.
			
 
				-	 */
			
 
				-	if (did_zeroing ||
			
 
				-	    (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
			
 
				-		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				-						      ip->i_d.di_size, newsize);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * We've already locked out new page faults, so now we can safely remove
			
 
				 	 * pages from the page cache knowing they won't get refaulted until we
			
@@ -917,9 +899,29 @@ xfs_setattr_size(
 
				 	 * user visible changes). There's not much we can do about this, except
			
 
				 	 * to hope that the caller sees ENOMEM and retries the truncate
			
 
				 	 * operation.
			
 
				+	 *
			
 
				+	 * And we update in-core i_size and truncate page cache beyond newsize
			
 
				+	 * before writeback the [di_size, newsize] range, so we're guaranteed
			
 
				+	 * not to write stale data past the new EOF on truncate down.
			
 
				 	 */
			
 
				 	truncate_setsize(inode, newsize);
			
 
				 
			
 
				+	/*
			
 
				+	 * We are going to log the inode size change in this transaction so
			
 
				+	 * any previous writes that are beyond the on disk EOF and the new
			
 
				+	 * EOF that have not been written out need to be written here.  If we
			
 
				+	 * do not write the data out, we expose ourselves to the null files
			
 
				+	 * problem. Note that this includes any block zeroing we did above;
			
 
				+	 * otherwise those blocks may not be zeroed after a crash.
			
 
				+	 */
			
 
				+	if (did_zeroing ||
			
 
				+	    (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
			
 
				+		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				+						ip->i_d.di_size, newsize - 1);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
			
 
				 	if (error)
			
 
				 		return error;
			
@@ -1231,18 +1233,6 @@ xfs_setup_inode(
 
				 	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
			
 
				 	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
			
 
				 
			
 
				-	switch (inode->i_mode & S_IFMT) {
			
 
				-	case S_IFBLK:
			
 
				-	case S_IFCHR:
			
 
				-		inode->i_rdev =
			
 
				-			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
			
 
				-			      sysv_minor(ip->i_df.if_u2.if_rdev));
			
 
				-		break;
			
 
				-	default:
			
 
				-		inode->i_rdev = 0;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				 	i_size_write(inode, ip->i_d.di_size);
			
 
				 	xfs_diflags_to_iflags(inode, ip);
			
 
				 
			
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -31,16 +31,6 @@
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				 
			
 
				-int
			
 
				-xfs_internal_inum(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	xfs_ino_t	ino)
			
 
				-{
			
 
				-	return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
			
 
				-		(xfs_sb_version_hasquota(&mp->m_sb) &&
			
 
				-		 xfs_is_quota_inode(&mp->m_sb, ino)));
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Return stat information for one inode.
			
 
				  * Return 0 if ok, else errno.
			
@@ -119,12 +109,11 @@ xfs_bulkstat_one_int(
 
				 
			
 
				 	switch (dic->di_format) {
			
 
				 	case XFS_DINODE_FMT_DEV:
			
 
				-		buf->bs_rdev = ip->i_df.if_u2.if_rdev;
			
 
				+		buf->bs_rdev = sysv_encode_dev(inode->i_rdev);
			
 
				 		buf->bs_blksize = BLKDEV_IOSIZE;
			
 
				 		buf->bs_blocks = 0;
			
 
				 		break;
			
 
				 	case XFS_DINODE_FMT_LOCAL:
			
 
				-	case XFS_DINODE_FMT_UUID:
			
 
				 		buf->bs_rdev = 0;
			
 
				 		buf->bs_blksize = mp->m_sb.sb_blocksize;
			
 
				 		buf->bs_blocks = 0;
			
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -96,6 +96,4 @@ xfs_inumbers(
 
				 	void			__user *buffer, /* buffer with inode info */
			
 
				 	inumbers_fmt_pf		formatter);
			
 
				 
			
 
				-int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
			
 
				-
			
 
				 #endif	/* __XFS_ITABLE_H__ */
			
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -142,6 +142,13 @@ typedef __u32			xfs_nlink_t;
 
				 #define SYNCHRONIZE()	barrier()
			
 
				 #define __return_address __builtin_return_address(0)
			
 
				 
			
 
				+/*
			
 
				+ * Return the address of a label.  Use barrier() so that the optimizer
			
 
				+ * won't reorder code to refactor the error jumpouts into a single
			
 
				+ * return, which throws off the reported address.
			
 
				+ */
			
 
				+#define __this_address	({ __label__ __here; __here: barrier(); &&__here; })
			
 
				+
			
 
				 #define XFS_PROJID_DEFAULT	0
			
 
				 
			
 
				 #define MIN(a,b)	(min(a,b))
			
@@ -243,10 +250,6 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
 
				 #define ASSERT(expr)	\
			
 
				 	(likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
			
 
				 
			
 
				-#ifndef STATIC
			
 
				-# define STATIC noinline
			
 
				-#endif
			
 
				-
			
 
				 #else	/* !DEBUG */
			
 
				 
			
 
				 #ifdef XFS_WARN
			
@@ -254,21 +257,15 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
 
				 #define ASSERT(expr)	\
			
 
				 	(likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
			
 
				 
			
 
				-#ifndef STATIC
			
 
				-# define STATIC static noinline
			
 
				-#endif
			
 
				-
			
 
				 #else	/* !DEBUG && !XFS_WARN */
			
 
				 
			
 
				 #define ASSERT(expr)	((void)0)
			
 
				 
			
 
				-#ifndef STATIC
			
 
				-# define STATIC static noinline
			
 
				-#endif
			
 
				-
			
 
				 #endif /* XFS_WARN */
			
 
				 #endif /* DEBUG */
			
 
				 
			
 
				+#define STATIC static noinline
			
 
				+
			
 
				 #ifdef CONFIG_XFS_RT
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -22,6 +22,7 @@
 
				 #include "xfs_log_format.h"
			
 
				 #include "xfs_trans_resv.h"
			
 
				 #include "xfs_mount.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_trans_priv.h"
			
@@ -608,6 +609,7 @@ xfs_log_mount(
 
				 	xfs_daddr_t	blk_offset,
			
 
				 	int		num_bblks)
			
 
				 {
			
 
				+	bool		fatal = xfs_sb_version_hascrc(&mp->m_sb);
			
 
				 	int		error = 0;
			
 
				 	int		min_logfsbs;
			
 
				 
			
@@ -659,9 +661,20 @@ xfs_log_mount(
 
				 			 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
			
 
				 			 XFS_MAX_LOG_BYTES);
			
 
				 		error = -EINVAL;
			
 
				+	} else if (mp->m_sb.sb_logsunit > 1 &&
			
 
				+		   mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
			
 
				+		xfs_warn(mp,
			
 
				+		"log stripe unit %u bytes must be a multiple of block size",
			
 
				+			 mp->m_sb.sb_logsunit);
			
 
				+		error = -EINVAL;
			
 
				+		fatal = true;
			
 
				 	}
			
 
				 	if (error) {
			
 
				-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		/*
			
 
				+		 * Log check errors are always fatal on v5; or whenever bad
			
 
				+		 * metadata leads to a crash.
			
 
				+		 */
			
 
				+		if (fatal) {
			
 
				 			xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
			
 
				 			ASSERT(0);
			
 
				 			goto out_free_log;
			
@@ -744,6 +757,7 @@ xfs_log_mount_finish(
 
				 {
			
 
				 	int	error = 0;
			
 
				 	bool	readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
			
 
				+	bool	recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED;
			
 
				 
			
 
				 	if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
			
 
				 		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
			
@@ -780,6 +794,21 @@ xfs_log_mount_finish(
 
				 	mp->m_super->s_flags &= ~MS_ACTIVE;
			
 
				 	evict_inodes(mp->m_super);
			
 
				 
			
 
				+	/*
			
 
				+	 * Drain the buffer LRU after log recovery. This is required for v4
			
 
				+	 * filesystems to avoid leaving around buffers with NULL verifier ops,
			
 
				+	 * but we do it unconditionally to make sure we're always in a clean
			
 
				+	 * cache state after mount.
			
 
				+	 *
			
 
				+	 * Don't push in the error case because the AIL may have pending intents
			
 
				+	 * that aren't removed until recovery is cancelled.
			
 
				+	 */
			
 
				+	if (!error && recovered) {
			
 
				+		xfs_log_force(mp, XFS_LOG_SYNC);
			
 
				+		xfs_ail_push_all_sync(mp->m_ail);
			
 
				+	}
			
 
				+	xfs_wait_buftarg(mp->m_ddev_targp);
			
 
				+
			
 
				 	if (readonly)
			
 
				 		mp->m_flags |= XFS_MOUNT_RDONLY;
			
 
				 
			
@@ -3734,7 +3763,7 @@ xlog_ticket_alloc(
 
				  * one of the iclogs.  This uses backup pointers stored in a different
			
 
				  * part of the log in case we trash the log structure.
			
 
				  */
			
 
				-void
			
 
				+STATIC void
			
 
				 xlog_verify_dest_ptr(
			
 
				 	struct xlog	*log,
			
 
				 	void		*ptr)
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -85,17 +85,21 @@ struct xfs_buf_cancel {
 
				  */
			
 
				 
			
 
				 /*
			
 
				- * Verify the given count of basic blocks is valid number of blocks
			
 
				- * to specify for an operation involving the given XFS log buffer.
			
 
				- * Returns nonzero if the count is valid, 0 otherwise.
			
 
				+ * Verify the log-relative block number and length in basic blocks are valid for
			
 
				+ * an operation involving the given XFS log buffer. Returns true if the fields
			
 
				+ * are valid, false otherwise.
			
 
				  */
			
 
				-
			
 
				-static inline int
			
 
				-xlog_buf_bbcount_valid(
			
 
				+static inline bool
			
 
				+xlog_verify_bp(
			
 
				 	struct xlog	*log,
			
 
				+	xfs_daddr_t	blk_no,
			
 
				 	int		bbcount)
			
 
				 {
			
 
				-	return bbcount > 0 && bbcount <= log->l_logBBsize;
			
 
				+	if (blk_no < 0 || blk_no >= log->l_logBBsize)
			
 
				+		return false;
			
 
				+	if (bbcount <= 0 || (blk_no + bbcount) > log->l_logBBsize)
			
 
				+		return false;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -110,7 +114,11 @@ xlog_get_bp(
 
				 {
			
 
				 	struct xfs_buf	*bp;
			
 
				 
			
 
				-	if (!xlog_buf_bbcount_valid(log, nbblks)) {
			
 
				+	/*
			
 
				+	 * Pass log block 0 since we don't have an addr yet, buffer will be
			
 
				+	 * verified on read.
			
 
				+	 */
			
 
				+	if (!xlog_verify_bp(log, 0, nbblks)) {
			
 
				 		xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
			
 
				 			nbblks);
			
 
				 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
			
@@ -180,9 +188,10 @@ xlog_bread_noalign(
 
				 {
			
 
				 	int		error;
			
 
				 
			
 
				-	if (!xlog_buf_bbcount_valid(log, nbblks)) {
			
 
				-		xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
			
 
				-			nbblks);
			
 
				+	if (!xlog_verify_bp(log, blk_no, nbblks)) {
			
 
				+		xfs_warn(log->l_mp,
			
 
				+			 "Invalid log block/length (0x%llx, 0x%x) for buffer",
			
 
				+			 blk_no, nbblks);
			
 
				 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
			
 
				 		return -EFSCORRUPTED;
			
 
				 	}
			
@@ -265,9 +274,10 @@ xlog_bwrite(
 
				 {
			
 
				 	int		error;
			
 
				 
			
 
				-	if (!xlog_buf_bbcount_valid(log, nbblks)) {
			
 
				-		xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
			
 
				-			nbblks);
			
 
				+	if (!xlog_verify_bp(log, blk_no, nbblks)) {
			
 
				+		xfs_warn(log->l_mp,
			
 
				+			 "Invalid log block/length (0x%llx, 0x%x) for buffer",
			
 
				+			 blk_no, nbblks);
			
 
				 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
			
 
				 		return -EFSCORRUPTED;
			
 
				 	}
			
@@ -753,7 +763,7 @@ xlog_find_head(
 
				 	 * in the in-core log.  The following number can be made tighter if
			
 
				 	 * we actually look at the block size of the filesystem.
			
 
				 	 */
			
 
				-	num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
			
 
				+	num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log));
			
 
				 	if (head_blk >= num_scan_bblks) {
			
 
				 		/*
			
 
				 		 * We are guaranteed that the entire check can be performed
			
@@ -2975,7 +2985,7 @@ xlog_recover_inode_pass2(
 
				 	struct xlog_recover_item	*item,
			
 
				 	xfs_lsn_t			current_lsn)
			
 
				 {
			
 
				-	xfs_inode_log_format_t	*in_f;
			
 
				+	struct xfs_inode_log_format	*in_f;
			
 
				 	xfs_mount_t		*mp = log->l_mp;
			
 
				 	xfs_buf_t		*bp;
			
 
				 	xfs_dinode_t		*dip;
			
@@ -2989,10 +2999,10 @@ xlog_recover_inode_pass2(
 
				 	uint			isize;
			
 
				 	int			need_free = 0;
			
 
				 
			
 
				-	if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
			
 
				+	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
			
 
				 		in_f = item->ri_buf[0].i_addr;
			
 
				 	} else {
			
 
				-		in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
			
 
				+		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP);
			
 
				 		need_free = 1;
			
 
				 		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
			
 
				 		if (error)
			
@@ -3163,16 +3173,8 @@ xlog_recover_inode_pass2(
 
				 	}
			
 
				 
			
 
				 	fields = in_f->ilf_fields;
			
 
				-	switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
			
 
				-	case XFS_ILOG_DEV:
			
 
				+	if (fields & XFS_ILOG_DEV)
			
 
				 		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
			
 
				-		break;
			
 
				-	case XFS_ILOG_UUID:
			
 
				-		memcpy(XFS_DFORK_DPTR(dip),
			
 
				-		       &in_f->ilf_u.ilfu_uuid,
			
 
				-		       sizeof(uuid_t));
			
 
				-		break;
			
 
				-	}
			
 
				 
			
 
				 	if (in_f->ilf_size == 2)
			
 
				 		goto out_owner_change;
			
@@ -4297,7 +4299,7 @@ xlog_recover_add_to_trans(
 
				 	char			*dp,
			
 
				 	int			len)
			
 
				 {
			
 
				-	xfs_inode_log_format_t	*in_f;			/* any will do */
			
 
				+	struct xfs_inode_log_format	*in_f;			/* any will do */
			
 
				 	xlog_recover_item_t	*item;
			
 
				 	char			*ptr;
			
 
				 
			
@@ -4331,7 +4333,7 @@ xlog_recover_add_to_trans(
 
				 
			
 
				 	ptr = kmem_alloc(len, KM_SLEEP);
			
 
				 	memcpy(ptr, dp, len);
			
 
				-	in_f = (xfs_inode_log_format_t *)ptr;
			
 
				+	in_f = (struct xfs_inode_log_format *)ptr;
			
 
				 
			
 
				 	/* take the tail entry */
			
 
				 	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
			
@@ -5823,7 +5825,7 @@ xlog_recover_cancel(
 
				  * Read all of the agf and agi counters and check that they
			
 
				  * are consistent with the superblock counters.
			
 
				  */
			
 
				-void
			
 
				+STATIC void
			
 
				 xlog_recover_check_summary(
			
 
				 	struct xlog	*log)
			
 
				 {
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1022,10 +1022,21 @@ xfs_mountfs(
 
				 	xfs_rtunmount_inodes(mp);
			
 
				  out_rele_rip:
			
 
				 	IRELE(rip);
			
 
				-	cancel_delayed_work_sync(&mp->m_reclaim_work);
			
 
				-	xfs_reclaim_inodes(mp, SYNC_WAIT);
			
 
				 	/* Clean out dquots that might be in memory after quotacheck. */
			
 
				 	xfs_qm_unmount(mp);
			
 
				+	/*
			
 
				+	 * Cancel all delayed reclaim work and reclaim the inodes directly.
			
 
				+	 * We have to do this /after/ rtunmount and qm_unmount because those
			
 
				+	 * two will have scheduled delayed reclaim for the rt/quota inodes.
			
 
				+	 *
			
 
				+	 * This is slightly different from the unmountfs call sequence
			
 
				+	 * because we could be tearing down a partially set up mount.  In
			
 
				+	 * particular, if log_mount_finish fails we bail out without calling
			
 
				+	 * qm_unmount_quotas and therefore rely on qm_unmount to release the
			
 
				+	 * quota inodes.
			
 
				+	 */
			
 
				+	cancel_delayed_work_sync(&mp->m_reclaim_work);
			
 
				+	xfs_reclaim_inodes(mp, SYNC_WAIT);
			
 
				  out_log_dealloc:
			
 
				 	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
			
 
				 	xfs_log_mount_cancel(mp);
			
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -273,7 +273,7 @@ xfs_reflink_reserve_cow(
 
				 	struct xfs_bmbt_irec	got;
			
 
				 	int			error = 0;
			
 
				 	bool			eof = false, trimmed;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 
			
 
				 	/*
			
 
				 	 * Search the COW fork extent list first.  This serves two purposes:
			
@@ -284,7 +284,7 @@ xfs_reflink_reserve_cow(
 
				 	 * tree.
			
 
				 	 */
			
 
				 
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got))
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got))
			
 
				 		eof = true;
			
 
				 	if (!eof && got.br_startoff <= imap->br_startoff) {
			
 
				 		trace_xfs_reflink_cow_found(ip, imap);
			
@@ -312,7 +312,7 @@ xfs_reflink_reserve_cow(
 
				 		return error;
			
 
				 
			
 
				 	error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
			
 
				-			imap->br_blockcount, 0, &got, &idx, eof);
			
 
				+			imap->br_blockcount, 0, &got, &icur, eof);
			
 
				 	if (error == -ENOSPC || error == -EDQUOT)
			
 
				 		trace_xfs_reflink_cow_enospc(ip, imap);
			
 
				 	if (error)
			
@@ -353,29 +353,22 @@ xfs_reflink_convert_cow(
 
				 	xfs_off_t		offset,
			
 
				 	xfs_off_t		count)
			
 
				 {
			
 
				-	struct xfs_bmbt_irec	got;
			
 
				-	struct xfs_defer_ops	dfops;
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				-	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
			
 
				 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
			
 
				 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
			
 
				-	xfs_extnum_t		idx;
			
 
				-	bool			found;
			
 
				-	int			error = 0;
			
 
				+	xfs_filblks_t		count_fsb = end_fsb - offset_fsb;
			
 
				+	struct xfs_bmbt_irec	imap;
			
 
				+	struct xfs_defer_ops	dfops;
			
 
				+	xfs_fsblock_t		first_block = NULLFSBLOCK;
			
 
				+	int			nimaps = 1, error = 0;
			
 
				 
			
 
				-	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+	ASSERT(count != 0);
			
 
				 
			
 
				-	/* Convert all the extents to real from unwritten. */
			
 
				-	for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
			
 
				-	     found && got.br_startoff < end_fsb;
			
 
				-	     found = xfs_iext_get_extent(ifp, ++idx, &got)) {
			
 
				-		error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
			
 
				-				end_fsb - offset_fsb, &dfops);
			
 
				-		if (error)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	/* Finish up. */
			
 
				+	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+	error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb,
			
 
				+			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT |
			
 
				+			XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps,
			
 
				+			&dfops);
			
 
				 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 	return error;
			
 
				 }
			
@@ -399,7 +392,7 @@ xfs_reflink_allocate_cow(
 
				 	bool			trimmed;
			
 
				 	xfs_filblks_t		resaligned;
			
 
				 	xfs_extlen_t		resblks = 0;
			
 
				-	xfs_extnum_t		idx;
			
 
				+	struct xfs_iext_cursor	icur;
			
 
				 
			
 
				 retry:
			
 
				 	ASSERT(xfs_is_reflink_inode(ip));
			
@@ -409,7 +402,7 @@ retry:
 
				 	 * Even if the extent is not shared we might have a preallocation for
			
 
				 	 * it in the COW fork.  If so use it.
			
 
				 	 */
			
 
				-	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
			
 
				+	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) &&
			
 
				 	    got.br_startoff <= offset_fsb) {
			
 
				 		*shared = true;
			
 
				 
			
@@ -496,13 +489,13 @@ xfs_reflink_find_cow_mapping(
 
				 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
			
 
				 	xfs_fileoff_t			offset_fsb;
			
 
				 	struct xfs_bmbt_irec		got;
			
 
				-	xfs_extnum_t			idx;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
			
 
				 	ASSERT(xfs_is_reflink_inode(ip));
			
 
				 
			
 
				 	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
			
 
				 		return false;
			
 
				 	if (got.br_startoff > offset_fsb)
			
 
				 		return false;
			
@@ -524,18 +517,18 @@ xfs_reflink_trim_irec_to_next_cow(
 
				 {
			
 
				 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
			
 
				 	struct xfs_bmbt_irec		got;
			
 
				-	xfs_extnum_t			idx;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				 
			
 
				 	if (!xfs_is_reflink_inode(ip))
			
 
				 		return;
			
 
				 
			
 
				 	/* Find the extent in the CoW fork. */
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
			
 
				 		return;
			
 
				 
			
 
				 	/* This is the extent before; try sliding up one. */
			
 
				 	if (got.br_startoff < offset_fsb) {
			
 
				-		if (!xfs_iext_get_extent(ifp, idx + 1, &got))
			
 
				+		if (!xfs_iext_next_extent(ifp, &icur, &got))
			
 
				 			return;
			
 
				 	}
			
 
				 
			
@@ -562,24 +555,32 @@ xfs_reflink_cancel_cow_blocks(
 
				 {
			
 
				 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
			
 
				 	struct xfs_bmbt_irec		got, del;
			
 
				-	xfs_extnum_t			idx;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				 	xfs_fsblock_t			firstfsb;
			
 
				 	struct xfs_defer_ops		dfops;
			
 
				 	int				error = 0;
			
 
				 
			
 
				 	if (!xfs_is_reflink_inode(ip))
			
 
				 		return 0;
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
			
 
				+	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
			
 
				 		return 0;
			
 
				 
			
 
				-	while (got.br_startoff < end_fsb) {
			
 
				+	/* Walk backwards until we're out of the I/O range... */
			
 
				+	while (got.br_startoff + got.br_blockcount > offset_fsb) {
			
 
				 		del = got;
			
 
				 		xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
			
 
				+
			
 
				+		/* Extent delete may have bumped ext forward */
			
 
				+		if (!del.br_blockcount) {
			
 
				+			xfs_iext_prev(ifp, &icur);
			
 
				+			goto next_extent;
			
 
				+		}
			
 
				+
			
 
				 		trace_xfs_reflink_cancel_cow(ip, &del);
			
 
				 
			
 
				 		if (isnullstartblock(del.br_startblock)) {
			
 
				 			error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
			
 
				-					&idx, &got, &del);
			
 
				+					&icur, &got, &del);
			
 
				 			if (error)
			
 
				 				break;
			
 
				 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
			
@@ -610,10 +611,10 @@ xfs_reflink_cancel_cow_blocks(
 
				 			}
			
 
				 
			
 
				 			/* Remove the mapping from the CoW fork. */
			
 
				-			xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
			
 
				+			xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
			
 
				 		}
			
 
				-
			
 
				-		if (!xfs_iext_get_extent(ifp, ++idx, &got))
			
 
				+next_extent:
			
 
				+		if (!xfs_iext_get_extent(ifp, &icur, &got))
			
 
				 			break;
			
 
				 	}
			
 
				 
			
@@ -698,7 +699,7 @@ xfs_reflink_end_cow(
 
				 	int				error;
			
 
				 	unsigned int			resblks;
			
 
				 	xfs_filblks_t			rlen;
			
 
				-	xfs_extnum_t			idx;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				 
			
 
				 	trace_xfs_reflink_end_cow(ip, offset, count);
			
 
				 
			
@@ -733,27 +734,22 @@ xfs_reflink_end_cow(
 
				 	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 	xfs_trans_ijoin(tp, ip, 0);
			
 
				 
			
 
				-	/* If there is a hole at end_fsb - 1 go to the previous extent */
			
 
				-	if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
			
 
				-	    got.br_startoff > end_fsb) {
			
 
				-		/*
			
 
				-		 * In case of racing, overlapping AIO writes no COW extents
			
 
				-		 * might be left by the time I/O completes for the loser of
			
 
				-		 * the race.  In that case we are done.
			
 
				-		 */
			
 
				-		if (idx <= 0)
			
 
				-			goto out_cancel;
			
 
				-		xfs_iext_get_extent(ifp, --idx, &got);
			
 
				-	}
			
 
				+	/*
			
 
				+	 * In case of racing, overlapping AIO writes no COW extents might be
			
 
				+	 * left by the time I/O completes for the loser of the race.  In that
			
 
				+	 * case we are done.
			
 
				+	 */
			
 
				+	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
			
 
				+		goto out_cancel;
			
 
				 
			
 
				 	/* Walk backwards until we're out of the I/O range... */
			
 
				 	while (got.br_startoff + got.br_blockcount > offset_fsb) {
			
 
				 		del = got;
			
 
				 		xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
			
 
				 
			
 
				-		/* Extent delete may have bumped idx forward */
			
 
				+		/* Extent delete may have bumped ext forward */
			
 
				 		if (!del.br_blockcount) {
			
 
				-			idx--;
			
 
				+			xfs_iext_prev(ifp, &icur);
			
 
				 			goto next_extent;
			
 
				 		}
			
 
				 
			
@@ -765,7 +761,7 @@ xfs_reflink_end_cow(
 
				 		 * allocated but have not yet been involved in a write.
			
 
				 		 */
			
 
				 		if (got.br_state == XFS_EXT_UNWRITTEN) {
			
 
				-			idx--;
			
 
				+			xfs_iext_prev(ifp, &icur);
			
 
				 			goto next_extent;
			
 
				 		}
			
 
				 
			
@@ -796,14 +792,14 @@ xfs_reflink_end_cow(
 
				 			goto out_defer;
			
 
				 
			
 
				 		/* Remove the mapping from the CoW fork. */
			
 
				-		xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
			
 
				+		xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
			
 
				 
			
 
				 		xfs_defer_ijoin(&dfops, ip);
			
 
				 		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_defer;
			
 
				 next_extent:
			
 
				-		if (!xfs_iext_get_extent(ifp, idx, &got))
			
 
				+		if (!xfs_iext_get_extent(ifp, &icur, &got))
			
 
				 			break;
			
 
				 	}
			
 
				 
			
@@ -1433,7 +1429,7 @@ xfs_reflink_inode_has_shared_extents(
 
				 	xfs_extlen_t			aglen;
			
 
				 	xfs_agblock_t			rbno;
			
 
				 	xfs_extlen_t			rlen;
			
 
				-	xfs_extnum_t			idx;
			
 
				+	struct xfs_iext_cursor		icur;
			
 
				 	bool				found;
			
 
				 	int				error;
			
 
				 
			
@@ -1445,7 +1441,7 @@ xfs_reflink_inode_has_shared_extents(
 
				 	}
			
 
				 
			
 
				 	*has_shared = false;
			
 
				-	found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got);
			
 
				+	found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got);
			
 
				 	while (found) {
			
 
				 		if (isnullstartblock(got.br_startblock) ||
			
 
				 		    got.br_state != XFS_EXT_NORM)
			
@@ -1464,7 +1460,7 @@ xfs_reflink_inode_has_shared_extents(
 
				 			return 0;
			
 
				 		}
			
 
				 next:
			
 
				-		found = xfs_iext_get_extent(ifp, ++idx, &got);
			
 
				+		found = xfs_iext_next_extent(ifp, &icur, &got);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -138,6 +138,7 @@ int xfs_rtalloc_query_range(struct xfs_trans *tp,
 
				 int xfs_rtalloc_query_all(struct xfs_trans *tp,
			
 
				 			  xfs_rtalloc_query_range_fn fn,
			
 
				 			  void *priv);
			
 
				+bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
			
 
				 #else
			
 
				 # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb)    (ENOSYS)
			
 
				 # define xfs_rtfree_extent(t,b,l)                       (ENOSYS)
			
@@ -146,6 +147,7 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
 
				 # define xfs_rtalloc_query_range(t,l,h,f,p)             (ENOSYS)
			
 
				 # define xfs_rtalloc_query_all(t,f,p)                   (ENOSYS)
			
 
				 # define xfs_rtbuf_get(m,t,b,i,p)                       (ENOSYS)
			
 
				+# define xfs_verify_rtbno(m, r)			(false)
			
 
				 static inline int		/* error */
			
 
				 xfs_rtmount_init(
			
 
				 	xfs_mount_t	*mp)	/* file system mount structure */
			
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -218,53 +218,15 @@ TRACE_EVENT(xfs_attr_list_node_descend,
 
				 		   __entry->bt_before)
			
 
				 );
			
 
				 
			
 
				-TRACE_EVENT(xfs_iext_insert,
			
 
				-	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
			
 
				-		 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
			
 
				-	TP_ARGS(ip, idx, r, state, caller_ip),
			
 
				-	TP_STRUCT__entry(
			
 
				-		__field(dev_t, dev)
			
 
				-		__field(xfs_ino_t, ino)
			
 
				-		__field(xfs_extnum_t, idx)
			
 
				-		__field(xfs_fileoff_t, startoff)
			
 
				-		__field(xfs_fsblock_t, startblock)
			
 
				-		__field(xfs_filblks_t, blockcount)
			
 
				-		__field(xfs_exntst_t, state)
			
 
				-		__field(int, bmap_state)
			
 
				-		__field(unsigned long, caller_ip)
			
 
				-	),
			
 
				-	TP_fast_assign(
			
 
				-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
			
 
				-		__entry->ino = ip->i_ino;
			
 
				-		__entry->idx = idx;
			
 
				-		__entry->startoff = r->br_startoff;
			
 
				-		__entry->startblock = r->br_startblock;
			
 
				-		__entry->blockcount = r->br_blockcount;
			
 
				-		__entry->state = r->br_state;
			
 
				-		__entry->bmap_state = state;
			
 
				-		__entry->caller_ip = caller_ip;
			
 
				-	),
			
 
				-	TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
			
 
				-		  "offset %lld block %lld count %lld flag %d caller %ps",
			
 
				-		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				-		  __entry->ino,
			
 
				-		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
			
 
				-		  (long)__entry->idx,
			
 
				-		  __entry->startoff,
			
 
				-		  (int64_t)__entry->startblock,
			
 
				-		  __entry->blockcount,
			
 
				-		  __entry->state,
			
 
				-		  (char *)__entry->caller_ip)
			
 
				-);
			
 
				-
			
 
				 DECLARE_EVENT_CLASS(xfs_bmap_class,
			
 
				-	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
			
 
				+	TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state,
			
 
				 		 unsigned long caller_ip),
			
 
				-	TP_ARGS(ip, idx, state, caller_ip),
			
 
				+	TP_ARGS(ip, cur, state, caller_ip),
			
 
				 	TP_STRUCT__entry(
			
 
				 		__field(dev_t, dev)
			
 
				 		__field(xfs_ino_t, ino)
			
 
				-		__field(xfs_extnum_t, idx)
			
 
				+		__field(void *, leaf);
			
 
				+		__field(int, pos);
			
 
				 		__field(xfs_fileoff_t, startoff)
			
 
				 		__field(xfs_fsblock_t, startblock)
			
 
				 		__field(xfs_filblks_t, blockcount)
			
@@ -277,10 +239,11 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
 
				 		struct xfs_bmbt_irec	r;
			
 
				 
			
 
				 		ifp = xfs_iext_state_to_fork(ip, state);
			
 
				-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
			
 
				+		xfs_iext_get_extent(ifp, cur, &r);
			
 
				 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
			
 
				 		__entry->ino = ip->i_ino;
			
 
				-		__entry->idx = idx;
			
 
				+		__entry->leaf = cur->leaf;
			
 
				+		__entry->pos = cur->pos;
			
 
				 		__entry->startoff = r.br_startoff;
			
 
				 		__entry->startblock = r.br_startblock;
			
 
				 		__entry->blockcount = r.br_blockcount;
			
@@ -288,12 +251,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
 
				 		__entry->bmap_state = state;
			
 
				 		__entry->caller_ip = caller_ip;
			
 
				 	),
			
 
				-	TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
			
 
				+	TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d "
			
 
				 		  "offset %lld block %lld count %lld flag %d caller %ps",
			
 
				 		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				 		  __entry->ino,
			
 
				 		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
			
 
				-		  (long)__entry->idx,
			
 
				+		  __entry->leaf,
			
 
				+		  __entry->pos,
			
 
				 		  __entry->startoff,
			
 
				 		  (int64_t)__entry->startblock,
			
 
				 		  __entry->blockcount,
			
@@ -303,13 +267,15 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
 
				 
			
 
				 #define DEFINE_BMAP_EVENT(name) \
			
 
				 DEFINE_EVENT(xfs_bmap_class, name, \
			
 
				-	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
			
 
				+	TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, \
			
 
				 		 unsigned long caller_ip), \
			
 
				-	TP_ARGS(ip, idx, state, caller_ip))
			
 
				+	TP_ARGS(ip, cur, state, caller_ip))
			
 
				+DEFINE_BMAP_EVENT(xfs_iext_insert);
			
 
				 DEFINE_BMAP_EVENT(xfs_iext_remove);
			
 
				 DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
			
 
				 DEFINE_BMAP_EVENT(xfs_bmap_post_update);
			
 
				-DEFINE_BMAP_EVENT(xfs_extlist);
			
 
				+DEFINE_BMAP_EVENT(xfs_read_extent);
			
 
				+DEFINE_BMAP_EVENT(xfs_write_extent);
			
 
				 
			
 
				 DECLARE_EVENT_CLASS(xfs_buf_class,
			
 
				 	TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
			
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -25,6 +25,7 @@
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_errortag.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_log.h"
			
 
				 
			
@@ -514,11 +515,26 @@ xfsaild(
 
				 	current->flags |= PF_MEMALLOC;
			
 
				 	set_freezable();
			
 
				 
			
 
				-	while (!kthread_should_stop()) {
			
 
				+	while (1) {
			
 
				 		if (tout && tout <= 20)
			
 
				-			__set_current_state(TASK_KILLABLE);
			
 
				+			set_current_state(TASK_KILLABLE);
			
 
				 		else
			
 
				-			__set_current_state(TASK_INTERRUPTIBLE);
			
 
				+			set_current_state(TASK_INTERRUPTIBLE);
			
 
				+
			
 
				+		/*
			
 
				+		 * Check kthread_should_stop() after we set the task state
			
 
				+		 * to guarantee that we either see the stop bit and exit or
			
 
				+		 * the task state is reset to runnable such that it's not
			
 
				+		 * scheduled out indefinitely and detects the stop bit at
			
 
				+		 * next iteration.
			
 
				+		 *
			
 
				+		 * A memory barrier is included in above task state set to
			
 
				+		 * serialize again kthread_stop().
			
 
				+		 */
			
 
				+		if (kthread_should_stop()) {
			
 
				+			__set_current_state(TASK_RUNNING);
			
 
				+			break;
			
 
				+		}
			
 
				 
			
 
				 		spin_lock(&ailp->xa_lock);