11 years ago · 5ff0b9e1a1
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -21,7 +21,6 @@
 
				 #include <linux/swap.h>
			
 
				 #include <linux/blkdev.h>
			
 
				 #include <linux/backing-dev.h>
			
 
				-#include "time.h"
			
 
				 #include "kmem.h"
			
 
				 #include "xfs_message.h"
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2209,6 +2209,10 @@ xfs_agf_verify(
 
				 	      be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
			
 
				 		return false;
			
 
				 
			
 
				+	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
			
 
				+	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
			
 
				+		return false;
			
 
				+
			
 
				 	/*
			
 
				 	 * during growfs operations, the perag is not fully initialised,
			
 
				 	 * so we can't use it for any useful checking. growfs ensures we can't
			
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5403,23 +5403,224 @@ error0:
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Determine whether an extent shift can be accomplished by a merge with the
			
 
				+ * extent that precedes the target hole of the shift.
			
 
				+ */
			
 
				+STATIC bool
			
 
				+xfs_bmse_can_merge(
			
 
				+	struct xfs_bmbt_irec	*left,	/* preceding extent */
			
 
				+	struct xfs_bmbt_irec	*got,	/* current extent to shift */
			
 
				+	xfs_fileoff_t		shift)	/* shift fsb */
			
 
				+{
			
 
				+	xfs_fileoff_t		startoff;
			
 
				+
			
 
				+	startoff = got->br_startoff - shift;
			
 
				+
			
 
				+	/*
			
 
				+	 * The extent, once shifted, must be adjacent in-file and on-disk with
			
 
				+	 * the preceding extent.
			
 
				+	 */
			
 
				+	if ((left->br_startoff + left->br_blockcount != startoff) ||
			
 
				+	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
			
 
				+	    (left->br_state != got->br_state) ||
			
 
				+	    (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * A bmap extent shift adjusts the file offset of an extent to fill a preceding
			
 
				+ * hole in the file. If an extent shift would result in the extent being fully
			
 
				+ * adjacent to the extent that currently precedes the hole, we can merge with
			
 
				+ * the preceding extent rather than do the shift.
			
 
				+ *
			
 
				+ * This function assumes the caller has verified a shift-by-merge is possible
			
 
				+ * with the provided extents via xfs_bmse_can_merge().
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_bmse_merge(
			
 
				+	struct xfs_inode		*ip,
			
 
				+	int				whichfork,
			
 
				+	xfs_fileoff_t			shift,		/* shift fsb */
			
 
				+	int				current_ext,	/* idx of gotp */
			
 
				+	struct xfs_bmbt_rec_host	*gotp,		/* extent to shift */
			
 
				+	struct xfs_bmbt_rec_host	*leftp,		/* preceding extent */
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	int				*logflags)	/* output */
			
 
				+{
			
 
				+	struct xfs_ifork		*ifp;
			
 
				+	struct xfs_bmbt_irec		got;
			
 
				+	struct xfs_bmbt_irec		left;
			
 
				+	xfs_filblks_t			blockcount;
			
 
				+	int				error, i;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	xfs_bmbt_get_all(gotp, &got);
			
 
				+	xfs_bmbt_get_all(leftp, &left);
			
 
				+	blockcount = left.br_blockcount + got.br_blockcount;
			
 
				+
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				+	ASSERT(xfs_bmse_can_merge(&left, &got, shift));
			
 
				+
			
 
				+	/*
			
 
				+	 * Merge the in-core extents. Note that the host record pointers and
			
 
				+	 * current_ext index are invalid once the extent has been removed via
			
 
				+	 * xfs_iext_remove().
			
 
				+	 */
			
 
				+	xfs_bmbt_set_blockcount(leftp, blockcount);
			
 
				+	xfs_iext_remove(ip, current_ext, 1, 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * Update the on-disk extent count, the btree if necessary and log the
			
 
				+	 * inode.
			
 
				+	 */
			
 
				+	XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				+			   XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
			
 
				+	*logflags |= XFS_ILOG_CORE;
			
 
				+	if (!cur) {
			
 
				+		*logflags |= XFS_ILOG_DEXT;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/* lookup and remove the extent to merge */
			
 
				+	error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
			
 
				+				   got.br_blockcount, &i);
			
 
				+	if (error)
			
 
				+		goto out_error;
			
 
				+	XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
			
 
				+
			
 
				+	error = xfs_btree_delete(cur, &i);
			
 
				+	if (error)
			
 
				+		goto out_error;
			
 
				+	XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
			
 
				+
			
 
				+	/* lookup and update size of the previous extent */
			
 
				+	error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
			
 
				+				   left.br_blockcount, &i);
			
 
				+	if (error)
			
 
				+		goto out_error;
			
 
				+	XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
			
 
				+
			
 
				+	left.br_blockcount = blockcount;
			
 
				+
			
 
				+	error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
			
 
				+				left.br_blockcount, left.br_state);
			
 
				+	if (error)
			
 
				+		goto out_error;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+out_error:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Shift a single extent.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_bmse_shift_one(
			
 
				+	struct xfs_inode		*ip,
			
 
				+	int				whichfork,
			
 
				+	xfs_fileoff_t			offset_shift_fsb,
			
 
				+	int				*current_ext,
			
 
				+	struct xfs_bmbt_rec_host	*gotp,
			
 
				+	struct xfs_btree_cur		*cur,
			
 
				+	int				*logflags)
			
 
				+{
			
 
				+	struct xfs_ifork		*ifp;
			
 
				+	xfs_fileoff_t			startoff;
			
 
				+	struct xfs_bmbt_rec_host	*leftp;
			
 
				+	struct xfs_bmbt_irec		got;
			
 
				+	struct xfs_bmbt_irec		left;
			
 
				+	int				error;
			
 
				+	int				i;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+
			
 
				+	xfs_bmbt_get_all(gotp, &got);
			
 
				+	startoff = got.br_startoff - offset_shift_fsb;
			
 
				+
			
 
				+	/* delalloc extents should be prevented by caller */
			
 
				+	XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock),
			
 
				+				out_error);
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is the first extent in the file, make sure there's enough
			
 
				+	 * room at the start of the file and jump right to the shift as there's
			
 
				+	 * no left extent to merge.
			
 
				+	 */
			
 
				+	if (*current_ext == 0) {
			
 
				+		if (got.br_startoff < offset_shift_fsb)
			
 
				+			return -EINVAL;
			
 
				+		goto shift_extent;
			
 
				+	}
			
 
				+
			
 
				+	/* grab the left extent and check for a large enough hole */
			
 
				+	leftp = xfs_iext_get_ext(ifp, *current_ext - 1);
			
 
				+	xfs_bmbt_get_all(leftp, &left);
			
 
				+
			
 
				+	if (startoff < left.br_startoff + left.br_blockcount)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* check whether to merge the extent or shift it down */
			
 
				+	if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb))
			
 
				+		goto shift_extent;
			
 
				+
			
 
				+	return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext,
			
 
				+			      gotp, leftp, cur, logflags);
			
 
				+
			
 
				+shift_extent:
			
 
				+	/*
			
 
				+	 * Increment the extent index for the next iteration, update the start
			
 
				+	 * offset of the in-core extent and update the btree if applicable.
			
 
				+	 */
			
 
				+	(*current_ext)++;
			
 
				+	xfs_bmbt_set_startoff(gotp, startoff);
			
 
				+	*logflags |= XFS_ILOG_CORE;
			
 
				+	if (!cur) {
			
 
				+		*logflags |= XFS_ILOG_DEXT;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
			
 
				+				   got.br_blockcount, &i);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	XFS_WANT_CORRUPTED_GOTO(i == 1, out_error);
			
 
				+
			
 
				+	got.br_startoff = startoff;
			
 
				+	error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
			
 
				+				got.br_blockcount, got.br_state);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+out_error:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Shift extent records to the left to cover a hole.
			
 
				  *
			
 
				- * The maximum number of extents to be shifted in a single operation
			
 
				- * is @num_exts, and @current_ext keeps track of the current extent
			
 
				- * index we have shifted. @offset_shift_fsb is the length by which each
			
 
				- * extent is shifted. If there is no hole to shift the extents
			
 
				- * into, this will be considered invalid operation and we abort immediately.
			
 
				+ * The maximum number of extents to be shifted in a single operation is
			
 
				+ * @num_exts. @start_fsb specifies the file offset to start the shift and the
			
 
				+ * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
			
 
				+ * is the length by which each extent is shifted. If there is no hole to shift
			
 
				+ * the extents into, this will be considered invalid operation and we abort
			
 
				+ * immediately.
			
 
				  */
			
 
				 int
			
 
				 xfs_bmap_shift_extents(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*ip,
			
 
				-	int			*done,
			
 
				 	xfs_fileoff_t		start_fsb,
			
 
				 	xfs_fileoff_t		offset_shift_fsb,
			
 
				-	xfs_extnum_t		*current_ext,
			
 
				+	int			*done,
			
 
				+	xfs_fileoff_t		*next_fsb,
			
 
				 	xfs_fsblock_t		*firstblock,
			
 
				 	struct xfs_bmap_free	*flist,
			
 
				 	int			num_exts)
			
@@ -5427,16 +5628,13 @@ xfs_bmap_shift_extents(
 
				 	struct xfs_btree_cur		*cur = NULL;
			
 
				 	struct xfs_bmbt_rec_host	*gotp;
			
 
				 	struct xfs_bmbt_irec            got;
			
 
				-	struct xfs_bmbt_irec		left;
			
 
				 	struct xfs_mount		*mp = ip->i_mount;
			
 
				 	struct xfs_ifork		*ifp;
			
 
				 	xfs_extnum_t			nexts = 0;
			
 
				-	xfs_fileoff_t			startoff;
			
 
				+	xfs_extnum_t			current_ext;
			
 
				 	int				error = 0;
			
 
				-	int				i;
			
 
				 	int				whichfork = XFS_DATA_FORK;
			
 
				 	int				logflags = 0;
			
 
				-	xfs_filblks_t			blockcount = 0;
			
 
				 	int				total_extents;
			
 
				 
			
 
				 	if (unlikely(XFS_TEST_ERROR(
			
@@ -5451,7 +5649,8 @@ xfs_bmap_shift_extents(
 
				 	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				 		return -EIO;
			
 
				 
			
 
				-	ASSERT(current_ext != NULL);
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				 
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
@@ -5461,23 +5660,6 @@ xfs_bmap_shift_extents(
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * If *current_ext is 0, we would need to lookup the extent
			
 
				-	 * from where we would start shifting and store it in gotp.
			
 
				-	 */
			
 
				-	if (!*current_ext) {
			
 
				-		gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
			
 
				-		/*
			
 
				-		 * gotp can be null in 2 cases: 1) if there are no extents
			
 
				-		 * or 2) start_fsb lies in a hole beyond which there are
			
 
				-		 * no extents. Either way, we are done.
			
 
				-		 */
			
 
				-		if (!gotp) {
			
 
				-			*done = 1;
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	if (ifp->if_flags & XFS_IFBROOT) {
			
 
				 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
			
 
				 		cur->bc_private.b.firstblock = *firstblock;
			
@@ -5485,113 +5667,47 @@ xfs_bmap_shift_extents(
 
				 		cur->bc_private.b.flags = 0;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Look up the extent index for the fsb where we start shifting. We can
			
 
				+	 * henceforth iterate with current_ext as extent list changes are locked
			
 
				+	 * out via ilock.
			
 
				+	 *
			
 
				+	 * gotp can be null in 2 cases: 1) if there are no extents or 2)
			
 
				+	 * start_fsb lies in a hole beyond which there are no extents. Either
			
 
				+	 * way, we are done.
			
 
				+	 */
			
 
				+	gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext);
			
 
				+	if (!gotp) {
			
 
				+		*done = 1;
			
 
				+		goto del_cursor;
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * There may be delalloc extents in the data fork before the range we
			
 
				-	 * are collapsing out, so we cannot
			
 
				-	 * use the count of real extents here. Instead we have to calculate it
			
 
				-	 * from the incore fork.
			
 
				+	 * are collapsing out, so we cannot use the count of real extents here.
			
 
				+	 * Instead we have to calculate it from the incore fork.
			
 
				 	 */
			
 
				 	total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
			
 
				-	while (nexts++ < num_exts && *current_ext < total_extents) {
			
 
				-
			
 
				-		gotp = xfs_iext_get_ext(ifp, *current_ext);
			
 
				-		xfs_bmbt_get_all(gotp, &got);
			
 
				-		startoff = got.br_startoff - offset_shift_fsb;
			
 
				-
			
 
				-		/*
			
 
				-		 * Before shifting extent into hole, make sure that the hole
			
 
				-		 * is large enough to accomodate the shift.
			
 
				-		 */
			
 
				-		if (*current_ext) {
			
 
				-			xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
			
 
				-						*current_ext - 1), &left);
			
 
				-
			
 
				-			if (startoff < left.br_startoff + left.br_blockcount)
			
 
				-				error = -EINVAL;
			
 
				-		} else if (offset_shift_fsb > got.br_startoff) {
			
 
				-			/*
			
 
				-			 * When first extent is shifted, offset_shift_fsb
			
 
				-			 * should be less than the stating offset of
			
 
				-			 * the first extent.
			
 
				-			 */
			
 
				-			error = -EINVAL;
			
 
				-		}
			
 
				-
			
 
				+	while (nexts++ < num_exts && current_ext < total_extents) {
			
 
				+		error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
			
 
				+					&current_ext, gotp, cur, &logflags);
			
 
				 		if (error)
			
 
				 			goto del_cursor;
			
 
				 
			
 
				-		if (cur) {
			
 
				-			error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
			
 
				-						   got.br_startblock,
			
 
				-						   got.br_blockcount,
			
 
				-						   &i);
			
 
				-			if (error)
			
 
				-				goto del_cursor;
			
 
				-			XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
			
 
				-		}
			
 
				-
			
 
				-		/* Check if we can merge 2 adjacent extents */
			
 
				-		if (*current_ext &&
			
 
				-		    left.br_startoff + left.br_blockcount == startoff &&
			
 
				-		    left.br_startblock + left.br_blockcount ==
			
 
				-				got.br_startblock &&
			
 
				-		    left.br_state == got.br_state &&
			
 
				-		    left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
			
 
				-			blockcount = left.br_blockcount +
			
 
				-				got.br_blockcount;
			
 
				-			xfs_iext_remove(ip, *current_ext, 1, 0);
			
 
				-			logflags |= XFS_ILOG_CORE;
			
 
				-			if (cur) {
			
 
				-				error = xfs_btree_delete(cur, &i);
			
 
				-				if (error)
			
 
				-					goto del_cursor;
			
 
				-				XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
			
 
				-			} else {
			
 
				-				logflags |= XFS_ILOG_DEXT;
			
 
				-			}
			
 
				-			XFS_IFORK_NEXT_SET(ip, whichfork,
			
 
				-				XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
			
 
				-			gotp = xfs_iext_get_ext(ifp, --*current_ext);
			
 
				-			xfs_bmbt_get_all(gotp, &got);
			
 
				-
			
 
				-			/* Make cursor point to the extent we will update */
			
 
				-			if (cur) {
			
 
				-				error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
			
 
				-							   got.br_startblock,
			
 
				-							   got.br_blockcount,
			
 
				-							   &i);
			
 
				-				if (error)
			
 
				-					goto del_cursor;
			
 
				-				XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
			
 
				-			}
			
 
				-
			
 
				-			xfs_bmbt_set_blockcount(gotp, blockcount);
			
 
				-			got.br_blockcount = blockcount;
			
 
				-		} else {
			
 
				-			/* We have to update the startoff */
			
 
				-			xfs_bmbt_set_startoff(gotp, startoff);
			
 
				-			got.br_startoff = startoff;
			
 
				-		}
			
 
				-
			
 
				-		logflags |= XFS_ILOG_CORE;
			
 
				-		if (cur) {
			
 
				-			error = xfs_bmbt_update(cur, got.br_startoff,
			
 
				-						got.br_startblock,
			
 
				-						got.br_blockcount,
			
 
				-						got.br_state);
			
 
				-			if (error)
			
 
				-				goto del_cursor;
			
 
				-		} else {
			
 
				-			logflags |= XFS_ILOG_DEXT;
			
 
				-		}
			
 
				-
			
 
				-		(*current_ext)++;
			
 
				+		/* update total extent count and grab the next record */
			
 
				 		total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
			
 
				+		if (current_ext >= total_extents)
			
 
				+			break;
			
 
				+		gotp = xfs_iext_get_ext(ifp, current_ext);
			
 
				 	}
			
 
				 
			
 
				 	/* Check if we are done */
			
 
				-	if (*current_ext == total_extents)
			
 
				+	if (current_ext == total_extents) {
			
 
				 		*done = 1;
			
 
				+	} else if (next_fsb) {
			
 
				+		xfs_bmbt_get_all(gotp, &got);
			
 
				+		*next_fsb = got.br_startoff;
			
 
				+	}
			
 
				 
			
 
				 del_cursor:
			
 
				 	if (cur)
			
@@ -5600,5 +5716,6 @@ del_cursor:
 
				 
			
 
				 	if (logflags)
			
 
				 		xfs_trans_log_inode(tp, ip, logflags);
			
 
				+
			
 
				 	return error;
			
 
				 }
			
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -178,9 +178,8 @@ int	xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
 
				 		xfs_extnum_t num);
			
 
				 uint	xfs_default_attroffset(struct xfs_inode *ip);
			
 
				 int	xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				-		int *done, xfs_fileoff_t start_fsb,
			
 
				-		xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
			
 
				-		xfs_fsblock_t *firstblock, struct xfs_bmap_free	*flist,
			
 
				-		int num_exts);
			
 
				+		xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb,
			
 
				+		int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock,
			
 
				+		struct xfs_bmap_free *flist, int num_exts);
			
 
				 
			
 
				 #endif	/* __XFS_BMAP_H__ */
			
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2563,7 +2563,8 @@ xfs_da_get_buf(
 
				 				    mapp, nmap, 0);
			
 
				 	error = bp ? bp->b_error : -EIO;
			
 
				 	if (error) {
			
 
				-		xfs_trans_brelse(trans, bp);
			
 
				+		if (bp)
			
 
				+			xfs_trans_brelse(trans, bp);
			
 
				 		goto out_free;
			
 
				 	}
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -270,7 +270,6 @@ xfs_dir3_data_get_ftype(
 
				 {
			
 
				 	__uint8_t	ftype = dep->name[dep->namelen];
			
 
				 
			
 
				-	ASSERT(ftype < XFS_DIR3_FT_MAX);
			
 
				 	if (ftype >= XFS_DIR3_FT_MAX)
			
 
				 		return XFS_DIR3_FT_UNKNOWN;
			
 
				 	return ftype;
			
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -237,7 +237,8 @@ xfs_dir_init(
 
				 }
			
 
				 
			
 
				 /*
			
 
				-  Enter a name in a directory.
			
 
				+ * Enter a name in a directory, or check for available space.
			
 
				+ * If inum is 0, only the available space test is performed.
			
 
				  */
			
 
				 int
			
 
				 xfs_dir_createname(
			
@@ -254,10 +255,12 @@ xfs_dir_createname(
 
				 	int			v;		/* type-checking value */
			
 
				 
			
 
				 	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				-	rval = xfs_dir_ino_validate(tp->t_mountp, inum);
			
 
				-	if (rval)
			
 
				-		return rval;
			
 
				-	XFS_STATS_INC(xs_dir_create);
			
 
				+	if (inum) {
			
 
				+		rval = xfs_dir_ino_validate(tp->t_mountp, inum);
			
 
				+		if (rval)
			
 
				+			return rval;
			
 
				+		XFS_STATS_INC(xs_dir_create);
			
 
				+	}
			
 
				 
			
 
				 	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
			
 
				 	if (!args)
			
@@ -276,6 +279,8 @@ xfs_dir_createname(
 
				 	args->whichfork = XFS_DATA_FORK;
			
 
				 	args->trans = tp;
			
 
				 	args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
			
 
				+	if (!inum)
			
 
				+		args->op_flags |= XFS_DA_OP_JUSTCHECK;
			
 
				 
			
 
				 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
			
 
				 		rval = xfs_dir2_sf_addname(args);
			
@@ -535,62 +540,14 @@ out_free:
 
				 
			
 
				 /*
			
 
				  * See if this entry can be added to the directory without allocating space.
			
 
				- * First checks that the caller couldn't reserve enough space (resblks = 0).
			
 
				  */
			
 
				 int
			
 
				 xfs_dir_canenter(
			
 
				 	xfs_trans_t	*tp,
			
 
				 	xfs_inode_t	*dp,
			
 
				-	struct xfs_name	*name,		/* name of entry to add */
			
 
				-	uint		resblks)
			
 
				+	struct xfs_name	*name)		/* name of entry to add */
			
 
				 {
			
 
				-	struct xfs_da_args *args;
			
 
				-	int		rval;
			
 
				-	int		v;		/* type-checking value */
			
 
				-
			
 
				-	if (resblks)
			
 
				-		return 0;
			
 
				-
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				-
			
 
				-	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
			
 
				-	if (!args)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	args->geo = dp->i_mount->m_dir_geo;
			
 
				-	args->name = name->name;
			
 
				-	args->namelen = name->len;
			
 
				-	args->filetype = name->type;
			
 
				-	args->hashval = dp->i_mount->m_dirnameops->hashname(name);
			
 
				-	args->dp = dp;
			
 
				-	args->whichfork = XFS_DATA_FORK;
			
 
				-	args->trans = tp;
			
 
				-	args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
			
 
				-							XFS_DA_OP_OKNOENT;
			
 
				-
			
 
				-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
			
 
				-		rval = xfs_dir2_sf_addname(args);
			
 
				-		goto out_free;
			
 
				-	}
			
 
				-
			
 
				-	rval = xfs_dir2_isblock(args, &v);
			
 
				-	if (rval)
			
 
				-		goto out_free;
			
 
				-	if (v) {
			
 
				-		rval = xfs_dir2_block_addname(args);
			
 
				-		goto out_free;
			
 
				-	}
			
 
				-
			
 
				-	rval = xfs_dir2_isleaf(args, &v);
			
 
				-	if (rval)
			
 
				-		goto out_free;
			
 
				-	if (v)
			
 
				-		rval = xfs_dir2_leaf_addname(args);
			
 
				-	else
			
 
				-		rval = xfs_dir2_node_addname(args);
			
 
				-out_free:
			
 
				-	kmem_free(args);
			
 
				-	return rval;
			
 
				+	return xfs_dir_createname(tp, dp, name, 0, NULL, NULL, 0);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -136,7 +136,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
 
				 				xfs_fsblock_t *first,
			
 
				 				struct xfs_bmap_free *flist, xfs_extlen_t tot);
			
 
				 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				-				struct xfs_name *name, uint resblks);
			
 
				+				struct xfs_name *name);
			
 
				 
			
 
				 /*
			
 
				  * Direct call from the bmap code, bypassing the generic directory layer.
			
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1076,8 +1076,8 @@ xfs_dialloc_ag_finobt_newino(
 
				 	int i;
			
 
				 
			
 
				 	if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
			
 
				-		error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
			
 
				-					 &i);
			
 
				+		error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
			
 
				+					 XFS_LOOKUP_EQ, &i);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 		if (i == 1) {
			
@@ -1085,7 +1085,6 @@ xfs_dialloc_ag_finobt_newino(
 
				 			if (error)
			
 
				 				return error;
			
 
				 			XFS_WANT_CORRUPTED_RETURN(i == 1);
			
 
				-
			
 
				 			return 0;
			
 
				 		}
			
 
				 	}
			
@@ -2051,6 +2050,8 @@ xfs_agi_verify(
 
				 	if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
			
 
				 		return false;
			
 
				 
			
 
				+	if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
			
 
				+		return false;
			
 
				 	/*
			
 
				 	 * during growfs operations, the perag is not fully initialised,
			
 
				 	 * so we can't use it for any useful checking. growfs ensures we can't
			
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -424,20 +424,24 @@ xfs_rtfind_forw(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Read and modify the summary information for a given extent size,
			
 
				+ * Read and/or modify the summary information for a given extent size,
			
 
				  * bitmap block combination.
			
 
				  * Keeps track of a current summary block, so we don't keep reading
			
 
				  * it from the buffer cache.
			
 
				+ *
			
 
				+ * Summary information is returned in *sum if specified.
			
 
				+ * If no delta is specified, returns summary only.
			
 
				  */
			
 
				 int
			
 
				-xfs_rtmodify_summary(
			
 
				-	xfs_mount_t	*mp,		/* file system mount point */
			
 
				+xfs_rtmodify_summary_int(
			
 
				+	xfs_mount_t	*mp,		/* file system mount structure */
			
 
				 	xfs_trans_t	*tp,		/* transaction pointer */
			
 
				 	int		log,		/* log2 of extent size */
			
 
				 	xfs_rtblock_t	bbno,		/* bitmap block number */
			
 
				 	int		delta,		/* change to make to summary info */
			
 
				 	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
			
 
				-	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
			
 
				+	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
			
 
				+	xfs_suminfo_t	*sum)		/* out: summary info for this block */
			
 
				 {
			
 
				 	xfs_buf_t	*bp;		/* buffer for the summary block */
			
 
				 	int		error;		/* error value */
			
@@ -456,7 +460,7 @@ xfs_rtmodify_summary(
 
				 	/*
			
 
				 	 * If we have an old buffer, and the block number matches, use that.
			
 
				 	 */
			
 
				-	if (rbpp && *rbpp && *rsb == sb)
			
 
				+	if (*rbpp && *rsb == sb)
			
 
				 		bp = *rbpp;
			
 
				 	/*
			
 
				 	 * Otherwise we have to get the buffer.
			
@@ -465,7 +469,7 @@ xfs_rtmodify_summary(
 
				 		/*
			
 
				 		 * If there was an old one, get rid of it first.
			
 
				 		 */
			
 
				-		if (rbpp && *rbpp)
			
 
				+		if (*rbpp)
			
 
				 			xfs_trans_brelse(tp, *rbpp);
			
 
				 		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
			
 
				 		if (error) {
			
@@ -474,21 +478,38 @@ xfs_rtmodify_summary(
 
				 		/*
			
 
				 		 * Remember this buffer and block for the next call.
			
 
				 		 */
			
 
				-		if (rbpp) {
			
 
				-			*rbpp = bp;
			
 
				-			*rsb = sb;
			
 
				-		}
			
 
				+		*rbpp = bp;
			
 
				+		*rsb = sb;
			
 
				 	}
			
 
				 	/*
			
 
				-	 * Point to the summary information, modify and log it.
			
 
				+	 * Point to the summary information, modify/log it, and/or copy it out.
			
 
				 	 */
			
 
				 	sp = XFS_SUMPTR(mp, bp, so);
			
 
				-	*sp += delta;
			
 
				-	xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
			
 
				-		(uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
			
 
				+	if (delta) {
			
 
				+		uint first = (uint)((char *)sp - (char *)bp->b_addr);
			
 
				+
			
 
				+		*sp += delta;
			
 
				+		xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
			
 
				+	}
			
 
				+	if (sum)
			
 
				+		*sum = *sp;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+int
			
 
				+xfs_rtmodify_summary(
			
 
				+	xfs_mount_t	*mp,		/* file system mount structure */
			
 
				+	xfs_trans_t	*tp,		/* transaction pointer */
			
 
				+	int		log,		/* log2 of extent size */
			
 
				+	xfs_rtblock_t	bbno,		/* bitmap block number */
			
 
				+	int		delta,		/* change to make to summary info */
			
 
				+	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
			
 
				+	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
			
 
				+{
			
 
				+	return xfs_rtmodify_summary_int(mp, tp, log, bbno,
			
 
				+					delta, rbpp, rsb, NULL);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Set the given range of bitmap bits to the given value.
			
 
				  * Do whatever I/O and logging is required.
			
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -279,11 +279,13 @@ xfs_mount_validate_sb(
 
				 	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
			
 
				 	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
			
 
				 	    sbp->sb_blocksize != (1 << sbp->sb_blocklog)		||
			
 
				+	    sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG			||
			
 
				 	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
			
 
				 	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
			
 
				 	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
			
 
				 	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
			
 
				 	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		||
			
 
				+	    sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE			||
			
 
				 	    sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
			
 
				 	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
			
 
				 	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
			
@@ -443,6 +445,8 @@ __xfs_sb_from_disk(
 
				 	to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
			
 
				 	to->sb_features_log_incompat =
			
 
				 				be32_to_cpu(from->sb_features_log_incompat);
			
 
				+	/* crc is only used on disk, not in memory; just init to 0 here. */
			
 
				+	to->sb_crc = 0;
			
 
				 	to->sb_pad = 0;
			
 
				 	to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
			
 
				 	to->sb_lsn = be64_to_cpu(from->sb_lsn);
			
@@ -548,6 +552,9 @@ xfs_sb_to_disk(
 
				 	if (!fields)
			
 
				 		return;
			
 
				 
			
 
				+	/* We should never write the crc here, it's updated in the IO path */
			
 
				+	fields &= ~XFS_SB_CRC;
			
 
				+
			
 
				 	xfs_sb_quota_to_disk(to, from, &fields);
			
 
				 	while (fields) {
			
 
				 		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
			
--- a/fs/xfs/time.h
+++ b/fs/xfs/time.h
@@ -1,36 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
			
 
				- * All Rights Reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it would be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program; if not, write the Free Software Foundation,
			
 
				- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				- */
			
 
				-#ifndef __XFS_SUPPORT_TIME_H__
			
 
				-#define __XFS_SUPPORT_TIME_H__
			
 
				-
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/time.h>
			
 
				-
			
 
				-typedef struct timespec timespec_t;
			
 
				-
			
 
				-static inline void delay(long ticks)
			
 
				-{
			
 
				-	schedule_timeout_uninterruptible(ticks);
			
 
				-}
			
 
				-
			
 
				-static inline void nanotime(struct timespec *tvp)
			
 
				-{
			
 
				-	*tvp = CURRENT_TIME;
			
 
				-}
			
 
				-
			
 
				-#endif /* __XFS_SUPPORT_TIME_H__ */
			
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -434,10 +434,22 @@ xfs_start_page_writeback(
 
				 {
			
 
				 	ASSERT(PageLocked(page));
			
 
				 	ASSERT(!PageWriteback(page));
			
 
				-	if (clear_dirty)
			
 
				+
			
 
				+	/*
			
 
				+	 * if the page was not fully cleaned, we need to ensure that the higher
			
 
				+	 * layers come back to it correctly. That means we need to keep the page
			
 
				+	 * dirty, and for WB_SYNC_ALL writeback we need to ensure the
			
 
				+	 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
			
 
				+	 * write this page in this writeback sweep will be made.
			
 
				+	 */
			
 
				+	if (clear_dirty) {
			
 
				 		clear_page_dirty_for_io(page);
			
 
				-	set_page_writeback(page);
			
 
				+		set_page_writeback(page);
			
 
				+	} else
			
 
				+		set_page_writeback_keepwrite(page);
			
 
				+
			
 
				 	unlock_page(page);
			
 
				+
			
 
				 	/* If no buffers on the page are to be written, finish it here */
			
 
				 	if (!buffers)
			
 
				 		end_page_writeback(page);
			
@@ -548,6 +560,13 @@ xfs_cancel_ioend(
 
				 		do {
			
 
				 			next_bh = bh->b_private;
			
 
				 			clear_buffer_async_write(bh);
			
 
				+			/*
			
 
				+			 * The unwritten flag is cleared when added to the
			
 
				+			 * ioend. We're not submitting for I/O so mark the
			
 
				+			 * buffer unwritten again for next time around.
			
 
				+			 */
			
 
				+			if (ioend->io_type == XFS_IO_UNWRITTEN)
			
 
				+				set_buffer_unwritten(bh);
			
 
				 			unlock_buffer(bh);
			
 
				 		} while ((bh = next_bh) != NULL);
			
 
				 
			
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes(
 
				 	if (endoff > XFS_ISIZE(ip))
			
 
				 		endoff = XFS_ISIZE(ip);
			
 
				 
			
 
				-	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
			
 
				-					mp->m_rtdev_targp : mp->m_ddev_targp,
			
 
				-				  BTOBB(mp->m_sb.sb_blocksize), 0);
			
 
				-	if (!bp)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	xfs_buf_unlock(bp);
			
 
				-
			
 
				 	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
			
 
				 		uint lock_mode;
			
 
				 
			
@@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes(
 
				 		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
			
 
				 		if (imap.br_state == XFS_EXT_UNWRITTEN)
			
 
				 			continue;
			
 
				-		XFS_BUF_UNDONE(bp);
			
 
				-		XFS_BUF_UNWRITE(bp);
			
 
				-		XFS_BUF_READ(bp);
			
 
				-		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
			
 
				 
			
 
				-		if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-			error = -EIO;
			
 
				-			break;
			
 
				-		}
			
 
				-		xfs_buf_iorequest(bp);
			
 
				-		error = xfs_buf_iowait(bp);
			
 
				-		if (error) {
			
 
				-			xfs_buf_ioerror_alert(bp,
			
 
				-					"xfs_zero_remaining_bytes(read)");
			
 
				-			break;
			
 
				-		}
			
 
				+		error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
			
 
				+				mp->m_rtdev_targp : mp->m_ddev_targp,
			
 
				+				xfs_fsb_to_db(ip, imap.br_startblock),
			
 
				+				BTOBB(mp->m_sb.sb_blocksize),
			
 
				+				0, &bp, NULL);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				 		memset(bp->b_addr +
			
 
				-			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
			
 
				-		      0, lastoffset - offset + 1);
			
 
				-		XFS_BUF_UNDONE(bp);
			
 
				-		XFS_BUF_UNREAD(bp);
			
 
				-		XFS_BUF_WRITE(bp);
			
 
				-
			
 
				-		if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-			error = -EIO;
			
 
				-			break;
			
 
				-		}
			
 
				-		xfs_buf_iorequest(bp);
			
 
				-		error = xfs_buf_iowait(bp);
			
 
				-		if (error) {
			
 
				-			xfs_buf_ioerror_alert(bp,
			
 
				-					"xfs_zero_remaining_bytes(write)");
			
 
				-			break;
			
 
				-		}
			
 
				+				(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
			
 
				+		       0, lastoffset - offset + 1);
			
 
				+
			
 
				+		error = xfs_bwrite(bp);
			
 
				+		xfs_buf_relse(bp);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				 	}
			
 
				-	xfs_buf_free(bp);
			
 
				 	return error;
			
 
				 }
			
 
				 
			
@@ -1205,6 +1179,7 @@ xfs_free_file_space(
 
				 	xfs_bmap_free_t		free_list;
			
 
				 	xfs_bmbt_irec_t		imap;
			
 
				 	xfs_off_t		ioffset;
			
 
				+	xfs_off_t		iendoffset;
			
 
				 	xfs_extlen_t		mod=0;
			
 
				 	xfs_mount_t		*mp;
			
 
				 	int			nimap;
			
@@ -1233,12 +1208,13 @@ xfs_free_file_space(
 
				 	inode_dio_wait(VFS_I(ip));
			
 
				 
			
 
				 	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
			
 
				-	ioffset = offset & ~(rounding - 1);
			
 
				-	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				-					      ioffset, -1);
			
 
				+	ioffset = round_down(offset, rounding);
			
 
				+	iendoffset = round_up(offset + len, rounding) - 1;
			
 
				+	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
			
 
				+					     iendoffset);
			
 
				 	if (error)
			
 
				 		goto out;
			
 
				-	truncate_pagecache_range(VFS_I(ip), ioffset, -1);
			
 
				+	truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset);
			
 
				 
			
 
				 	/*
			
 
				 	 * Need to zero the stuff we're not freeing, on disk.
			
@@ -1392,14 +1368,14 @@ xfs_zero_file_space(
 
				 
			
 
				 	if (start_boundary < end_boundary - 1) {
			
 
				 		/*
			
 
				-		 * punch out delayed allocation blocks and the page cache over
			
 
				-		 * the conversion range
			
 
				+		 * Writeback the range to ensure any inode size updates due to
			
 
				+		 * appending writes make it to disk (otherwise we could just
			
 
				+		 * punch out the delalloc blocks).
			
 
				 		 */
			
 
				-		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				-		error = xfs_bmap_punch_delalloc_range(ip,
			
 
				-				XFS_B_TO_FSBT(mp, start_boundary),
			
 
				-				XFS_B_TO_FSB(mp, end_boundary - start_boundary));
			
 
				-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				+				start_boundary, end_boundary - 1);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				 		truncate_pagecache_range(VFS_I(ip), start_boundary,
			
 
				 					 end_boundary - 1);
			
 
				 
			
@@ -1456,41 +1432,47 @@ xfs_collapse_file_space(
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	struct xfs_trans	*tp;
			
 
				 	int			error;
			
 
				-	xfs_extnum_t		current_ext = 0;
			
 
				 	struct xfs_bmap_free	free_list;
			
 
				 	xfs_fsblock_t		first_block;
			
 
				 	int			committed;
			
 
				 	xfs_fileoff_t		start_fsb;
			
 
				+	xfs_fileoff_t		next_fsb;
			
 
				 	xfs_fileoff_t		shift_fsb;
			
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				 
			
 
				 	trace_xfs_collapse_file_space(ip);
			
 
				 
			
 
				-	start_fsb = XFS_B_TO_FSB(mp, offset + len);
			
 
				+	next_fsb = XFS_B_TO_FSB(mp, offset + len);
			
 
				 	shift_fsb = XFS_B_TO_FSB(mp, len);
			
 
				 
			
 
				-	/*
			
 
				-	 * Writeback the entire file and force remove any post-eof blocks. The
			
 
				-	 * writeback prevents changes to the extent list via concurrent
			
 
				-	 * writeback and the eofblocks trim prevents the extent shift algorithm
			
 
				-	 * from running into a post-eof delalloc extent.
			
 
				-	 *
			
 
				-	 * XXX: This is a temporary fix until the extent shift loop below is
			
 
				-	 * converted to use offsets and lookups within the ILOCK rather than
			
 
				-	 * carrying around the index into the extent list for the next
			
 
				-	 * iteration.
			
 
				-	 */
			
 
				-	error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
			
 
				+	error = xfs_free_file_space(ip, offset, len);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
			
 
				+	 * into the accessible region of the file.
			
 
				+	 */
			
 
				 	if (xfs_can_free_eofblocks(ip, true)) {
			
 
				 		error = xfs_free_eofblocks(mp, ip, false);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				-	error = xfs_free_file_space(ip, offset, len);
			
 
				+	/*
			
 
				+	 * Writeback and invalidate cache for the remainder of the file as we're
			
 
				+	 * about to shift down every extent from the collapse range to EOF. The
			
 
				+	 * free of the collapse range above might have already done some of
			
 
				+	 * this, but we shouldn't rely on it to do anything outside of the range
			
 
				+	 * that was freed.
			
 
				+	 */
			
 
				+	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				+					     offset + len, -1);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
			
 
				+					(offset + len) >> PAGE_CACHE_SHIFT, -1);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -1525,10 +1507,10 @@ xfs_collapse_file_space(
 
				 		 * We are using the write transaction in which max 2 bmbt
			
 
				 		 * updates are allowed
			
 
				 		 */
			
 
				-		error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
			
 
				-					       shift_fsb, &current_ext,
			
 
				-					       &first_block, &free_list,
			
 
				-					       XFS_BMAP_MAX_SHIFT_EXTENTS);
			
 
				+		start_fsb = next_fsb;
			
 
				+		error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb,
			
 
				+				&done, &next_fsb, &first_block, &free_list,
			
 
				+				XFS_BMAP_MAX_SHIFT_EXTENTS);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 
			
@@ -1638,7 +1620,7 @@ xfs_swap_extents_check_format(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int
			
 
				+static int
			
 
				 xfs_swap_extent_flush(
			
 
				 	struct xfs_inode	*ip)
			
 
				 {
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -623,10 +623,11 @@ _xfs_buf_read(
 
				 	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
			
 
				 	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
			
 
				 
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	if (flags & XBF_ASYNC)
			
 
				+	if (flags & XBF_ASYNC) {
			
 
				+		xfs_buf_submit(bp);
			
 
				 		return 0;
			
 
				-	return xfs_buf_iowait(bp);
			
 
				+	}
			
 
				+	return xfs_buf_submit_wait(bp);
			
 
				 }
			
 
				 
			
 
				 xfs_buf_t *
			
@@ -687,34 +688,39 @@ xfs_buf_readahead_map(
 
				  * Read an uncached buffer from disk. Allocates and returns a locked
			
 
				  * buffer containing the disk contents or nothing.
			
 
				  */
			
 
				-struct xfs_buf *
			
 
				+int
			
 
				 xfs_buf_read_uncached(
			
 
				 	struct xfs_buftarg	*target,
			
 
				 	xfs_daddr_t		daddr,
			
 
				 	size_t			numblks,
			
 
				 	int			flags,
			
 
				+	struct xfs_buf		**bpp,
			
 
				 	const struct xfs_buf_ops *ops)
			
 
				 {
			
 
				 	struct xfs_buf		*bp;
			
 
				 
			
 
				+	*bpp = NULL;
			
 
				+
			
 
				 	bp = xfs_buf_get_uncached(target, numblks, flags);
			
 
				 	if (!bp)
			
 
				-		return NULL;
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				 	/* set up the buffer for a read IO */
			
 
				 	ASSERT(bp->b_map_count == 1);
			
 
				-	bp->b_bn = daddr;
			
 
				+	bp->b_bn = XFS_BUF_DADDR_NULL;  /* always null for uncached buffers */
			
 
				 	bp->b_maps[0].bm_bn = daddr;
			
 
				 	bp->b_flags |= XBF_READ;
			
 
				 	bp->b_ops = ops;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
			
 
				+	xfs_buf_submit_wait(bp);
			
 
				+	if (bp->b_error) {
			
 
				+		int	error = bp->b_error;
			
 
				 		xfs_buf_relse(bp);
			
 
				-		return NULL;
			
 
				+		return error;
			
 
				 	}
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	xfs_buf_iowait(bp);
			
 
				-	return bp;
			
 
				+
			
 
				+	*bpp = bp;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -998,53 +1004,56 @@ xfs_buf_wait_unpin(
 
				  *	Buffer Utility Routines
			
 
				  */
			
 
				 
			
 
				-STATIC void
			
 
				-xfs_buf_iodone_work(
			
 
				-	struct work_struct	*work)
			
 
				+void
			
 
				+xfs_buf_ioend(
			
 
				+	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	struct xfs_buf		*bp =
			
 
				-		container_of(work, xfs_buf_t, b_iodone_work);
			
 
				-	bool			read = !!(bp->b_flags & XBF_READ);
			
 
				+	bool		read = bp->b_flags & XBF_READ;
			
 
				+
			
 
				+	trace_xfs_buf_iodone(bp, _RET_IP_);
			
 
				 
			
 
				 	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
			
 
				 
			
 
				-	/* only validate buffers that were read without errors */
			
 
				-	if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE))
			
 
				+	/*
			
 
				+	 * Pull in IO completion errors now. We are guaranteed to be running
			
 
				+	 * single threaded, so we don't need the lock to read b_io_error.
			
 
				+	 */
			
 
				+	if (!bp->b_error && bp->b_io_error)
			
 
				+		xfs_buf_ioerror(bp, bp->b_io_error);
			
 
				+
			
 
				+	/* Only validate buffers that were read without errors */
			
 
				+	if (read && !bp->b_error && bp->b_ops) {
			
 
				+		ASSERT(!bp->b_iodone);
			
 
				 		bp->b_ops->verify_read(bp);
			
 
				+	}
			
 
				+
			
 
				+	if (!bp->b_error)
			
 
				+		bp->b_flags |= XBF_DONE;
			
 
				 
			
 
				 	if (bp->b_iodone)
			
 
				 		(*(bp->b_iodone))(bp);
			
 
				 	else if (bp->b_flags & XBF_ASYNC)
			
 
				 		xfs_buf_relse(bp);
			
 
				-	else {
			
 
				-		ASSERT(read && bp->b_ops);
			
 
				+	else
			
 
				 		complete(&bp->b_iowait);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				-void
			
 
				-xfs_buf_ioend(
			
 
				-	struct xfs_buf	*bp,
			
 
				-	int		schedule)
			
 
				+static void
			
 
				+xfs_buf_ioend_work(
			
 
				+	struct work_struct	*work)
			
 
				 {
			
 
				-	bool		read = !!(bp->b_flags & XBF_READ);
			
 
				-
			
 
				-	trace_xfs_buf_iodone(bp, _RET_IP_);
			
 
				+	struct xfs_buf		*bp =
			
 
				+		container_of(work, xfs_buf_t, b_iodone_work);
			
 
				 
			
 
				-	if (bp->b_error == 0)
			
 
				-		bp->b_flags |= XBF_DONE;
			
 
				+	xfs_buf_ioend(bp);
			
 
				+}
			
 
				 
			
 
				-	if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) {
			
 
				-		if (schedule) {
			
 
				-			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
			
 
				-			queue_work(xfslogd_workqueue, &bp->b_iodone_work);
			
 
				-		} else {
			
 
				-			xfs_buf_iodone_work(&bp->b_iodone_work);
			
 
				-		}
			
 
				-	} else {
			
 
				-		bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
			
 
				-		complete(&bp->b_iowait);
			
 
				-	}
			
 
				+void
			
 
				+xfs_buf_ioend_async(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
			
 
				+	queue_work(xfslogd_workqueue, &bp->b_iodone_work);
			
 
				 }
			
 
				 
			
 
				 void
			
@@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert(
 
				 		(__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Called when we want to stop a buffer from getting written or read.
			
 
				- * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
			
 
				- * so that the proper iodone callbacks get called.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_bioerror(
			
 
				-	xfs_buf_t *bp)
			
 
				-{
			
 
				-#ifdef XFSERRORDEBUG
			
 
				-	ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
			
 
				-#endif
			
 
				-
			
 
				-	/*
			
 
				-	 * No need to wait until the buffer is unpinned, we aren't flushing it.
			
 
				-	 */
			
 
				-	xfs_buf_ioerror(bp, -EIO);
			
 
				-
			
 
				-	/*
			
 
				-	 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
			
 
				-	 */
			
 
				-	XFS_BUF_UNREAD(bp);
			
 
				-	XFS_BUF_UNDONE(bp);
			
 
				-	xfs_buf_stale(bp);
			
 
				-
			
 
				-	xfs_buf_ioend(bp, 0);
			
 
				-
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Same as xfs_bioerror, except that we are releasing the buffer
			
 
				- * here ourselves, and avoiding the xfs_buf_ioend call.
			
 
				- * This is meant for userdata errors; metadata bufs come with
			
 
				- * iodone functions attached, so that we can track down errors.
			
 
				- */
			
 
				-int
			
 
				-xfs_bioerror_relse(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	int64_t		fl = bp->b_flags;
			
 
				-	/*
			
 
				-	 * No need to wait until the buffer is unpinned.
			
 
				-	 * We aren't flushing it.
			
 
				-	 *
			
 
				-	 * chunkhold expects B_DONE to be set, whether
			
 
				-	 * we actually finish the I/O or not. We don't want to
			
 
				-	 * change that interface.
			
 
				-	 */
			
 
				-	XFS_BUF_UNREAD(bp);
			
 
				-	XFS_BUF_DONE(bp);
			
 
				-	xfs_buf_stale(bp);
			
 
				-	bp->b_iodone = NULL;
			
 
				-	if (!(fl & XBF_ASYNC)) {
			
 
				-		/*
			
 
				-		 * Mark b_error and B_ERROR _both_.
			
 
				-		 * Lot's of chunkcache code assumes that.
			
 
				-		 * There's no reason to mark error for
			
 
				-		 * ASYNC buffers.
			
 
				-		 */
			
 
				-		xfs_buf_ioerror(bp, -EIO);
			
 
				-		complete(&bp->b_iowait);
			
 
				-	} else {
			
 
				-		xfs_buf_relse(bp);
			
 
				-	}
			
 
				-
			
 
				-	return -EIO;
			
 
				-}
			
 
				-
			
 
				-STATIC int
			
 
				-xfs_bdstrat_cb(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				-		trace_xfs_bdstrat_shut(bp, _RET_IP_);
			
 
				-		/*
			
 
				-		 * Metadata write that didn't get logged but
			
 
				-		 * written delayed anyway. These aren't associated
			
 
				-		 * with a transaction, and can be ignored.
			
 
				-		 */
			
 
				-		if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
			
 
				-			return xfs_bioerror_relse(bp);
			
 
				-		else
			
 
				-			return xfs_bioerror(bp);
			
 
				-	}
			
 
				-
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 int
			
 
				 xfs_bwrite(
			
 
				 	struct xfs_buf		*bp)
			
@@ -1166,11 +1085,10 @@ xfs_bwrite(
 
				 	ASSERT(xfs_buf_islocked(bp));
			
 
				 
			
 
				 	bp->b_flags |= XBF_WRITE;
			
 
				-	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
			
 
				+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
			
 
				+			 XBF_WRITE_FAIL | XBF_DONE);
			
 
				 
			
 
				-	xfs_bdstrat_cb(bp);
			
 
				-
			
 
				-	error = xfs_buf_iowait(bp);
			
 
				+	error = xfs_buf_submit_wait(bp);
			
 
				 	if (error) {
			
 
				 		xfs_force_shutdown(bp->b_target->bt_mount,
			
 
				 				   SHUTDOWN_META_IO_ERROR);
			
@@ -1178,15 +1096,6 @@ xfs_bwrite(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-STATIC void
			
 
				-_xfs_buf_ioend(
			
 
				-	xfs_buf_t		*bp,
			
 
				-	int			schedule)
			
 
				-{
			
 
				-	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
			
 
				-		xfs_buf_ioend(bp, schedule);
			
 
				-}
			
 
				-
			
 
				 STATIC void
			
 
				 xfs_buf_bio_end_io(
			
 
				 	struct bio		*bio,
			
@@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io(
 
				 	 * don't overwrite existing errors - otherwise we can lose errors on
			
 
				 	 * buffers that require multiple bios to complete.
			
 
				 	 */
			
 
				-	if (!bp->b_error)
			
 
				-		xfs_buf_ioerror(bp, error);
			
 
				+	if (error) {
			
 
				+		spin_lock(&bp->b_lock);
			
 
				+		if (!bp->b_io_error)
			
 
				+			bp->b_io_error = error;
			
 
				+		spin_unlock(&bp->b_lock);
			
 
				+	}
			
 
				 
			
 
				 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
			
 
				 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
			
 
				 
			
 
				-	_xfs_buf_ioend(bp, 1);
			
 
				+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
			
 
				+		xfs_buf_ioend_async(bp);
			
 
				 	bio_put(bio);
			
 
				 }
			
 
				 
			
@@ -1283,7 +1197,7 @@ next_chunk:
 
				 	} else {
			
 
				 		/*
			
 
				 		 * This is guaranteed not to be the last io reference count
			
 
				-		 * because the caller (xfs_buf_iorequest) holds a count itself.
			
 
				+		 * because the caller (xfs_buf_submit) holds a count itself.
			
 
				 		 */
			
 
				 		atomic_dec(&bp->b_io_remaining);
			
 
				 		xfs_buf_ioerror(bp, -EIO);
			
@@ -1373,53 +1287,131 @@ _xfs_buf_ioapply(
 
				 	blk_finish_plug(&plug);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Asynchronous IO submission path. This transfers the buffer lock ownership and
			
 
				+ * the current reference to the IO. It is not safe to reference the buffer after
			
 
				+ * a call to this function unless the caller holds an additional reference
			
 
				+ * itself.
			
 
				+ */
			
 
				 void
			
 
				-xfs_buf_iorequest(
			
 
				-	xfs_buf_t		*bp)
			
 
				+xfs_buf_submit(
			
 
				+	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	trace_xfs_buf_iorequest(bp, _RET_IP_);
			
 
				+	trace_xfs_buf_submit(bp, _RET_IP_);
			
 
				 
			
 
				 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
			
 
				+	ASSERT(bp->b_flags & XBF_ASYNC);
			
 
				+
			
 
				+	/* on shutdown we stale and complete the buffer immediately */
			
 
				+	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				+		xfs_buf_ioerror(bp, -EIO);
			
 
				+		bp->b_flags &= ~XBF_DONE;
			
 
				+		xfs_buf_stale(bp);
			
 
				+		xfs_buf_ioend(bp);
			
 
				+		return;
			
 
				+	}
			
 
				 
			
 
				 	if (bp->b_flags & XBF_WRITE)
			
 
				 		xfs_buf_wait_unpin(bp);
			
 
				+
			
 
				+	/* clear the internal error state to avoid spurious errors */
			
 
				+	bp->b_io_error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * The caller's reference is released during I/O completion.
			
 
				+	 * This occurs some time after the last b_io_remaining reference is
			
 
				+	 * released, so after we drop our Io reference we have to have some
			
 
				+	 * other reference to ensure the buffer doesn't go away from underneath
			
 
				+	 * us. Take a direct reference to ensure we have safe access to the
			
 
				+	 * buffer until we are finished with it.
			
 
				+	 */
			
 
				 	xfs_buf_hold(bp);
			
 
				 
			
 
				 	/*
			
 
				-	 * Set the count to 1 initially, this will stop an I/O
			
 
				-	 * completion callout which happens before we have started
			
 
				-	 * all the I/O from calling xfs_buf_ioend too early.
			
 
				+	 * Set the count to 1 initially, this will stop an I/O completion
			
 
				+	 * callout which happens before we have started all the I/O from calling
			
 
				+	 * xfs_buf_ioend too early.
			
 
				 	 */
			
 
				 	atomic_set(&bp->b_io_remaining, 1);
			
 
				 	_xfs_buf_ioapply(bp);
			
 
				+
			
 
				 	/*
			
 
				-	 * If _xfs_buf_ioapply failed, we'll get back here with
			
 
				-	 * only the reference we took above.  _xfs_buf_ioend will
			
 
				-	 * drop it to zero, so we'd better not queue it for later,
			
 
				-	 * or we'll free it before it's done.
			
 
				+	 * If _xfs_buf_ioapply failed, we can get back here with only the IO
			
 
				+	 * reference we took above. If we drop it to zero, run completion so
			
 
				+	 * that we don't return to the caller with completion still pending.
			
 
				 	 */
			
 
				-	_xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
			
 
				+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
			
 
				+		if (bp->b_error)
			
 
				+			xfs_buf_ioend(bp);
			
 
				+		else
			
 
				+			xfs_buf_ioend_async(bp);
			
 
				+	}
			
 
				 
			
 
				 	xfs_buf_rele(bp);
			
 
				+	/* Note: it is not safe to reference bp now we've dropped our ref */
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Waits for I/O to complete on the buffer supplied.  It returns immediately if
			
 
				- * no I/O is pending or there is already a pending error on the buffer, in which
			
 
				- * case nothing will ever complete.  It returns the I/O error code, if any, or
			
 
				- * 0 if there was no error.
			
 
				+ * Synchronous buffer IO submission path, read or write.
			
 
				  */
			
 
				 int
			
 
				-xfs_buf_iowait(
			
 
				-	xfs_buf_t		*bp)
			
 
				+xfs_buf_submit_wait(
			
 
				+	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	trace_xfs_buf_iowait(bp, _RET_IP_);
			
 
				+	int		error;
			
 
				 
			
 
				-	if (!bp->b_error)
			
 
				-		wait_for_completion(&bp->b_iowait);
			
 
				+	trace_xfs_buf_submit_wait(bp, _RET_IP_);
			
 
				+
			
 
				+	ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				+		xfs_buf_ioerror(bp, -EIO);
			
 
				+		xfs_buf_stale(bp);
			
 
				+		bp->b_flags &= ~XBF_DONE;
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	if (bp->b_flags & XBF_WRITE)
			
 
				+		xfs_buf_wait_unpin(bp);
			
 
				+
			
 
				+	/* clear the internal error state to avoid spurious errors */
			
 
				+	bp->b_io_error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * For synchronous IO, the IO does not inherit the submitters reference
			
 
				+	 * count, nor the buffer lock. Hence we cannot release the reference we
			
 
				+	 * are about to take until we've waited for all IO completion to occur,
			
 
				+	 * including any xfs_buf_ioend_async() work that may be pending.
			
 
				+	 */
			
 
				+	xfs_buf_hold(bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Set the count to 1 initially, this will stop an I/O completion
			
 
				+	 * callout which happens before we have started all the I/O from calling
			
 
				+	 * xfs_buf_ioend too early.
			
 
				+	 */
			
 
				+	atomic_set(&bp->b_io_remaining, 1);
			
 
				+	_xfs_buf_ioapply(bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * make sure we run completion synchronously if it raced with us and is
			
 
				+	 * already complete.
			
 
				+	 */
			
 
				+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
			
 
				+		xfs_buf_ioend(bp);
			
 
				 
			
 
				+	/* wait for completion before gathering the error from the buffer */
			
 
				+	trace_xfs_buf_iowait(bp, _RET_IP_);
			
 
				+	wait_for_completion(&bp->b_iowait);
			
 
				 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
			
 
				-	return bp->b_error;
			
 
				+	error = bp->b_error;
			
 
				+
			
 
				+	/*
			
 
				+	 * all done now, we can release the hold that keeps the buffer
			
 
				+	 * referenced for the entire IO.
			
 
				+	 */
			
 
				+	xfs_buf_rele(bp);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 xfs_caddr_t
			
@@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit(
 
				 	blk_start_plug(&plug);
			
 
				 	list_for_each_entry_safe(bp, n, io_list, b_list) {
			
 
				 		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
			
 
				-		bp->b_flags |= XBF_WRITE;
			
 
				+		bp->b_flags |= XBF_WRITE | XBF_ASYNC;
			
 
				 
			
 
				-		if (!wait) {
			
 
				-			bp->b_flags |= XBF_ASYNC;
			
 
				+		/*
			
 
				+		 * we do all Io submission async. This means if we need to wait
			
 
				+		 * for IO completion we need to take an extra reference so the
			
 
				+		 * buffer is still valid on the other side.
			
 
				+		 */
			
 
				+		if (wait)
			
 
				+			xfs_buf_hold(bp);
			
 
				+		else
			
 
				 			list_del_init(&bp->b_list);
			
 
				-		}
			
 
				-		xfs_bdstrat_cb(bp);
			
 
				+
			
 
				+		xfs_buf_submit(bp);
			
 
				 	}
			
 
				 	blk_finish_plug(&plug);
			
 
				 
			
@@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit(
 
				 		bp = list_first_entry(&io_list, struct xfs_buf, b_list);
			
 
				 
			
 
				 		list_del_init(&bp->b_list);
			
 
				-		error2 = xfs_buf_iowait(bp);
			
 
				+
			
 
				+		/* locking the buffer will wait for async IO completion. */
			
 
				+		xfs_buf_lock(bp);
			
 
				+		error2 = bp->b_error;
			
 
				 		xfs_buf_relse(bp);
			
 
				 		if (!error)
			
 
				 			error = error2;
			
@@ -1884,7 +1885,7 @@ xfs_buf_init(void)
 
				 		goto out;
			
 
				 
			
 
				 	xfslogd_workqueue = alloc_workqueue("xfslogd",
			
 
				-					WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
			
 
				+				WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1);
			
 
				 	if (!xfslogd_workqueue)
			
 
				 		goto out_free_buf_zone;
			
 
				 
			
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -158,6 +158,7 @@ typedef struct xfs_buf {
 
				 	struct list_head	b_lru;		/* lru list */
			
 
				 	spinlock_t		b_lock;		/* internal state lock */
			
 
				 	unsigned int		b_state;	/* internal state flags */
			
 
				+	int			b_io_error;	/* internal IO error state */
			
 
				 	wait_queue_head_t	b_waiters;	/* unpin waiters */
			
 
				 	struct list_head	b_list;
			
 
				 	struct xfs_perag	*b_pag;		/* contains rbtree root */
			
@@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
 
				 
			
 
				 struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
			
 
				 				int flags);
			
 
				-struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target,
			
 
				-				xfs_daddr_t daddr, size_t numblks, int flags,
			
 
				-				const struct xfs_buf_ops *ops);
			
 
				+int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
			
 
				+			  size_t numblks, int flags, struct xfs_buf **bpp,
			
 
				+			  const struct xfs_buf_ops *ops);
			
 
				 void xfs_buf_hold(struct xfs_buf *bp);
			
 
				 
			
 
				 /* Releasing Buffers */
			
@@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 
				 
			
 
				 /* Buffer Read and Write Routines */
			
 
				 extern int xfs_bwrite(struct xfs_buf *bp);
			
 
				-extern void xfs_buf_ioend(xfs_buf_t *,	int);
			
 
				+extern void xfs_buf_ioend(struct xfs_buf *bp);
			
 
				 extern void xfs_buf_ioerror(xfs_buf_t *, int);
			
 
				 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
			
 
				-extern void xfs_buf_iorequest(xfs_buf_t *);
			
 
				-extern int xfs_buf_iowait(xfs_buf_t *);
			
 
				+extern void xfs_buf_submit(struct xfs_buf *bp);
			
 
				+extern int xfs_buf_submit_wait(struct xfs_buf *bp);
			
 
				 extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
			
 
				 				xfs_buf_rw_t);
			
 
				 #define xfs_buf_zero(bp, off, len) \
			
 
				 	    xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
			
 
				 
			
 
				-extern int xfs_bioerror_relse(struct xfs_buf *);
			
 
				-
			
 
				 /* Buffer Utility Routines */
			
 
				 extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
			
 
				 
			
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -491,7 +491,7 @@ xfs_buf_item_unpin(
 
				 		xfs_buf_ioerror(bp, -EIO);
			
 
				 		XFS_BUF_UNDONE(bp);
			
 
				 		xfs_buf_stale(bp);
			
 
				-		xfs_buf_ioend(bp, 0);
			
 
				+		xfs_buf_ioend(bp);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -501,7 +501,7 @@ xfs_buf_item_unpin(
 
				  * buffer being bad..
			
 
				  */
			
 
				 
			
 
				-DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
			
 
				+static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
			
 
				 
			
 
				 STATIC uint
			
 
				 xfs_buf_item_push(
			
@@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks(
 
				 	 * a way to shut the filesystem down if the writes keep failing.
			
 
				 	 *
			
 
				 	 * In practice we'll shut the filesystem down soon as non-transient
			
 
				-	 * erorrs tend to affect the whole device and a failing log write
			
 
				+	 * errors tend to affect the whole device and a failing log write
			
 
				 	 * will make us give up.  But we really ought to do better here.
			
 
				 	 */
			
 
				 	if (XFS_BUF_ISASYNC(bp)) {
			
@@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks(
 
				 		if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
			
 
				 			bp->b_flags |= XBF_WRITE | XBF_ASYNC |
			
 
				 				       XBF_DONE | XBF_WRITE_FAIL;
			
 
				-			xfs_buf_iorequest(bp);
			
 
				+			xfs_buf_submit(bp);
			
 
				 		} else {
			
 
				 			xfs_buf_relse(bp);
			
 
				 		}
			
@@ -1115,7 +1115,7 @@ do_callbacks:
 
				 	xfs_buf_do_callbacks(bp);
			
 
				 	bp->b_fspriv = NULL;
			
 
				 	bp->b_iodone = NULL;
			
 
				-	xfs_buf_ioend(bp, 0);
			
 
				+	xfs_buf_ioend(bp);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -983,7 +983,7 @@ xfs_vm_page_mkwrite(
 
				 
			
 
				 /*
			
 
				  * This type is designed to indicate the type of offset we would like
			
 
				- * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
			
 
				+ * to search from page cache for xfs_seek_hole_data().
			
 
				  */
			
 
				 enum {
			
 
				 	HOLE_OFF = 0,
			
@@ -1040,7 +1040,7 @@ xfs_lookup_buffer_offset(
 
				 /*
			
 
				  * This routine is called to find out and return a data or hole offset
			
 
				  * from the page cache for unwritten extents according to the desired
			
 
				- * type for xfs_seek_data() or xfs_seek_hole().
			
 
				+ * type for xfs_seek_hole_data().
			
 
				  *
			
 
				  * The argument offset is used to tell where we start to search from the
			
 
				  * page cache.  Map is used to figure out the end points of the range to
			
@@ -1200,9 +1200,10 @@ out:
 
				 }
			
 
				 
			
 
				 STATIC loff_t
			
 
				-xfs_seek_data(
			
 
				+xfs_seek_hole_data(
			
 
				 	struct file		*file,
			
 
				-	loff_t			start)
			
 
				+	loff_t			start,
			
 
				+	int			whence)
			
 
				 {
			
 
				 	struct inode		*inode = file->f_mapping->host;
			
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
@@ -1214,6 +1215,9 @@ xfs_seek_data(
 
				 	uint			lock;
			
 
				 	int			error;
			
 
				 
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return -EIO;
			
 
				+
			
 
				 	lock = xfs_ilock_data_map_shared(ip);
			
 
				 
			
 
				 	isize = i_size_read(inode);
			
@@ -1228,6 +1232,7 @@ xfs_seek_data(
 
				 	 */
			
 
				 	fsbno = XFS_B_TO_FSBT(mp, start);
			
 
				 	end = XFS_B_TO_FSB(mp, isize);
			
 
				+
			
 
				 	for (;;) {
			
 
				 		struct xfs_bmbt_irec	map[2];
			
 
				 		int			nmap = 2;
			
@@ -1248,29 +1253,48 @@ xfs_seek_data(
 
				 			offset = max_t(loff_t, start,
			
 
				 				       XFS_FSB_TO_B(mp, map[i].br_startoff));
			
 
				 
			
 
				-			/* Landed in a data extent */
			
 
				-			if (map[i].br_startblock == DELAYSTARTBLOCK ||
			
 
				-			    (map[i].br_state == XFS_EXT_NORM &&
			
 
				-			     !isnullstartblock(map[i].br_startblock)))
			
 
				+			/* Landed in the hole we wanted? */
			
 
				+			if (whence == SEEK_HOLE &&
			
 
				+			    map[i].br_startblock == HOLESTARTBLOCK)
			
 
				+				goto out;
			
 
				+
			
 
				+			/* Landed in the data extent we wanted? */
			
 
				+			if (whence == SEEK_DATA &&
			
 
				+			    (map[i].br_startblock == DELAYSTARTBLOCK ||
			
 
				+			     (map[i].br_state == XFS_EXT_NORM &&
			
 
				+			      !isnullstartblock(map[i].br_startblock))))
			
 
				 				goto out;
			
 
				 
			
 
				 			/*
			
 
				-			 * Landed in an unwritten extent, try to search data
			
 
				-			 * from page cache.
			
 
				+			 * Landed in an unwritten extent, try to search
			
 
				+			 * for hole or data from page cache.
			
 
				 			 */
			
 
				 			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
			
 
				 				if (xfs_find_get_desired_pgoff(inode, &map[i],
			
 
				-							DATA_OFF, &offset))
			
 
				+				      whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
			
 
				+							&offset))
			
 
				 					goto out;
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				 		/*
			
 
				-		 * map[0] is hole or its an unwritten extent but
			
 
				-		 * without data in page cache.  Probably means that
			
 
				-		 * we are reading after EOF if nothing in map[1].
			
 
				+		 * We only received one extent out of the two requested. This
			
 
				+		 * means we've hit EOF and didn't find what we are looking for.
			
 
				 		 */
			
 
				 		if (nmap == 1) {
			
 
				+			/*
			
 
				+			 * If we were looking for a hole, set offset to
			
 
				+			 * the end of the file (i.e., there is an implicit
			
 
				+			 * hole at the end of any file).
			
 
				+		 	 */
			
 
				+			if (whence == SEEK_HOLE) {
			
 
				+				offset = isize;
			
 
				+				break;
			
 
				+			}
			
 
				+			/*
			
 
				+			 * If we were looking for data, it's nowhere to be found
			
 
				+			 */
			
 
				+			ASSERT(whence == SEEK_DATA);
			
 
				 			error = -ENXIO;
			
 
				 			goto out_unlock;
			
 
				 		}
			
@@ -1279,125 +1303,30 @@ xfs_seek_data(
 
				 
			
 
				 		/*
			
 
				 		 * Nothing was found, proceed to the next round of search
			
 
				-		 * if reading offset not beyond or hit EOF.
			
 
				+		 * if the next reading offset is not at or beyond EOF.
			
 
				 		 */
			
 
				 		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
			
 
				 		start = XFS_FSB_TO_B(mp, fsbno);
			
 
				 		if (start >= isize) {
			
 
				+			if (whence == SEEK_HOLE) {
			
 
				+				offset = isize;
			
 
				+				break;
			
 
				+			}
			
 
				+			ASSERT(whence == SEEK_DATA);
			
 
				 			error = -ENXIO;
			
 
				 			goto out_unlock;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-out:
			
 
				-	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
			
 
				-
			
 
				-out_unlock:
			
 
				-	xfs_iunlock(ip, lock);
			
 
				-
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-	return offset;
			
 
				-}
			
 
				-
			
 
				-STATIC loff_t
			
 
				-xfs_seek_hole(
			
 
				-	struct file		*file,
			
 
				-	loff_t			start)
			
 
				-{
			
 
				-	struct inode		*inode = file->f_mapping->host;
			
 
				-	struct xfs_inode	*ip = XFS_I(inode);
			
 
				-	struct xfs_mount	*mp = ip->i_mount;
			
 
				-	loff_t			uninitialized_var(offset);
			
 
				-	xfs_fsize_t		isize;
			
 
				-	xfs_fileoff_t		fsbno;
			
 
				-	xfs_filblks_t		end;
			
 
				-	uint			lock;
			
 
				-	int			error;
			
 
				-
			
 
				-	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				-		return -EIO;
			
 
				-
			
 
				-	lock = xfs_ilock_data_map_shared(ip);
			
 
				-
			
 
				-	isize = i_size_read(inode);
			
 
				-	if (start >= isize) {
			
 
				-		error = -ENXIO;
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	fsbno = XFS_B_TO_FSBT(mp, start);
			
 
				-	end = XFS_B_TO_FSB(mp, isize);
			
 
				-
			
 
				-	for (;;) {
			
 
				-		struct xfs_bmbt_irec	map[2];
			
 
				-		int			nmap = 2;
			
 
				-		unsigned int		i;
			
 
				-
			
 
				-		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
			
 
				-				       XFS_BMAPI_ENTIRE);
			
 
				-		if (error)
			
 
				-			goto out_unlock;
			
 
				-
			
 
				-		/* No extents at given offset, must be beyond EOF */
			
 
				-		if (nmap == 0) {
			
 
				-			error = -ENXIO;
			
 
				-			goto out_unlock;
			
 
				-		}
			
 
				-
			
 
				-		for (i = 0; i < nmap; i++) {
			
 
				-			offset = max_t(loff_t, start,
			
 
				-				       XFS_FSB_TO_B(mp, map[i].br_startoff));
			
 
				-
			
 
				-			/* Landed in a hole */
			
 
				-			if (map[i].br_startblock == HOLESTARTBLOCK)
			
 
				-				goto out;
			
 
				-
			
 
				-			/*
			
 
				-			 * Landed in an unwritten extent, try to search hole
			
 
				-			 * from page cache.
			
 
				-			 */
			
 
				-			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
			
 
				-				if (xfs_find_get_desired_pgoff(inode, &map[i],
			
 
				-							HOLE_OFF, &offset))
			
 
				-					goto out;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * map[0] contains data or its unwritten but contains
			
 
				-		 * data in page cache, probably means that we are
			
 
				-		 * reading after EOF.  We should fix offset to point
			
 
				-		 * to the end of the file(i.e., there is an implicit
			
 
				-		 * hole at the end of any file).
			
 
				-		 */
			
 
				-		if (nmap == 1) {
			
 
				-			offset = isize;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		ASSERT(i > 1);
			
 
				-
			
 
				-		/*
			
 
				-		 * Both mappings contains data, proceed to the next round of
			
 
				-		 * search if the current reading offset not beyond or hit EOF.
			
 
				-		 */
			
 
				-		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
			
 
				-		start = XFS_FSB_TO_B(mp, fsbno);
			
 
				-		if (start >= isize) {
			
 
				-			offset = isize;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 out:
			
 
				 	/*
			
 
				-	 * At this point, we must have found a hole.  However, the returned
			
 
				+	 * If at this point we have found the hole we wanted, the returned
			
 
				 	 * offset may be bigger than the file size as it may be aligned to
			
 
				-	 * page boundary for unwritten extents, we need to deal with this
			
 
				+	 * page boundary for unwritten extents.  We need to deal with this
			
 
				 	 * situation in particular.
			
 
				 	 */
			
 
				-	offset = min_t(loff_t, offset, isize);
			
 
				+	if (whence == SEEK_HOLE)
			
 
				+		offset = min_t(loff_t, offset, isize);
			
 
				 	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
			
 
				 
			
 
				 out_unlock:
			
@@ -1412,17 +1341,16 @@ STATIC loff_t
 
				 xfs_file_llseek(
			
 
				 	struct file	*file,
			
 
				 	loff_t		offset,
			
 
				-	int		origin)
			
 
				+	int		whence)
			
 
				 {
			
 
				-	switch (origin) {
			
 
				+	switch (whence) {
			
 
				 	case SEEK_END:
			
 
				 	case SEEK_CUR:
			
 
				 	case SEEK_SET:
			
 
				-		return generic_file_llseek(file, offset, origin);
			
 
				-	case SEEK_DATA:
			
 
				-		return xfs_seek_data(file, offset);
			
 
				+		return generic_file_llseek(file, offset, whence);
			
 
				 	case SEEK_HOLE:
			
 
				-		return xfs_seek_hole(file, offset);
			
 
				+	case SEEK_DATA:
			
 
				+		return xfs_seek_hole_data(file, offset, whence);
			
 
				 	default:
			
 
				 		return -EINVAL;
			
 
				 	}
			
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -172,16 +172,11 @@ xfs_growfs_data_private(
 
				 	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
			
 
				 		return error;
			
 
				 	dpct = pct - mp->m_sb.sb_imax_pct;
			
 
				-	bp = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				 				XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
			
 
				-				XFS_FSS_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp)
			
 
				-		return -EIO;
			
 
				-	if (bp->b_error) {
			
 
				-		error = bp->b_error;
			
 
				-		xfs_buf_relse(bp);
			
 
				+				XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				 	new = nb;	/* use new as a temporary here */
			
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -43,3 +43,7 @@ xfs_param_t xfs_params = {
 
				 	.fstrm_timer	= {	1,		30*100,		3600*100},
			
 
				 	.eofb_timer	= {	1,		300,		3600*24},
			
 
				 };
			
 
				+
			
 
				+struct xfs_globals xfs_globals = {
			
 
				+	.log_recovery_delay	=	0,	/* no delay by default */
			
 
				+};
			
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,7 +33,6 @@
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				 #include "xfs_bmap_util.h"
			
 
				-#include "xfs_quota.h"
			
 
				 #include "xfs_dquot_item.h"
			
 
				 #include "xfs_dquot.h"
			
 
				 
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -654,7 +654,7 @@ xfs_ialloc(
 
				 	xfs_inode_t	*ip;
			
 
				 	uint		flags;
			
 
				 	int		error;
			
 
				-	timespec_t	tv;
			
 
				+	struct timespec	tv;
			
 
				 
			
 
				 	/*
			
 
				 	 * Call the space management code to pick
			
@@ -720,7 +720,7 @@ xfs_ialloc(
 
				 	ip->i_d.di_nextents = 0;
			
 
				 	ASSERT(ip->i_d.di_nblocks == 0);
			
 
				 
			
 
				-	nanotime(&tv);
			
 
				+	tv = current_fs_time(mp->m_super);
			
 
				 	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
			
 
				 	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
			
 
				 	ip->i_d.di_atime = ip->i_d.di_mtime;
			
@@ -769,6 +769,8 @@ xfs_ialloc(
 
				 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
			
 
				 					ip->i_d.di_extsize = pip->i_d.di_extsize;
			
 
				 				}
			
 
				+				if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
			
 
				+					di_flags |= XFS_DIFLAG_PROJINHERIT;
			
 
				 			} else if (S_ISREG(mode)) {
			
 
				 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
			
 
				 					di_flags |= XFS_DIFLAG_REALTIME;
			
@@ -789,8 +791,6 @@ xfs_ialloc(
 
				 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
			
 
				 			    xfs_inherit_nosymlinks)
			
 
				 				di_flags |= XFS_DIFLAG_NOSYMLINKS;
			
 
				-			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
			
 
				-				di_flags |= XFS_DIFLAG_PROJINHERIT;
			
 
				 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
			
 
				 			    xfs_inherit_nodefrag)
			
 
				 				di_flags |= XFS_DIFLAG_NODEFRAG;
			
@@ -1153,9 +1153,11 @@ xfs_create(
 
				 	if (error)
			
 
				 		goto out_trans_cancel;
			
 
				 
			
 
				-	error = xfs_dir_canenter(tp, dp, name, resblks);
			
 
				-	if (error)
			
 
				-		goto out_trans_cancel;
			
 
				+	if (!resblks) {
			
 
				+		error = xfs_dir_canenter(tp, dp, name);
			
 
				+		if (error)
			
 
				+			goto out_trans_cancel;
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * A newly created regular or special file just has one directory
			
@@ -1421,9 +1423,11 @@ xfs_link(
 
				 		goto error_return;
			
 
				 	}
			
 
				 
			
 
				-	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
			
 
				-	if (error)
			
 
				-		goto error_return;
			
 
				+	if (!resblks) {
			
 
				+		error = xfs_dir_canenter(tp, tdp, target_name);
			
 
				+		if (error)
			
 
				+			goto error_return;
			
 
				+	}
			
 
				 
			
 
				 	xfs_bmap_init(&free_list, &first_block);
			
 
				 
			
@@ -2759,9 +2763,11 @@ xfs_rename(
 
				 		 * If there's no space reservation, check the entry will
			
 
				 		 * fit before actually inserting it.
			
 
				 		 */
			
 
				-		error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
			
 
				-		if (error)
			
 
				-			goto error_return;
			
 
				+		if (!spaceres) {
			
 
				+			error = xfs_dir_canenter(tp, target_dp, target_name);
			
 
				+			if (error)
			
 
				+				goto error_return;
			
 
				+		}
			
 
				 		/*
			
 
				 		 * If target does not exist and the rename crosses
			
 
				 		 * directories, adjust the target directory link count
			
@@ -3056,7 +3062,7 @@ cluster_corrupt_out:
 
				 			XFS_BUF_UNDONE(bp);
			
 
				 			xfs_buf_stale(bp);
			
 
				 			xfs_buf_ioerror(bp, -EIO);
			
 
				-			xfs_buf_ioend(bp, 0);
			
 
				+			xfs_buf_ioend(bp);
			
 
				 		} else {
			
 
				 			xfs_buf_stale(bp);
			
 
				 			xfs_buf_relse(bp);
			
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -102,7 +102,7 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
 
				 {
			
 
				 	xfs_fsize_t i_size = i_size_read(VFS_I(ip));
			
 
				 
			
 
				-	if (new_size > i_size)
			
 
				+	if (new_size > i_size || new_size < 0)
			
 
				 		new_size = i_size;
			
 
				 	return new_size > ip->i_d.di_size ? new_size : 0;
			
 
				 }
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -615,7 +615,7 @@ xfs_iflush_done(
 
				 	blip = bp->b_fspriv;
			
 
				 	prev = NULL;
			
 
				 	while (blip != NULL) {
			
 
				-		if (lip->li_cb != xfs_iflush_done) {
			
 
				+		if (blip->li_cb != xfs_iflush_done) {
			
 
				 			prev = blip;
			
 
				 			blip = blip->li_bio_list;
			
 
				 			continue;
			
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -968,8 +968,6 @@ xfs_set_diflags(
 
				 		di_flags |= XFS_DIFLAG_NOATIME;
			
 
				 	if (xflags & XFS_XFLAG_NODUMP)
			
 
				 		di_flags |= XFS_DIFLAG_NODUMP;
			
 
				-	if (xflags & XFS_XFLAG_PROJINHERIT)
			
 
				-		di_flags |= XFS_DIFLAG_PROJINHERIT;
			
 
				 	if (xflags & XFS_XFLAG_NODEFRAG)
			
 
				 		di_flags |= XFS_DIFLAG_NODEFRAG;
			
 
				 	if (xflags & XFS_XFLAG_FILESTREAM)
			
@@ -981,6 +979,8 @@ xfs_set_diflags(
 
				 			di_flags |= XFS_DIFLAG_NOSYMLINKS;
			
 
				 		if (xflags & XFS_XFLAG_EXTSZINHERIT)
			
 
				 			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
			
 
				+		if (xflags & XFS_XFLAG_PROJINHERIT)
			
 
				+			di_flags |= XFS_DIFLAG_PROJINHERIT;
			
 
				 	} else if (S_ISREG(ip->i_d.di_mode)) {
			
 
				 		if (xflags & XFS_XFLAG_REALTIME)
			
 
				 			di_flags |= XFS_DIFLAG_REALTIME;
			
@@ -1231,13 +1231,25 @@ xfs_ioctl_setattr(
 
				 
			
 
				 	}
			
 
				 
			
 
				-	if (mask & FSX_EXTSIZE)
			
 
				-		ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
			
 
				 	if (mask & FSX_XFLAGS) {
			
 
				 		xfs_set_diflags(ip, fa->fsx_xflags);
			
 
				 		xfs_diflags_to_linux(ip);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Only set the extent size hint if we've already determined that the
			
 
				+	 * extent size hint should be set on the inode. If no extent size flags
			
 
				+	 * are set on the inode then unconditionally clear the extent size hint.
			
 
				+	 */
			
 
				+	if (mask & FSX_EXTSIZE) {
			
 
				+		int	extsize = 0;
			
 
				+
			
 
				+		if (ip->i_d.di_flags &
			
 
				+				(XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT))
			
 
				+			extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
			
 
				+		ip->i_d.di_extsize = extsize;
			
 
				+	}
			
 
				+
			
 
				 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				 
			
@@ -1349,7 +1361,7 @@ xfs_ioc_setxflags(
 
				 STATIC int
			
 
				 xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
			
 
				 {
			
 
				-	struct getbmap __user	*base = *ap;
			
 
				+	struct getbmap __user	*base = (struct getbmap __user *)*ap;
			
 
				 
			
 
				 	/* copy only getbmap portion (not getbmapx) */
			
 
				 	if (copy_to_user(base, bmv, sizeof(struct getbmap)))
			
@@ -1380,7 +1392,7 @@ xfs_ioc_getbmap(
 
				 		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
			
 
				 
			
 
				 	error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
			
 
				-			    (struct getbmap *)arg+1);
			
 
				+			    (__force struct getbmap *)arg+1);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -1393,7 +1405,7 @@ xfs_ioc_getbmap(
 
				 STATIC int
			
 
				 xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
			
 
				 {
			
 
				-	struct getbmapx __user	*base = *ap;
			
 
				+	struct getbmapx __user	*base = (struct getbmapx __user *)*ap;
			
 
				 
			
 
				 	if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
			
 
				 		return -EFAULT;
			
@@ -1420,7 +1432,7 @@ xfs_ioc_getbmapx(
 
				 		return -EINVAL;
			
 
				 
			
 
				 	error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
			
 
				-			    (struct getbmapx *)arg+1);
			
 
				+			    (__force struct getbmapx *)arg+1);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -160,6 +160,7 @@ xfs_ioctl32_bstat_copyin(
 
				 	    get_user(bstat->bs_gen,	&bstat32->bs_gen)	||
			
 
				 	    get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
			
 
				 	    get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
			
 
				+	    get_user(bstat->bs_forkoff,	&bstat32->bs_forkoff)	||
			
 
				 	    get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask)	||
			
 
				 	    get_user(bstat->bs_dmstate,	&bstat32->bs_dmstate)	||
			
 
				 	    get_user(bstat->bs_aextents, &bstat32->bs_aextents))
			
@@ -214,6 +215,7 @@ xfs_bulkstat_one_fmt_compat(
 
				 	    put_user(buffer->bs_gen,	  &p32->bs_gen)		||
			
 
				 	    put_user(buffer->bs_projid,	  &p32->bs_projid)	||
			
 
				 	    put_user(buffer->bs_projid_hi,	&p32->bs_projid_hi)	||
			
 
				+	    put_user(buffer->bs_forkoff,  &p32->bs_forkoff)	||
			
 
				 	    put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)	||
			
 
				 	    put_user(buffer->bs_dmstate,  &p32->bs_dmstate)	||
			
 
				 	    put_user(buffer->bs_aextents, &p32->bs_aextents))
			
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -67,8 +67,9 @@ typedef struct compat_xfs_bstat {
 
				 	__u32		bs_gen;		/* generation count		*/
			
 
				 	__u16		bs_projid_lo;	/* lower part of project id	*/
			
 
				 #define	bs_projid	bs_projid_lo	/* (previously just bs_projid)	*/
			
 
				+	__u16		bs_forkoff;	/* inode fork offset in bytes	*/
			
 
				 	__u16		bs_projid_hi;	/* high part of project id	*/
			
 
				-	unsigned char	bs_pad[12];	/* pad space, unused		*/
			
 
				+	unsigned char	bs_pad[10];	/* pad space, unused		*/
			
 
				 	__u32		bs_dmevmask;	/* DMIG event mask		*/
			
 
				 	__u16		bs_dmstate;	/* DMIG state info		*/
			
 
				 	__u16		bs_aextents;	/* attribute number of extents	*/
			
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -404,8 +404,8 @@ xfs_quota_calc_throttle(
 
				 	int shift = 0;
			
 
				 	struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
			
 
				 
			
 
				-	/* over hi wmark, squash the prealloc completely */
			
 
				-	if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
			
 
				+	/* no dq, or over hi wmark, squash the prealloc completely */
			
 
				+	if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
			
 
				 		*qblocks = 0;
			
 
				 		*qfreesp = 0;
			
 
				 		return;
			
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -849,6 +849,36 @@ xfs_setattr_size(
 
				 		return error;
			
 
				 	truncate_setsize(inode, newsize);
			
 
				 
			
 
				+	/*
			
 
				+	 * The "we can't serialise against page faults" pain gets worse.
			
 
				+	 *
			
 
				+	 * If the file is mapped then we have to clean the page at the old EOF
			
 
				+	 * when extending the file. Extending the file can expose changes the
			
 
				+	 * underlying page mapping (e.g. from beyond EOF to a hole or
			
 
				+	 * unwritten), and so on the next attempt to write to that page we need
			
 
				+	 * to remap it for write. i.e. we need .page_mkwrite() to be called.
			
 
				+	 * Hence we need to clean the page to clean the pte and so a new write
			
 
				+	 * fault will be triggered appropriately.
			
 
				+	 *
			
 
				+	 * If we do it before we change the inode size, then we can race with a
			
 
				+	 * page fault that maps the page with exactly the same problem. If we do
			
 
				+	 * it after we change the file size, then a new page fault can come in
			
 
				+	 * and allocate space before we've run the rest of the truncate
			
 
				+	 * transaction. That's kinda grotesque, but it's better than have data
			
 
				+	 * over a hole, and so that's the lesser evil that has been chosen here.
			
 
				+	 *
			
 
				+	 * The real solution, however, is to have some mechanism for locking out
			
 
				+	 * page faults while a truncate is in progress.
			
 
				+	 */
			
 
				+	if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
			
 
				+		error = filemap_write_and_wait_range(
			
 
				+				VFS_I(ip)->i_mapping,
			
 
				+				round_down(oldsize, PAGE_CACHE_SIZE),
			
 
				+				round_up(oldsize, PAGE_CACHE_SIZE) - 1);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
			
 
				 	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
			
 
				 	if (error)
			
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -639,7 +639,8 @@ next_ag:
 
				 		xfs_buf_relse(agbp);
			
 
				 		agbp = NULL;
			
 
				 		agino = 0;
			
 
				-	} while (++agno < mp->m_sb.sb_agcount);
			
 
				+		agno++;
			
 
				+	} while (agno < mp->m_sb.sb_agcount);
			
 
				 
			
 
				 	if (!error) {
			
 
				 		if (bufidx) {
			
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -56,7 +56,6 @@ typedef __uint64_t __psunsigned_t;
 
				 
			
 
				 #include "kmem.h"
			
 
				 #include "mrlock.h"
			
 
				-#include "time.h"
			
 
				 #include "uuid.h"
			
 
				 
			
 
				 #include <linux/semaphore.h>
			
@@ -179,6 +178,11 @@ typedef __uint64_t __psunsigned_t;
 
				 #define MAX(a,b)	(max(a,b))
			
 
				 #define howmany(x, y)	(((x)+((y)-1))/(y))
			
 
				 
			
 
				+static inline void delay(long ticks)
			
 
				+{
			
 
				+	schedule_timeout_uninterruptible(ticks);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * XFS wrapper structure for sysfs support. It depends on external data
			
 
				  * structures and is embedded in various internal data structures to implement
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1678,7 +1678,7 @@ xlog_bdstrat(
 
				 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
			
 
				 		xfs_buf_ioerror(bp, -EIO);
			
 
				 		xfs_buf_stale(bp);
			
 
				-		xfs_buf_ioend(bp, 0);
			
 
				+		xfs_buf_ioend(bp);
			
 
				 		/*
			
 
				 		 * It would seem logical to return EIO here, but we rely on
			
 
				 		 * the log state machine to propagate I/O errors instead of
			
@@ -1688,7 +1688,7 @@ xlog_bdstrat(
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	xfs_buf_iorequest(bp);
			
 
				+	xfs_buf_submit(bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -3867,18 +3867,17 @@ xlog_state_ioerror(
 
				  * This is called from xfs_force_shutdown, when we're forcibly
			
 
				  * shutting down the filesystem, typically because of an IO error.
			
 
				  * Our main objectives here are to make sure that:
			
 
				- *	a. the filesystem gets marked 'SHUTDOWN' for all interested
			
 
				+ *	a. if !logerror, flush the logs to disk. Anything modified
			
 
				+ *	   after this is ignored.
			
 
				+ *	b. the filesystem gets marked 'SHUTDOWN' for all interested
			
 
				  *	   parties to find out, 'atomically'.
			
 
				- *	b. those who're sleeping on log reservations, pinned objects and
			
 
				+ *	c. those who're sleeping on log reservations, pinned objects and
			
 
				  *	    other resources get woken up, and be told the bad news.
			
 
				- *	c. nothing new gets queued up after (a) and (b) are done.
			
 
				- *	d. if !logerror, flush the iclogs to disk, then seal them off
			
 
				- *	   for business.
			
 
				+ *	d. nothing new gets queued up after (b) and (c) are done.
			
 
				  *
			
 
				- * Note: for delayed logging the !logerror case needs to flush the regions
			
 
				- * held in memory out to the iclogs before flushing them to disk. This needs
			
 
				- * to be done before the log is marked as shutdown, otherwise the flush to the
			
 
				- * iclogs will fail.
			
 
				+ * Note: for the !logerror case we need to flush the regions held in memory out
			
 
				+ * to disk first. This needs to be done before the log is marked as shutdown,
			
 
				+ * otherwise the iclog writes will fail.
			
 
				  */
			
 
				 int
			
 
				 xfs_log_force_umount(
			
@@ -3910,16 +3909,16 @@ xfs_log_force_umount(
 
				 		ASSERT(XLOG_FORCED_SHUTDOWN(log));
			
 
				 		return 1;
			
 
				 	}
			
 
				-	retval = 0;
			
 
				 
			
 
				 	/*
			
 
				-	 * Flush the in memory commit item list before marking the log as
			
 
				-	 * being shut down. We need to do it in this order to ensure all the
			
 
				-	 * completed transactions are flushed to disk with the xfs_log_force()
			
 
				-	 * call below.
			
 
				+	 * Flush all the completed transactions to disk before marking the log
			
 
				+	 * being shut down. We need to do it in this order to ensure that
			
 
				+	 * completed operations are safely on disk before we shut down, and that
			
 
				+	 * we don't have to issue any buffer IO after the shutdown flags are set
			
 
				+	 * to guarantee this.
			
 
				 	 */
			
 
				 	if (!logerror)
			
 
				-		xlog_cil_force(log);
			
 
				+		_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
			
 
				 
			
 
				 	/*
			
 
				 	 * mark the filesystem and the as in a shutdown state and wake
			
@@ -3931,18 +3930,11 @@ xfs_log_force_umount(
 
				 		XFS_BUF_DONE(mp->m_sb_bp);
			
 
				 
			
 
				 	/*
			
 
				-	 * This flag is sort of redundant because of the mount flag, but
			
 
				-	 * it's good to maintain the separation between the log and the rest
			
 
				-	 * of XFS.
			
 
				+	 * Mark the log and the iclogs with IO error flags to prevent any
			
 
				+	 * further log IO from being issued or completed.
			
 
				 	 */
			
 
				 	log->l_flags |= XLOG_IO_ERROR;
			
 
				-
			
 
				-	/*
			
 
				-	 * If we hit a log error, we want to mark all the iclogs IOERROR
			
 
				-	 * while we're still holding the loglock.
			
 
				-	 */
			
 
				-	if (logerror)
			
 
				-		retval = xlog_state_ioerror(log);
			
 
				+	retval = xlog_state_ioerror(log);
			
 
				 	spin_unlock(&log->l_icloglock);
			
 
				 
			
 
				 	/*
			
@@ -3955,19 +3947,6 @@ xfs_log_force_umount(
 
				 	xlog_grant_head_wake_all(&log->l_reserve_head);
			
 
				 	xlog_grant_head_wake_all(&log->l_write_head);
			
 
				 
			
 
				-	if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
			
 
				-		ASSERT(!logerror);
			
 
				-		/*
			
 
				-		 * Force the incore logs to disk before shutting the
			
 
				-		 * log down completely.
			
 
				-		 */
			
 
				-		_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
			
 
				-
			
 
				-		spin_lock(&log->l_icloglock);
			
 
				-		retval = xlog_state_ioerror(log);
			
 
				-		spin_unlock(&log->l_icloglock);
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
			
 
				 	 * as if the log writes were completed. The abort handling in the log
			
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -463,12 +463,40 @@ xlog_cil_push(
 
				 		spin_unlock(&cil->xc_push_lock);
			
 
				 		goto out_skip;
			
 
				 	}
			
 
				-	spin_unlock(&cil->xc_push_lock);
			
 
				 
			
 
				 
			
 
				 	/* check for a previously pushed seqeunce */
			
 
				-	if (push_seq < cil->xc_ctx->sequence)
			
 
				+	if (push_seq < cil->xc_ctx->sequence) {
			
 
				+		spin_unlock(&cil->xc_push_lock);
			
 
				 		goto out_skip;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We are now going to push this context, so add it to the committing
			
 
				+	 * list before we do anything else. This ensures that anyone waiting on
			
 
				+	 * this push can easily detect the difference between a "push in
			
 
				+	 * progress" and "CIL is empty, nothing to do".
			
 
				+	 *
			
 
				+	 * IOWs, a wait loop can now check for:
			
 
				+	 *	the current sequence not being found on the committing list;
			
 
				+	 *	an empty CIL; and
			
 
				+	 *	an unchanged sequence number
			
 
				+	 * to detect a push that had nothing to do and therefore does not need
			
 
				+	 * waiting on. If the CIL is not empty, we get put on the committing
			
 
				+	 * list before emptying the CIL and bumping the sequence number. Hence
			
 
				+	 * an empty CIL and an unchanged sequence number means we jumped out
			
 
				+	 * above after doing nothing.
			
 
				+	 *
			
 
				+	 * Hence the waiter will either find the commit sequence on the
			
 
				+	 * committing list or the sequence number will be unchanged and the CIL
			
 
				+	 * still dirty. In that latter case, the push has not yet started, and
			
 
				+	 * so the waiter will have to continue trying to check the CIL
			
 
				+	 * committing list until it is found. In extreme cases of delay, the
			
 
				+	 * sequence may fully commit between the attempts the wait makes to wait
			
 
				+	 * on the commit sequence.
			
 
				+	 */
			
 
				+	list_add(&ctx->committing, &cil->xc_committing);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 
			
 
				 	/*
			
 
				 	 * pull all the log vectors off the items in the CIL, and
			
@@ -532,7 +560,6 @@ xlog_cil_push(
 
				 	 */
			
 
				 	spin_lock(&cil->xc_push_lock);
			
 
				 	cil->xc_current_sequence = new_ctx->sequence;
			
 
				-	list_add(&ctx->committing, &cil->xc_committing);
			
 
				 	spin_unlock(&cil->xc_push_lock);
			
 
				 	up_write(&cil->xc_ctx_lock);
			
 
				 
			
@@ -855,13 +882,15 @@ restart:
 
				 	 * Hence by the time we have got here it our sequence may not have been
			
 
				 	 * pushed yet. This is true if the current sequence still matches the
			
 
				 	 * push sequence after the above wait loop and the CIL still contains
			
 
				-	 * dirty objects.
			
 
				+	 * dirty objects. This is guaranteed by the push code first adding the
			
 
				+	 * context to the committing list before emptying the CIL.
			
 
				 	 *
			
 
				-	 * When the push occurs, it will empty the CIL and atomically increment
			
 
				-	 * the currect sequence past the push sequence and move it into the
			
 
				-	 * committing list. Of course, if the CIL is clean at the time of the
			
 
				-	 * push, it won't have pushed the CIL at all, so in that case we should
			
 
				-	 * try the push for this sequence again from the start just in case.
			
 
				+	 * Hence if we don't find the context in the committing list and the
			
 
				+	 * current sequence number is unchanged then the CIL contents are
			
 
				+	 * significant.  If the CIL is empty, if means there was nothing to push
			
 
				+	 * and that means there is nothing to wait for. If the CIL is not empty,
			
 
				+	 * it means we haven't yet started the push, because if it had started
			
 
				+	 * we would have found the context on the committing list.
			
 
				 	 */
			
 
				 	if (sequence == cil->xc_current_sequence &&
			
 
				 	    !list_empty(&cil->xc_cil)) {
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,12 +193,8 @@ xlog_bread_noalign(
 
				 	bp->b_io_length = nbblks;
			
 
				 	bp->b_error = 0;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(log->l_mp))
			
 
				-		return -EIO;
			
 
				-
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	error = xfs_buf_iowait(bp);
			
 
				-	if (error)
			
 
				+	error = xfs_buf_submit_wait(bp);
			
 
				+	if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
			
 
				 		xfs_buf_ioerror_alert(bp, __func__);
			
 
				 	return error;
			
 
				 }
			
@@ -378,12 +374,14 @@ xlog_recover_iodone(
 
				 		 * We're not going to bother about retrying
			
 
				 		 * this during recovery. One strike!
			
 
				 		 */
			
 
				-		xfs_buf_ioerror_alert(bp, __func__);
			
 
				-		xfs_force_shutdown(bp->b_target->bt_mount,
			
 
				-					SHUTDOWN_META_IO_ERROR);
			
 
				+		if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
			
 
				+			xfs_buf_ioerror_alert(bp, __func__);
			
 
				+			xfs_force_shutdown(bp->b_target->bt_mount,
			
 
				+						SHUTDOWN_META_IO_ERROR);
			
 
				+		}
			
 
				 	}
			
 
				 	bp->b_iodone = NULL;
			
 
				-	xfs_buf_ioend(bp, 0);
			
 
				+	xfs_buf_ioend(bp);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1445,160 +1443,6 @@ xlog_clear_stale_blocks(
 
				  ******************************************************************************
			
 
				  */
			
 
				 
			
 
				-STATIC xlog_recover_t *
			
 
				-xlog_recover_find_tid(
			
 
				-	struct hlist_head	*head,
			
 
				-	xlog_tid_t		tid)
			
 
				-{
			
 
				-	xlog_recover_t		*trans;
			
 
				-
			
 
				-	hlist_for_each_entry(trans, head, r_list) {
			
 
				-		if (trans->r_log_tid == tid)
			
 
				-			return trans;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-STATIC void
			
 
				-xlog_recover_new_tid(
			
 
				-	struct hlist_head	*head,
			
 
				-	xlog_tid_t		tid,
			
 
				-	xfs_lsn_t		lsn)
			
 
				-{
			
 
				-	xlog_recover_t		*trans;
			
 
				-
			
 
				-	trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
			
 
				-	trans->r_log_tid   = tid;
			
 
				-	trans->r_lsn	   = lsn;
			
 
				-	INIT_LIST_HEAD(&trans->r_itemq);
			
 
				-
			
 
				-	INIT_HLIST_NODE(&trans->r_list);
			
 
				-	hlist_add_head(&trans->r_list, head);
			
 
				-}
			
 
				-
			
 
				-STATIC void
			
 
				-xlog_recover_add_item(
			
 
				-	struct list_head	*head)
			
 
				-{
			
 
				-	xlog_recover_item_t	*item;
			
 
				-
			
 
				-	item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
			
 
				-	INIT_LIST_HEAD(&item->ri_list);
			
 
				-	list_add_tail(&item->ri_list, head);
			
 
				-}
			
 
				-
			
 
				-STATIC int
			
 
				-xlog_recover_add_to_cont_trans(
			
 
				-	struct xlog		*log,
			
 
				-	struct xlog_recover	*trans,
			
 
				-	xfs_caddr_t		dp,
			
 
				-	int			len)
			
 
				-{
			
 
				-	xlog_recover_item_t	*item;
			
 
				-	xfs_caddr_t		ptr, old_ptr;
			
 
				-	int			old_len;
			
 
				-
			
 
				-	if (list_empty(&trans->r_itemq)) {
			
 
				-		/* finish copying rest of trans header */
			
 
				-		xlog_recover_add_item(&trans->r_itemq);
			
 
				-		ptr = (xfs_caddr_t) &trans->r_theader +
			
 
				-				sizeof(xfs_trans_header_t) - len;
			
 
				-		memcpy(ptr, dp, len); /* d, s, l */
			
 
				-		return 0;
			
 
				-	}
			
 
				-	/* take the tail entry */
			
 
				-	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
			
 
				-
			
 
				-	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
			
 
				-	old_len = item->ri_buf[item->ri_cnt-1].i_len;
			
 
				-
			
 
				-	ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
			
 
				-	memcpy(&ptr[old_len], dp, len); /* d, s, l */
			
 
				-	item->ri_buf[item->ri_cnt-1].i_len += len;
			
 
				-	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
			
 
				-	trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * The next region to add is the start of a new region.  It could be
			
 
				- * a whole region or it could be the first part of a new region.  Because
			
 
				- * of this, the assumption here is that the type and size fields of all
			
 
				- * format structures fit into the first 32 bits of the structure.
			
 
				- *
			
 
				- * This works because all regions must be 32 bit aligned.  Therefore, we
			
 
				- * either have both fields or we have neither field.  In the case we have
			
 
				- * neither field, the data part of the region is zero length.  We only have
			
 
				- * a log_op_header and can throw away the header since a new one will appear
			
 
				- * later.  If we have at least 4 bytes, then we can determine how many regions
			
 
				- * will appear in the current log item.
			
 
				- */
			
 
				-STATIC int
			
 
				-xlog_recover_add_to_trans(
			
 
				-	struct xlog		*log,
			
 
				-	struct xlog_recover	*trans,
			
 
				-	xfs_caddr_t		dp,
			
 
				-	int			len)
			
 
				-{
			
 
				-	xfs_inode_log_format_t	*in_f;			/* any will do */
			
 
				-	xlog_recover_item_t	*item;
			
 
				-	xfs_caddr_t		ptr;
			
 
				-
			
 
				-	if (!len)
			
 
				-		return 0;
			
 
				-	if (list_empty(&trans->r_itemq)) {
			
 
				-		/* we need to catch log corruptions here */
			
 
				-		if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
			
 
				-			xfs_warn(log->l_mp, "%s: bad header magic number",
			
 
				-				__func__);
			
 
				-			ASSERT(0);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-		if (len == sizeof(xfs_trans_header_t))
			
 
				-			xlog_recover_add_item(&trans->r_itemq);
			
 
				-		memcpy(&trans->r_theader, dp, len); /* d, s, l */
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	ptr = kmem_alloc(len, KM_SLEEP);
			
 
				-	memcpy(ptr, dp, len);
			
 
				-	in_f = (xfs_inode_log_format_t *)ptr;
			
 
				-
			
 
				-	/* take the tail entry */
			
 
				-	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
			
 
				-	if (item->ri_total != 0 &&
			
 
				-	     item->ri_total == item->ri_cnt) {
			
 
				-		/* tail item is in use, get a new one */
			
 
				-		xlog_recover_add_item(&trans->r_itemq);
			
 
				-		item = list_entry(trans->r_itemq.prev,
			
 
				-					xlog_recover_item_t, ri_list);
			
 
				-	}
			
 
				-
			
 
				-	if (item->ri_total == 0) {		/* first region to be added */
			
 
				-		if (in_f->ilf_size == 0 ||
			
 
				-		    in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
			
 
				-			xfs_warn(log->l_mp,
			
 
				-		"bad number of regions (%d) in inode log format",
			
 
				-				  in_f->ilf_size);
			
 
				-			ASSERT(0);
			
 
				-			kmem_free(ptr);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-
			
 
				-		item->ri_total = in_f->ilf_size;
			
 
				-		item->ri_buf =
			
 
				-			kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
			
 
				-				    KM_SLEEP);
			
 
				-	}
			
 
				-	ASSERT(item->ri_total > item->ri_cnt);
			
 
				-	/* Description region is ri_buf[0] */
			
 
				-	item->ri_buf[item->ri_cnt].i_addr = ptr;
			
 
				-	item->ri_buf[item->ri_cnt].i_len  = len;
			
 
				-	item->ri_cnt++;
			
 
				-	trace_xfs_log_recover_item_add(log, trans, item, 0);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Sort the log items in the transaction.
			
 
				  *
			
@@ -3254,31 +3098,6 @@ xlog_recover_do_icreate_pass2(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Free up any resources allocated by the transaction
			
 
				- *
			
 
				- * Remember that EFIs, EFDs, and IUNLINKs are handled later.
			
 
				- */
			
 
				-STATIC void
			
 
				-xlog_recover_free_trans(
			
 
				-	struct xlog_recover	*trans)
			
 
				-{
			
 
				-	xlog_recover_item_t	*item, *n;
			
 
				-	int			i;
			
 
				-
			
 
				-	list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
			
 
				-		/* Free the regions in the item. */
			
 
				-		list_del(&item->ri_list);
			
 
				-		for (i = 0; i < item->ri_cnt; i++)
			
 
				-			kmem_free(item->ri_buf[i].i_addr);
			
 
				-		/* Free the item itself */
			
 
				-		kmem_free(item->ri_buf);
			
 
				-		kmem_free(item);
			
 
				-	}
			
 
				-	/* Free the transaction recover structure */
			
 
				-	kmem_free(trans);
			
 
				-}
			
 
				-
			
 
				 STATIC void
			
 
				 xlog_recover_buffer_ra_pass2(
			
 
				 	struct xlog                     *log,
			
@@ -3528,21 +3347,308 @@ out:
 
				 	if (!list_empty(&done_list))
			
 
				 		list_splice_init(&done_list, &trans->r_itemq);
			
 
				 
			
 
				-	xlog_recover_free_trans(trans);
			
 
				-
			
 
				 	error2 = xfs_buf_delwri_submit(&buffer_list);
			
 
				 	return error ? error : error2;
			
 
				 }
			
 
				 
			
 
				+STATIC void
			
 
				+xlog_recover_add_item(
			
 
				+	struct list_head	*head)
			
 
				+{
			
 
				+	xlog_recover_item_t	*item;
			
 
				+
			
 
				+	item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
			
 
				+	INIT_LIST_HEAD(&item->ri_list);
			
 
				+	list_add_tail(&item->ri_list, head);
			
 
				+}
			
 
				+
			
 
				 STATIC int
			
 
				-xlog_recover_unmount_trans(
			
 
				-	struct xlog		*log)
			
 
				+xlog_recover_add_to_cont_trans(
			
 
				+	struct xlog		*log,
			
 
				+	struct xlog_recover	*trans,
			
 
				+	xfs_caddr_t		dp,
			
 
				+	int			len)
			
 
				 {
			
 
				-	/* Do nothing now */
			
 
				-	xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
			
 
				+	xlog_recover_item_t	*item;
			
 
				+	xfs_caddr_t		ptr, old_ptr;
			
 
				+	int			old_len;
			
 
				+
			
 
				+	if (list_empty(&trans->r_itemq)) {
			
 
				+		/* finish copying rest of trans header */
			
 
				+		xlog_recover_add_item(&trans->r_itemq);
			
 
				+		ptr = (xfs_caddr_t) &trans->r_theader +
			
 
				+				sizeof(xfs_trans_header_t) - len;
			
 
				+		memcpy(ptr, dp, len);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	/* take the tail entry */
			
 
				+	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
			
 
				+
			
 
				+	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
			
 
				+	old_len = item->ri_buf[item->ri_cnt-1].i_len;
			
 
				+
			
 
				+	ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
			
 
				+	memcpy(&ptr[old_len], dp, len);
			
 
				+	item->ri_buf[item->ri_cnt-1].i_len += len;
			
 
				+	item->ri_buf[item->ri_cnt-1].i_addr = ptr;
			
 
				+	trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The next region to add is the start of a new region.  It could be
			
 
				+ * a whole region or it could be the first part of a new region.  Because
			
 
				+ * of this, the assumption here is that the type and size fields of all
			
 
				+ * format structures fit into the first 32 bits of the structure.
			
 
				+ *
			
 
				+ * This works because all regions must be 32 bit aligned.  Therefore, we
			
 
				+ * either have both fields or we have neither field.  In the case we have
			
 
				+ * neither field, the data part of the region is zero length.  We only have
			
 
				+ * a log_op_header and can throw away the header since a new one will appear
			
 
				+ * later.  If we have at least 4 bytes, then we can determine how many regions
			
 
				+ * will appear in the current log item.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xlog_recover_add_to_trans(
			
 
				+	struct xlog		*log,
			
 
				+	struct xlog_recover	*trans,
			
 
				+	xfs_caddr_t		dp,
			
 
				+	int			len)
			
 
				+{
			
 
				+	xfs_inode_log_format_t	*in_f;			/* any will do */
			
 
				+	xlog_recover_item_t	*item;
			
 
				+	xfs_caddr_t		ptr;
			
 
				+
			
 
				+	if (!len)
			
 
				+		return 0;
			
 
				+	if (list_empty(&trans->r_itemq)) {
			
 
				+		/* we need to catch log corruptions here */
			
 
				+		if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
			
 
				+			xfs_warn(log->l_mp, "%s: bad header magic number",
			
 
				+				__func__);
			
 
				+			ASSERT(0);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+		if (len == sizeof(xfs_trans_header_t))
			
 
				+			xlog_recover_add_item(&trans->r_itemq);
			
 
				+		memcpy(&trans->r_theader, dp, len);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	ptr = kmem_alloc(len, KM_SLEEP);
			
 
				+	memcpy(ptr, dp, len);
			
 
				+	in_f = (xfs_inode_log_format_t *)ptr;
			
 
				+
			
 
				+	/* take the tail entry */
			
 
				+	item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
			
 
				+	if (item->ri_total != 0 &&
			
 
				+	     item->ri_total == item->ri_cnt) {
			
 
				+		/* tail item is in use, get a new one */
			
 
				+		xlog_recover_add_item(&trans->r_itemq);
			
 
				+		item = list_entry(trans->r_itemq.prev,
			
 
				+					xlog_recover_item_t, ri_list);
			
 
				+	}
			
 
				+
			
 
				+	if (item->ri_total == 0) {		/* first region to be added */
			
 
				+		if (in_f->ilf_size == 0 ||
			
 
				+		    in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
			
 
				+			xfs_warn(log->l_mp,
			
 
				+		"bad number of regions (%d) in inode log format",
			
 
				+				  in_f->ilf_size);
			
 
				+			ASSERT(0);
			
 
				+			kmem_free(ptr);
			
 
				+			return -EIO;
			
 
				+		}
			
 
				+
			
 
				+		item->ri_total = in_f->ilf_size;
			
 
				+		item->ri_buf =
			
 
				+			kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
			
 
				+				    KM_SLEEP);
			
 
				+	}
			
 
				+	ASSERT(item->ri_total > item->ri_cnt);
			
 
				+	/* Description region is ri_buf[0] */
			
 
				+	item->ri_buf[item->ri_cnt].i_addr = ptr;
			
 
				+	item->ri_buf[item->ri_cnt].i_len  = len;
			
 
				+	item->ri_cnt++;
			
 
				+	trace_xfs_log_recover_item_add(log, trans, item, 0);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Free up any resources allocated by the transaction
			
 
				+ *
			
 
				+ * Remember that EFIs, EFDs, and IUNLINKs are handled later.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xlog_recover_free_trans(
			
 
				+	struct xlog_recover	*trans)
			
 
				+{
			
 
				+	xlog_recover_item_t	*item, *n;
			
 
				+	int			i;
			
 
				+
			
 
				+	list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
			
 
				+		/* Free the regions in the item. */
			
 
				+		list_del(&item->ri_list);
			
 
				+		for (i = 0; i < item->ri_cnt; i++)
			
 
				+			kmem_free(item->ri_buf[i].i_addr);
			
 
				+		/* Free the item itself */
			
 
				+		kmem_free(item->ri_buf);
			
 
				+		kmem_free(item);
			
 
				+	}
			
 
				+	/* Free the transaction recover structure */
			
 
				+	kmem_free(trans);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * On error or completion, trans is freed.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xlog_recovery_process_trans(
			
 
				+	struct xlog		*log,
			
 
				+	struct xlog_recover	*trans,
			
 
				+	xfs_caddr_t		dp,
			
 
				+	unsigned int		len,
			
 
				+	unsigned int		flags,
			
 
				+	int			pass)
			
 
				+{
			
 
				+	int			error = 0;
			
 
				+	bool			freeit = false;
			
 
				+
			
 
				+	/* mask off ophdr transaction container flags */
			
 
				+	flags &= ~XLOG_END_TRANS;
			
 
				+	if (flags & XLOG_WAS_CONT_TRANS)
			
 
				+		flags &= ~XLOG_CONTINUE_TRANS;
			
 
				+
			
 
				+	/*
			
 
				+	 * Callees must not free the trans structure. We'll decide if we need to
			
 
				+	 * free it or not based on the operation being done and it's result.
			
 
				+	 */
			
 
				+	switch (flags) {
			
 
				+	/* expected flag values */
			
 
				+	case 0:
			
 
				+	case XLOG_CONTINUE_TRANS:
			
 
				+		error = xlog_recover_add_to_trans(log, trans, dp, len);
			
 
				+		break;
			
 
				+	case XLOG_WAS_CONT_TRANS:
			
 
				+		error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
			
 
				+		break;
			
 
				+	case XLOG_COMMIT_TRANS:
			
 
				+		error = xlog_recover_commit_trans(log, trans, pass);
			
 
				+		/* success or fail, we are now done with this transaction. */
			
 
				+		freeit = true;
			
 
				+		break;
			
 
				+
			
 
				+	/* unexpected flag values */
			
 
				+	case XLOG_UNMOUNT_TRANS:
			
 
				+		/* just skip trans */
			
 
				+		xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
			
 
				+		freeit = true;
			
 
				+		break;
			
 
				+	case XLOG_START_TRANS:
			
 
				+	default:
			
 
				+		xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags);
			
 
				+		ASSERT(0);
			
 
				+		error = -EIO;
			
 
				+		break;
			
 
				+	}
			
 
				+	if (error || freeit)
			
 
				+		xlog_recover_free_trans(trans);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Lookup the transaction recovery structure associated with the ID in the
			
 
				+ * current ophdr. If the transaction doesn't exist and the start flag is set in
			
 
				+ * the ophdr, then allocate a new transaction for future ID matches to find.
			
 
				+ * Either way, return what we found during the lookup - an existing transaction
			
 
				+ * or nothing.
			
 
				+ */
			
 
				+STATIC struct xlog_recover *
			
 
				+xlog_recover_ophdr_to_trans(
			
 
				+	struct hlist_head	rhash[],
			
 
				+	struct xlog_rec_header	*rhead,
			
 
				+	struct xlog_op_header	*ohead)
			
 
				+{
			
 
				+	struct xlog_recover	*trans;
			
 
				+	xlog_tid_t		tid;
			
 
				+	struct hlist_head	*rhp;
			
 
				+
			
 
				+	tid = be32_to_cpu(ohead->oh_tid);
			
 
				+	rhp = &rhash[XLOG_RHASH(tid)];
			
 
				+	hlist_for_each_entry(trans, rhp, r_list) {
			
 
				+		if (trans->r_log_tid == tid)
			
 
				+			return trans;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * skip over non-start transaction headers - we could be
			
 
				+	 * processing slack space before the next transaction starts
			
 
				+	 */
			
 
				+	if (!(ohead->oh_flags & XLOG_START_TRANS))
			
 
				+		return NULL;
			
 
				+
			
 
				+	ASSERT(be32_to_cpu(ohead->oh_len) == 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * This is a new transaction so allocate a new recovery container to
			
 
				+	 * hold the recovery ops that will follow.
			
 
				+	 */
			
 
				+	trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
			
 
				+	trans->r_log_tid = tid;
			
 
				+	trans->r_lsn = be64_to_cpu(rhead->h_lsn);
			
 
				+	INIT_LIST_HEAD(&trans->r_itemq);
			
 
				+	INIT_HLIST_NODE(&trans->r_list);
			
 
				+	hlist_add_head(&trans->r_list, rhp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Nothing more to do for this ophdr. Items to be added to this new
			
 
				+	 * transaction will be in subsequent ophdr containers.
			
 
				+	 */
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+STATIC int
			
 
				+xlog_recover_process_ophdr(
			
 
				+	struct xlog		*log,
			
 
				+	struct hlist_head	rhash[],
			
 
				+	struct xlog_rec_header	*rhead,
			
 
				+	struct xlog_op_header	*ohead,
			
 
				+	xfs_caddr_t		dp,
			
 
				+	xfs_caddr_t		end,
			
 
				+	int			pass)
			
 
				+{
			
 
				+	struct xlog_recover	*trans;
			
 
				+	unsigned int		len;
			
 
				+
			
 
				+	/* Do we understand who wrote this op? */
			
 
				+	if (ohead->oh_clientid != XFS_TRANSACTION &&
			
 
				+	    ohead->oh_clientid != XFS_LOG) {
			
 
				+		xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
			
 
				+			__func__, ohead->oh_clientid);
			
 
				+		ASSERT(0);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Check the ophdr contains all the data it is supposed to contain.
			
 
				+	 */
			
 
				+	len = be32_to_cpu(ohead->oh_len);
			
 
				+	if (dp + len > end) {
			
 
				+		xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len);
			
 
				+		WARN_ON(1);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead);
			
 
				+	if (!trans) {
			
 
				+		/* nothing to do, so skip over this ophdr */
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return xlog_recovery_process_trans(log, trans, dp, len,
			
 
				+					   ohead->oh_flags, pass);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * There are two valid states of the r_state field.  0 indicates that the
			
 
				  * transaction structure is in a normal state.  We have either seen the
			
@@ -3560,86 +3666,30 @@ xlog_recover_process_data(
 
				 	xfs_caddr_t		dp,
			
 
				 	int			pass)
			
 
				 {
			
 
				-	xfs_caddr_t		lp;
			
 
				+	struct xlog_op_header	*ohead;
			
 
				+	xfs_caddr_t		end;
			
 
				 	int			num_logops;
			
 
				-	xlog_op_header_t	*ohead;
			
 
				-	xlog_recover_t		*trans;
			
 
				-	xlog_tid_t		tid;
			
 
				 	int			error;
			
 
				-	unsigned long		hash;
			
 
				-	uint			flags;
			
 
				 
			
 
				-	lp = dp + be32_to_cpu(rhead->h_len);
			
 
				+	end = dp + be32_to_cpu(rhead->h_len);
			
 
				 	num_logops = be32_to_cpu(rhead->h_num_logops);
			
 
				 
			
 
				 	/* check the log format matches our own - else we can't recover */
			
 
				 	if (xlog_header_check_recover(log->l_mp, rhead))
			
 
				 		return -EIO;
			
 
				 
			
 
				-	while ((dp < lp) && num_logops) {
			
 
				-		ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
			
 
				-		ohead = (xlog_op_header_t *)dp;
			
 
				-		dp += sizeof(xlog_op_header_t);
			
 
				-		if (ohead->oh_clientid != XFS_TRANSACTION &&
			
 
				-		    ohead->oh_clientid != XFS_LOG) {
			
 
				-			xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
			
 
				-					__func__, ohead->oh_clientid);
			
 
				-			ASSERT(0);
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-		tid = be32_to_cpu(ohead->oh_tid);
			
 
				-		hash = XLOG_RHASH(tid);
			
 
				-		trans = xlog_recover_find_tid(&rhash[hash], tid);
			
 
				-		if (trans == NULL) {		   /* not found; add new tid */
			
 
				-			if (ohead->oh_flags & XLOG_START_TRANS)
			
 
				-				xlog_recover_new_tid(&rhash[hash], tid,
			
 
				-					be64_to_cpu(rhead->h_lsn));
			
 
				-		} else {
			
 
				-			if (dp + be32_to_cpu(ohead->oh_len) > lp) {
			
 
				-				xfs_warn(log->l_mp, "%s: bad length 0x%x",
			
 
				-					__func__, be32_to_cpu(ohead->oh_len));
			
 
				-				WARN_ON(1);
			
 
				-				return -EIO;
			
 
				-			}
			
 
				-			flags = ohead->oh_flags & ~XLOG_END_TRANS;
			
 
				-			if (flags & XLOG_WAS_CONT_TRANS)
			
 
				-				flags &= ~XLOG_CONTINUE_TRANS;
			
 
				-			switch (flags) {
			
 
				-			case XLOG_COMMIT_TRANS:
			
 
				-				error = xlog_recover_commit_trans(log,
			
 
				-								trans, pass);
			
 
				-				break;
			
 
				-			case XLOG_UNMOUNT_TRANS:
			
 
				-				error = xlog_recover_unmount_trans(log);
			
 
				-				break;
			
 
				-			case XLOG_WAS_CONT_TRANS:
			
 
				-				error = xlog_recover_add_to_cont_trans(log,
			
 
				-						trans, dp,
			
 
				-						be32_to_cpu(ohead->oh_len));
			
 
				-				break;
			
 
				-			case XLOG_START_TRANS:
			
 
				-				xfs_warn(log->l_mp, "%s: bad transaction",
			
 
				-					__func__);
			
 
				-				ASSERT(0);
			
 
				-				error = -EIO;
			
 
				-				break;
			
 
				-			case 0:
			
 
				-			case XLOG_CONTINUE_TRANS:
			
 
				-				error = xlog_recover_add_to_trans(log, trans,
			
 
				-						dp, be32_to_cpu(ohead->oh_len));
			
 
				-				break;
			
 
				-			default:
			
 
				-				xfs_warn(log->l_mp, "%s: bad flag 0x%x",
			
 
				-					__func__, flags);
			
 
				-				ASSERT(0);
			
 
				-				error = -EIO;
			
 
				-				break;
			
 
				-			}
			
 
				-			if (error) {
			
 
				-				xlog_recover_free_trans(trans);
			
 
				-				return error;
			
 
				-			}
			
 
				-		}
			
 
				+	while ((dp < end) && num_logops) {
			
 
				+
			
 
				+		ohead = (struct xlog_op_header *)dp;
			
 
				+		dp += sizeof(*ohead);
			
 
				+		ASSERT(dp <= end);
			
 
				+
			
 
				+		/* errors will abort recovery */
			
 
				+		error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
			
 
				+						    dp, end, pass);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				 		dp += be32_to_cpu(ohead->oh_len);
			
 
				 		num_logops--;
			
 
				 	}
			
@@ -4132,41 +4182,13 @@ xlog_do_recovery_pass(
 
				 	}
			
 
				 
			
 
				 	memset(rhash, 0, sizeof(rhash));
			
 
				-	if (tail_blk <= head_blk) {
			
 
				-		for (blk_no = tail_blk; blk_no < head_blk; ) {
			
 
				-			error = xlog_bread(log, blk_no, hblks, hbp, &offset);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				-
			
 
				-			rhead = (xlog_rec_header_t *)offset;
			
 
				-			error = xlog_valid_rec_header(log, rhead, blk_no);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				-
			
 
				-			/* blocks in data section */
			
 
				-			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
			
 
				-			error = xlog_bread(log, blk_no + hblks, bblks, dbp,
			
 
				-					   &offset);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				-
			
 
				-			error = xlog_unpack_data(rhead, offset, log);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				-
			
 
				-			error = xlog_recover_process_data(log,
			
 
				-						rhash, rhead, offset, pass);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				-			blk_no += bblks + hblks;
			
 
				-		}
			
 
				-	} else {
			
 
				+	blk_no = tail_blk;
			
 
				+	if (tail_blk > head_blk) {
			
 
				 		/*
			
 
				 		 * Perform recovery around the end of the physical log.
			
 
				 		 * When the head is not on the same cycle number as the tail,
			
 
				-		 * we can't do a sequential recovery as above.
			
 
				+		 * we can't do a sequential recovery.
			
 
				 		 */
			
 
				-		blk_no = tail_blk;
			
 
				 		while (blk_no < log->l_logBBsize) {
			
 
				 			/*
			
 
				 			 * Check for header wrapping around physical end-of-log
			
@@ -4280,34 +4302,35 @@ xlog_do_recovery_pass(
 
				 
			
 
				 		ASSERT(blk_no >= log->l_logBBsize);
			
 
				 		blk_no -= log->l_logBBsize;
			
 
				+	}
			
 
				 
			
 
				-		/* read first part of physical log */
			
 
				-		while (blk_no < head_blk) {
			
 
				-			error = xlog_bread(log, blk_no, hblks, hbp, &offset);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				+	/* read first part of physical log */
			
 
				+	while (blk_no < head_blk) {
			
 
				+		error = xlog_bread(log, blk_no, hblks, hbp, &offset);
			
 
				+		if (error)
			
 
				+			goto bread_err2;
			
 
				 
			
 
				-			rhead = (xlog_rec_header_t *)offset;
			
 
				-			error = xlog_valid_rec_header(log, rhead, blk_no);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				+		rhead = (xlog_rec_header_t *)offset;
			
 
				+		error = xlog_valid_rec_header(log, rhead, blk_no);
			
 
				+		if (error)
			
 
				+			goto bread_err2;
			
 
				 
			
 
				-			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
			
 
				-			error = xlog_bread(log, blk_no+hblks, bblks, dbp,
			
 
				-					   &offset);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				+		/* blocks in data section */
			
 
				+		bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
			
 
				+		error = xlog_bread(log, blk_no+hblks, bblks, dbp,
			
 
				+				   &offset);
			
 
				+		if (error)
			
 
				+			goto bread_err2;
			
 
				 
			
 
				-			error = xlog_unpack_data(rhead, offset, log);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				+		error = xlog_unpack_data(rhead, offset, log);
			
 
				+		if (error)
			
 
				+			goto bread_err2;
			
 
				 
			
 
				-			error = xlog_recover_process_data(log, rhash,
			
 
				-							rhead, offset, pass);
			
 
				-			if (error)
			
 
				-				goto bread_err2;
			
 
				-			blk_no += bblks + hblks;
			
 
				-		}
			
 
				+		error = xlog_recover_process_data(log, rhash,
			
 
				+						rhead, offset, pass);
			
 
				+		if (error)
			
 
				+			goto bread_err2;
			
 
				+		blk_no += bblks + hblks;
			
 
				 	}
			
 
				 
			
 
				  bread_err2:
			
@@ -4427,16 +4450,12 @@ xlog_do_recover(
 
				 	XFS_BUF_UNASYNC(bp);
			
 
				 	bp->b_ops = &xfs_sb_buf_ops;
			
 
				 
			
 
				-	if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
			
 
				-		xfs_buf_relse(bp);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	xfs_buf_iorequest(bp);
			
 
				-	error = xfs_buf_iowait(bp);
			
 
				+	error = xfs_buf_submit_wait(bp);
			
 
				 	if (error) {
			
 
				-		xfs_buf_ioerror_alert(bp, __func__);
			
 
				-		ASSERT(0);
			
 
				+		if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
			
 
				+			xfs_buf_ioerror_alert(bp, __func__);
			
 
				+			ASSERT(0);
			
 
				+		}
			
 
				 		xfs_buf_relse(bp);
			
 
				 		return error;
			
 
				 	}
			
@@ -4509,6 +4528,18 @@ xlog_recover(
 
				 			return -EINVAL;
			
 
				 		}
			
 
				 
			
 
				+		/*
			
 
				+		 * Delay log recovery if the debug hook is set. This is debug
			
 
				+		 * instrumention to coordinate simulation of I/O failures with
			
 
				+		 * log recovery.
			
 
				+		 */
			
 
				+		if (xfs_globals.log_recovery_delay) {
			
 
				+			xfs_notice(log->l_mp,
			
 
				+				"Delaying log recovery for %d seconds.",
			
 
				+				xfs_globals.log_recovery_delay);
			
 
				+			msleep(xfs_globals.log_recovery_delay * 1000);
			
 
				+		}
			
 
				+
			
 
				 		xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
			
 
				 				log->l_mp->m_logname ? log->l_mp->m_logname
			
 
				 						     : "internal");
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -61,8 +61,6 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
 
				 static int xfs_uuid_table_size;
			
 
				 static uuid_t *xfs_uuid_table;
			
 
				 
			
 
				-extern struct kset *xfs_kset;
			
 
				-
			
 
				 /*
			
 
				  * See if the UUID is unique among mounted XFS filesystems.
			
 
				  * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
			
@@ -302,21 +300,15 @@ xfs_readsb(
 
				 	 * access to the superblock.
			
 
				 	 */
			
 
				 reread:
			
 
				-	bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
			
 
				-				   BTOBB(sector_size), 0, buf_ops);
			
 
				-	if (!bp) {
			
 
				-		if (loud)
			
 
				-			xfs_warn(mp, "SB buffer read failed");
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-	if (bp->b_error) {
			
 
				-		error = bp->b_error;
			
 
				+	error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
			
 
				+				   BTOBB(sector_size), 0, &bp, buf_ops);
			
 
				+	if (error) {
			
 
				 		if (loud)
			
 
				 			xfs_warn(mp, "SB validate failed with error %d.", error);
			
 
				 		/* bad CRC means corrupted metadata */
			
 
				 		if (error == -EFSBADCRC)
			
 
				 			error = -EFSCORRUPTED;
			
 
				-		goto release_buf;
			
 
				+		return error;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -546,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp)
 
				  * Check that the data (and log if separate) is an ok size.
			
 
				  */
			
 
				 STATIC int
			
 
				-xfs_check_sizes(xfs_mount_t *mp)
			
 
				+xfs_check_sizes(
			
 
				+	struct xfs_mount *mp)
			
 
				 {
			
 
				-	xfs_buf_t	*bp;
			
 
				+	struct xfs_buf	*bp;
			
 
				 	xfs_daddr_t	d;
			
 
				+	int		error;
			
 
				 
			
 
				 	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
			
 
				 	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
			
 
				 		xfs_warn(mp, "filesystem size mismatch detected");
			
 
				 		return -EFBIG;
			
 
				 	}
			
 
				-	bp = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				 					d - XFS_FSS_TO_BB(mp, 1),
			
 
				-					XFS_FSS_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp) {
			
 
				+					XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error) {
			
 
				 		xfs_warn(mp, "last sector read failed");
			
 
				-		return -EIO;
			
 
				+		return error;
			
 
				 	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				-	if (mp->m_logdev_targp != mp->m_ddev_targp) {
			
 
				-		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
			
 
				-		if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
			
 
				-			xfs_warn(mp, "log size mismatch detected");
			
 
				-			return -EFBIG;
			
 
				-		}
			
 
				-		bp = xfs_buf_read_uncached(mp->m_logdev_targp,
			
 
				+	if (mp->m_logdev_targp == mp->m_ddev_targp)
			
 
				+		return 0;
			
 
				+
			
 
				+	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
			
 
				+	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
			
 
				+		xfs_warn(mp, "log size mismatch detected");
			
 
				+		return -EFBIG;
			
 
				+	}
			
 
				+	error = xfs_buf_read_uncached(mp->m_logdev_targp,
			
 
				 					d - XFS_FSB_TO_BB(mp, 1),
			
 
				-					XFS_FSB_TO_BB(mp, 1), 0, NULL);
			
 
				-		if (!bp) {
			
 
				-			xfs_warn(mp, "log device read failed");
			
 
				-			return -EIO;
			
 
				-		}
			
 
				-		xfs_buf_relse(bp);
			
 
				+					XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error) {
			
 
				+		xfs_warn(mp, "log device read failed");
			
 
				+		return error;
			
 
				 	}
			
 
				+	xfs_buf_relse(bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -729,7 +724,6 @@ xfs_mountfs(
 
				 
			
 
				 	xfs_set_maxicount(mp);
			
 
				 
			
 
				-	mp->m_kobj.kobject.kset = xfs_kset;
			
 
				 	error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
			
 
				 	if (error)
			
 
				 		goto out;
			
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -304,7 +304,8 @@ _xfs_mru_cache_reap(
 
				 int
			
 
				 xfs_mru_cache_init(void)
			
 
				 {
			
 
				-	xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
			
 
				+	xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache",
			
 
				+				WQ_MEM_RECLAIM|WQ_FREEZABLE, 1);
			
 
				 	if (!xfs_mru_reap_wq)
			
 
				 		return -ENOMEM;
			
 
				 	return 0;
			
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -434,6 +434,7 @@ xfs_qm_dquot_isolate(
 
				 	struct list_head	*item,
			
 
				 	spinlock_t		*lru_lock,
			
 
				 	void			*arg)
			
 
				+		__releases(lru_lock) __acquires(lru_lock)
			
 
				 {
			
 
				 	struct xfs_dquot	*dqp = container_of(item,
			
 
				 						struct xfs_dquot, q_lru);
			
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -46,7 +46,7 @@
 
				  * Keeps track of a current summary block, so we don't keep reading
			
 
				  * it from the buffer cache.
			
 
				  */
			
 
				-STATIC int				/* error */
			
 
				+static int
			
 
				 xfs_rtget_summary(
			
 
				 	xfs_mount_t	*mp,		/* file system mount structure */
			
 
				 	xfs_trans_t	*tp,		/* transaction pointer */
			
@@ -56,60 +56,9 @@ xfs_rtget_summary(
 
				 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
			
 
				 	xfs_suminfo_t	*sum)		/* out: summary info for this block */
			
 
				 {
			
 
				-	xfs_buf_t	*bp;		/* buffer for summary block */
			
 
				-	int		error;		/* error value */
			
 
				-	xfs_fsblock_t	sb;		/* summary fsblock */
			
 
				-	int		so;		/* index into the summary file */
			
 
				-	xfs_suminfo_t	*sp;		/* pointer to returned data */
			
 
				-
			
 
				-	/*
			
 
				-	 * Compute entry number in the summary file.
			
 
				-	 */
			
 
				-	so = XFS_SUMOFFS(mp, log, bbno);
			
 
				-	/*
			
 
				-	 * Compute the block number in the summary file.
			
 
				-	 */
			
 
				-	sb = XFS_SUMOFFSTOBLOCK(mp, so);
			
 
				-	/*
			
 
				-	 * If we have an old buffer, and the block number matches, use that.
			
 
				-	 */
			
 
				-	if (rbpp && *rbpp && *rsb == sb)
			
 
				-		bp = *rbpp;
			
 
				-	/*
			
 
				-	 * Otherwise we have to get the buffer.
			
 
				-	 */
			
 
				-	else {
			
 
				-		/*
			
 
				-		 * If there was an old one, get rid of it first.
			
 
				-		 */
			
 
				-		if (rbpp && *rbpp)
			
 
				-			xfs_trans_brelse(tp, *rbpp);
			
 
				-		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
			
 
				-		if (error) {
			
 
				-			return error;
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Remember this buffer and block for the next call.
			
 
				-		 */
			
 
				-		if (rbpp) {
			
 
				-			*rbpp = bp;
			
 
				-			*rsb = sb;
			
 
				-		}
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Point to the summary information & copy it out.
			
 
				-	 */
			
 
				-	sp = XFS_SUMPTR(mp, bp, so);
			
 
				-	*sum = *sp;
			
 
				-	/*
			
 
				-	 * Drop the buffer if we're not asked to remember it.
			
 
				-	 */
			
 
				-	if (!rbpp)
			
 
				-		xfs_trans_brelse(tp, bp);
			
 
				-	return 0;
			
 
				+	return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum);
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /*
			
 
				  * Return whether there are any free extents in the size range given
			
 
				  * by low and high, for the bitmap block bbno.
			
@@ -972,16 +921,11 @@ xfs_growfs_rt(
 
				 	/*
			
 
				 	 * Read in the last block of the device, make sure it exists.
			
 
				 	 */
			
 
				-	bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				 				XFS_FSB_TO_BB(mp, nrblocks - 1),
			
 
				-				XFS_FSB_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp)
			
 
				-		return -EIO;
			
 
				-	if (bp->b_error) {
			
 
				-		error = bp->b_error;
			
 
				-		xfs_buf_relse(bp);
			
 
				+				XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				 	/*
			
@@ -1235,11 +1179,12 @@ xfs_rtallocate_extent(
 
				  */
			
 
				 int				/* error */
			
 
				 xfs_rtmount_init(
			
 
				-	xfs_mount_t	*mp)	/* file system mount structure */
			
 
				+	struct xfs_mount	*mp)	/* file system mount structure */
			
 
				 {
			
 
				-	xfs_buf_t	*bp;	/* buffer for last block of subvolume */
			
 
				-	xfs_daddr_t	d;	/* address of last block of subvolume */
			
 
				-	xfs_sb_t	*sbp;	/* filesystem superblock copy in mount */
			
 
				+	struct xfs_buf		*bp;	/* buffer for last block of subvolume */
			
 
				+	struct xfs_sb		*sbp;	/* filesystem superblock copy in mount */
			
 
				+	xfs_daddr_t		d;	/* address of last block of subvolume */
			
 
				+	int			error;
			
 
				 
			
 
				 	sbp = &mp->m_sb;
			
 
				 	if (sbp->sb_rblocks == 0)
			
@@ -1265,14 +1210,12 @@ xfs_rtmount_init(
 
				 			(unsigned long long) mp->m_sb.sb_rblocks);
			
 
				 		return -EFBIG;
			
 
				 	}
			
 
				-	bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				+	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				 					d - XFS_FSB_TO_BB(mp, 1),
			
 
				-					XFS_FSB_TO_BB(mp, 1), 0, NULL);
			
 
				-	if (!bp || bp->b_error) {
			
 
				+					XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
			
 
				+	if (error) {
			
 
				 		xfs_warn(mp, "realtime device size check failed");
			
 
				-		if (bp)
			
 
				-			xfs_buf_relse(bp);
			
 
				-		return -EIO;
			
 
				+		return error;
			
 
				 	}
			
 
				 	xfs_buf_relse(bp);
			
 
				 	return 0;
			
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -111,6 +111,10 @@ int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
 
				 		    xfs_rtblock_t *rtblock);
			
 
				 int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
			
 
				 		       xfs_rtblock_t start, xfs_extlen_t len, int val);
			
 
				+int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
			
 
				+			     int log, xfs_rtblock_t bbno, int delta,
			
 
				+			     xfs_buf_t **rbpp, xfs_fsblock_t *rsb,
			
 
				+			     xfs_suminfo_t *sum);
			
 
				 int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
			
 
				 			 xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp,
			
 
				 			 xfs_fsblock_t *rsb);
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -47,6 +47,7 @@
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_filestream.h"
			
 
				 #include "xfs_quota.h"
			
 
				+#include "xfs_sysfs.h"
			
 
				 
			
 
				 #include <linux/namei.h>
			
 
				 #include <linux/init.h>
			
@@ -61,7 +62,11 @@
 
				 static const struct super_operations xfs_super_operations;
			
 
				 static kmem_zone_t *xfs_ioend_zone;
			
 
				 mempool_t *xfs_ioend_pool;
			
 
				-struct kset *xfs_kset;
			
 
				+
			
 
				+static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
			
 
				+#ifdef DEBUG
			
 
				+static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
			
 
				+#endif
			
 
				 
			
 
				 #define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
			
 
				 #define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
			
@@ -838,32 +843,32 @@ xfs_init_mount_workqueues(
 
				 	struct xfs_mount	*mp)
			
 
				 {
			
 
				 	mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
			
 
				-			WQ_MEM_RECLAIM, 0, mp->m_fsname);
			
 
				+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
			
 
				 	if (!mp->m_data_workqueue)
			
 
				 		goto out;
			
 
				 
			
 
				 	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
			
 
				-			WQ_MEM_RECLAIM, 0, mp->m_fsname);
			
 
				+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
			
 
				 	if (!mp->m_unwritten_workqueue)
			
 
				 		goto out_destroy_data_iodone_queue;
			
 
				 
			
 
				 	mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
			
 
				-			WQ_MEM_RECLAIM, 0, mp->m_fsname);
			
 
				+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
			
 
				 	if (!mp->m_cil_workqueue)
			
 
				 		goto out_destroy_unwritten;
			
 
				 
			
 
				 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
			
 
				-			0, 0, mp->m_fsname);
			
 
				+			WQ_FREEZABLE, 0, mp->m_fsname);
			
 
				 	if (!mp->m_reclaim_workqueue)
			
 
				 		goto out_destroy_cil;
			
 
				 
			
 
				 	mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
			
 
				-			0, 0, mp->m_fsname);
			
 
				+			WQ_FREEZABLE, 0, mp->m_fsname);
			
 
				 	if (!mp->m_log_workqueue)
			
 
				 		goto out_destroy_reclaim;
			
 
				 
			
 
				 	mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
			
 
				-			0, 0, mp->m_fsname);
			
 
				+			WQ_FREEZABLE, 0, mp->m_fsname);
			
 
				 	if (!mp->m_eofblocks_workqueue)
			
 
				 		goto out_destroy_log;
			
 
				 
			
@@ -1406,6 +1411,7 @@ xfs_fs_fill_super(
 
				 	atomic_set(&mp->m_active_trans, 0);
			
 
				 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
			
 
				 	INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
			
 
				+	mp->m_kobj.kobject.kset = xfs_kset;
			
 
				 
			
 
				 	mp->m_super = sb;
			
 
				 	sb->s_fs_info = mp;
			
@@ -1715,7 +1721,8 @@ xfs_init_workqueues(void)
 
				 	 * AGs in all the filesystems mounted. Hence use the default large
			
 
				 	 * max_active value for this workqueue.
			
 
				 	 */
			
 
				-	xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
			
 
				+	xfs_alloc_wq = alloc_workqueue("xfsalloc",
			
 
				+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
			
 
				 	if (!xfs_alloc_wq)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -1768,9 +1775,16 @@ init_xfs_fs(void)
 
				 		goto out_sysctl_unregister;;
			
 
				 	}
			
 
				 
			
 
				-	error = xfs_qm_init();
			
 
				+#ifdef DEBUG
			
 
				+	xfs_dbg_kobj.kobject.kset = xfs_kset;
			
 
				+	error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
			
 
				 	if (error)
			
 
				 		goto out_kset_unregister;
			
 
				+#endif
			
 
				+
			
 
				+	error = xfs_qm_init();
			
 
				+	if (error)
			
 
				+		goto out_remove_kobj;
			
 
				 
			
 
				 	error = register_filesystem(&xfs_fs_type);
			
 
				 	if (error)
			
@@ -1779,7 +1793,11 @@ init_xfs_fs(void)
 
				 
			
 
				  out_qm_exit:
			
 
				 	xfs_qm_exit();
			
 
				+ out_remove_kobj:
			
 
				+#ifdef DEBUG
			
 
				+	xfs_sysfs_del(&xfs_dbg_kobj);
			
 
				  out_kset_unregister:
			
 
				+#endif
			
 
				 	kset_unregister(xfs_kset);
			
 
				  out_sysctl_unregister:
			
 
				 	xfs_sysctl_unregister();
			
@@ -1802,6 +1820,9 @@ exit_xfs_fs(void)
 
				 {
			
 
				 	xfs_qm_exit();
			
 
				 	unregister_filesystem(&xfs_fs_type);
			
 
				+#ifdef DEBUG
			
 
				+	xfs_sysfs_del(&xfs_dbg_kobj);
			
 
				+#endif
			
 
				 	kset_unregister(xfs_kset);
			
 
				 	xfs_sysctl_unregister();
			
 
				 	xfs_cleanup_procfs();
			
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -269,9 +269,11 @@ xfs_symlink(
 
				 	/*
			
 
				 	 * Check for ability to enter directory entry, if no space reserved.
			
 
				 	 */
			
 
				-	error = xfs_dir_canenter(tp, dp, link_name, resblks);
			
 
				-	if (error)
			
 
				-		goto error_return;
			
 
				+	if (!resblks) {
			
 
				+		error = xfs_dir_canenter(tp, dp, link_name);
			
 
				+		if (error)
			
 
				+			goto error_return;
			
 
				+	}
			
 
				 	/*
			
 
				 	 * Initialize the bmap freelist prior to calling either
			
 
				 	 * bmapi or the directory create code.
			
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -92,6 +92,11 @@ enum {
 
				 
			
 
				 extern xfs_param_t	xfs_params;
			
 
				 
			
 
				+struct xfs_globals {
			
 
				+	int	log_recovery_delay;	/* log recovery delay (secs) */
			
 
				+};
			
 
				+extern struct xfs_globals	xfs_globals;
			
 
				+
			
 
				 #ifdef CONFIG_SYSCTL
			
 
				 extern int xfs_sysctl_register(void);
			
 
				 extern void xfs_sysctl_unregister(void);
			
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -51,6 +51,80 @@ struct kobj_type xfs_mp_ktype = {
 
				 	.release = xfs_sysfs_release,
			
 
				 };
			
 
				 
			
 
				+#ifdef DEBUG
			
 
				+/* debug */
			
 
				+
			
 
				+STATIC ssize_t
			
 
				+log_recovery_delay_store(
			
 
				+	const char	*buf,
			
 
				+	size_t		count,
			
 
				+	void		*data)
			
 
				+{
			
 
				+	int		ret;
			
 
				+	int		val;
			
 
				+
			
 
				+	ret = kstrtoint(buf, 0, &val);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	if (val < 0 || val > 60)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	xfs_globals.log_recovery_delay = val;
			
 
				+
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+STATIC ssize_t
			
 
				+log_recovery_delay_show(
			
 
				+	char	*buf,
			
 
				+	void	*data)
			
 
				+{
			
 
				+	return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
			
 
				+}
			
 
				+XFS_SYSFS_ATTR_RW(log_recovery_delay);
			
 
				+
			
 
				+static struct attribute *xfs_dbg_attrs[] = {
			
 
				+	ATTR_LIST(log_recovery_delay),
			
 
				+	NULL,
			
 
				+};
			
 
				+
			
 
				+STATIC ssize_t
			
 
				+xfs_dbg_show(
			
 
				+	struct kobject		*kobject,
			
 
				+	struct attribute	*attr,
			
 
				+	char			*buf)
			
 
				+{
			
 
				+	struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
			
 
				+
			
 
				+	return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0;
			
 
				+}
			
 
				+
			
 
				+STATIC ssize_t
			
 
				+xfs_dbg_store(
			
 
				+	struct kobject		*kobject,
			
 
				+	struct attribute	*attr,
			
 
				+	const char		*buf,
			
 
				+	size_t			count)
			
 
				+{
			
 
				+	struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
			
 
				+
			
 
				+	return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0;
			
 
				+}
			
 
				+
			
 
				+static struct sysfs_ops xfs_dbg_ops = {
			
 
				+	.show = xfs_dbg_show,
			
 
				+	.store = xfs_dbg_store,
			
 
				+};
			
 
				+
			
 
				+struct kobj_type xfs_dbg_ktype = {
			
 
				+	.release = xfs_sysfs_release,
			
 
				+	.sysfs_ops = &xfs_dbg_ops,
			
 
				+	.default_attrs = xfs_dbg_attrs,
			
 
				+};
			
 
				+
			
 
				+#endif /* DEBUG */
			
 
				+
			
 
				 /* xlog */
			
 
				 
			
 
				 STATIC ssize_t
			
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -20,6 +20,7 @@
 
				 #define __XFS_SYSFS_H__
			
 
				 
			
 
				 extern struct kobj_type xfs_mp_ktype;	/* xfs_mount */
			
 
				+extern struct kobj_type xfs_dbg_ktype;	/* debug */
			
 
				 extern struct kobj_type xfs_log_ktype;	/* xlog */
			
 
				 
			
 
				 static inline struct xfs_kobj *
			
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free);
 
				 DEFINE_BUF_EVENT(xfs_buf_hold);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_rele);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_iodone);
			
 
				-DEFINE_BUF_EVENT(xfs_buf_iorequest);
			
 
				+DEFINE_BUF_EVENT(xfs_buf_submit);
			
 
				+DEFINE_BUF_EVENT(xfs_buf_submit_wait);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_bawrite);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_lock);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_lock_done);
			
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -318,20 +318,10 @@ xfs_trans_read_buf_map(
 
				 			XFS_BUF_READ(bp);
			
 
				 			bp->b_ops = ops;
			
 
				 
			
 
				-			/*
			
 
				-			 * XXX(hch): clean up the error handling here to be less
			
 
				-			 * of a mess..
			
 
				-			 */
			
 
				-			if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-				trace_xfs_bdstrat_shut(bp, _RET_IP_);
			
 
				-				xfs_bioerror_relse(bp);
			
 
				-			} else {
			
 
				-				xfs_buf_iorequest(bp);
			
 
				-			}
			
 
				-
			
 
				-			error = xfs_buf_iowait(bp);
			
 
				+			error = xfs_buf_submit_wait(bp);
			
 
				 			if (error) {
			
 
				-				xfs_buf_ioerror_alert(bp, __func__);
			
 
				+				if (!XFS_FORCED_SHUTDOWN(mp))
			
 
				+					xfs_buf_ioerror_alert(bp, __func__);
			
 
				 				xfs_buf_relse(bp);
			
 
				 				/*
			
 
				 				 * We can gracefully recover from most read
			
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -70,7 +70,7 @@ xfs_trans_ichgtime(
 
				 	int			flags)
			
 
				 {
			
 
				 	struct inode		*inode = VFS_I(ip);
			
 
				-	timespec_t		tv;
			
 
				+	struct timespec		tv;
			
 
				 
			
 
				 	ASSERT(tp);
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));