8 years ago · 5791577963
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -637,6 +637,7 @@ again:
 
				 
			
 
				 	dispose_list(&dispose);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(evict_inodes);
			
 
				 
			
 
				 /**
			
 
				  * invalidate_inodes	- attempt to free all inodes on a superblock
			
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -132,7 +132,6 @@ static inline bool atime_needs_update_rcu(const struct path *path,
 
				 extern void inode_io_list_del(struct inode *inode);
			
 
				 
			
 
				 extern long get_nr_dirty_inodes(void);
			
 
				-extern void evict_inodes(struct super_block *);
			
 
				 extern int invalidate_inodes(struct super_block *, bool);
			
 
				 
			
 
				 /*
			
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -477,10 +477,10 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 
				 
			
 
				 	set_page_dirty(page);
			
 
				 	wait_for_stable_page(page);
			
 
				-	return 0;
			
 
				+	return VM_FAULT_LOCKED;
			
 
				 out_unlock:
			
 
				 	unlock_page(page);
			
 
				-	return ret;
			
 
				+	return block_page_mkwrite_return(ret);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -328,20 +328,19 @@ xfs_attr_set(
 
				 		 */
			
 
				 		xfs_defer_init(args.dfops, args.firstblock);
			
 
				 		error = xfs_attr_shortform_to_leaf(&args);
			
 
				-		if (!error)
			
 
				-			error = xfs_defer_finish(&args.trans, args.dfops, dp);
			
 
				-		if (error) {
			
 
				-			args.trans = NULL;
			
 
				-			xfs_defer_cancel(&dfops);
			
 
				-			goto out;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				+		xfs_defer_ijoin(args.dfops, dp);
			
 
				+		error = xfs_defer_finish(&args.trans, args.dfops);
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				 
			
 
				 		/*
			
 
				 		 * Commit the leaf transformation.  We'll need another (linked)
			
 
				 		 * transaction to add the new attribute to the leaf.
			
 
				 		 */
			
 
				 
			
 
				-		error = xfs_trans_roll(&args.trans, dp);
			
 
				+		error = xfs_trans_roll_inode(&args.trans, dp);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 
			
@@ -373,6 +372,9 @@ xfs_attr_set(
 
				 
			
 
				 	return error;
			
 
				 
			
 
				+out_defer_cancel:
			
 
				+	xfs_defer_cancel(&dfops);
			
 
				+	args.trans = NULL;
			
 
				 out:
			
 
				 	if (args.trans)
			
 
				 		xfs_trans_cancel(args.trans);
			
@@ -593,19 +595,18 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 		 */
			
 
				 		xfs_defer_init(args->dfops, args->firstblock);
			
 
				 		error = xfs_attr3_leaf_to_node(args);
			
 
				-		if (!error)
			
 
				-			error = xfs_defer_finish(&args->trans, args->dfops, dp);
			
 
				-		if (error) {
			
 
				-			args->trans = NULL;
			
 
				-			xfs_defer_cancel(args->dfops);
			
 
				-			return error;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				+		xfs_defer_ijoin(args->dfops, dp);
			
 
				+		error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				 
			
 
				 		/*
			
 
				 		 * Commit the current trans (including the inode) and start
			
 
				 		 * a new one.
			
 
				 		 */
			
 
				-		error = xfs_trans_roll(&args->trans, dp);
			
 
				+		error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 
			
@@ -620,7 +621,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 	 * Commit the transaction that added the attr name so that
			
 
				 	 * later routines can manage their own transactions.
			
 
				 	 */
			
 
				-	error = xfs_trans_roll(&args->trans, dp);
			
 
				+	error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -684,20 +685,18 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 			xfs_defer_init(args->dfops, args->firstblock);
			
 
				 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
			
 
				 			/* bp is gone due to xfs_da_shrink_inode */
			
 
				-			if (!error)
			
 
				-				error = xfs_defer_finish(&args->trans,
			
 
				-							args->dfops, dp);
			
 
				-			if (error) {
			
 
				-				args->trans = NULL;
			
 
				-				xfs_defer_cancel(args->dfops);
			
 
				-				return error;
			
 
				-			}
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				+			xfs_defer_ijoin(args->dfops, dp);
			
 
				+			error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
 
				 		 * Commit the remove and start the next trans in series.
			
 
				 		 */
			
 
				-		error = xfs_trans_roll(&args->trans, dp);
			
 
				+		error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 
			
 
				 	} else if (args->rmtblkno > 0) {
			
 
				 		/*
			
@@ -706,6 +705,10 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 		error = xfs_attr3_leaf_clearflag(args);
			
 
				 	}
			
 
				 	return error;
			
 
				+out_defer_cancel:
			
 
				+	xfs_defer_cancel(args->dfops);
			
 
				+	args->trans = NULL;
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -747,15 +750,18 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 
				 		xfs_defer_init(args->dfops, args->firstblock);
			
 
				 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
			
 
				 		/* bp is gone due to xfs_da_shrink_inode */
			
 
				-		if (!error)
			
 
				-			error = xfs_defer_finish(&args->trans, args->dfops, dp);
			
 
				-		if (error) {
			
 
				-			args->trans = NULL;
			
 
				-			xfs_defer_cancel(args->dfops);
			
 
				-			return error;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				+		xfs_defer_ijoin(args->dfops, dp);
			
 
				+		error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				 	}
			
 
				 	return 0;
			
 
				+out_defer_cancel:
			
 
				+	xfs_defer_cancel(args->dfops);
			
 
				+	args->trans = NULL;
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -872,20 +878,18 @@ restart:
 
				 			state = NULL;
			
 
				 			xfs_defer_init(args->dfops, args->firstblock);
			
 
				 			error = xfs_attr3_leaf_to_node(args);
			
 
				-			if (!error)
			
 
				-				error = xfs_defer_finish(&args->trans,
			
 
				-							args->dfops, dp);
			
 
				-			if (error) {
			
 
				-				args->trans = NULL;
			
 
				-				xfs_defer_cancel(args->dfops);
			
 
				-				goto out;
			
 
				-			}
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				+			xfs_defer_ijoin(args->dfops, dp);
			
 
				+			error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				 
			
 
				 			/*
			
 
				 			 * Commit the node conversion and start the next
			
 
				 			 * trans in the chain.
			
 
				 			 */
			
 
				-			error = xfs_trans_roll(&args->trans, dp);
			
 
				+			error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 			if (error)
			
 
				 				goto out;
			
 
				 
			
@@ -900,13 +904,12 @@ restart:
 
				 		 */
			
 
				 		xfs_defer_init(args->dfops, args->firstblock);
			
 
				 		error = xfs_da3_split(state);
			
 
				-		if (!error)
			
 
				-			error = xfs_defer_finish(&args->trans, args->dfops, dp);
			
 
				-		if (error) {
			
 
				-			args->trans = NULL;
			
 
				-			xfs_defer_cancel(args->dfops);
			
 
				-			goto out;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				+		xfs_defer_ijoin(args->dfops, dp);
			
 
				+		error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				 	} else {
			
 
				 		/*
			
 
				 		 * Addition succeeded, update Btree hashvals.
			
@@ -925,7 +928,7 @@ restart:
 
				 	 * Commit the leaf addition or btree split and start the next
			
 
				 	 * trans in the chain.
			
 
				 	 */
			
 
				-	error = xfs_trans_roll(&args->trans, dp);
			
 
				+	error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 	if (error)
			
 
				 		goto out;
			
 
				 
			
@@ -999,20 +1002,18 @@ restart:
 
				 		if (retval && (state->path.active > 1)) {
			
 
				 			xfs_defer_init(args->dfops, args->firstblock);
			
 
				 			error = xfs_da3_join(state);
			
 
				-			if (!error)
			
 
				-				error = xfs_defer_finish(&args->trans,
			
 
				-							args->dfops, dp);
			
 
				-			if (error) {
			
 
				-				args->trans = NULL;
			
 
				-				xfs_defer_cancel(args->dfops);
			
 
				-				goto out;
			
 
				-			}
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				+			xfs_defer_ijoin(args->dfops, dp);
			
 
				+			error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
 
				 		 * Commit and start the next trans in the chain.
			
 
				 		 */
			
 
				-		error = xfs_trans_roll(&args->trans, dp);
			
 
				+		error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 
			
@@ -1032,6 +1033,10 @@ out:
 
				 	if (error)
			
 
				 		return error;
			
 
				 	return retval;
			
 
				+out_defer_cancel:
			
 
				+	xfs_defer_cancel(args->dfops);
			
 
				+	args->trans = NULL;
			
 
				+	goto out;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1122,17 +1127,16 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 
				 	if (retval && (state->path.active > 1)) {
			
 
				 		xfs_defer_init(args->dfops, args->firstblock);
			
 
				 		error = xfs_da3_join(state);
			
 
				-		if (!error)
			
 
				-			error = xfs_defer_finish(&args->trans, args->dfops, dp);
			
 
				-		if (error) {
			
 
				-			args->trans = NULL;
			
 
				-			xfs_defer_cancel(args->dfops);
			
 
				-			goto out;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				+		xfs_defer_ijoin(args->dfops, dp);
			
 
				+		error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				 		/*
			
 
				 		 * Commit the Btree join operation and start a new trans.
			
 
				 		 */
			
 
				-		error = xfs_trans_roll(&args->trans, dp);
			
 
				+		error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 	}
			
@@ -1156,14 +1160,12 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 
				 			xfs_defer_init(args->dfops, args->firstblock);
			
 
				 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
			
 
				 			/* bp is gone due to xfs_da_shrink_inode */
			
 
				-			if (!error)
			
 
				-				error = xfs_defer_finish(&args->trans,
			
 
				-							args->dfops, dp);
			
 
				-			if (error) {
			
 
				-				args->trans = NULL;
			
 
				-				xfs_defer_cancel(args->dfops);
			
 
				-				goto out;
			
 
				-			}
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				+			xfs_defer_ijoin(args->dfops, dp);
			
 
				+			error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+			if (error)
			
 
				+				goto out_defer_cancel;
			
 
				 		} else
			
 
				 			xfs_trans_brelse(args->trans, bp);
			
 
				 	}
			
@@ -1172,6 +1174,10 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 
				 out:
			
 
				 	xfs_da_state_free(state);
			
 
				 	return error;
			
 
				+out_defer_cancel:
			
 
				+	xfs_defer_cancel(args->dfops);
			
 
				+	args->trans = NULL;
			
 
				+	goto out;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -2608,7 +2608,7 @@ xfs_attr3_leaf_clearflag(
 
				 	/*
			
 
				 	 * Commit the flag value change and start the next trans in series.
			
 
				 	 */
			
 
				-	return xfs_trans_roll(&args->trans, args->dp);
			
 
				+	return xfs_trans_roll_inode(&args->trans, args->dp);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2659,7 +2659,7 @@ xfs_attr3_leaf_setflag(
 
				 	/*
			
 
				 	 * Commit the flag value change and start the next trans in series.
			
 
				 	 */
			
 
				-	return xfs_trans_roll(&args->trans, args->dp);
			
 
				+	return xfs_trans_roll_inode(&args->trans, args->dp);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2777,7 +2777,7 @@ xfs_attr3_leaf_flipflags(
 
				 	/*
			
 
				 	 * Commit the flag value change and start the next trans in series.
			
 
				 	 */
			
 
				-	error = xfs_trans_roll(&args->trans, args->dp);
			
 
				+	error = xfs_trans_roll_inode(&args->trans, args->dp);
			
 
				 
			
 
				 	return error;
			
 
				 }
			
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -467,13 +467,12 @@ xfs_attr_rmtval_set(
 
				 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
			
 
				 				  blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
			
 
				 				  args->total, &map, &nmap, args->dfops);
			
 
				-		if (!error)
			
 
				-			error = xfs_defer_finish(&args->trans, args->dfops, dp);
			
 
				-		if (error) {
			
 
				-			args->trans = NULL;
			
 
				-			xfs_defer_cancel(args->dfops);
			
 
				-			return error;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				+		xfs_defer_ijoin(args->dfops, dp);
			
 
				+		error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				 
			
 
				 		ASSERT(nmap == 1);
			
 
				 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
			
@@ -484,7 +483,7 @@ xfs_attr_rmtval_set(
 
				 		/*
			
 
				 		 * Start the next trans in the chain.
			
 
				 		 */
			
 
				-		error = xfs_trans_roll(&args->trans, dp);
			
 
				+		error = xfs_trans_roll_inode(&args->trans, dp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	}
			
@@ -539,6 +538,10 @@ xfs_attr_rmtval_set(
 
				 	}
			
 
				 	ASSERT(valuelen == 0);
			
 
				 	return 0;
			
 
				+out_defer_cancel:
			
 
				+	xfs_defer_cancel(args->dfops);
			
 
				+	args->trans = NULL;
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -609,21 +612,23 @@ xfs_attr_rmtval_remove(
 
				 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
			
 
				 				    XFS_BMAPI_ATTRFORK, 1, args->firstblock,
			
 
				 				    args->dfops, &done);
			
 
				-		if (!error)
			
 
				-			error = xfs_defer_finish(&args->trans, args->dfops,
			
 
				-						args->dp);
			
 
				-		if (error) {
			
 
				-			args->trans = NULL;
			
 
				-			xfs_defer_cancel(args->dfops);
			
 
				-			return error;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				+		xfs_defer_ijoin(args->dfops, args->dp);
			
 
				+		error = xfs_defer_finish(&args->trans, args->dfops);
			
 
				+		if (error)
			
 
				+			goto out_defer_cancel;
			
 
				 
			
 
				 		/*
			
 
				 		 * Close out trans and start the next one in the chain.
			
 
				 		 */
			
 
				-		error = xfs_trans_roll(&args->trans, args->dp);
			
 
				+		error = xfs_trans_roll_inode(&args->trans, args->dp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	}
			
 
				 	return 0;
			
 
				+out_defer_cancel:
			
 
				+	xfs_defer_cancel(args->dfops);
			
 
				+	args->trans = NULL;
			
 
				+	return error;
			
 
				 }
			
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -579,7 +579,7 @@ xfs_bmap_validate_ret(
 
				 
			
 
				 #else
			
 
				 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
			
 
				-#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
			
 
				+#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)	do { } while (0)
			
 
				 #endif /* DEBUG */
			
 
				 
			
 
				 /*
			
@@ -880,7 +880,7 @@ xfs_bmap_local_to_extents(
 
				 	xfs_ifork_t	*ifp;		/* inode fork pointer */
			
 
				 	xfs_alloc_arg_t	args;		/* allocation arguments */
			
 
				 	xfs_buf_t	*bp;		/* buffer for extent block */
			
 
				-	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
			
 
				+	struct xfs_bmbt_irec rec;
			
 
				 
			
 
				 	/*
			
 
				 	 * We don't want to deal with the case of keeping inode data inline yet.
			
@@ -943,9 +943,12 @@ xfs_bmap_local_to_extents(
 
				 	xfs_bmap_local_to_extents_empty(ip, whichfork);
			
 
				 	flags |= XFS_ILOG_CORE;
			
 
				 
			
 
				-	xfs_iext_add(ifp, 0, 1);
			
 
				-	ep = xfs_iext_get_ext(ifp, 0);
			
 
				-	xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
			
 
				+	rec.br_startoff = 0;
			
 
				+	rec.br_startblock = args.fsbno;
			
 
				+	rec.br_blockcount = 1;
			
 
				+	rec.br_state = XFS_EXT_NORM;
			
 
				+	xfs_iext_insert(ip, 0, 1, &rec, 0);
			
 
				+
			
 
				 	trace_xfs_bmap_post_update(ip, 0,
			
 
				 			whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
			
 
				 			_THIS_IP_);
			
@@ -1196,7 +1199,7 @@ xfs_bmap_add_attrfork(
 
				 			xfs_log_sb(tp);
			
 
				 	}
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto bmap_cancel;
			
 
				 	error = xfs_trans_commit(tp);
			
@@ -1356,7 +1359,6 @@ xfs_bmap_first_unused(
 
				 	xfs_fileoff_t	lastaddr;		/* last block number seen */
			
 
				 	xfs_fileoff_t	lowest;			/* lowest useful block */
			
 
				 	xfs_fileoff_t	max;			/* starting useful block */
			
 
				-	xfs_fileoff_t	off;			/* offset for this block */
			
 
				 	xfs_extnum_t	nextents;		/* number of extent entries */
			
 
				 
			
 
				 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
			
@@ -1373,16 +1375,19 @@ xfs_bmap_first_unused(
 
				 	lowest = *first_unused;
			
 
				 	nextents = xfs_iext_count(ifp);
			
 
				 	for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
			
 
				-		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
			
 
				-		off = xfs_bmbt_get_startoff(ep);
			
 
				+		struct xfs_bmbt_irec got;
			
 
				+
			
 
				+		xfs_iext_get_extent(ifp, idx, &got);
			
 
				+
			
 
				 		/*
			
 
				 		 * See if the hole before this extent will work.
			
 
				 		 */
			
 
				-		if (off >= lowest + len && off - max >= len) {
			
 
				+		if (got.br_startoff >= lowest + len &&
			
 
				+		    got.br_startoff - max >= len) {
			
 
				 			*first_unused = max;
			
 
				 			return 0;
			
 
				 		}
			
 
				-		lastaddr = off + xfs_bmbt_get_blockcount(ep);
			
 
				+		lastaddr = got.br_startoff + got.br_blockcount;
			
 
				 		max = XFS_FILEOFF_MAX(lastaddr, lowest);
			
 
				 	}
			
 
				 	*first_unused = max;
			
@@ -4918,7 +4923,7 @@ xfs_bmap_del_extent_delay(
 
				 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
			
 
				 				got->br_blockcount), da_old);
			
 
				 		got->br_startblock = nullstartblock((int)da_new);
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
			
 
				+		xfs_iext_update_extent(ifp, *idx, got);
			
 
				 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				 		break;
			
 
				 	case BMAP_RIGHT_CONTIG:
			
@@ -4930,7 +4935,7 @@ xfs_bmap_del_extent_delay(
 
				 		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
			
 
				 				got->br_blockcount), da_old);
			
 
				 		got->br_startblock = nullstartblock((int)da_new);
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
			
 
				+		xfs_iext_update_extent(ifp, *idx, got);
			
 
				 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				 		break;
			
 
				 	case 0:
			
@@ -4956,7 +4961,7 @@ xfs_bmap_del_extent_delay(
 
				 						       del->br_blockcount);
			
 
				 
			
 
				 		got->br_startblock = nullstartblock((int)got_indlen);
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
			
 
				+		xfs_iext_update_extent(ifp, *idx, got);
			
 
				 		trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
			
 
				 
			
 
				 		new.br_startoff = del_endoff;
			
@@ -5026,7 +5031,7 @@ xfs_bmap_del_extent_cow(
 
				 		got->br_startoff = del_endoff;
			
 
				 		got->br_blockcount -= del->br_blockcount;
			
 
				 		got->br_startblock = del->br_startblock + del->br_blockcount;
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
			
 
				+		xfs_iext_update_extent(ifp, *idx, got);
			
 
				 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				 		break;
			
 
				 	case BMAP_RIGHT_CONTIG:
			
@@ -5035,7 +5040,7 @@ xfs_bmap_del_extent_cow(
 
				 		 */
			
 
				 		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		got->br_blockcount -= del->br_blockcount;
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
			
 
				+		xfs_iext_update_extent(ifp, *idx, got);
			
 
				 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				 		break;
			
 
				 	case 0:
			
@@ -5044,7 +5049,7 @@ xfs_bmap_del_extent_cow(
 
				 		 */
			
 
				 		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
			
 
				 		got->br_blockcount = del->br_startoff - got->br_startoff;
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
			
 
				+		xfs_iext_update_extent(ifp, *idx, got);
			
 
				 		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
			
 
				 
			
 
				 		new.br_startoff = del_endoff;
			
@@ -5876,32 +5881,26 @@ xfs_bmse_merge(
 
				 	int				whichfork,
			
 
				 	xfs_fileoff_t			shift,		/* shift fsb */
			
 
				 	int				current_ext,	/* idx of gotp */
			
 
				-	struct xfs_bmbt_rec_host	*gotp,		/* extent to shift */
			
 
				-	struct xfs_bmbt_rec_host	*leftp,		/* preceding extent */
			
 
				+	struct xfs_bmbt_irec		*got,		/* extent to shift */
			
 
				+	struct xfs_bmbt_irec		*left,		/* preceding extent */
			
 
				 	struct xfs_btree_cur		*cur,
			
 
				-	int				*logflags)	/* output */
			
 
				+	int				*logflags,	/* output */
			
 
				+	struct xfs_defer_ops		*dfops)
			
 
				 {
			
 
				-	struct xfs_bmbt_irec		got;
			
 
				-	struct xfs_bmbt_irec		left;
			
 
				+	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	struct xfs_bmbt_irec		new;
			
 
				 	xfs_filblks_t			blockcount;
			
 
				 	int				error, i;
			
 
				 	struct xfs_mount		*mp = ip->i_mount;
			
 
				 
			
 
				-	xfs_bmbt_get_all(gotp, &got);
			
 
				-	xfs_bmbt_get_all(leftp, &left);
			
 
				-	blockcount = left.br_blockcount + got.br_blockcount;
			
 
				+	blockcount = left->br_blockcount + got->br_blockcount;
			
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				-	ASSERT(xfs_bmse_can_merge(&left, &got, shift));
			
 
				+	ASSERT(xfs_bmse_can_merge(left, got, shift));
			
 
				 
			
 
				-	/*
			
 
				-	 * Merge the in-core extents. Note that the host record pointers and
			
 
				-	 * current_ext index are invalid once the extent has been removed via
			
 
				-	 * xfs_iext_remove().
			
 
				-	 */
			
 
				-	xfs_bmbt_set_blockcount(leftp, blockcount);
			
 
				-	xfs_iext_remove(ip, current_ext, 1, 0);
			
 
				+	new = *left;
			
 
				+	new.br_blockcount = blockcount;
			
 
				 
			
 
				 	/*
			
 
				 	 * Update the on-disk extent count, the btree if necessary and log the
			
@@ -5912,12 +5911,12 @@ xfs_bmse_merge(
 
				 	*logflags |= XFS_ILOG_CORE;
			
 
				 	if (!cur) {
			
 
				 		*logflags |= XFS_ILOG_DEXT;
			
 
				-		return 0;
			
 
				+		goto done;
			
 
				 	}
			
 
				 
			
 
				 	/* lookup and remove the extent to merge */
			
 
				-	error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
			
 
				-				   got.br_blockcount, &i);
			
 
				+	error = xfs_bmbt_lookup_eq(cur, got->br_startoff, got->br_startblock,
			
 
				+				   got->br_blockcount, &i);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
@@ -5928,16 +5927,28 @@ xfs_bmse_merge(
 
				 	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
 
				 
			
 
				 	/* lookup and update size of the previous extent */
			
 
				-	error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
			
 
				-				   left.br_blockcount, &i);
			
 
				+	error = xfs_bmbt_lookup_eq(cur, left->br_startoff, left->br_startblock,
			
 
				+				   left->br_blockcount, &i);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
 
				 
			
 
				-	left.br_blockcount = blockcount;
			
 
				+	error = xfs_bmbt_update(cur, new.br_startoff, new.br_startblock,
			
 
				+			        new.br_blockcount, new.br_state);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+done:
			
 
				+	xfs_iext_update_extent(ifp, current_ext - 1, &new);
			
 
				+	xfs_iext_remove(ip, current_ext, 1, 0);
			
 
				 
			
 
				-	return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
			
 
				-			       left.br_blockcount, left.br_state);
			
 
				+	/* update reverse mapping. rmap functions merge the rmaps for us */
			
 
				+	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	memcpy(&new, got, sizeof(new));
			
 
				+	new.br_startoff = left->br_startoff + left->br_blockcount;
			
 
				+	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -5949,7 +5960,7 @@ xfs_bmse_shift_one(
 
				 	int				whichfork,
			
 
				 	xfs_fileoff_t			offset_shift_fsb,
			
 
				 	int				*current_ext,
			
 
				-	struct xfs_bmbt_rec_host	*gotp,
			
 
				+	struct xfs_bmbt_irec		*got,
			
 
				 	struct xfs_btree_cur		*cur,
			
 
				 	int				*logflags,
			
 
				 	enum shift_direction		direction,
			
@@ -5958,9 +5969,7 @@ xfs_bmse_shift_one(
 
				 	struct xfs_ifork		*ifp;
			
 
				 	struct xfs_mount		*mp;
			
 
				 	xfs_fileoff_t			startoff;
			
 
				-	struct xfs_bmbt_rec_host	*adj_irecp;
			
 
				-	struct xfs_bmbt_irec		got;
			
 
				-	struct xfs_bmbt_irec		adj_irec;
			
 
				+	struct xfs_bmbt_irec		adj_irec, new;
			
 
				 	int				error;
			
 
				 	int				i;
			
 
				 	int				total_extents;
			
@@ -5969,13 +5978,11 @@ xfs_bmse_shift_one(
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	total_extents = xfs_iext_count(ifp);
			
 
				 
			
 
				-	xfs_bmbt_get_all(gotp, &got);
			
 
				-
			
 
				 	/* delalloc extents should be prevented by caller */
			
 
				-	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
			
 
				+	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got->br_startblock));
			
 
				 
			
 
				 	if (direction == SHIFT_LEFT) {
			
 
				-		startoff = got.br_startoff - offset_shift_fsb;
			
 
				+		startoff = got->br_startoff - offset_shift_fsb;
			
 
				 
			
 
				 		/*
			
 
				 		 * Check for merge if we've got an extent to the left,
			
@@ -5983,46 +5990,39 @@ xfs_bmse_shift_one(
 
				 		 * of the file for the shift.
			
 
				 		 */
			
 
				 		if (!*current_ext) {
			
 
				-			if (got.br_startoff < offset_shift_fsb)
			
 
				+			if (got->br_startoff < offset_shift_fsb)
			
 
				 				return -EINVAL;
			
 
				 			goto update_current_ext;
			
 
				 		}
			
 
				+
			
 
				 		/*
			
 
				-		 * grab the left extent and check for a large
			
 
				-		 * enough hole.
			
 
				+		 * grab the left extent and check for a large enough hole.
			
 
				 		 */
			
 
				-		adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
			
 
				-		xfs_bmbt_get_all(adj_irecp, &adj_irec);
			
 
				-
			
 
				-		if (startoff <
			
 
				-		    adj_irec.br_startoff + adj_irec.br_blockcount)
			
 
				+		xfs_iext_get_extent(ifp, *current_ext - 1, &adj_irec);
			
 
				+		if (startoff < adj_irec.br_startoff + adj_irec.br_blockcount)
			
 
				 			return -EINVAL;
			
 
				 
			
 
				 		/* check whether to merge the extent or shift it down */
			
 
				-		if (xfs_bmse_can_merge(&adj_irec, &got,
			
 
				-				       offset_shift_fsb)) {
			
 
				-			error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
			
 
				-					       *current_ext, gotp, adj_irecp,
			
 
				-					       cur, logflags);
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			adj_irec = got;
			
 
				-			goto update_rmap;
			
 
				+		if (xfs_bmse_can_merge(&adj_irec, got, offset_shift_fsb)) {
			
 
				+			return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
			
 
				+					      *current_ext, got, &adj_irec,
			
 
				+					      cur, logflags, dfops);
			
 
				 		}
			
 
				 	} else {
			
 
				-		startoff = got.br_startoff + offset_shift_fsb;
			
 
				+		startoff = got->br_startoff + offset_shift_fsb;
			
 
				 		/* nothing to move if this is the last extent */
			
 
				 		if (*current_ext >= (total_extents - 1))
			
 
				 			goto update_current_ext;
			
 
				+
			
 
				 		/*
			
 
				 		 * If this is not the last extent in the file, make sure there
			
 
				 		 * is enough room between current extent and next extent for
			
 
				 		 * accommodating the shift.
			
 
				 		 */
			
 
				-		adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
			
 
				-		xfs_bmbt_get_all(adj_irecp, &adj_irec);
			
 
				-		if (startoff + got.br_blockcount > adj_irec.br_startoff)
			
 
				+		xfs_iext_get_extent(ifp, *current_ext + 1, &adj_irec);
			
 
				+		if (startoff + got->br_blockcount > adj_irec.br_startoff)
			
 
				 			return -EINVAL;
			
 
				+
			
 
				 		/*
			
 
				 		 * Unlike a left shift (which involves a hole punch),
			
 
				 		 * a right shift does not modify extent neighbors
			
@@ -6030,45 +6030,48 @@ xfs_bmse_shift_one(
 
				 		 * in this scenario. Check anyways and warn if we
			
 
				 		 * encounter two extents that could be one.
			
 
				 		 */
			
 
				-		if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
			
 
				+		if (xfs_bmse_can_merge(got, &adj_irec, offset_shift_fsb))
			
 
				 			WARN_ON_ONCE(1);
			
 
				 	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Increment the extent index for the next iteration, update the start
			
 
				 	 * offset of the in-core extent and update the btree if applicable.
			
 
				 	 */
			
 
				 update_current_ext:
			
 
				-	if (direction == SHIFT_LEFT)
			
 
				-		(*current_ext)++;
			
 
				-	else
			
 
				-		(*current_ext)--;
			
 
				-	xfs_bmbt_set_startoff(gotp, startoff);
			
 
				 	*logflags |= XFS_ILOG_CORE;
			
 
				-	adj_irec = got;
			
 
				-	if (!cur) {
			
 
				+
			
 
				+	new = *got;
			
 
				+	new.br_startoff = startoff;
			
 
				+
			
 
				+	if (cur) {
			
 
				+		error = xfs_bmbt_lookup_eq(cur, got->br_startoff,
			
 
				+				got->br_startblock, got->br_blockcount, &i);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
 
				+
			
 
				+		error = xfs_bmbt_update(cur, new.br_startoff,
			
 
				+				new.br_startblock, new.br_blockcount,
			
 
				+				new.br_state);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	} else {
			
 
				 		*logflags |= XFS_ILOG_DEXT;
			
 
				-		goto update_rmap;
			
 
				 	}
			
 
				 
			
 
				-	error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
			
 
				-				   got.br_blockcount, &i);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
			
 
				+	xfs_iext_update_extent(ifp, *current_ext, &new);
			
 
				 
			
 
				-	got.br_startoff = startoff;
			
 
				-	error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
			
 
				-			got.br_blockcount, got.br_state);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				+	if (direction == SHIFT_LEFT)
			
 
				+		(*current_ext)++;
			
 
				+	else
			
 
				+		(*current_ext)--;
			
 
				 
			
 
				-update_rmap:
			
 
				 	/* update reverse mapping */
			
 
				-	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
			
 
				+	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				-	adj_irec.br_startoff = startoff;
			
 
				-	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
			
 
				+	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -6095,7 +6098,6 @@ xfs_bmap_shift_extents(
 
				 	int			num_exts)
			
 
				 {
			
 
				 	struct xfs_btree_cur		*cur = NULL;
			
 
				-	struct xfs_bmbt_rec_host	*gotp;
			
 
				 	struct xfs_bmbt_irec            got;
			
 
				 	struct xfs_mount		*mp = ip->i_mount;
			
 
				 	struct xfs_ifork		*ifp;
			
@@ -6122,7 +6124,6 @@ xfs_bmap_shift_extents(
 
				 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				 	ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
			
 
				-	ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
			
 
				 
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
@@ -6154,10 +6155,26 @@ xfs_bmap_shift_extents(
 
				 	 * In case of first right shift, we need to initialize next_fsb
			
 
				 	 */
			
 
				 	if (*next_fsb == NULLFSBLOCK) {
			
 
				-		gotp = xfs_iext_get_ext(ifp, total_extents - 1);
			
 
				-		xfs_bmbt_get_all(gotp, &got);
			
 
				+		ASSERT(direction == SHIFT_RIGHT);
			
 
				+
			
 
				+		current_ext = total_extents - 1;
			
 
				+		xfs_iext_get_extent(ifp, current_ext, &got);
			
 
				+		if (stop_fsb > got.br_startoff) {
			
 
				+			*done = 1;
			
 
				+			goto del_cursor;
			
 
				+		}
			
 
				 		*next_fsb = got.br_startoff;
			
 
				-		if (stop_fsb > *next_fsb) {
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Look up the extent index for the fsb where we start shifting. We can
			
 
				+		 * henceforth iterate with current_ext as extent list changes are locked
			
 
				+		 * out via ilock.
			
 
				+		 *
			
 
				+		 * If next_fsb lies in a hole beyond which there are no extents we are
			
 
				+		 * done.
			
 
				+		 */
			
 
				+		if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &current_ext,
			
 
				+				&got)) {
			
 
				 			*done = 1;
			
 
				 			goto del_cursor;
			
 
				 		}
			
@@ -6165,37 +6182,26 @@ xfs_bmap_shift_extents(
 
				 
			
 
				 	/* Lookup the extent index at which we have to stop */
			
 
				 	if (direction == SHIFT_RIGHT) {
			
 
				-		gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
			
 
				+		struct xfs_bmbt_irec s;
			
 
				+
			
 
				+		xfs_iext_lookup_extent(ip, ifp, stop_fsb, &stop_extent, &s);
			
 
				 		/* Make stop_extent exclusive of shift range */
			
 
				 		stop_extent--;
			
 
				-	} else
			
 
				+		if (current_ext <= stop_extent) {
			
 
				+			error = -EIO;
			
 
				+			goto del_cursor;
			
 
				+		}
			
 
				+	} else {
			
 
				 		stop_extent = total_extents;
			
 
				-
			
 
				-	/*
			
 
				-	 * Look up the extent index for the fsb where we start shifting. We can
			
 
				-	 * henceforth iterate with current_ext as extent list changes are locked
			
 
				-	 * out via ilock.
			
 
				-	 *
			
 
				-	 * gotp can be null in 2 cases: 1) if there are no extents or 2)
			
 
				-	 * *next_fsb lies in a hole beyond which there are no extents. Either
			
 
				-	 * way, we are done.
			
 
				-	 */
			
 
				-	gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
			
 
				-	if (!gotp) {
			
 
				-		*done = 1;
			
 
				-		goto del_cursor;
			
 
				-	}
			
 
				-
			
 
				-	/* some sanity checking before we finally start shifting extents */
			
 
				-	if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
			
 
				-	     (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
			
 
				-		error = -EIO;
			
 
				-		goto del_cursor;
			
 
				+		if (current_ext >= stop_extent) {
			
 
				+			error = -EIO;
			
 
				+			goto del_cursor;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	while (nexts++ < num_exts) {
			
 
				 		error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
			
 
				-					   &current_ext, gotp, cur, &logflags,
			
 
				+					   &current_ext, &got, cur, &logflags,
			
 
				 					   direction, dfops);
			
 
				 		if (error)
			
 
				 			goto del_cursor;
			
@@ -6213,13 +6219,11 @@ xfs_bmap_shift_extents(
 
				 			*next_fsb = NULLFSBLOCK;
			
 
				 			break;
			
 
				 		}
			
 
				-		gotp = xfs_iext_get_ext(ifp, current_ext);
			
 
				+		xfs_iext_get_extent(ifp, current_ext, &got);
			
 
				 	}
			
 
				 
			
 
				-	if (!*done) {
			
 
				-		xfs_bmbt_get_all(gotp, &got);
			
 
				+	if (!*done)
			
 
				 		*next_fsb = got.br_startoff;
			
 
				-	}
			
 
				 
			
 
				 del_cursor:
			
 
				 	if (cur)
			
@@ -6248,7 +6252,6 @@ xfs_bmap_split_extent_at(
 
				 {
			
 
				 	int				whichfork = XFS_DATA_FORK;
			
 
				 	struct xfs_btree_cur		*cur = NULL;
			
 
				-	struct xfs_bmbt_rec_host	*gotp;
			
 
				 	struct xfs_bmbt_irec		got;
			
 
				 	struct xfs_bmbt_irec		new; /* split extent */
			
 
				 	struct xfs_mount		*mp = ip->i_mount;
			
@@ -6280,21 +6283,10 @@ xfs_bmap_split_extent_at(
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * gotp can be null in 2 cases: 1) if there are no extents
			
 
				-	 * or 2) split_fsb lies in a hole beyond which there are
			
 
				-	 * no extents. Either way, we are done.
			
 
				-	 */
			
 
				-	gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
			
 
				-	if (!gotp)
			
 
				-		return 0;
			
 
				-
			
 
				-	xfs_bmbt_get_all(gotp, &got);
			
 
				-
			
 
				-	/*
			
 
				-	 * Check split_fsb lies in a hole or the start boundary offset
			
 
				-	 * of the extent.
			
 
				+	 * If there are not extents, or split_fsb lies in a hole we are done.
			
 
				 	 */
			
 
				-	if (got.br_startoff >= split_fsb)
			
 
				+	if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &current_ext, &got) ||
			
 
				+	    got.br_startoff >= split_fsb)
			
 
				 		return 0;
			
 
				 
			
 
				 	gotblkcnt = split_fsb - got.br_startoff;
			
@@ -6317,8 +6309,8 @@ xfs_bmap_split_extent_at(
 
				 		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
			
 
				 	}
			
 
				 
			
 
				-	xfs_bmbt_set_blockcount(gotp, gotblkcnt);
			
 
				 	got.br_blockcount = gotblkcnt;
			
 
				+	xfs_iext_update_extent(ifp, current_ext, &got);
			
 
				 
			
 
				 	logflags = XFS_ILOG_CORE;
			
 
				 	if (cur) {
			
@@ -6402,7 +6394,7 @@ xfs_bmap_split_extent(
 
				 	if (error)
			
 
				 		goto out;
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto out;
			
 
				 
			
@@ -6452,7 +6444,7 @@ __xfs_bmap_add(
 
				 	bi->bi_whichfork = whichfork;
			
 
				 	bi->bi_bmap = *bmap;
			
 
				 
			
 
				-	error = xfs_defer_join(dfops, bi->bi_owner);
			
 
				+	error = xfs_defer_ijoin(dfops, bi->bi_owner);
			
 
				 	if (error) {
			
 
				 		kmem_free(bi);
			
 
				 		return error;
			
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -858,6 +858,7 @@ xfs_bmbt_change_owner(
 
				 	cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
			
 
				 	if (!cur)
			
 
				 		return -ENOMEM;
			
 
				+	cur->bc_private.b.flags |= XFS_BTCUR_BPRV_INVALID_OWNER;
			
 
				 
			
 
				 	error = xfs_btree_change_owner(cur, new_owner, buffer_list);
			
 
				 	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
			
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1791,6 +1791,7 @@ xfs_btree_lookup_get_block(
 
				 
			
 
				 	/* Check the inode owner since the verifiers don't. */
			
 
				 	if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
			
 
				+	    !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) &&
			
 
				 	    (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
			
 
				 	    be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
			
 
				 			cur->bc_private.b.ip->i_ino)
			
@@ -4451,10 +4452,15 @@ xfs_btree_block_change_owner(
 
				 
			
 
				 	/* modify the owner */
			
 
				 	block = xfs_btree_get_block(cur, level, &bp);
			
 
				-	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
			
 
				+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
			
 
				+		if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
			
 
				+			return 0;
			
 
				 		block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
			
 
				-	else
			
 
				+	} else {
			
 
				+		if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
			
 
				+			return 0;
			
 
				 		block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * If the block is a root block hosted in an inode, we might not have a
			
@@ -4463,16 +4469,19 @@ xfs_btree_block_change_owner(
 
				 	 * block is formatted into the on-disk inode fork. We still change it,
			
 
				 	 * though, so everything is consistent in memory.
			
 
				 	 */
			
 
				-	if (bp) {
			
 
				-		if (cur->bc_tp) {
			
 
				-			xfs_trans_ordered_buf(cur->bc_tp, bp);
			
 
				+	if (!bp) {
			
 
				+		ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
			
 
				+		ASSERT(level == cur->bc_nlevels - 1);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (cur->bc_tp) {
			
 
				+		if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
			
 
				 			xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
			
 
				-		} else {
			
 
				-			xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
			
 
				+			return -EAGAIN;
			
 
				 		}
			
 
				 	} else {
			
 
				-		ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
			
 
				-		ASSERT(level == cur->bc_nlevels - 1);
			
 
				+		xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -233,7 +233,8 @@ typedef struct xfs_btree_cur
 
				 			short		forksize;	/* fork's inode space */
			
 
				 			char		whichfork;	/* data or attr fork */
			
 
				 			char		flags;		/* flags */
			
 
				-#define	XFS_BTCUR_BPRV_WASDEL	1			/* was delayed */
			
 
				+#define	XFS_BTCUR_BPRV_WASDEL		(1<<0)		/* was delayed */
			
 
				+#define	XFS_BTCUR_BPRV_INVALID_OWNER	(1<<1)		/* for ext swap */
			
 
				 		} b;
			
 
				 	}		bc_private;	/* per-btree type data */
			
 
				 } xfs_btree_cur_t;
			
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -240,23 +240,19 @@ xfs_defer_trans_abort(
 
				 STATIC int
			
 
				 xfs_defer_trans_roll(
			
 
				 	struct xfs_trans		**tp,
			
 
				-	struct xfs_defer_ops		*dop,
			
 
				-	struct xfs_inode		*ip)
			
 
				+	struct xfs_defer_ops		*dop)
			
 
				 {
			
 
				 	int				i;
			
 
				 	int				error;
			
 
				 
			
 
				-	/* Log all the joined inodes except the one we passed in. */
			
 
				-	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
			
 
				-		if (dop->dop_inodes[i] == ip)
			
 
				-			continue;
			
 
				+	/* Log all the joined inodes. */
			
 
				+	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
			
 
				 		xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
			
 
				-	}
			
 
				 
			
 
				 	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
			
 
				 
			
 
				 	/* Roll the transaction. */
			
 
				-	error = xfs_trans_roll(tp, ip);
			
 
				+	error = xfs_trans_roll(tp);
			
 
				 	if (error) {
			
 
				 		trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
			
 
				 		xfs_defer_trans_abort(*tp, dop, error);
			
@@ -264,12 +260,9 @@ xfs_defer_trans_roll(
 
				 	}
			
 
				 	dop->dop_committed = true;
			
 
				 
			
 
				-	/* Rejoin the joined inodes except the one we passed in. */
			
 
				-	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
			
 
				-		if (dop->dop_inodes[i] == ip)
			
 
				-			continue;
			
 
				+	/* Rejoin the joined inodes. */
			
 
				+	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
			
 
				 		xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
			
 
				-	}
			
 
				 
			
 
				 	return error;
			
 
				 }
			
@@ -284,11 +277,10 @@ xfs_defer_has_unfinished_work(
 
				 
			
 
				 /*
			
 
				  * Add this inode to the deferred op.  Each joined inode is relogged
			
 
				- * each time we roll the transaction, in addition to any inode passed
			
 
				- * to xfs_defer_finish().
			
 
				+ * each time we roll the transaction.
			
 
				  */
			
 
				 int
			
 
				-xfs_defer_join(
			
 
				+xfs_defer_ijoin(
			
 
				 	struct xfs_defer_ops		*dop,
			
 
				 	struct xfs_inode		*ip)
			
 
				 {
			
@@ -317,8 +309,7 @@ xfs_defer_join(
 
				 int
			
 
				 xfs_defer_finish(
			
 
				 	struct xfs_trans		**tp,
			
 
				-	struct xfs_defer_ops		*dop,
			
 
				-	struct xfs_inode		*ip)
			
 
				+	struct xfs_defer_ops		*dop)
			
 
				 {
			
 
				 	struct xfs_defer_pending	*dfp;
			
 
				 	struct list_head		*li;
			
@@ -337,7 +328,7 @@ xfs_defer_finish(
 
				 		xfs_defer_intake_work(*tp, dop);
			
 
				 
			
 
				 		/* Roll the transaction. */
			
 
				-		error = xfs_defer_trans_roll(tp, dop, ip);
			
 
				+		error = xfs_defer_trans_roll(tp, dop);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -72,12 +72,11 @@ struct xfs_defer_ops {
 
				 
			
 
				 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
			
 
				 		struct list_head *h);
			
 
				-int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop,
			
 
				-		struct xfs_inode *ip);
			
 
				+int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop);
			
 
				 void xfs_defer_cancel(struct xfs_defer_ops *dop);
			
 
				 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
			
 
				 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
			
 
				-int xfs_defer_join(struct xfs_defer_ops *dop, struct xfs_inode *ip);
			
 
				+int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
			
 
				 
			
 
				 /* Description of a deferred type. */
			
 
				 struct xfs_defer_op_type {
			
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -378,8 +378,6 @@ xfs_ialloc_inode_init(
 
				 				 * transaction and pin the log appropriately.
			
 
				 				 */
			
 
				 				xfs_trans_ordered_buf(tp, fbuf);
			
 
				-				xfs_trans_log_buf(tp, fbuf, 0,
			
 
				-						  BBTOB(fbuf->b_length) - 1);
			
 
				 			}
			
 
				 		} else {
			
 
				 			fbuf->b_flags |= XBF_DONE;
			
@@ -1133,6 +1131,7 @@ xfs_dialloc_ag_inobt(
 
				 	int			error;
			
 
				 	int			offset;
			
 
				 	int			i, j;
			
 
				+	int			searchdistance = 10;
			
 
				 
			
 
				 	pag = xfs_perag_get(mp, agno);
			
 
				 
			
@@ -1159,7 +1158,6 @@ xfs_dialloc_ag_inobt(
 
				 	if (pagno == agno) {
			
 
				 		int		doneleft;	/* done, to the left */
			
 
				 		int		doneright;	/* done, to the right */
			
 
				-		int		searchdistance = 10;
			
 
				 
			
 
				 		error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
			
 
				 		if (error)
			
@@ -1220,21 +1218,9 @@ xfs_dialloc_ag_inobt(
 
				 		/*
			
 
				 		 * Loop until we find an inode chunk with a free inode.
			
 
				 		 */
			
 
				-		while (!doneleft || !doneright) {
			
 
				+		while (--searchdistance > 0 && (!doneleft || !doneright)) {
			
 
				 			int	useleft;  /* using left inode chunk this time */
			
 
				 
			
 
				-			if (!--searchdistance) {
			
 
				-				/*
			
 
				-				 * Not in range - save last search
			
 
				-				 * location and allocate a new inode
			
 
				-				 */
			
 
				-				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
			
 
				-				pag->pagl_leftrec = trec.ir_startino;
			
 
				-				pag->pagl_rightrec = rec.ir_startino;
			
 
				-				pag->pagl_pagino = pagino;
			
 
				-				goto newino;
			
 
				-			}
			
 
				-
			
 
				 			/* figure out the closer block if both are valid. */
			
 
				 			if (!doneleft && !doneright) {
			
 
				 				useleft = pagino -
			
@@ -1278,26 +1264,37 @@ xfs_dialloc_ag_inobt(
 
				 				goto error1;
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * We've reached the end of the btree. because
			
 
				-		 * we are only searching a small chunk of the
			
 
				-		 * btree each search, there is obviously free
			
 
				-		 * inodes closer to the parent inode than we
			
 
				-		 * are now. restart the search again.
			
 
				-		 */
			
 
				-		pag->pagl_pagino = NULLAGINO;
			
 
				-		pag->pagl_leftrec = NULLAGINO;
			
 
				-		pag->pagl_rightrec = NULLAGINO;
			
 
				-		xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
			
 
				-		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
			
 
				-		goto restart_pagno;
			
 
				+		if (searchdistance <= 0) {
			
 
				+			/*
			
 
				+			 * Not in range - save last search
			
 
				+			 * location and allocate a new inode
			
 
				+			 */
			
 
				+			xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
			
 
				+			pag->pagl_leftrec = trec.ir_startino;
			
 
				+			pag->pagl_rightrec = rec.ir_startino;
			
 
				+			pag->pagl_pagino = pagino;
			
 
				+
			
 
				+		} else {
			
 
				+			/*
			
 
				+			 * We've reached the end of the btree. because
			
 
				+			 * we are only searching a small chunk of the
			
 
				+			 * btree each search, there is obviously free
			
 
				+			 * inodes closer to the parent inode than we
			
 
				+			 * are now. restart the search again.
			
 
				+			 */
			
 
				+			pag->pagl_pagino = NULLAGINO;
			
 
				+			pag->pagl_leftrec = NULLAGINO;
			
 
				+			pag->pagl_rightrec = NULLAGINO;
			
 
				+			xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
			
 
				+			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
			
 
				+			goto restart_pagno;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				 	 * In a different AG from the parent.
			
 
				 	 * See if the most recently allocated block has any free.
			
 
				 	 */
			
 
				-newino:
			
 
				 	if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
			
 
				 		error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
			
 
				 					 XFS_LOOKUP_EQ, &i);
			
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -1499,14 +1499,11 @@ xfs_iext_realloc_indirect(
 
				 	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				 	int		new_size)	/* new indirection array size */
			
 
				 {
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-	int		size;		/* current indirection array size */
			
 
				-
			
 
				 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	size = nlists * sizeof(xfs_ext_irec_t);
			
 
				 	ASSERT(ifp->if_real_bytes);
			
 
				-	ASSERT((new_size >= 0) && (new_size != size));
			
 
				+	ASSERT((new_size >= 0) &&
			
 
				+	       (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) *
			
 
				+			     sizeof(xfs_ext_irec_t))));
			
 
				 	if (new_size == 0) {
			
 
				 		xfs_iext_destroy(ifp);
			
 
				 	} else {
			
@@ -2023,3 +2020,15 @@ xfs_iext_get_extent(
 
				 	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
			
 
				 	return true;
			
 
				 }
			
 
				+
			
 
				+void
			
 
				+xfs_iext_update_extent(
			
 
				+	struct xfs_ifork	*ifp,
			
 
				+	xfs_extnum_t		idx,
			
 
				+	struct xfs_bmbt_irec	*gotp)
			
 
				+{
			
 
				+	ASSERT(idx >= 0);
			
 
				+	ASSERT(idx < xfs_iext_count(ifp));
			
 
				+
			
 
				+	xfs_bmbt_set_all(xfs_iext_get_ext(ifp, idx), gotp);
			
 
				+}
			
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -187,6 +187,8 @@ bool		xfs_iext_lookup_extent(struct xfs_inode *ip,
 
				 			xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp);
			
 
				 bool		xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
			
 
				 			struct xfs_bmbt_irec *gotp);
			
 
				+void		xfs_iext_update_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
			
 
				+			struct xfs_bmbt_irec *gotp);
			
 
				 
			
 
				 extern struct kmem_zone	*xfs_ifork_zone;
			
 
				 
			
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1679,7 +1679,7 @@ xfs_refcount_recover_cow_leftovers(
 
				 		xfs_bmap_add_free(mp, &dfops, fsb,
			
 
				 				rr->rr_rrec.rc_blockcount, NULL);
			
 
				 
			
 
				-		error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_defer;
			
 
				 
			
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -85,11 +85,11 @@ xfs_find_bdev_for_inode(
 
				  * associated buffer_heads, paying attention to the start and end offsets that
			
 
				  * we need to process on the page.
			
 
				  *
			
 
				- * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
			
 
				- * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
			
 
				- * the page at all, as we may be racing with memory reclaim and it can free both
			
 
				- * the bufferhead chain and the page as it will see the page as clean and
			
 
				- * unused.
			
 
				+ * Note that we open code the action in end_buffer_async_write here so that we
			
 
				+ * only have to iterate over the buffers attached to the page once.  This is not
			
 
				+ * only more efficient, but also ensures that we only calls end_page_writeback
			
 
				+ * at the end of the iteration, and thus avoids the pitfall of having the page
			
 
				+ * and buffers potentially freed after every call to end_buffer_async_write.
			
 
				  */
			
 
				 static void
			
 
				 xfs_finish_page_writeback(
			
@@ -97,29 +97,44 @@ xfs_finish_page_writeback(
 
				 	struct bio_vec		*bvec,
			
 
				 	int			error)
			
 
				 {
			
 
				-	unsigned int		end = bvec->bv_offset + bvec->bv_len - 1;
			
 
				-	struct buffer_head	*head, *bh, *next;
			
 
				+	struct buffer_head	*head = page_buffers(bvec->bv_page), *bh = head;
			
 
				+	bool			busy = false;
			
 
				 	unsigned int		off = 0;
			
 
				-	unsigned int		bsize;
			
 
				+	unsigned long		flags;
			
 
				 
			
 
				 	ASSERT(bvec->bv_offset < PAGE_SIZE);
			
 
				 	ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
			
 
				-	ASSERT(end < PAGE_SIZE);
			
 
				+	ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
			
 
				 	ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
			
 
				 
			
 
				-	bh = head = page_buffers(bvec->bv_page);
			
 
				-
			
 
				-	bsize = bh->b_size;
			
 
				+	local_irq_save(flags);
			
 
				+	bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
			
 
				 	do {
			
 
				-		if (off > end)
			
 
				-			break;
			
 
				-		next = bh->b_this_page;
			
 
				-		if (off < bvec->bv_offset)
			
 
				-			goto next_bh;
			
 
				-		bh->b_end_io(bh, !error);
			
 
				-next_bh:
			
 
				-		off += bsize;
			
 
				-	} while ((bh = next) != head);
			
 
				+		if (off >= bvec->bv_offset &&
			
 
				+		    off < bvec->bv_offset + bvec->bv_len) {
			
 
				+			ASSERT(buffer_async_write(bh));
			
 
				+			ASSERT(bh->b_end_io == NULL);
			
 
				+
			
 
				+			if (error) {
			
 
				+				mark_buffer_write_io_error(bh);
			
 
				+				clear_buffer_uptodate(bh);
			
 
				+				SetPageError(bvec->bv_page);
			
 
				+			} else {
			
 
				+				set_buffer_uptodate(bh);
			
 
				+			}
			
 
				+			clear_buffer_async_write(bh);
			
 
				+			unlock_buffer(bh);
			
 
				+		} else if (buffer_async_write(bh)) {
			
 
				+			ASSERT(buffer_locked(bh));
			
 
				+			busy = true;
			
 
				+		}
			
 
				+		off += bh->b_size;
			
 
				+	} while ((bh = bh->b_this_page) != head);
			
 
				+	bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
			
 
				+	local_irq_restore(flags);
			
 
				+
			
 
				+	if (!busy)
			
 
				+		end_page_writeback(bvec->bv_page);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -133,8 +148,10 @@ xfs_destroy_ioend(
 
				 	int			error)
			
 
				 {
			
 
				 	struct inode		*inode = ioend->io_inode;
			
 
				-	struct bio		*last = ioend->io_bio;
			
 
				-	struct bio		*bio, *next;
			
 
				+	struct bio		*bio = &ioend->io_inline_bio;
			
 
				+	struct bio		*last = ioend->io_bio, *next;
			
 
				+	u64			start = bio->bi_iter.bi_sector;
			
 
				+	bool			quiet = bio_flagged(bio, BIO_QUIET);
			
 
				 
			
 
				 	for (bio = &ioend->io_inline_bio; bio; bio = next) {
			
 
				 		struct bio_vec	*bvec;
			
@@ -155,6 +172,11 @@ xfs_destroy_ioend(
 
				 
			
 
				 		bio_put(bio);
			
 
				 	}
			
 
				+
			
 
				+	if (unlikely(error && !quiet)) {
			
 
				+		xfs_err_ratelimited(XFS_I(inode)->i_mount,
			
 
				+			"writeback error on sector %llu", start);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -423,7 +445,8 @@ xfs_start_buffer_writeback(
 
				 	ASSERT(!buffer_delay(bh));
			
 
				 	ASSERT(!buffer_unwritten(bh));
			
 
				 
			
 
				-	mark_buffer_async_write(bh);
			
 
				+	bh->b_end_io = NULL;
			
 
				+	set_buffer_async_write(bh);
			
 
				 	set_buffer_uptodate(bh);
			
 
				 	clear_buffer_dirty(bh);
			
 
				 }
			
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -97,7 +97,7 @@ xfs_attr3_leaf_freextent(
 
				 			/*
			
 
				 			 * Roll to next transaction.
			
 
				 			 */
			
 
				-			error = xfs_trans_roll(trans, dp);
			
 
				+			error = xfs_trans_roll_inode(trans, dp);
			
 
				 			if (error)
			
 
				 				return error;
			
 
				 		}
			
@@ -308,7 +308,7 @@ xfs_attr3_node_inactive(
 
				 		/*
			
 
				 		 * Atomically commit the whole invalidate stuff.
			
 
				 		 */
			
 
				-		error = xfs_trans_roll(trans, dp);
			
 
				+		error = xfs_trans_roll_inode(trans, dp);
			
 
				 		if (error)
			
 
				 			return  error;
			
 
				 	}
			
@@ -375,7 +375,7 @@ xfs_attr3_root_inactive(
 
				 	/*
			
 
				 	 * Commit the invalidate and start the next transaction.
			
 
				 	 */
			
 
				-	error = xfs_trans_roll(trans, dp);
			
 
				+	error = xfs_trans_roll_inode(trans, dp);
			
 
				 
			
 
				 	return error;
			
 
				 }
			
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -502,7 +502,7 @@ xfs_bui_recover(
 
				 	}
			
 
				 
			
 
				 	/* Finish transaction, free inodes. */
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto err_dfops;
			
 
				 
			
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -222,22 +222,21 @@ xfs_bmap_eof(
 
				  * Count leaf blocks given a range of extent records.  Delayed allocation
			
 
				  * extents are not counted towards the totals.
			
 
				  */
			
 
				-STATIC void
			
 
				+xfs_extnum_t
			
 
				 xfs_bmap_count_leaves(
			
 
				 	struct xfs_ifork	*ifp,
			
 
				-	xfs_extnum_t		*numrecs,
			
 
				 	xfs_filblks_t		*count)
			
 
				 {
			
 
				-	xfs_extnum_t		i;
			
 
				-	xfs_extnum_t		nr_exts = xfs_iext_count(ifp);
			
 
				-
			
 
				-	for (i = 0; i < nr_exts; i++) {
			
 
				-		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i);
			
 
				-		if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) {
			
 
				-			(*numrecs)++;
			
 
				-			*count += xfs_bmbt_get_blockcount(frp);
			
 
				+	struct xfs_bmbt_irec	got;
			
 
				+	xfs_extnum_t		numrecs = 0, i = 0;
			
 
				+
			
 
				+	while (xfs_iext_get_extent(ifp, i++, &got)) {
			
 
				+		if (!isnullstartblock(got.br_startblock)) {
			
 
				+			*count += got.br_blockcount;
			
 
				+			numrecs++;
			
 
				 		}
			
 
				 	}
			
 
				+	return numrecs;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -370,7 +369,7 @@ xfs_bmap_count_blocks(
 
				 
			
 
				 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
			
 
				 	case XFS_DINODE_FMT_EXTENTS:
			
 
				-		xfs_bmap_count_leaves(ifp, nextents, count);
			
 
				+		*nextents = xfs_bmap_count_leaves(ifp, count);
			
 
				 		return 0;
			
 
				 	case XFS_DINODE_FMT_BTREE:
			
 
				 		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
@@ -1136,7 +1135,7 @@ xfs_alloc_file_space(
 
				 		/*
			
 
				 		 * Complete the transaction
			
 
				 		 */
			
 
				-		error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto error0;
			
 
				 
			
@@ -1202,7 +1201,8 @@ xfs_unmap_extent(
 
				 	if (error)
			
 
				 		goto out_bmap_cancel;
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, &dfops, ip);
			
 
				+	xfs_defer_ijoin(&dfops, ip);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto out_bmap_cancel;
			
 
				 
			
@@ -1496,7 +1496,7 @@ xfs_shift_file_space(
 
				 		if (error)
			
 
				 			goto out_bmap_cancel;
			
 
				 
			
 
				-		error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_bmap_cancel;
			
 
				 
			
@@ -1777,7 +1777,8 @@ xfs_swap_extent_rmap(
 
				 			if (error)
			
 
				 				goto out_defer;
			
 
				 
			
 
				-			error = xfs_defer_finish(tpp, &dfops, ip);
			
 
				+			xfs_defer_ijoin(&dfops, ip);
			
 
				+			error = xfs_defer_finish(tpp, &dfops);
			
 
				 			if (error)
			
 
				 				goto out_defer;
			
 
				 
			
@@ -1840,29 +1841,18 @@ xfs_swap_extent_forks(
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Before we've swapped the forks, lets set the owners of the forks
			
 
				-	 * appropriately. We have to do this as we are demand paging the btree
			
 
				-	 * buffers, and so the validation done on read will expect the owner
			
 
				-	 * field to be correctly set. Once we change the owners, we can swap the
			
 
				-	 * inode forks.
			
 
				+	 * Btree format (v3) inodes have the inode number stamped in the bmbt
			
 
				+	 * block headers. We can't start changing the bmbt blocks until the
			
 
				+	 * inode owner change is logged so recovery does the right thing in the
			
 
				+	 * event of a crash. Set the owner change log flags now and leave the
			
 
				+	 * bmbt scan as the last step.
			
 
				 	 */
			
 
				 	if (ip->i_d.di_version == 3 &&
			
 
				-	    ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
			
 
				+	    ip->i_d.di_format == XFS_DINODE_FMT_BTREE)
			
 
				 		(*target_log_flags) |= XFS_ILOG_DOWNER;
			
 
				-		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
			
 
				-					      tip->i_ino, NULL);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-	}
			
 
				-
			
 
				 	if (tip->i_d.di_version == 3 &&
			
 
				-	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
			
 
				+	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
			
 
				 		(*src_log_flags) |= XFS_ILOG_DOWNER;
			
 
				-		error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
			
 
				-					      ip->i_ino, NULL);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Swap the data forks of the inodes
			
@@ -1940,6 +1930,48 @@ xfs_swap_extent_forks(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Fix up the owners of the bmbt blocks to refer to the current inode. The
			
 
				+ * change owner scan attempts to order all modified buffers in the current
			
 
				+ * transaction. In the event of ordered buffer failure, the offending buffer is
			
 
				+ * physically logged as a fallback and the scan returns -EAGAIN. We must roll
			
 
				+ * the transaction in this case to replenish the fallback log reservation and
			
 
				+ * restart the scan. This process repeats until the scan completes.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_swap_change_owner(
			
 
				+	struct xfs_trans	**tpp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_inode	*tmpip)
			
 
				+{
			
 
				+	int			error;
			
 
				+	struct xfs_trans	*tp = *tpp;
			
 
				+
			
 
				+	do {
			
 
				+		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
			
 
				+					      NULL);
			
 
				+		/* success or fatal error */
			
 
				+		if (error != -EAGAIN)
			
 
				+			break;
			
 
				+
			
 
				+		error = xfs_trans_roll(tpp);
			
 
				+		if (error)
			
 
				+			break;
			
 
				+		tp = *tpp;
			
 
				+
			
 
				+		/*
			
 
				+		 * Redirty both inodes so they can relog and keep the log tail
			
 
				+		 * moving forward.
			
 
				+		 */
			
 
				+		xfs_trans_ijoin(tp, ip, 0);
			
 
				+		xfs_trans_ijoin(tp, tmpip, 0);
			
 
				+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+		xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
			
 
				+	} while (true);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 int
			
 
				 xfs_swap_extents(
			
 
				 	struct xfs_inode	*ip,	/* target inode */
			
@@ -1954,7 +1986,7 @@ xfs_swap_extents(
 
				 	int			lock_flags;
			
 
				 	struct xfs_ifork	*cowfp;
			
 
				 	uint64_t		f;
			
 
				-	int			resblks;
			
 
				+	int			resblks = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * Lock the inodes against other IO, page faults and truncate to
			
@@ -2002,11 +2034,8 @@ xfs_swap_extents(
 
				 			  XFS_SWAP_RMAP_SPACE_RES(mp,
			
 
				 				XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
			
 
				 				XFS_DATA_FORK);
			
 
				-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
			
 
				-				0, 0, &tp);
			
 
				-	} else
			
 
				-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
			
 
				-				0, 0, &tp);
			
 
				+	}
			
 
				+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
			
 
				 	if (error)
			
 
				 		goto out_unlock;
			
 
				 
			
@@ -2091,6 +2120,23 @@ xfs_swap_extents(
 
				 	xfs_trans_log_inode(tp, ip,  src_log_flags);
			
 
				 	xfs_trans_log_inode(tp, tip, target_log_flags);
			
 
				 
			
 
				+	/*
			
 
				+	 * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems
			
 
				+	 * have inode number owner values in the bmbt blocks that still refer to
			
 
				+	 * the old inode. Scan each bmbt to fix up the owner values with the
			
 
				+	 * inode number of the current inode.
			
 
				+	 */
			
 
				+	if (src_log_flags & XFS_ILOG_DOWNER) {
			
 
				+		error = xfs_swap_change_owner(&tp, ip, tip);
			
 
				+		if (error)
			
 
				+			goto out_trans_cancel;
			
 
				+	}
			
 
				+	if (target_log_flags & XFS_ILOG_DOWNER) {
			
 
				+		error = xfs_swap_change_owner(&tp, tip, ip);
			
 
				+		if (error)
			
 
				+			goto out_trans_cancel;
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * If this is a synchronous mount, make sure that the
			
 
				 	 * transaction goes to disk before returning to the user.
			
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -70,6 +70,7 @@ int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
 
				 
			
 
				 xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
			
 
				 
			
 
				+xfs_extnum_t xfs_bmap_count_leaves(struct xfs_ifork *ifp, xfs_filblks_t *count);
			
 
				 int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				 			  int whichfork, xfs_extnum_t *nextents,
			
 
				 			  xfs_filblks_t *count);
			
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -29,6 +29,7 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_log.h"
			
 
				+#include "xfs_inode.h"
			
 
				 
			
 
				 
			
 
				 kmem_zone_t	*xfs_buf_item_zone;
			
@@ -322,6 +323,8 @@ xfs_buf_item_format(
 
				 	ASSERT((bip->bli_flags & XFS_BLI_STALE) ||
			
 
				 	       (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF
			
 
				 	        && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF));
			
 
				+	ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED) ||
			
 
				+	       (bip->bli_flags & XFS_BLI_STALE));
			
 
				 
			
 
				 
			
 
				 	/*
			
@@ -346,16 +349,6 @@ xfs_buf_item_format(
 
				 		bip->bli_flags &= ~XFS_BLI_INODE_BUF;
			
 
				 	}
			
 
				 
			
 
				-	if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) ==
			
 
				-							XFS_BLI_ORDERED) {
			
 
				-		/*
			
 
				-		 * The buffer has been logged just to order it.  It is not being
			
 
				-		 * included in the transaction commit, so don't format it.
			
 
				-		 */
			
 
				-		trace_xfs_buf_item_format_ordered(bip);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				 	for (i = 0; i < bip->bli_format_count; i++) {
			
 
				 		xfs_buf_item_format_segment(bip, lv, &vecp, offset,
			
 
				 					    &bip->bli_formats[i]);
			
@@ -574,26 +567,20 @@ xfs_buf_item_unlock(
 
				 {
			
 
				 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
			
 
				 	struct xfs_buf		*bp = bip->bli_buf;
			
 
				-	bool			clean;
			
 
				-	bool			aborted;
			
 
				-	int			flags;
			
 
				+	bool			aborted = !!(lip->li_flags & XFS_LI_ABORTED);
			
 
				+	bool			hold = !!(bip->bli_flags & XFS_BLI_HOLD);
			
 
				+	bool			dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
			
 
				+#if defined(DEBUG) || defined(XFS_WARN)
			
 
				+	bool			ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
			
 
				+#endif
			
 
				 
			
 
				 	/* Clear the buffer's association with this transaction. */
			
 
				 	bp->b_transp = NULL;
			
 
				 
			
 
				 	/*
			
 
				-	 * If this is a transaction abort, don't return early.  Instead, allow
			
 
				-	 * the brelse to happen.  Normally it would be done for stale
			
 
				-	 * (cancelled) buffers at unpin time, but we'll never go through the
			
 
				-	 * pin/unpin cycle if we abort inside commit.
			
 
				+	 * The per-transaction state has been copied above so clear it from the
			
 
				+	 * bli.
			
 
				 	 */
			
 
				-	aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false;
			
 
				-	/*
			
 
				-	 * Before possibly freeing the buf item, copy the per-transaction state
			
 
				-	 * so we can reference it safely later after clearing it from the
			
 
				-	 * buffer log item.
			
 
				-	 */
			
 
				-	flags = bip->bli_flags;
			
 
				 	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
			
 
				 
			
 
				 	/*
			
@@ -601,7 +588,7 @@ xfs_buf_item_unlock(
 
				 	 * unlock the buffer and free the buf item when the buffer is unpinned
			
 
				 	 * for the last time.
			
 
				 	 */
			
 
				-	if (flags & XFS_BLI_STALE) {
			
 
				+	if (bip->bli_flags & XFS_BLI_STALE) {
			
 
				 		trace_xfs_buf_item_unlock_stale(bip);
			
 
				 		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
			
 
				 		if (!aborted) {
			
@@ -619,20 +606,11 @@ xfs_buf_item_unlock(
 
				 	 * regardless of whether it is dirty or not. A dirty abort implies a
			
 
				 	 * shutdown, anyway.
			
 
				 	 *
			
 
				-	 * Ordered buffers are dirty but may have no recorded changes, so ensure
			
 
				-	 * we only release clean items here.
			
 
				+	 * The bli dirty state should match whether the blf has logged segments
			
 
				+	 * except for ordered buffers, where only the bli should be dirty.
			
 
				 	 */
			
 
				-	clean = (flags & XFS_BLI_DIRTY) ? false : true;
			
 
				-	if (clean) {
			
 
				-		int i;
			
 
				-		for (i = 0; i < bip->bli_format_count; i++) {
			
 
				-			if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
			
 
				-				     bip->bli_formats[i].blf_map_size)) {
			
 
				-				clean = false;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				+	ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) ||
			
 
				+	       (ordered && dirty && !xfs_buf_item_dirty_format(bip)));
			
 
				 
			
 
				 	/*
			
 
				 	 * Clean buffers, by definition, cannot be in the AIL. However, aborted
			
@@ -651,11 +629,11 @@ xfs_buf_item_unlock(
 
				 			ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
			
 
				 			xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
			
 
				 			xfs_buf_item_relse(bp);
			
 
				-		} else if (clean)
			
 
				+		} else if (!dirty)
			
 
				 			xfs_buf_item_relse(bp);
			
 
				 	}
			
 
				 
			
 
				-	if (!(flags & XFS_BLI_HOLD))
			
 
				+	if (!hold)
			
 
				 		xfs_buf_relse(bp);
			
 
				 }
			
 
				 
			
@@ -945,14 +923,22 @@ xfs_buf_item_log(
 
				 
			
 
				 
			
 
				 /*
			
 
				- * Return 1 if the buffer has been logged or ordered in a transaction (at any
			
 
				- * point, not just the current transaction) and 0 if not.
			
 
				+ * Return true if the buffer has any ranges logged/dirtied by a transaction,
			
 
				+ * false otherwise.
			
 
				  */
			
 
				-uint
			
 
				-xfs_buf_item_dirty(
			
 
				-	xfs_buf_log_item_t	*bip)
			
 
				+bool
			
 
				+xfs_buf_item_dirty_format(
			
 
				+	struct xfs_buf_log_item	*bip)
			
 
				 {
			
 
				-	return (bip->bli_flags & XFS_BLI_DIRTY);
			
 
				+	int			i;
			
 
				+
			
 
				+	for (i = 0; i < bip->bli_format_count; i++) {
			
 
				+		if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
			
 
				+			     bip->bli_formats[i].blf_map_size))
			
 
				+			return true;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				 }
			
 
				 
			
 
				 STATIC void
			
@@ -1054,6 +1040,31 @@ xfs_buf_do_callbacks(
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Invoke the error state callback for each log item affected by the failed I/O.
			
 
				+ *
			
 
				+ * If a metadata buffer write fails with a non-permanent error, the buffer is
			
 
				+ * eventually resubmitted and so the completion callbacks are not run. The error
			
 
				+ * state may need to be propagated to the log items attached to the buffer,
			
 
				+ * however, so the next AIL push of the item knows hot to handle it correctly.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_buf_do_callbacks_fail(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_log_item	*next;
			
 
				+	struct xfs_log_item	*lip = bp->b_fspriv;
			
 
				+	struct xfs_ail		*ailp = lip->li_ailp;
			
 
				+
			
 
				+	spin_lock(&ailp->xa_lock);
			
 
				+	for (; lip; lip = next) {
			
 
				+		next = lip->li_bio_list;
			
 
				+		if (lip->li_ops->iop_error)
			
 
				+			lip->li_ops->iop_error(lip, bp);
			
 
				+	}
			
 
				+	spin_unlock(&ailp->xa_lock);
			
 
				+}
			
 
				+
			
 
				 static bool
			
 
				 xfs_buf_iodone_callback_error(
			
 
				 	struct xfs_buf		*bp)
			
@@ -1123,7 +1134,11 @@ xfs_buf_iodone_callback_error(
 
				 	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
			
 
				 		goto permanent_error;
			
 
				 
			
 
				-	/* still a transient error, higher layers will retry */
			
 
				+	/*
			
 
				+	 * Still a transient error, run IO completion failure callbacks and let
			
 
				+	 * the higher layers retry the buffer.
			
 
				+	 */
			
 
				+	xfs_buf_do_callbacks_fail(bp);
			
 
				 	xfs_buf_ioerror(bp, 0);
			
 
				 	xfs_buf_relse(bp);
			
 
				 	return true;
			
@@ -1204,3 +1219,31 @@ xfs_buf_iodone(
 
				 	xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
			
 
				 	xfs_buf_item_free(BUF_ITEM(lip));
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Requeue a failed buffer for writeback
			
 
				+ *
			
 
				+ * Return true if the buffer has been re-queued properly, false otherwise
			
 
				+ */
			
 
				+bool
			
 
				+xfs_buf_resubmit_failed_buffers(
			
 
				+	struct xfs_buf		*bp,
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct list_head	*buffer_list)
			
 
				+{
			
 
				+	struct xfs_log_item	*next;
			
 
				+
			
 
				+	/*
			
 
				+	 * Clear XFS_LI_FAILED flag from all items before resubmit
			
 
				+	 *
			
 
				+	 * XFS_LI_FAILED set/clear is protected by xa_lock, caller  this
			
 
				+	 * function already have it acquired
			
 
				+	 */
			
 
				+	for (; lip; lip = next) {
			
 
				+		next = lip->li_bio_list;
			
 
				+		xfs_clear_li_failed(lip);
			
 
				+	}
			
 
				+
			
 
				+	/* Add this buffer back to the delayed write list */
			
 
				+	return xfs_buf_delwri_queue(bp, buffer_list);
			
 
				+}
			
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -64,12 +64,15 @@ typedef struct xfs_buf_log_item {
 
				 int	xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
			
 
				 void	xfs_buf_item_relse(struct xfs_buf *);
			
 
				 void	xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
			
 
				-uint	xfs_buf_item_dirty(xfs_buf_log_item_t *);
			
 
				+bool	xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
			
 
				 void	xfs_buf_attach_iodone(struct xfs_buf *,
			
 
				 			      void(*)(struct xfs_buf *, xfs_log_item_t *),
			
 
				 			      xfs_log_item_t *);
			
 
				 void	xfs_buf_iodone_callbacks(struct xfs_buf *);
			
 
				 void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
			
 
				+bool	xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
			
 
				+					struct xfs_log_item *,
			
 
				+					struct list_head *);
			
 
				 
			
 
				 extern kmem_zone_t	*xfs_buf_item_zone;
			
 
				 
			
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -383,7 +383,7 @@ xfs_qm_dqalloc(
 
				 
			
 
				 	xfs_trans_bhold(tp, bp);
			
 
				 
			
 
				-	error = xfs_defer_finish(tpp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(tpp, &dfops);
			
 
				 	if (error)
			
 
				 		goto error1;
			
 
				 
			
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -57,6 +57,7 @@ static unsigned int xfs_errortag_random_default[] = {
 
				 	XFS_RANDOM_AG_RESV_CRITICAL,
			
 
				 	XFS_RANDOM_DROP_WRITES,
			
 
				 	XFS_RANDOM_LOG_BAD_CRC,
			
 
				+	XFS_RANDOM_LOG_ITEM_PIN,
			
 
				 };
			
 
				 
			
 
				 struct xfs_errortag_attr {
			
@@ -161,6 +162,7 @@ XFS_ERRORTAG_ATTR_RW(bmap_finish_one,	XFS_ERRTAG_BMAP_FINISH_ONE);
 
				 XFS_ERRORTAG_ATTR_RW(ag_resv_critical,	XFS_ERRTAG_AG_RESV_CRITICAL);
			
 
				 XFS_ERRORTAG_ATTR_RW(drop_writes,	XFS_ERRTAG_DROP_WRITES);
			
 
				 XFS_ERRORTAG_ATTR_RW(log_bad_crc,	XFS_ERRTAG_LOG_BAD_CRC);
			
 
				+XFS_ERRORTAG_ATTR_RW(log_item_pin,	XFS_ERRTAG_LOG_ITEM_PIN);
			
 
				 
			
 
				 static struct attribute *xfs_errortag_attrs[] = {
			
 
				 	XFS_ERRORTAG_ATTR_LIST(noerror),
			
@@ -193,6 +195,7 @@ static struct attribute *xfs_errortag_attrs[] = {
 
				 	XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
			
 
				 	XFS_ERRORTAG_ATTR_LIST(drop_writes),
			
 
				 	XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
			
 
				+	XFS_ERRORTAG_ATTR_LIST(log_item_pin),
			
 
				 	NULL,
			
 
				 };
			
 
				 
			
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -106,7 +106,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 
				  */
			
 
				 #define XFS_ERRTAG_DROP_WRITES				28
			
 
				 #define XFS_ERRTAG_LOG_BAD_CRC				29
			
 
				-#define XFS_ERRTAG_MAX					30
			
 
				+#define XFS_ERRTAG_LOG_ITEM_PIN				30
			
 
				+#define XFS_ERRTAG_MAX					31
			
 
				 
			
 
				 /*
			
 
				  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
			
@@ -141,6 +142,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 
				 #define XFS_RANDOM_AG_RESV_CRITICAL			4
			
 
				 #define XFS_RANDOM_DROP_WRITES				1
			
 
				 #define XFS_RANDOM_LOG_BAD_CRC				1
			
 
				+#define XFS_RANDOM_LOG_ITEM_PIN				1
			
 
				 
			
 
				 #ifdef DEBUG
			
 
				 extern int xfs_errortag_init(struct xfs_mount *mp);
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1011,96 +1011,67 @@ xfs_file_llseek(
 
				  *       page_lock (MM)
			
 
				  *         i_lock (XFS - extent map serialisation)
			
 
				  */
			
 
				-
			
 
				-/*
			
 
				- * mmap()d file has taken write protection fault and is being made writable. We
			
 
				- * can set the page state up correctly for a writable page, which means we can
			
 
				- * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
			
 
				- * mapping.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_filemap_page_mkwrite(
			
 
				-	struct vm_fault		*vmf)
			
 
				+static int
			
 
				+__xfs_filemap_fault(
			
 
				+	struct vm_fault		*vmf,
			
 
				+	enum page_entry_size	pe_size,
			
 
				+	bool			write_fault)
			
 
				 {
			
 
				 	struct inode		*inode = file_inode(vmf->vma->vm_file);
			
 
				+	struct xfs_inode	*ip = XFS_I(inode);
			
 
				 	int			ret;
			
 
				 
			
 
				-	trace_xfs_filemap_page_mkwrite(XFS_I(inode));
			
 
				+	trace_xfs_filemap_fault(ip, pe_size, write_fault);
			
 
				 
			
 
				-	sb_start_pagefault(inode->i_sb);
			
 
				-	file_update_time(vmf->vma->vm_file);
			
 
				-	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
			
 
				+	if (write_fault) {
			
 
				+		sb_start_pagefault(inode->i_sb);
			
 
				+		file_update_time(vmf->vma->vm_file);
			
 
				+	}
			
 
				 
			
 
				+	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
			
 
				 	if (IS_DAX(inode)) {
			
 
				-		ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
			
 
				+		ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
			
 
				 	} else {
			
 
				-		ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
			
 
				-		ret = block_page_mkwrite_return(ret);
			
 
				+		if (write_fault)
			
 
				+			ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
			
 
				+		else
			
 
				+			ret = filemap_fault(vmf);
			
 
				 	}
			
 
				-
			
 
				 	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
			
 
				-	sb_end_pagefault(inode->i_sb);
			
 
				 
			
 
				+	if (write_fault)
			
 
				+		sb_end_pagefault(inode->i_sb);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				+static int
			
 
				 xfs_filemap_fault(
			
 
				 	struct vm_fault		*vmf)
			
 
				 {
			
 
				-	struct inode		*inode = file_inode(vmf->vma->vm_file);
			
 
				-	int			ret;
			
 
				-
			
 
				-	trace_xfs_filemap_fault(XFS_I(inode));
			
 
				-
			
 
				 	/* DAX can shortcut the normal fault path on write faults! */
			
 
				-	if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
			
 
				-		return xfs_filemap_page_mkwrite(vmf);
			
 
				-
			
 
				-	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
			
 
				-	if (IS_DAX(inode))
			
 
				-		ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
			
 
				-	else
			
 
				-		ret = filemap_fault(vmf);
			
 
				-	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
			
 
				-
			
 
				-	return ret;
			
 
				+	return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
			
 
				+			IS_DAX(file_inode(vmf->vma->vm_file)) &&
			
 
				+			(vmf->flags & FAULT_FLAG_WRITE));
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
			
 
				- * both read and write faults. Hence we need to handle both cases. There is no
			
 
				- * ->huge_mkwrite callout for huge pages, so we have a single function here to
			
 
				- * handle both cases here. @flags carries the information on the type of fault
			
 
				- * occuring.
			
 
				- */
			
 
				-STATIC int
			
 
				+static int
			
 
				 xfs_filemap_huge_fault(
			
 
				 	struct vm_fault		*vmf,
			
 
				 	enum page_entry_size	pe_size)
			
 
				 {
			
 
				-	struct inode		*inode = file_inode(vmf->vma->vm_file);
			
 
				-	struct xfs_inode	*ip = XFS_I(inode);
			
 
				-	int			ret;
			
 
				-
			
 
				-	if (!IS_DAX(inode))
			
 
				+	if (!IS_DAX(file_inode(vmf->vma->vm_file)))
			
 
				 		return VM_FAULT_FALLBACK;
			
 
				 
			
 
				-	trace_xfs_filemap_huge_fault(ip);
			
 
				-
			
 
				-	if (vmf->flags & FAULT_FLAG_WRITE) {
			
 
				-		sb_start_pagefault(inode->i_sb);
			
 
				-		file_update_time(vmf->vma->vm_file);
			
 
				-	}
			
 
				-
			
 
				-	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
			
 
				-	ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
			
 
				-	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
			
 
				-
			
 
				-	if (vmf->flags & FAULT_FLAG_WRITE)
			
 
				-		sb_end_pagefault(inode->i_sb);
			
 
				+	/* DAX can shortcut the normal fault path on write faults! */
			
 
				+	return __xfs_filemap_fault(vmf, pe_size,
			
 
				+			(vmf->flags & FAULT_FLAG_WRITE));
			
 
				+}
			
 
				 
			
 
				-	return ret;
			
 
				+static int
			
 
				+xfs_filemap_page_mkwrite(
			
 
				+	struct vm_fault		*vmf)
			
 
				+{
			
 
				+	return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1124,11 +1124,11 @@ reclaim:
 
				 	 * Because we use RCU freeing we need to ensure the inode always appears
			
 
				 	 * to be reclaimed with an invalid inode number when in the free state.
			
 
				 	 * We do this as early as possible under the ILOCK so that
			
 
				-	 * xfs_iflush_cluster() can be guaranteed to detect races with us here.
			
 
				-	 * By doing this, we guarantee that once xfs_iflush_cluster has locked
			
 
				-	 * XFS_ILOCK that it will see either a valid, flushable inode that will
			
 
				-	 * serialise correctly, or it will see a clean (and invalid) inode that
			
 
				-	 * it can skip.
			
 
				+	 * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to
			
 
				+	 * detect races with us here. By doing this, we guarantee that once
			
 
				+	 * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that
			
 
				+	 * it will see either a valid inode that will serialise correctly, or it
			
 
				+	 * will see an invalid inode that it can skip.
			
 
				 	 */
			
 
				 	spin_lock(&ip->i_flags_lock);
			
 
				 	ip->i_flags = XFS_IRECLAIM;
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1055,7 +1055,7 @@ xfs_dir_ialloc(
 
				 			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
			
 
				 		}
			
 
				 
			
 
				-		code = xfs_trans_roll(&tp, NULL);
			
 
				+		code = xfs_trans_roll(&tp);
			
 
				 		if (committed != NULL)
			
 
				 			*committed = 1;
			
 
				 
			
@@ -1285,7 +1285,7 @@ xfs_create(
 
				 	 */
			
 
				 	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto out_bmap_cancel;
			
 
				 
			
@@ -1513,7 +1513,7 @@ xfs_link(
 
				 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
			
 
				 		xfs_trans_set_sync(tp);
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error) {
			
 
				 		xfs_defer_cancel(&dfops);
			
 
				 		goto error_return;
			
@@ -1607,11 +1607,12 @@ xfs_itruncate_extents(
 
				 		 * Duplicate the transaction that has the permanent
			
 
				 		 * reservation and commit the old transaction.
			
 
				 		 */
			
 
				-		error = xfs_defer_finish(&tp, &dfops, ip);
			
 
				+		xfs_defer_ijoin(&dfops, ip);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_bmap_cancel;
			
 
				 
			
 
				-		error = xfs_trans_roll(&tp, ip);
			
 
				+		error = xfs_trans_roll_inode(&tp, ip);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 	}
			
@@ -1855,7 +1856,7 @@ xfs_inactive_ifree(
 
				 	 * Just ignore errors at this point.  There is nothing we can do except
			
 
				 	 * to try to keep going. Make sure it's not a silent error.
			
 
				 	 */
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error) {
			
 
				 		xfs_notice(mp, "%s: xfs_defer_finish returned error %d",
			
 
				 			__func__, error);
			
@@ -2359,11 +2360,24 @@ retry:
 
				 			 * already marked stale. If we can't lock it, back off
			
 
				 			 * and retry.
			
 
				 			 */
			
 
				-			if (ip != free_ip &&
			
 
				-			    !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
			
 
				-				rcu_read_unlock();
			
 
				-				delay(1);
			
 
				-				goto retry;
			
 
				+			if (ip != free_ip) {
			
 
				+				if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
			
 
				+					rcu_read_unlock();
			
 
				+					delay(1);
			
 
				+					goto retry;
			
 
				+				}
			
 
				+
			
 
				+				/*
			
 
				+				 * Check the inode number again in case we're
			
 
				+				 * racing with freeing in xfs_reclaim_inode().
			
 
				+				 * See the comments in that function for more
			
 
				+				 * information as to why the initial check is
			
 
				+				 * not sufficient.
			
 
				+				 */
			
 
				+				if (ip->i_ino != inum + i) {
			
 
				+					xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+					continue;
			
 
				+				}
			
 
				 			}
			
 
				 			rcu_read_unlock();
			
 
				 
			
@@ -2637,7 +2651,7 @@ xfs_remove(
 
				 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
			
 
				 		xfs_trans_set_sync(tp);
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto out_bmap_cancel;
			
 
				 
			
@@ -2723,7 +2737,7 @@ xfs_finish_rename(
 
				 	if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
			
 
				 		xfs_trans_set_sync(tp);
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, dfops);
			
 
				 	if (error) {
			
 
				 		xfs_defer_cancel(dfops);
			
 
				 		xfs_trans_cancel(tp);
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -27,6 +27,7 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				 #include "xfs_log.h"
			
 
				 
			
 
				 
			
@@ -475,6 +476,23 @@ xfs_inode_item_unpin(
 
				 		wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
			
 
				+ * have been failed during writeback
			
 
				+ *
			
 
				+ * This informs the AIL that the inode is already flush locked on the next push,
			
 
				+ * and acquires a hold on the buffer to ensure that it isn't reclaimed before
			
 
				+ * dirty data makes it to disk.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_inode_item_error(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
			
 
				+	xfs_set_li_failed(lip, bp);
			
 
				+}
			
 
				+
			
 
				 STATIC uint
			
 
				 xfs_inode_item_push(
			
 
				 	struct xfs_log_item	*lip,
			
@@ -484,13 +502,28 @@ xfs_inode_item_push(
 
				 {
			
 
				 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
			
 
				 	struct xfs_inode	*ip = iip->ili_inode;
			
 
				-	struct xfs_buf		*bp = NULL;
			
 
				+	struct xfs_buf		*bp = lip->li_buf;
			
 
				 	uint			rval = XFS_ITEM_SUCCESS;
			
 
				 	int			error;
			
 
				 
			
 
				 	if (xfs_ipincount(ip) > 0)
			
 
				 		return XFS_ITEM_PINNED;
			
 
				 
			
 
				+	/*
			
 
				+	 * The buffer containing this item failed to be written back
			
 
				+	 * previously. Resubmit the buffer for IO.
			
 
				+	 */
			
 
				+	if (lip->li_flags & XFS_LI_FAILED) {
			
 
				+		if (!xfs_buf_trylock(bp))
			
 
				+			return XFS_ITEM_LOCKED;
			
 
				+
			
 
				+		if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
			
 
				+			rval = XFS_ITEM_FLUSHING;
			
 
				+
			
 
				+		xfs_buf_unlock(bp);
			
 
				+		return rval;
			
 
				+	}
			
 
				+
			
 
				 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
			
 
				 		return XFS_ITEM_LOCKED;
			
 
				 
			
@@ -622,7 +655,8 @@ static const struct xfs_item_ops xfs_inode_item_ops = {
 
				 	.iop_unlock	= xfs_inode_item_unlock,
			
 
				 	.iop_committed	= xfs_inode_item_committed,
			
 
				 	.iop_push	= xfs_inode_item_push,
			
 
				-	.iop_committing = xfs_inode_item_committing
			
 
				+	.iop_committing = xfs_inode_item_committing,
			
 
				+	.iop_error	= xfs_inode_item_error
			
 
				 };
			
 
				 
			
 
				 
			
@@ -710,7 +744,8 @@ xfs_iflush_done(
 
				 		 * the AIL lock.
			
 
				 		 */
			
 
				 		iip = INODE_ITEM(blip);
			
 
				-		if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
			
 
				+		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
			
 
				+		    lip->li_flags & XFS_LI_FAILED)
			
 
				 			need_ail++;
			
 
				 
			
 
				 		blip = next;
			
@@ -718,7 +753,8 @@ xfs_iflush_done(
 
				 
			
 
				 	/* make sure we capture the state of the initial inode. */
			
 
				 	iip = INODE_ITEM(lip);
			
 
				-	if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
			
 
				+	if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
			
 
				+	    lip->li_flags & XFS_LI_FAILED)
			
 
				 		need_ail++;
			
 
				 
			
 
				 	/*
			
@@ -739,6 +775,9 @@ xfs_iflush_done(
 
				 			if (INODE_ITEM(blip)->ili_logged &&
			
 
				 			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
			
 
				 				mlip_changed |= xfs_ail_delete_one(ailp, blip);
			
 
				+			else {
			
 
				+				xfs_clear_li_failed(blip);
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		if (mlip_changed) {
			
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -931,16 +931,15 @@ xfs_ioc_fsgetxattr(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC void
			
 
				-xfs_set_diflags(
			
 
				+STATIC uint16_t
			
 
				+xfs_flags2diflags(
			
 
				 	struct xfs_inode	*ip,
			
 
				 	unsigned int		xflags)
			
 
				 {
			
 
				-	unsigned int		di_flags;
			
 
				-	uint64_t		di_flags2;
			
 
				-
			
 
				 	/* can't set PREALLOC this way, just preserve it */
			
 
				-	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
			
 
				+	uint16_t		di_flags =
			
 
				+		(ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
			
 
				+
			
 
				 	if (xflags & FS_XFLAG_IMMUTABLE)
			
 
				 		di_flags |= XFS_DIFLAG_IMMUTABLE;
			
 
				 	if (xflags & FS_XFLAG_APPEND)
			
@@ -970,19 +969,24 @@ xfs_set_diflags(
 
				 		if (xflags & FS_XFLAG_EXTSIZE)
			
 
				 			di_flags |= XFS_DIFLAG_EXTSIZE;
			
 
				 	}
			
 
				-	ip->i_d.di_flags = di_flags;
			
 
				 
			
 
				-	/* diflags2 only valid for v3 inodes. */
			
 
				-	if (ip->i_d.di_version < 3)
			
 
				-		return;
			
 
				+	return di_flags;
			
 
				+}
			
 
				+
			
 
				+STATIC uint64_t
			
 
				+xfs_flags2diflags2(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	unsigned int		xflags)
			
 
				+{
			
 
				+	uint64_t		di_flags2 =
			
 
				+		(ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
			
 
				 
			
 
				-	di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
			
 
				 	if (xflags & FS_XFLAG_DAX)
			
 
				 		di_flags2 |= XFS_DIFLAG2_DAX;
			
 
				 	if (xflags & FS_XFLAG_COWEXTSIZE)
			
 
				 		di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
			
 
				 
			
 
				-	ip->i_d.di_flags2 = di_flags2;
			
 
				+	return di_flags2;
			
 
				 }
			
 
				 
			
 
				 STATIC void
			
@@ -1008,11 +1012,12 @@ xfs_diflags_to_linux(
 
				 		inode->i_flags |= S_NOATIME;
			
 
				 	else
			
 
				 		inode->i_flags &= ~S_NOATIME;
			
 
				+#if 0	/* disabled until the flag switching races are sorted out */
			
 
				 	if (xflags & FS_XFLAG_DAX)
			
 
				 		inode->i_flags |= S_DAX;
			
 
				 	else
			
 
				 		inode->i_flags &= ~S_DAX;
			
 
				-
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static int
			
@@ -1022,6 +1027,7 @@ xfs_ioctl_setattr_xflags(
 
				 	struct fsxattr		*fa)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	uint64_t		di_flags2;
			
 
				 
			
 
				 	/* Can't change realtime flag if any extents are allocated. */
			
 
				 	if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
			
@@ -1052,7 +1058,14 @@ xfs_ioctl_setattr_xflags(
 
				 	    !capable(CAP_LINUX_IMMUTABLE))
			
 
				 		return -EPERM;
			
 
				 
			
 
				-	xfs_set_diflags(ip, fa->fsx_xflags);
			
 
				+	/* diflags2 only valid for v3 inodes. */
			
 
				+	di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
			
 
				+	if (di_flags2 && ip->i_d.di_version < 3)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags);
			
 
				+	ip->i_d.di_flags2 = di_flags2;
			
 
				+
			
 
				 	xfs_diflags_to_linux(ip);
			
 
				 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -274,7 +274,7 @@ xfs_iomap_write_direct(
 
				 	/*
			
 
				 	 * Complete the transaction
			
 
				 	 */
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto out_bmap_cancel;
			
 
				 
			
@@ -520,7 +520,6 @@ xfs_file_iomap_begin_delay(
 
				 	struct inode		*inode,
			
 
				 	loff_t			offset,
			
 
				 	loff_t			count,
			
 
				-	unsigned		flags,
			
 
				 	struct iomap		*iomap)
			
 
				 {
			
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
@@ -784,7 +783,7 @@ xfs_iomap_write_allocate(
 
				 			if (error)
			
 
				 				goto trans_cancel;
			
 
				 
			
 
				-			error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+			error = xfs_defer_finish(&tp, &dfops);
			
 
				 			if (error)
			
 
				 				goto trans_cancel;
			
 
				 
			
@@ -906,7 +905,7 @@ xfs_iomap_write_unwritten(
 
				 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				 		}
			
 
				 
			
 
				-		error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto error_on_bmapi_transaction;
			
 
				 
			
@@ -984,8 +983,7 @@ xfs_file_iomap_begin(
 
				 	if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
			
 
				 			!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
			
 
				 		/* Reserve delalloc blocks for regular writeback. */
			
 
				-		return xfs_file_iomap_begin_delay(inode, offset, length, flags,
			
 
				-				iomap);
			
 
				+		return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
			
 
				 	}
			
 
				 
			
 
				 	if (need_excl_ilock(ip, flags)) {
			
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -817,7 +817,7 @@ xfs_vn_setattr_nonsize(
 
				  * Caution: The caller of this function is responsible for calling
			
 
				  * setattr_prepare() or otherwise verifying the change is fine.
			
 
				  */
			
 
				-int
			
 
				+STATIC int
			
 
				 xfs_setattr_size(
			
 
				 	struct xfs_inode	*ip,
			
 
				 	struct iattr		*iattr)
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -743,10 +743,14 @@ xfs_log_mount_finish(
 
				 	struct xfs_mount	*mp)
			
 
				 {
			
 
				 	int	error = 0;
			
 
				+	bool	readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
			
 
				 
			
 
				 	if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
			
 
				 		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
			
 
				 		return 0;
			
 
				+	} else if (readonly) {
			
 
				+		/* Allow unlinked processing to proceed */
			
 
				+		mp->m_flags &= ~XFS_MOUNT_RDONLY;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -757,12 +761,27 @@ xfs_log_mount_finish(
 
				 	 * inodes.  Turn it off immediately after recovery finishes
			
 
				 	 * so that we don't leak the quota inodes if subsequent mount
			
 
				 	 * activities fail.
			
 
				+	 *
			
 
				+	 * We let all inodes involved in redo item processing end up on
			
 
				+	 * the LRU instead of being evicted immediately so that if we do
			
 
				+	 * something to an unlinked inode, the irele won't cause
			
 
				+	 * premature truncation and freeing of the inode, which results
			
 
				+	 * in log recovery failure.  We have to evict the unreferenced
			
 
				+	 * lru inodes after clearing MS_ACTIVE because we don't
			
 
				+	 * otherwise clean up the lru if there's a subsequent failure in
			
 
				+	 * xfs_mountfs, which leads to us leaking the inodes if nothing
			
 
				+	 * else (e.g. quotacheck) references the inodes before the
			
 
				+	 * mount failure occurs.
			
 
				 	 */
			
 
				 	mp->m_super->s_flags |= MS_ACTIVE;
			
 
				 	error = xlog_recover_finish(mp->m_log);
			
 
				 	if (!error)
			
 
				 		xfs_log_work_queue(mp);
			
 
				 	mp->m_super->s_flags &= ~MS_ACTIVE;
			
 
				+	evict_inodes(mp->m_super);
			
 
				+
			
 
				+	if (readonly)
			
 
				+		mp->m_flags |= XFS_MOUNT_RDONLY;
			
 
				 
			
 
				 	return error;
			
 
				 }
			
@@ -812,11 +831,14 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 
				 	int		 error;
			
 
				 
			
 
				 	/*
			
 
				-	 * Don't write out unmount record on read-only mounts.
			
 
				+	 * Don't write out unmount record on norecovery mounts or ro devices.
			
 
				 	 * Or, if we are doing a forced umount (typically because of IO errors).
			
 
				 	 */
			
 
				-	if (mp->m_flags & XFS_MOUNT_RDONLY)
			
 
				+	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
			
 
				+	    xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
			
 
				+		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
			
 
				 		return 0;
			
 
				+	}
			
 
				 
			
 
				 	error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
			
 
				 	ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
			
@@ -3353,8 +3375,6 @@ maybe_sleep:
 
				 		 */
			
 
				 		if (iclog->ic_state & XLOG_STATE_IOERROR)
			
 
				 			return -EIO;
			
 
				-		if (log_flushed)
			
 
				-			*log_flushed = 1;
			
 
				 	} else {
			
 
				 
			
 
				 no_sleep:
			
@@ -3458,8 +3478,6 @@ try_again:
 
				 
			
 
				 				xlog_wait(&iclog->ic_prev->ic_write_wait,
			
 
				 							&log->l_icloglock);
			
 
				-				if (log_flushed)
			
 
				-					*log_flushed = 1;
			
 
				 				already_slept = 1;
			
 
				 				goto try_again;
			
 
				 			}
			
@@ -3493,9 +3511,6 @@ try_again:
 
				 			 */
			
 
				 			if (iclog->ic_state & XLOG_STATE_IOERROR)
			
 
				 				return -EIO;
			
 
				-
			
 
				-			if (log_flushed)
			
 
				-				*log_flushed = 1;
			
 
				 		} else {		/* just return */
			
 
				 			spin_unlock(&log->l_icloglock);
			
 
				 		}
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1029,61 +1029,106 @@ out_error:
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Check the log tail for torn writes. This is required when torn writes are
			
 
				- * detected at the head and the head had to be walked back to a previous record.
			
 
				- * The tail of the previous record must now be verified to ensure the torn
			
 
				- * writes didn't corrupt the previous tail.
			
 
				+ * Calculate distance from head to tail (i.e., unused space in the log).
			
 
				+ */
			
 
				+static inline int
			
 
				+xlog_tail_distance(
			
 
				+	struct xlog	*log,
			
 
				+	xfs_daddr_t	head_blk,
			
 
				+	xfs_daddr_t	tail_blk)
			
 
				+{
			
 
				+	if (head_blk < tail_blk)
			
 
				+		return tail_blk - head_blk;
			
 
				+
			
 
				+	return tail_blk + (log->l_logBBsize - head_blk);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Verify the log tail. This is particularly important when torn or incomplete
			
 
				+ * writes have been detected near the front of the log and the head has been
			
 
				+ * walked back accordingly.
			
 
				+ *
			
 
				+ * We also have to handle the case where the tail was pinned and the head
			
 
				+ * blocked behind the tail right before a crash. If the tail had been pushed
			
 
				+ * immediately prior to the crash and the subsequent checkpoint was only
			
 
				+ * partially written, it's possible it overwrote the last referenced tail in the
			
 
				+ * log with garbage. This is not a coherency problem because the tail must have
			
 
				+ * been pushed before it can be overwritten, but appears as log corruption to
			
 
				+ * recovery because we have no way to know the tail was updated if the
			
 
				+ * subsequent checkpoint didn't write successfully.
			
 
				  *
			
 
				- * Return an error if CRC verification fails as recovery cannot proceed.
			
 
				+ * Therefore, CRC check the log from tail to head. If a failure occurs and the
			
 
				+ * offending record is within max iclog bufs from the head, walk the tail
			
 
				+ * forward and retry until a valid tail is found or corruption is detected out
			
 
				+ * of the range of a possible overwrite.
			
 
				  */
			
 
				 STATIC int
			
 
				 xlog_verify_tail(
			
 
				 	struct xlog		*log,
			
 
				 	xfs_daddr_t		head_blk,
			
 
				-	xfs_daddr_t		tail_blk)
			
 
				+	xfs_daddr_t		*tail_blk,
			
 
				+	int			hsize)
			
 
				 {
			
 
				 	struct xlog_rec_header	*thead;
			
 
				 	struct xfs_buf		*bp;
			
 
				 	xfs_daddr_t		first_bad;
			
 
				-	int			count;
			
 
				 	int			error = 0;
			
 
				 	bool			wrapped;
			
 
				-	xfs_daddr_t		tmp_head;
			
 
				+	xfs_daddr_t		tmp_tail;
			
 
				+	xfs_daddr_t		orig_tail = *tail_blk;
			
 
				 
			
 
				 	bp = xlog_get_bp(log, 1);
			
 
				 	if (!bp)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	/*
			
 
				-	 * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
			
 
				-	 * a temporary head block that points after the last possible
			
 
				-	 * concurrently written record of the tail.
			
 
				+	 * Make sure the tail points to a record (returns positive count on
			
 
				+	 * success).
			
 
				 	 */
			
 
				-	count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
			
 
				-				     XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
			
 
				-				     &wrapped);
			
 
				-	if (count < 0) {
			
 
				-		error = count;
			
 
				+	error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
			
 
				+			&tmp_tail, &thead, &wrapped);
			
 
				+	if (error < 0)
			
 
				 		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
			
 
				-	 * into the actual log head. tmp_head points to the start of the record
			
 
				-	 * so update it to the actual head block.
			
 
				-	 */
			
 
				-	if (count < XLOG_MAX_ICLOGS + 1)
			
 
				-		tmp_head = head_blk;
			
 
				+	if (*tail_blk != tmp_tail)
			
 
				+		*tail_blk = tmp_tail;
			
 
				 
			
 
				 	/*
			
 
				-	 * We now have a tail and temporary head block that covers at least
			
 
				-	 * XLOG_MAX_ICLOGS records from the tail. We need to verify that these
			
 
				-	 * records were completely written. Run a CRC verification pass from
			
 
				-	 * tail to head and return the result.
			
 
				+	 * Run a CRC check from the tail to the head. We can't just check
			
 
				+	 * MAX_ICLOGS records past the tail because the tail may point to stale
			
 
				+	 * blocks cleared during the search for the head/tail. These blocks are
			
 
				+	 * overwritten with zero-length records and thus record count is not a
			
 
				+	 * reliable indicator of the iclog state before a crash.
			
 
				 	 */
			
 
				-	error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
			
 
				+	first_bad = 0;
			
 
				+	error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
			
 
				 				      XLOG_RECOVER_CRCPASS, &first_bad);
			
 
				+	while ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
			
 
				+		int	tail_distance;
			
 
				+
			
 
				+		/*
			
 
				+		 * Is corruption within range of the head? If so, retry from
			
 
				+		 * the next record. Otherwise return an error.
			
 
				+		 */
			
 
				+		tail_distance = xlog_tail_distance(log, head_blk, first_bad);
			
 
				+		if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize))
			
 
				+			break;
			
 
				 
			
 
				+		/* skip to the next record; returns positive count on success */
			
 
				+		error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
			
 
				+				&tmp_tail, &thead, &wrapped);
			
 
				+		if (error < 0)
			
 
				+			goto out;
			
 
				+
			
 
				+		*tail_blk = tmp_tail;
			
 
				+		first_bad = 0;
			
 
				+		error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
			
 
				+					      XLOG_RECOVER_CRCPASS, &first_bad);
			
 
				+	}
			
 
				+
			
 
				+	if (!error && *tail_blk != orig_tail)
			
 
				+		xfs_warn(log->l_mp,
			
 
				+		"Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
			
 
				+			 orig_tail, *tail_blk);
			
 
				 out:
			
 
				 	xlog_put_bp(bp);
			
 
				 	return error;
			
@@ -1143,7 +1188,7 @@ xlog_verify_head(
 
				 	 */
			
 
				 	error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
			
 
				 				      XLOG_RECOVER_CRCPASS, &first_bad);
			
 
				-	if (error == -EFSBADCRC) {
			
 
				+	if ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
			
 
				 		/*
			
 
				 		 * We've hit a potential torn write. Reset the error and warn
			
 
				 		 * about it.
			
@@ -1183,31 +1228,12 @@ xlog_verify_head(
 
				 			ASSERT(0);
			
 
				 			return 0;
			
 
				 		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Now verify the tail based on the updated head. This is
			
 
				-		 * required because the torn writes trimmed from the head could
			
 
				-		 * have been written over the tail of a previous record. Return
			
 
				-		 * any errors since recovery cannot proceed if the tail is
			
 
				-		 * corrupt.
			
 
				-		 *
			
 
				-		 * XXX: This leaves a gap in truly robust protection from torn
			
 
				-		 * writes in the log. If the head is behind the tail, the tail
			
 
				-		 * pushes forward to create some space and then a crash occurs
			
 
				-		 * causing the writes into the previous record's tail region to
			
 
				-		 * tear, log recovery isn't able to recover.
			
 
				-		 *
			
 
				-		 * How likely is this to occur? If possible, can we do something
			
 
				-		 * more intelligent here? Is it safe to push the tail forward if
			
 
				-		 * we can determine that the tail is within the range of the
			
 
				-		 * torn write (e.g., the kernel can only overwrite the tail if
			
 
				-		 * it has actually been pushed forward)? Alternatively, could we
			
 
				-		 * somehow prevent this condition at runtime?
			
 
				-		 */
			
 
				-		error = xlog_verify_tail(log, *head_blk, *tail_blk);
			
 
				 	}
			
 
				+	if (error)
			
 
				+		return error;
			
 
				 
			
 
				-	return error;
			
 
				+	return xlog_verify_tail(log, *head_blk, tail_blk,
			
 
				+				be32_to_cpu((*rhead)->h_size));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -4801,12 +4827,16 @@ xlog_recover_process_intents(
 
				 	int			error = 0;
			
 
				 	struct xfs_ail_cursor	cur;
			
 
				 	struct xfs_ail		*ailp;
			
 
				+#if defined(DEBUG) || defined(XFS_WARN)
			
 
				 	xfs_lsn_t		last_lsn;
			
 
				+#endif
			
 
				 
			
 
				 	ailp = log->l_ailp;
			
 
				 	spin_lock(&ailp->xa_lock);
			
 
				 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
			
 
				+#if defined(DEBUG) || defined(XFS_WARN)
			
 
				 	last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
			
 
				+#endif
			
 
				 	while (lip != NULL) {
			
 
				 		/*
			
 
				 		 * We're done when we see something other than an intent.
			
@@ -5218,7 +5248,7 @@ xlog_do_recovery_pass(
 
				 	xfs_daddr_t		*first_bad)	/* out: first bad log rec */
			
 
				 {
			
 
				 	xlog_rec_header_t	*rhead;
			
 
				-	xfs_daddr_t		blk_no;
			
 
				+	xfs_daddr_t		blk_no, rblk_no;
			
 
				 	xfs_daddr_t		rhead_blk;
			
 
				 	char			*offset;
			
 
				 	xfs_buf_t		*hbp, *dbp;
			
@@ -5231,7 +5261,7 @@ xlog_do_recovery_pass(
 
				 	LIST_HEAD		(buffer_list);
			
 
				 
			
 
				 	ASSERT(head_blk != tail_blk);
			
 
				-	rhead_blk = 0;
			
 
				+	blk_no = rhead_blk = tail_blk;
			
 
				 
			
 
				 	for (i = 0; i < XLOG_RHASH_SIZE; i++)
			
 
				 		INIT_HLIST_HEAD(&rhash[i]);
			
@@ -5309,7 +5339,6 @@ xlog_do_recovery_pass(
 
				 	}
			
 
				 
			
 
				 	memset(rhash, 0, sizeof(rhash));
			
 
				-	blk_no = rhead_blk = tail_blk;
			
 
				 	if (tail_blk > head_blk) {
			
 
				 		/*
			
 
				 		 * Perform recovery around the end of the physical log.
			
@@ -5371,9 +5400,19 @@ xlog_do_recovery_pass(
 
				 			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
			
 
				 			blk_no += hblks;
			
 
				 
			
 
				-			/* Read in data for log record */
			
 
				-			if (blk_no + bblks <= log->l_logBBsize) {
			
 
				-				error = xlog_bread(log, blk_no, bblks, dbp,
			
 
				+			/*
			
 
				+			 * Read the log record data in multiple reads if it
			
 
				+			 * wraps around the end of the log. Note that if the
			
 
				+			 * header already wrapped, blk_no could point past the
			
 
				+			 * end of the log. The record data is contiguous in
			
 
				+			 * that case.
			
 
				+			 */
			
 
				+			if (blk_no + bblks <= log->l_logBBsize ||
			
 
				+			    blk_no >= log->l_logBBsize) {
			
 
				+				/* mod blk_no in case the header wrapped and
			
 
				+				 * pushed it beyond the end of the log */
			
 
				+				rblk_no = do_mod(blk_no, log->l_logBBsize);
			
 
				+				error = xlog_bread(log, rblk_no, bblks, dbp,
			
 
				 						   &offset);
			
 
				 				if (error)
			
 
				 					goto bread_err2;
			
@@ -5563,6 +5602,8 @@ xlog_do_recover(
 
				 	xfs_buf_t	*bp;
			
 
				 	xfs_sb_t	*sbp;
			
 
				 
			
 
				+	trace_xfs_log_recover(log, head_blk, tail_blk);
			
 
				+
			
 
				 	/*
			
 
				 	 * First replay the images in the log.
			
 
				 	 */
			
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -31,6 +31,7 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				 #include "xfs_qm.h"
			
@@ -1120,31 +1121,6 @@ xfs_qm_quotacheck_dqadjust(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				-xfs_qm_get_rtblks(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	xfs_qcnt_t	*O_rtblks)
			
 
				-{
			
 
				-	xfs_filblks_t	rtblks;			/* total rt blks */
			
 
				-	xfs_extnum_t	idx;			/* extent record index */
			
 
				-	xfs_ifork_t	*ifp;			/* inode fork pointer */
			
 
				-	xfs_extnum_t	nextents;		/* number of extent entries */
			
 
				-	int		error;
			
 
				-
			
 
				-	ASSERT(XFS_IS_REALTIME_INODE(ip));
			
 
				-	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
			
 
				-	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				-		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
			
 
				-			return error;
			
 
				-	}
			
 
				-	rtblks = 0;
			
 
				-	nextents = xfs_iext_count(ifp);
			
 
				-	for (idx = 0; idx < nextents; idx++)
			
 
				-		rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
			
 
				-	*O_rtblks = (xfs_qcnt_t)rtblks;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * callback routine supplied to bulkstat(). Given an inumber, find its
			
 
				  * dquots and update them to account for resources taken by that inode.
			
@@ -1160,7 +1136,8 @@ xfs_qm_dqusage_adjust(
 
				 	int		*res)		/* result code value */
			
 
				 {
			
 
				 	xfs_inode_t	*ip;
			
 
				-	xfs_qcnt_t	nblks, rtblks = 0;
			
 
				+	xfs_qcnt_t	nblks;
			
 
				+	xfs_filblks_t	rtblks = 0;	/* total rt blks */
			
 
				 	int		error;
			
 
				 
			
 
				 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
			
@@ -1190,12 +1167,15 @@ xfs_qm_dqusage_adjust(
 
				 	ASSERT(ip->i_delayed_blks == 0);
			
 
				 
			
 
				 	if (XFS_IS_REALTIME_INODE(ip)) {
			
 
				-		/*
			
 
				-		 * Walk thru the extent list and count the realtime blocks.
			
 
				-		 */
			
 
				-		error = xfs_qm_get_rtblks(ip, &rtblks);
			
 
				-		if (error)
			
 
				-			goto error0;
			
 
				+		struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
			
 
				+
			
 
				+		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
			
 
				+			error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
			
 
				+			if (error)
			
 
				+				goto error0;
			
 
				+		}
			
 
				+
			
 
				+		xfs_bmap_count_leaves(ifp, &rtblks);
			
 
				 	}
			
 
				 
			
 
				 	nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
			
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -525,7 +525,7 @@ xfs_cui_recover(
 
				 	}
			
 
				 
			
 
				 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto abort_defer;
			
 
				 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
			
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -464,7 +464,7 @@ retry:
 
				 		goto out_bmap_cancel;
			
 
				 
			
 
				 	/* Finish up. */
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto out_bmap_cancel;
			
 
				 
			
@@ -602,7 +602,8 @@ xfs_reflink_cancel_cow_blocks(
 
				 					-(long)del.br_blockcount);
			
 
				 
			
 
				 			/* Roll the transaction */
			
 
				-			error = xfs_defer_finish(tpp, &dfops, ip);
			
 
				+			xfs_defer_ijoin(&dfops, ip);
			
 
				+			error = xfs_defer_finish(tpp, &dfops);
			
 
				 			if (error) {
			
 
				 				xfs_defer_cancel(&dfops);
			
 
				 				break;
			
@@ -791,7 +792,8 @@ xfs_reflink_end_cow(
 
				 		/* Remove the mapping from the CoW fork. */
			
 
				 		xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
			
 
				 
			
 
				-		error = xfs_defer_finish(&tp, &dfops, ip);
			
 
				+		xfs_defer_ijoin(&dfops, ip);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_defer;
			
 
				 next_extent:
			
@@ -1152,7 +1154,8 @@ xfs_reflink_remap_extent(
 
				 
			
 
				 next_extent:
			
 
				 		/* Process all the deferred stuff. */
			
 
				-		error = xfs_defer_finish(&tp, &dfops, ip);
			
 
				+		xfs_defer_ijoin(&dfops, ip);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_defer;
			
 
				 	}
			
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -810,7 +810,7 @@ xfs_growfs_rt_alloc(
 
				 		/*
			
 
				 		 * Free any blocks freed up in the transaction, then commit.
			
 
				 		 */
			
 
				-		error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+		error = xfs_defer_finish(&tp, &dfops);
			
 
				 		if (error)
			
 
				 			goto out_bmap_cancel;
			
 
				 		error = xfs_trans_commit(tp);
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1220,7 +1220,7 @@ xfs_test_remount_options(
 
				 	tmp_mp->m_super = sb;
			
 
				 	error = xfs_parseargs(tmp_mp, options);
			
 
				 	xfs_free_fsname(tmp_mp);
			
 
				-	kfree(tmp_mp);
			
 
				+	kmem_free(tmp_mp);
			
 
				 
			
 
				 	return error;
			
 
				 }
			
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -378,7 +378,7 @@ xfs_symlink(
 
				 		xfs_trans_set_sync(tp);
			
 
				 	}
			
 
				 
			
 
				-	error = xfs_defer_finish(&tp, &dfops, NULL);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto out_bmap_cancel;
			
 
				 
			
@@ -497,7 +497,8 @@ xfs_inactive_symlink_rmt(
 
				 	/*
			
 
				 	 * Commit the first transaction.  This logs the EFI and the inode.
			
 
				 	 */
			
 
				-	error = xfs_defer_finish(&tp, &dfops, ip);
			
 
				+	xfs_defer_ijoin(&dfops, ip);
			
 
				+	error = xfs_defer_finish(&tp, &dfops);
			
 
				 	if (error)
			
 
				 		goto error_bmap_cancel;
			
 
				 	/*
			
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -517,7 +517,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
			
 
				-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
			
@@ -689,11 +688,34 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
 
				 DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
			
 
				 DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
			
 
				 
			
 
				-DEFINE_INODE_EVENT(xfs_filemap_fault);
			
 
				-DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
			
 
				-DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
			
 
				 DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
			
 
				 
			
 
				+TRACE_EVENT(xfs_filemap_fault,
			
 
				+	TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
			
 
				+		 bool write_fault),
			
 
				+	TP_ARGS(ip, pe_size, write_fault),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(enum page_entry_size, pe_size)
			
 
				+		__field(bool, write_fault)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
			
 
				+		__entry->ino = ip->i_ino;
			
 
				+		__entry->pe_size = pe_size;
			
 
				+		__entry->write_fault = write_fault;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino 0x%llx %s write_fault %d",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->ino,
			
 
				+		  __print_symbolic(__entry->pe_size,
			
 
				+			{ PE_SIZE_PTE,	"PTE" },
			
 
				+			{ PE_SIZE_PMD,	"PMD" },
			
 
				+			{ PE_SIZE_PUD,	"PUD" }),
			
 
				+		  __entry->write_fault)
			
 
				+)
			
 
				+
			
 
				 DECLARE_EVENT_CLASS(xfs_iref_class,
			
 
				 	TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
			
 
				 	TP_ARGS(ip, caller_ip),
			
@@ -1963,6 +1985,24 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
 
				 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
			
 
				 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
			
 
				 
			
 
				+TRACE_EVENT(xfs_log_recover,
			
 
				+	TP_PROTO(struct xlog *log, xfs_daddr_t headblk, xfs_daddr_t tailblk),
			
 
				+	TP_ARGS(log, headblk, tailblk),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_daddr_t, headblk)
			
 
				+		__field(xfs_daddr_t, tailblk)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = log->l_mp->m_super->s_dev;
			
 
				+		__entry->headblk = headblk;
			
 
				+		__entry->tailblk = tailblk;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d headblk 0x%llx tailblk 0x%llx",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->headblk,
			
 
				+		  __entry->tailblk)
			
 
				+)
			
 
				+
			
 
				 TRACE_EVENT(xfs_log_recover_record,
			
 
				 	TP_PROTO(struct xlog *log, struct xlog_rec_header *rhead, int pass),
			
 
				 	TP_ARGS(log, rhead, pass),
			
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1035,25 +1035,18 @@ xfs_trans_cancel(
 
				  */
			
 
				 int
			
 
				 xfs_trans_roll(
			
 
				-	struct xfs_trans	**tpp,
			
 
				-	struct xfs_inode	*dp)
			
 
				+	struct xfs_trans	**tpp)
			
 
				 {
			
 
				-	struct xfs_trans	*trans;
			
 
				+	struct xfs_trans	*trans = *tpp;
			
 
				 	struct xfs_trans_res	tres;
			
 
				 	int			error;
			
 
				 
			
 
				-	/*
			
 
				-	 * Ensure that the inode is always logged.
			
 
				-	 */
			
 
				-	trans = *tpp;
			
 
				-	if (dp)
			
 
				-		xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
			
 
				-
			
 
				 	/*
			
 
				 	 * Copy the critical parameters from one trans to the next.
			
 
				 	 */
			
 
				 	tres.tr_logres = trans->t_log_res;
			
 
				 	tres.tr_logcount = trans->t_log_count;
			
 
				+
			
 
				 	*tpp = xfs_trans_dup(trans);
			
 
				 
			
 
				 	/*
			
@@ -1067,10 +1060,8 @@ xfs_trans_roll(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	trans = *tpp;
			
 
				-
			
 
				 	/*
			
 
				-	 * Reserve space in the log for th next transaction.
			
 
				+	 * Reserve space in the log for the next transaction.
			
 
				 	 * This also pushes items in the "AIL", the list of logged items,
			
 
				 	 * out to disk if they are taking up space at the tail of the log
			
 
				 	 * that we want to use.  This requires that either nothing be locked
			
@@ -1078,14 +1069,5 @@ xfs_trans_roll(
 
				 	 * the prior and the next transactions.
			
 
				 	 */
			
 
				 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
			
 
				-	error = xfs_trans_reserve(trans, &tres, 0, 0);
			
 
				-	/*
			
 
				-	 *  Ensure that the inode is in the new transaction and locked.
			
 
				-	 */
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	if (dp)
			
 
				-		xfs_trans_ijoin(trans, dp, 0);
			
 
				-	return 0;
			
 
				+	return xfs_trans_reserve(*tpp, &tres, 0, 0);
			
 
				 }
			
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -49,6 +49,7 @@ typedef struct xfs_log_item {
 
				 	struct xfs_ail			*li_ailp;	/* ptr to AIL */
			
 
				 	uint				li_type;	/* item type */
			
 
				 	uint				li_flags;	/* misc flags */
			
 
				+	struct xfs_buf			*li_buf;	/* real buffer pointer */
			
 
				 	struct xfs_log_item		*li_bio_list;	/* buffer item list */
			
 
				 	void				(*li_cb)(struct xfs_buf *,
			
 
				 						 struct xfs_log_item *);
			
@@ -64,11 +65,13 @@ typedef struct xfs_log_item {
 
				 } xfs_log_item_t;
			
 
				 
			
 
				 #define	XFS_LI_IN_AIL	0x1
			
 
				-#define XFS_LI_ABORTED	0x2
			
 
				+#define	XFS_LI_ABORTED	0x2
			
 
				+#define	XFS_LI_FAILED	0x4
			
 
				 
			
 
				 #define XFS_LI_FLAGS \
			
 
				 	{ XFS_LI_IN_AIL,	"IN_AIL" }, \
			
 
				-	{ XFS_LI_ABORTED,	"ABORTED" }
			
 
				+	{ XFS_LI_ABORTED,	"ABORTED" }, \
			
 
				+	{ XFS_LI_FAILED,	"FAILED" }
			
 
				 
			
 
				 struct xfs_item_ops {
			
 
				 	void (*iop_size)(xfs_log_item_t *, int *, int *);
			
@@ -79,6 +82,7 @@ struct xfs_item_ops {
 
				 	void (*iop_unlock)(xfs_log_item_t *);
			
 
				 	xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
			
 
				 	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
			
 
				+	void (*iop_error)(xfs_log_item_t *, xfs_buf_t *);
			
 
				 };
			
 
				 
			
 
				 void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
			
@@ -208,12 +212,14 @@ void		xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
 
				 void		xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
			
 
				 void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
			
 
				 void		xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
			
 
				-void		xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
			
 
				+bool		xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
			
 
				 void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
			
 
				 void		xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
			
 
				 void		xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
			
 
				 void		xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
			
 
				-void		xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
			
 
				+void		xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint,
			
 
				+				  uint);
			
 
				+void		xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
			
 
				 void		xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
			
 
				 
			
 
				 void		xfs_extent_free_init_defer_op(void);
			
@@ -224,7 +230,8 @@ int		xfs_trans_free_extent(struct xfs_trans *,
 
				 				      struct xfs_efd_log_item *, xfs_fsblock_t,
			
 
				 				      xfs_extlen_t, struct xfs_owner_info *);
			
 
				 int		xfs_trans_commit(struct xfs_trans *);
			
 
				-int		xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
			
 
				+int		xfs_trans_roll(struct xfs_trans **);
			
 
				+int		xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
			
 
				 void		xfs_trans_cancel(xfs_trans_t *);
			
 
				 int		xfs_trans_ail_init(struct xfs_mount *);
			
 
				 void		xfs_trans_ail_destroy(struct xfs_mount *);
			
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -325,6 +325,21 @@ xfs_ail_delete(
 
				 	xfs_trans_ail_cursor_clear(ailp, lip);
			
 
				 }
			
 
				 
			
 
				+static inline uint
			
 
				+xfsaild_push_item(
			
 
				+	struct xfs_ail		*ailp,
			
 
				+	struct xfs_log_item	*lip)
			
 
				+{
			
 
				+	/*
			
 
				+	 * If log item pinning is enabled, skip the push and track the item as
			
 
				+	 * pinned. This can help induce head-behind-tail conditions.
			
 
				+	 */
			
 
				+	if (XFS_TEST_ERROR(false, ailp->xa_mount, XFS_ERRTAG_LOG_ITEM_PIN))
			
 
				+		return XFS_ITEM_PINNED;
			
 
				+
			
 
				+	return lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
			
 
				+}
			
 
				+
			
 
				 static long
			
 
				 xfsaild_push(
			
 
				 	struct xfs_ail		*ailp)
			
@@ -382,7 +397,7 @@ xfsaild_push(
 
				 		 * rely on the AIL cursor implementation to be able to deal with
			
 
				 		 * the dropped lock.
			
 
				 		 */
			
 
				-		lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
			
 
				+		lock_result = xfsaild_push_item(ailp, lip);
			
 
				 		switch (lock_result) {
			
 
				 		case XFS_ITEM_SUCCESS:
			
 
				 			XFS_STATS_INC(mp, xs_push_ail_success);
			
@@ -687,12 +702,13 @@ xfs_trans_ail_update_bulk(
 
				 bool
			
 
				 xfs_ail_delete_one(
			
 
				 	struct xfs_ail		*ailp,
			
 
				-	struct xfs_log_item 	*lip)
			
 
				+	struct xfs_log_item	*lip)
			
 
				 {
			
 
				 	struct xfs_log_item	*mlip = xfs_ail_min(ailp);
			
 
				 
			
 
				 	trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
			
 
				 	xfs_ail_delete(ailp, lip);
			
 
				+	xfs_clear_li_failed(lip);
			
 
				 	lip->li_flags &= ~XFS_LI_IN_AIL;
			
 
				 	lip->li_lsn = 0;
			
 
				 
			
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -435,7 +435,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 
				 	if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
			
 
				 		xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
			
 
				 		xfs_buf_item_relse(bp);
			
 
				-	} else if (!xfs_buf_item_dirty(bip)) {
			
 
				+	} else if (!(bip->bli_flags & XFS_BLI_DIRTY)) {
			
 
				 /***
			
 
				 		ASSERT(bp->b_pincount == 0);
			
 
				 ***/
			
@@ -493,25 +493,17 @@ xfs_trans_bhold_release(xfs_trans_t	*tp,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * This is called to mark bytes first through last inclusive of the given
			
 
				- * buffer as needing to be logged when the transaction is committed.
			
 
				- * The buffer must already be associated with the given transaction.
			
 
				- *
			
 
				- * First and last are numbers relative to the beginning of this buffer,
			
 
				- * so the first byte in the buffer is numbered 0 regardless of the
			
 
				- * value of b_blkno.
			
 
				+ * Mark a buffer dirty in the transaction.
			
 
				  */
			
 
				 void
			
 
				-xfs_trans_log_buf(xfs_trans_t	*tp,
			
 
				-		  xfs_buf_t	*bp,
			
 
				-		  uint		first,
			
 
				-		  uint		last)
			
 
				+xfs_trans_dirty_buf(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				 
			
 
				 	ASSERT(bp->b_transp == tp);
			
 
				 	ASSERT(bip != NULL);
			
 
				-	ASSERT(first <= last && last < BBTOB(bp->b_length));
			
 
				 	ASSERT(bp->b_iodone == NULL ||
			
 
				 	       bp->b_iodone == xfs_buf_iodone_callbacks);
			
 
				 
			
@@ -531,8 +523,6 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 
				 	bp->b_iodone = xfs_buf_iodone_callbacks;
			
 
				 	bip->bli_item.li_cb = xfs_buf_iodone;
			
 
				 
			
 
				-	trace_xfs_trans_log_buf(bip);
			
 
				-
			
 
				 	/*
			
 
				 	 * If we invalidated the buffer within this transaction, then
			
 
				 	 * cancel the invalidation now that we're dirtying the buffer
			
@@ -545,17 +535,37 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 
				 		bp->b_flags &= ~XBF_STALE;
			
 
				 		bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
			
 
				 	}
			
 
				+	bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
			
 
				 
			
 
				 	tp->t_flags |= XFS_TRANS_DIRTY;
			
 
				 	bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
			
 
				+}
			
 
				 
			
 
				-	/*
			
 
				-	 * If we have an ordered buffer we are not logging any dirty range but
			
 
				-	 * it still needs to be marked dirty and that it has been logged.
			
 
				-	 */
			
 
				-	bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
			
 
				-	if (!(bip->bli_flags & XFS_BLI_ORDERED))
			
 
				-		xfs_buf_item_log(bip, first, last);
			
 
				+/*
			
 
				+ * This is called to mark bytes first through last inclusive of the given
			
 
				+ * buffer as needing to be logged when the transaction is committed.
			
 
				+ * The buffer must already be associated with the given transaction.
			
 
				+ *
			
 
				+ * First and last are numbers relative to the beginning of this buffer,
			
 
				+ * so the first byte in the buffer is numbered 0 regardless of the
			
 
				+ * value of b_blkno.
			
 
				+ */
			
 
				+void
			
 
				+xfs_trans_log_buf(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_buf		*bp,
			
 
				+	uint			first,
			
 
				+	uint			last)
			
 
				+{
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	ASSERT(first <= last && last < BBTOB(bp->b_length));
			
 
				+	ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
			
 
				+
			
 
				+	xfs_trans_dirty_buf(tp, bp);
			
 
				+
			
 
				+	trace_xfs_trans_log_buf(bip);
			
 
				+	xfs_buf_item_log(bip, first, last);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -708,14 +718,13 @@ xfs_trans_inode_alloc_buf(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Mark the buffer as ordered for this transaction. This means
			
 
				- * that the contents of the buffer are not recorded in the transaction
			
 
				- * but it is tracked in the AIL as though it was. This allows us
			
 
				- * to record logical changes in transactions rather than the physical
			
 
				- * changes we make to the buffer without changing writeback ordering
			
 
				- * constraints of metadata buffers.
			
 
				+ * Mark the buffer as ordered for this transaction. This means that the contents
			
 
				+ * of the buffer are not recorded in the transaction but it is tracked in the
			
 
				+ * AIL as though it was. This allows us to record logical changes in
			
 
				+ * transactions rather than the physical changes we make to the buffer without
			
 
				+ * changing writeback ordering constraints of metadata buffers.
			
 
				  */
			
 
				-void
			
 
				+bool
			
 
				 xfs_trans_ordered_buf(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_buf		*bp)
			
@@ -726,8 +735,18 @@ xfs_trans_ordered_buf(
 
				 	ASSERT(bip != NULL);
			
 
				 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
			
 
				 
			
 
				+	if (xfs_buf_item_dirty_format(bip))
			
 
				+		return false;
			
 
				+
			
 
				 	bip->bli_flags |= XFS_BLI_ORDERED;
			
 
				 	trace_xfs_buf_item_ordered(bip);
			
 
				+
			
 
				+	/*
			
 
				+	 * We don't log a dirty range of an ordered buffer but it still needs
			
 
				+	 * to be marked dirty and that it has been logged.
			
 
				+	 */
			
 
				+	xfs_trans_dirty_buf(tp, bp);
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -134,3 +134,17 @@ xfs_trans_log_inode(
 
				 	flags |= ip->i_itemp->ili_last_fields;
			
 
				 	ip->i_itemp->ili_fields |= flags;
			
 
				 }
			
 
				+
			
 
				+int
			
 
				+xfs_trans_roll_inode(
			
 
				+	struct xfs_trans	**tpp,
			
 
				+	struct xfs_inode	*ip)
			
 
				+{
			
 
				+	int			error;
			
 
				+
			
 
				+	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
			
 
				+	error = xfs_trans_roll(tpp);
			
 
				+	if (!error)
			
 
				+		xfs_trans_ijoin(*tpp, ip, 0);
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -164,4 +164,35 @@ xfs_trans_ail_copy_lsn(
 
				 	*dst = *src;
			
 
				 }
			
 
				 #endif
			
 
				+
			
 
				+static inline void
			
 
				+xfs_clear_li_failed(
			
 
				+	struct xfs_log_item	*lip)
			
 
				+{
			
 
				+	struct xfs_buf	*bp = lip->li_buf;
			
 
				+
			
 
				+	ASSERT(lip->li_flags & XFS_LI_IN_AIL);
			
 
				+	lockdep_assert_held(&lip->li_ailp->xa_lock);
			
 
				+
			
 
				+	if (lip->li_flags & XFS_LI_FAILED) {
			
 
				+		lip->li_flags &= ~XFS_LI_FAILED;
			
 
				+		lip->li_buf = NULL;
			
 
				+		xfs_buf_rele(bp);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+xfs_set_li_failed(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	lockdep_assert_held(&lip->li_ailp->xa_lock);
			
 
				+
			
 
				+	if (!(lip->li_flags & XFS_LI_FAILED)) {
			
 
				+		xfs_buf_hold(bp);
			
 
				+		lip->li_flags |= XFS_LI_FAILED;
			
 
				+		lip->li_buf = bp;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 #endif	/* __XFS_TRANS_PRIV_H__ */
			
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2837,6 +2837,7 @@ static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
 
				 #endif
			
 
				 extern void unlock_new_inode(struct inode *);
			
 
				 extern unsigned int get_next_ino(void);
			
 
				+extern void evict_inodes(struct super_block *sb);
			
 
				 
			
 
				 extern void __iget(struct inode * inode);
			
 
				 extern void iget_failed(struct inode *);
			
--- a/include/uapi/linux/fsmap.h
+++ b/include/uapi/linux/fsmap.h
@@ -96,7 +96,7 @@ fsmap_advance(
 
				 #define FMR_OF_EXTENT_MAP	0x4	/* segment = extent map */
			
 
				 #define FMR_OF_SHARED		0x8	/* segment = shared with another file */
			
 
				 #define FMR_OF_SPECIAL_OWNER	0x10	/* owner is a special value */
			
 
				-#define FMR_OF_LAST		0x20	/* segment is the last in the FS */
			
 
				+#define FMR_OF_LAST		0x20	/* segment is the last in the dataset */
			
 
				 
			
 
				 /* Each FS gets to define its own special owner codes. */
			
 
				 #define FMR_OWNER(type, code)	(((__u64)type << 32) | \