преди 12 години · 300893b08f
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -620,12 +620,16 @@ spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
 
				 		case Opt_uid:
			
 
				 			if (match_int(&args[0], &option))
			
 
				 				return 0;
			
 
				-			root->i_uid = option;
			
 
				+			root->i_uid = make_kuid(current_user_ns(), option);
			
 
				+			if (!uid_valid(root->i_uid))
			
 
				+				return 0;
			
 
				 			break;
			
 
				 		case Opt_gid:
			
 
				 			if (match_int(&args[0], &option))
			
 
				 				return 0;
			
 
				-			root->i_gid = option;
			
 
				+			root->i_gid = make_kgid(current_user_ns(), option);
			
 
				+			if (!gid_valid(root->i_gid))
			
 
				+				return 0;
			
 
				 			break;
			
 
				 		case Opt_mode:
			
 
				 			if (match_octal(&args[0], &option))
			
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -27,6 +27,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 
				 	case Q_SYNC:
			
 
				 	case Q_GETINFO:
			
 
				 	case Q_XGETQSTAT:
			
 
				+	case Q_XGETQSTATV:
			
 
				 	case Q_XQUOTASYNC:
			
 
				 		break;
			
 
				 	/* allow to query information for dquots we "own" */
			
@@ -217,6 +218,31 @@ static int quota_getxstate(struct super_block *sb, void __user *addr)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int quota_getxstatev(struct super_block *sb, void __user *addr)
			
 
				+{
			
 
				+	struct fs_quota_statv fqs;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!sb->s_qcop->get_xstatev)
			
 
				+		return -ENOSYS;
			
 
				+
			
 
				+	memset(&fqs, 0, sizeof(fqs));
			
 
				+	if (copy_from_user(&fqs, addr, 1)) /* Just read qs_version */
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	/* If this kernel doesn't support user specified version, fail */
			
 
				+	switch (fqs.qs_version) {
			
 
				+	case FS_QSTATV_VERSION1:
			
 
				+		break;
			
 
				+	default:
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+	ret = sb->s_qcop->get_xstatev(sb, &fqs);
			
 
				+	if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
			
 
				+		return -EFAULT;
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static int quota_setxquota(struct super_block *sb, int type, qid_t id,
			
 
				 			   void __user *addr)
			
 
				 {
			
@@ -293,6 +319,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 
				 		return quota_setxstate(sb, cmd, addr);
			
 
				 	case Q_XGETQSTAT:
			
 
				 		return quota_getxstate(sb, addr);
			
 
				+	case Q_XGETQSTATV:
			
 
				+		return quota_getxstatev(sb, addr);
			
 
				 	case Q_XSETQLIM:
			
 
				 		return quota_setxquota(sb, type, id, addr);
			
 
				 	case Q_XGETQUOTA:
			
@@ -317,6 +345,7 @@ static int quotactl_cmd_write(int cmd)
 
				 	case Q_GETINFO:
			
 
				 	case Q_SYNC:
			
 
				 	case Q_XGETQSTAT:
			
 
				+	case Q_XGETQSTATV:
			
 
				 	case Q_XGETQUOTA:
			
 
				 	case Q_XQUOTASYNC:
			
 
				 		return 0;
			
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -27,9 +27,12 @@ xfs-y				+= xfs_trace.o
 
				 
			
 
				 # highlevel code
			
 
				 xfs-y				+= xfs_aops.o \
			
 
				+				   xfs_attr_inactive.o \
			
 
				+				   xfs_attr_list.o \
			
 
				 				   xfs_bit.o \
			
 
				+				   xfs_bmap_util.o \
			
 
				 				   xfs_buf.o \
			
 
				-				   xfs_dfrag.o \
			
 
				+				   xfs_dir2_readdir.o \
			
 
				 				   xfs_discard.o \
			
 
				 				   xfs_error.o \
			
 
				 				   xfs_export.o \
			
@@ -44,11 +47,11 @@ xfs-y				+= xfs_aops.o \
 
				 				   xfs_iops.o \
			
 
				 				   xfs_itable.o \
			
 
				 				   xfs_message.o \
			
 
				+				   xfs_mount.o \
			
 
				 				   xfs_mru_cache.o \
			
 
				-				   xfs_rename.o \
			
 
				 				   xfs_super.o \
			
 
				-				   xfs_utils.o \
			
 
				-				   xfs_vnodeops.o \
			
 
				+				   xfs_symlink.o \
			
 
				+				   xfs_trans.o \
			
 
				 				   xfs_xattr.o \
			
 
				 				   kmem.o \
			
 
				 				   uuid.o
			
@@ -73,10 +76,13 @@ xfs-y				+= xfs_alloc.o \
 
				 				   xfs_ialloc_btree.o \
			
 
				 				   xfs_icreate_item.o \
			
 
				 				   xfs_inode.o \
			
 
				+				   xfs_inode_fork.o \
			
 
				+				   xfs_inode_buf.o \
			
 
				 				   xfs_log_recover.o \
			
 
				-				   xfs_mount.o \
			
 
				-				   xfs_symlink.o \
			
 
				-				   xfs_trans.o
			
 
				+				   xfs_log_rlimit.o \
			
 
				+				   xfs_sb.o \
			
 
				+				   xfs_symlink_remote.o \
			
 
				+				   xfs_trans_resv.o
			
 
				 
			
 
				 # low-level transaction/log code
			
 
				 xfs-y				+= xfs_log.o \
			
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -16,11 +16,13 @@
 
				  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				  */
			
 
				 #include "xfs.h"
			
 
				+#include "xfs_log_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				 #include "xfs_acl.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_inode.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				+#include "xfs_ag.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_trace.h"
			
@@ -68,14 +70,15 @@ xfs_acl_from_disk(
 
				 
			
 
				 		switch (acl_e->e_tag) {
			
 
				 		case ACL_USER:
			
 
				+			acl_e->e_uid = xfs_uid_to_kuid(be32_to_cpu(ace->ae_id));
			
 
				+			break;
			
 
				 		case ACL_GROUP:
			
 
				-			acl_e->e_id = be32_to_cpu(ace->ae_id);
			
 
				+			acl_e->e_gid = xfs_gid_to_kgid(be32_to_cpu(ace->ae_id));
			
 
				 			break;
			
 
				 		case ACL_USER_OBJ:
			
 
				 		case ACL_GROUP_OBJ:
			
 
				 		case ACL_MASK:
			
 
				 		case ACL_OTHER:
			
 
				-			acl_e->e_id = ACL_UNDEFINED_ID;
			
 
				 			break;
			
 
				 		default:
			
 
				 			goto fail;
			
@@ -101,7 +104,18 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
 
				 		acl_e = &acl->a_entries[i];
			
 
				 
			
 
				 		ace->ae_tag = cpu_to_be32(acl_e->e_tag);
			
 
				-		ace->ae_id = cpu_to_be32(acl_e->e_id);
			
 
				+		switch (acl_e->e_tag) {
			
 
				+		case ACL_USER:
			
 
				+			ace->ae_id = cpu_to_be32(xfs_kuid_to_uid(acl_e->e_uid));
			
 
				+			break;
			
 
				+		case ACL_GROUP:
			
 
				+			ace->ae_id = cpu_to_be32(xfs_kgid_to_gid(acl_e->e_gid));
			
 
				+			break;
			
 
				+		default:
			
 
				+			ace->ae_id = cpu_to_be32(ACL_UNDEFINED_ID);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				 		ace->ae_perm = cpu_to_be16(acl_e->e_perm);
			
 
				 	}
			
 
				 }
			
@@ -360,7 +374,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
 
				 		return -EINVAL;
			
 
				 	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
			
 
				 		return value ? -EACCES : 0;
			
 
				-	if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
			
 
				+	if (!inode_owner_or_capable(inode))
			
 
				 		return -EPERM;
			
 
				 
			
 
				 	if (!value)
			
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -226,59 +226,6 @@ typedef struct xfs_agfl {
 
				 	__be32		agfl_bno[];	/* actually XFS_AGFL_SIZE(mp) */
			
 
				 } xfs_agfl_t;
			
 
				 
			
 
				-/*
			
 
				- * Per-ag incore structure, copies of information in agf and agi,
			
 
				- * to improve the performance of allocation group selection.
			
 
				- */
			
 
				-#define XFS_PAGB_NUM_SLOTS	128
			
 
				-
			
 
				-typedef struct xfs_perag {
			
 
				-	struct xfs_mount *pag_mount;	/* owner filesystem */
			
 
				-	xfs_agnumber_t	pag_agno;	/* AG this structure belongs to */
			
 
				-	atomic_t	pag_ref;	/* perag reference count */
			
 
				-	char		pagf_init;	/* this agf's entry is initialized */
			
 
				-	char		pagi_init;	/* this agi's entry is initialized */
			
 
				-	char		pagf_metadata;	/* the agf is preferred to be metadata */
			
 
				-	char		pagi_inodeok;	/* The agi is ok for inodes */
			
 
				-	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
			
 
				-					/* # of levels in bno & cnt btree */
			
 
				-	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
			
 
				-	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
			
 
				-	xfs_extlen_t	pagf_longest;	/* longest free space */
			
 
				-	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
			
 
				-	xfs_agino_t	pagi_freecount;	/* number of free inodes */
			
 
				-	xfs_agino_t	pagi_count;	/* number of allocated inodes */
			
 
				-
			
 
				-	/*
			
 
				-	 * Inode allocation search lookup optimisation.
			
 
				-	 * If the pagino matches, the search for new inodes
			
 
				-	 * doesn't need to search the near ones again straight away
			
 
				-	 */
			
 
				-	xfs_agino_t	pagl_pagino;
			
 
				-	xfs_agino_t	pagl_leftrec;
			
 
				-	xfs_agino_t	pagl_rightrec;
			
 
				-#ifdef __KERNEL__
			
 
				-	spinlock_t	pagb_lock;	/* lock for pagb_tree */
			
 
				-	struct rb_root	pagb_tree;	/* ordered tree of busy extents */
			
 
				-
			
 
				-	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
			
 
				-
			
 
				-	spinlock_t	pag_ici_lock;	/* incore inode cache lock */
			
 
				-	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
			
 
				-	int		pag_ici_reclaimable;	/* reclaimable inodes */
			
 
				-	struct mutex	pag_ici_reclaim_lock;	/* serialisation point */
			
 
				-	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
			
 
				-
			
 
				-	/* buffer cache index */
			
 
				-	spinlock_t	pag_buf_lock;	/* lock for pag_buf_tree */
			
 
				-	struct rb_root	pag_buf_tree;	/* ordered tree of active buffers */
			
 
				-
			
 
				-	/* for rcu-safe freeing */
			
 
				-	struct rcu_head	rcu_head;
			
 
				-#endif
			
 
				-	int		pagb_count;	/* pagb slots in use */
			
 
				-} xfs_perag_t;
			
 
				-
			
 
				 /*
			
 
				  * tags for inode radix tree
			
 
				  */
			
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -878,7 +878,7 @@ xfs_alloc_ag_vextent_near(
 
				 	xfs_agblock_t	ltnew;		/* useful start bno of left side */
			
 
				 	xfs_extlen_t	rlen;		/* length of returned extent */
			
 
				 	int		forced = 0;
			
 
				-#if defined(DEBUG) && defined(__KERNEL__)
			
 
				+#ifdef DEBUG
			
 
				 	/*
			
 
				 	 * Randomly don't execute the first algorithm.
			
 
				 	 */
			
@@ -938,8 +938,8 @@ restart:
 
				 		xfs_extlen_t	blen=0;
			
 
				 		xfs_agblock_t	bnew=0;
			
 
				 
			
 
				-#if defined(DEBUG) && defined(__KERNEL__)
			
 
				-		if (!dofirst)
			
 
				+#ifdef DEBUG
			
 
				+		if (dofirst)
			
 
				 			break;
			
 
				 #endif
			
 
				 		/*
			
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -28,9 +28,9 @@
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_iomap.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include <linux/aio.h>
			
 
				 #include <linux/gfp.h>
			
 
				 #include <linux/mpage.h>
			
@@ -108,7 +108,7 @@ xfs_setfilesize_trans_alloc(
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
			
 
				 
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return error;
			
@@ -440,7 +440,7 @@ xfs_start_page_writeback(
 
				 		end_page_writeback(page);
			
 
				 }
			
 
				 
			
 
				-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
			
 
				+static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
			
 
				 {
			
 
				 	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
			
 
				 }
			
@@ -514,7 +514,7 @@ xfs_submit_ioend(
 
				 				goto retry;
			
 
				 			}
			
 
				 
			
 
				-			if (bio_add_buffer(bio, bh) != bh->b_size) {
			
 
				+			if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
			
 
				 				xfs_submit_ioend_bio(wbc, ioend, bio);
			
 
				 				goto retry;
			
 
				 			}
			
@@ -1498,13 +1498,26 @@ xfs_vm_write_failed(
 
				 	loff_t			pos,
			
 
				 	unsigned		len)
			
 
				 {
			
 
				-	loff_t			block_offset = pos & PAGE_MASK;
			
 
				+	loff_t			block_offset;
			
 
				 	loff_t			block_start;
			
 
				 	loff_t			block_end;
			
 
				 	loff_t			from = pos & (PAGE_CACHE_SIZE - 1);
			
 
				 	loff_t			to = from + len;
			
 
				 	struct buffer_head	*bh, *head;
			
 
				 
			
 
				+	/*
			
 
				+	 * The request pos offset might be 32 or 64 bit, this is all fine
			
 
				+	 * on 64-bit platform.  However, for 64-bit pos request on 32-bit
			
 
				+	 * platform, the high 32-bit will be masked off if we evaluate the
			
 
				+	 * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
			
 
				+	 * 0xfffff000 as an unsigned long, hence the result is incorrect
			
 
				+	 * which could cause the following ASSERT failed in most cases.
			
 
				+	 * In order to avoid this, we can evaluate the block_offset of the
			
 
				+	 * start of the page by using shifts rather than masks the mismatch
			
 
				+	 * problem.
			
 
				+	 */
			
 
				+	block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT;
			
 
				+
			
 
				 	ASSERT(block_offset + from == pos);
			
 
				 
			
 
				 	head = page_buffers(page);
			
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -17,10 +17,11 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
@@ -32,13 +33,13 @@
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_attr_leaf.h"
			
 
				 #include "xfs_attr_remote.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
 
				 /*
			
@@ -62,7 +63,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
 
				 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
			
 
				 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
			
 
				 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
			
 
				-STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
			
 
				 
			
 
				 /*
			
 
				  * Internal routines when attribute list is more than one block.
			
@@ -70,7 +70,6 @@ STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
 
				 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
			
 
				 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
			
 
				 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
			
 
				-STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
			
 
				 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
			
 
				 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
			
 
				 
			
@@ -90,7 +89,7 @@ xfs_attr_name_to_xname(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				+int
			
 
				 xfs_inode_hasattr(
			
 
				 	struct xfs_inode	*ip)
			
 
				 {
			
@@ -227,13 +226,14 @@ xfs_attr_set_int(
 
				 	int		valuelen,
			
 
				 	int		flags)
			
 
				 {
			
 
				-	xfs_da_args_t	args;
			
 
				-	xfs_fsblock_t	firstblock;
			
 
				-	xfs_bmap_free_t flist;
			
 
				-	int		error, err2, committed;
			
 
				-	xfs_mount_t	*mp = dp->i_mount;
			
 
				-	int             rsvd = (flags & ATTR_ROOT) != 0;
			
 
				-	int		local;
			
 
				+	xfs_da_args_t		args;
			
 
				+	xfs_fsblock_t		firstblock;
			
 
				+	xfs_bmap_free_t		flist;
			
 
				+	int			error, err2, committed;
			
 
				+	struct xfs_mount	*mp = dp->i_mount;
			
 
				+	struct xfs_trans_res	tres;
			
 
				+	int			rsvd = (flags & ATTR_ROOT) != 0;
			
 
				+	int			local;
			
 
				 
			
 
				 	/*
			
 
				 	 * Attach the dquots to the inode.
			
@@ -293,11 +293,11 @@ xfs_attr_set_int(
 
				 	if (rsvd)
			
 
				 		args.trans->t_flags |= XFS_TRANS_RESERVE;
			
 
				 
			
 
				-	error = xfs_trans_reserve(args.trans, args.total,
			
 
				-				  XFS_ATTRSETM_LOG_RES(mp) +
			
 
				-				  XFS_ATTRSETRT_LOG_RES(mp) * args.total,
			
 
				-				  0, XFS_TRANS_PERM_LOG_RES,
			
 
				-				  XFS_ATTRSET_LOG_COUNT);
			
 
				+	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
			
 
				+			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
			
 
				+	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
			
 
				+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
			
 
				+	error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(args.trans, 0);
			
 
				 		return(error);
			
@@ -517,11 +517,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 
				 	if (flags & ATTR_ROOT)
			
 
				 		args.trans->t_flags |= XFS_TRANS_RESERVE;
			
 
				 
			
 
				-	if ((error = xfs_trans_reserve(args.trans,
			
 
				-				      XFS_ATTRRM_SPACE_RES(mp),
			
 
				-				      XFS_ATTRRM_LOG_RES(mp),
			
 
				-				      0, XFS_TRANS_PERM_LOG_RES,
			
 
				-				      XFS_ATTRRM_LOG_COUNT))) {
			
 
				+	error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
			
 
				+				  XFS_ATTRRM_SPACE_RES(mp), 0);
			
 
				+	if (error) {
			
 
				 		xfs_trans_cancel(args.trans, 0);
			
 
				 		return(error);
			
 
				 	}
			
@@ -611,228 +609,6 @@ xfs_attr_remove(
 
				 	return xfs_attr_remove_int(dp, &xname, flags);
			
 
				 }
			
 
				 
			
 
				-int
			
 
				-xfs_attr_list_int(xfs_attr_list_context_t *context)
			
 
				-{
			
 
				-	int error;
			
 
				-	xfs_inode_t *dp = context->dp;
			
 
				-
			
 
				-	XFS_STATS_INC(xs_attr_list);
			
 
				-
			
 
				-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
			
 
				-		return EIO;
			
 
				-
			
 
				-	xfs_ilock(dp, XFS_ILOCK_SHARED);
			
 
				-
			
 
				-	/*
			
 
				-	 * Decide on what work routines to call based on the inode size.
			
 
				-	 */
			
 
				-	if (!xfs_inode_hasattr(dp)) {
			
 
				-		error = 0;
			
 
				-	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
			
 
				-		error = xfs_attr_shortform_list(context);
			
 
				-	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
			
 
				-		error = xfs_attr_leaf_list(context);
			
 
				-	} else {
			
 
				-		error = xfs_attr_node_list(context);
			
 
				-	}
			
 
				-
			
 
				-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
			
 
				-
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-#define	ATTR_ENTBASESIZE		/* minimum bytes used by an attr */ \
			
 
				-	(((struct attrlist_ent *) 0)->a_name - (char *) 0)
			
 
				-#define	ATTR_ENTSIZE(namelen)		/* actual bytes used by an attr */ \
			
 
				-	((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
			
 
				-	 & ~(sizeof(u_int32_t)-1))
			
 
				-
			
 
				-/*
			
 
				- * Format an attribute and copy it out to the user's buffer.
			
 
				- * Take care to check values and protect against them changing later,
			
 
				- * we may be reading them directly out of a user buffer.
			
 
				- */
			
 
				-/*ARGSUSED*/
			
 
				-STATIC int
			
 
				-xfs_attr_put_listent(
			
 
				-	xfs_attr_list_context_t *context,
			
 
				-	int		flags,
			
 
				-	unsigned char	*name,
			
 
				-	int		namelen,
			
 
				-	int		valuelen,
			
 
				-	unsigned char	*value)
			
 
				-{
			
 
				-	struct attrlist *alist = (struct attrlist *)context->alist;
			
 
				-	attrlist_ent_t *aep;
			
 
				-	int arraytop;
			
 
				-
			
 
				-	ASSERT(!(context->flags & ATTR_KERNOVAL));
			
 
				-	ASSERT(context->count >= 0);
			
 
				-	ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
			
 
				-	ASSERT(context->firstu >= sizeof(*alist));
			
 
				-	ASSERT(context->firstu <= context->bufsize);
			
 
				-
			
 
				-	/*
			
 
				-	 * Only list entries in the right namespace.
			
 
				-	 */
			
 
				-	if (((context->flags & ATTR_SECURE) == 0) !=
			
 
				-	    ((flags & XFS_ATTR_SECURE) == 0))
			
 
				-		return 0;
			
 
				-	if (((context->flags & ATTR_ROOT) == 0) !=
			
 
				-	    ((flags & XFS_ATTR_ROOT) == 0))
			
 
				-		return 0;
			
 
				-
			
 
				-	arraytop = sizeof(*alist) +
			
 
				-			context->count * sizeof(alist->al_offset[0]);
			
 
				-	context->firstu -= ATTR_ENTSIZE(namelen);
			
 
				-	if (context->firstu < arraytop) {
			
 
				-		trace_xfs_attr_list_full(context);
			
 
				-		alist->al_more = 1;
			
 
				-		context->seen_enough = 1;
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	aep = (attrlist_ent_t *)&context->alist[context->firstu];
			
 
				-	aep->a_valuelen = valuelen;
			
 
				-	memcpy(aep->a_name, name, namelen);
			
 
				-	aep->a_name[namelen] = 0;
			
 
				-	alist->al_offset[context->count++] = context->firstu;
			
 
				-	alist->al_count = context->count;
			
 
				-	trace_xfs_attr_list_add(context);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Generate a list of extended attribute names and optionally
			
 
				- * also value lengths.  Positive return value follows the XFS
			
 
				- * convention of being an error, zero or negative return code
			
 
				- * is the length of the buffer returned (negated), indicating
			
 
				- * success.
			
 
				- */
			
 
				-int
			
 
				-xfs_attr_list(
			
 
				-	xfs_inode_t	*dp,
			
 
				-	char		*buffer,
			
 
				-	int		bufsize,
			
 
				-	int		flags,
			
 
				-	attrlist_cursor_kern_t *cursor)
			
 
				-{
			
 
				-	xfs_attr_list_context_t context;
			
 
				-	struct attrlist *alist;
			
 
				-	int error;
			
 
				-
			
 
				-	/*
			
 
				-	 * Validate the cursor.
			
 
				-	 */
			
 
				-	if (cursor->pad1 || cursor->pad2)
			
 
				-		return(XFS_ERROR(EINVAL));
			
 
				-	if ((cursor->initted == 0) &&
			
 
				-	    (cursor->hashval || cursor->blkno || cursor->offset))
			
 
				-		return XFS_ERROR(EINVAL);
			
 
				-
			
 
				-	/*
			
 
				-	 * Check for a properly aligned buffer.
			
 
				-	 */
			
 
				-	if (((long)buffer) & (sizeof(int)-1))
			
 
				-		return XFS_ERROR(EFAULT);
			
 
				-	if (flags & ATTR_KERNOVAL)
			
 
				-		bufsize = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * Initialize the output buffer.
			
 
				-	 */
			
 
				-	memset(&context, 0, sizeof(context));
			
 
				-	context.dp = dp;
			
 
				-	context.cursor = cursor;
			
 
				-	context.resynch = 1;
			
 
				-	context.flags = flags;
			
 
				-	context.alist = buffer;
			
 
				-	context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
			
 
				-	context.firstu = context.bufsize;
			
 
				-	context.put_listent = xfs_attr_put_listent;
			
 
				-
			
 
				-	alist = (struct attrlist *)context.alist;
			
 
				-	alist->al_count = 0;
			
 
				-	alist->al_more = 0;
			
 
				-	alist->al_offset[0] = context.bufsize;
			
 
				-
			
 
				-	error = xfs_attr_list_int(&context);
			
 
				-	ASSERT(error >= 0);
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-int								/* error */
			
 
				-xfs_attr_inactive(xfs_inode_t *dp)
			
 
				-{
			
 
				-	xfs_trans_t *trans;
			
 
				-	xfs_mount_t *mp;
			
 
				-	int error;
			
 
				-
			
 
				-	mp = dp->i_mount;
			
 
				-	ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
			
 
				-
			
 
				-	xfs_ilock(dp, XFS_ILOCK_SHARED);
			
 
				-	if (!xfs_inode_hasattr(dp) ||
			
 
				-	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
			
 
				-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
			
 
				-		return 0;
			
 
				-	}
			
 
				-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
			
 
				-
			
 
				-	/*
			
 
				-	 * Start our first transaction of the day.
			
 
				-	 *
			
 
				-	 * All future transactions during this code must be "chained" off
			
 
				-	 * this one via the trans_dup() call.  All transactions will contain
			
 
				-	 * the inode, and the inode will always be marked with trans_ihold().
			
 
				-	 * Since the inode will be locked in all transactions, we must log
			
 
				-	 * the inode in every transaction to let it float upward through
			
 
				-	 * the log.
			
 
				-	 */
			
 
				-	trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
			
 
				-	if ((error = xfs_trans_reserve(trans, 0, XFS_ATTRINVAL_LOG_RES(mp), 0,
			
 
				-				      XFS_TRANS_PERM_LOG_RES,
			
 
				-				      XFS_ATTRINVAL_LOG_COUNT))) {
			
 
				-		xfs_trans_cancel(trans, 0);
			
 
				-		return(error);
			
 
				-	}
			
 
				-	xfs_ilock(dp, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	/*
			
 
				-	 * No need to make quota reservations here. We expect to release some
			
 
				-	 * blocks, not allocate, in the common case.
			
 
				-	 */
			
 
				-	xfs_trans_ijoin(trans, dp, 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * Decide on what work routines to call based on the inode size.
			
 
				-	 */
			
 
				-	if (!xfs_inode_hasattr(dp) ||
			
 
				-	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
			
 
				-		error = 0;
			
 
				-		goto out;
			
 
				-	}
			
 
				-	error = xfs_attr3_root_inactive(&trans, dp);
			
 
				-	if (error)
			
 
				-		goto out;
			
 
				-
			
 
				-	error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
			
 
				-	if (error)
			
 
				-		goto out;
			
 
				-
			
 
				-	error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
			
 
				-	xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	return(error);
			
 
				-
			
 
				-out:
			
 
				-	xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
			
 
				-	xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				-	return(error);
			
 
				-}
			
 
				-
			
 
				-
			
 
				 
			
 
				 /*========================================================================
			
 
				  * External routines when attribute list is inside the inode
			
@@ -1166,28 +942,6 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Copy out attribute entries for attr_list(), for leaf attribute lists.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_attr_leaf_list(xfs_attr_list_context_t *context)
			
 
				-{
			
 
				-	int error;
			
 
				-	struct xfs_buf *bp;
			
 
				-
			
 
				-	trace_xfs_attr_leaf_list(context);
			
 
				-
			
 
				-	context->cursor->blkno = 0;
			
 
				-	error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
			
 
				-	if (error)
			
 
				-		return XFS_ERROR(error);
			
 
				-
			
 
				-	error = xfs_attr3_leaf_list_int(bp, context);
			
 
				-	xfs_trans_brelse(NULL, bp);
			
 
				-	return XFS_ERROR(error);
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /*========================================================================
			
 
				  * External routines when attribute list size > XFS_LBSIZE(mp).
			
 
				  *========================================================================*/
			
@@ -1260,6 +1014,7 @@ restart:
 
				 			 * have been a b-tree.
			
 
				 			 */
			
 
				 			xfs_da_state_free(state);
			
 
				+			state = NULL;
			
 
				 			xfs_bmap_init(args->flist, args->firstblock);
			
 
				 			error = xfs_attr3_leaf_to_node(args);
			
 
				 			if (!error) {
			
@@ -1780,143 +1535,3 @@ xfs_attr_node_get(xfs_da_args_t *args)
 
				 	xfs_da_state_free(state);
			
 
				 	return(retval);
			
 
				 }
			
 
				-
			
 
				-STATIC int							/* error */
			
 
				-xfs_attr_node_list(xfs_attr_list_context_t *context)
			
 
				-{
			
 
				-	attrlist_cursor_kern_t *cursor;
			
 
				-	xfs_attr_leafblock_t *leaf;
			
 
				-	xfs_da_intnode_t *node;
			
 
				-	struct xfs_attr3_icleaf_hdr leafhdr;
			
 
				-	struct xfs_da3_icnode_hdr nodehdr;
			
 
				-	struct xfs_da_node_entry *btree;
			
 
				-	int error, i;
			
 
				-	struct xfs_buf *bp;
			
 
				-
			
 
				-	trace_xfs_attr_node_list(context);
			
 
				-
			
 
				-	cursor = context->cursor;
			
 
				-	cursor->initted = 1;
			
 
				-
			
 
				-	/*
			
 
				-	 * Do all sorts of validation on the passed-in cursor structure.
			
 
				-	 * If anything is amiss, ignore the cursor and look up the hashval
			
 
				-	 * starting from the btree root.
			
 
				-	 */
			
 
				-	bp = NULL;
			
 
				-	if (cursor->blkno > 0) {
			
 
				-		error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
			
 
				-					      &bp, XFS_ATTR_FORK);
			
 
				-		if ((error != 0) && (error != EFSCORRUPTED))
			
 
				-			return(error);
			
 
				-		if (bp) {
			
 
				-			struct xfs_attr_leaf_entry *entries;
			
 
				-
			
 
				-			node = bp->b_addr;
			
 
				-			switch (be16_to_cpu(node->hdr.info.magic)) {
			
 
				-			case XFS_DA_NODE_MAGIC:
			
 
				-			case XFS_DA3_NODE_MAGIC:
			
 
				-				trace_xfs_attr_list_wrong_blk(context);
			
 
				-				xfs_trans_brelse(NULL, bp);
			
 
				-				bp = NULL;
			
 
				-				break;
			
 
				-			case XFS_ATTR_LEAF_MAGIC:
			
 
				-			case XFS_ATTR3_LEAF_MAGIC:
			
 
				-				leaf = bp->b_addr;
			
 
				-				xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				-				entries = xfs_attr3_leaf_entryp(leaf);
			
 
				-				if (cursor->hashval > be32_to_cpu(
			
 
				-						entries[leafhdr.count - 1].hashval)) {
			
 
				-					trace_xfs_attr_list_wrong_blk(context);
			
 
				-					xfs_trans_brelse(NULL, bp);
			
 
				-					bp = NULL;
			
 
				-				} else if (cursor->hashval <= be32_to_cpu(
			
 
				-						entries[0].hashval)) {
			
 
				-					trace_xfs_attr_list_wrong_blk(context);
			
 
				-					xfs_trans_brelse(NULL, bp);
			
 
				-					bp = NULL;
			
 
				-				}
			
 
				-				break;
			
 
				-			default:
			
 
				-				trace_xfs_attr_list_wrong_blk(context);
			
 
				-				xfs_trans_brelse(NULL, bp);
			
 
				-				bp = NULL;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * We did not find what we expected given the cursor's contents,
			
 
				-	 * so we start from the top and work down based on the hash value.
			
 
				-	 * Note that start of node block is same as start of leaf block.
			
 
				-	 */
			
 
				-	if (bp == NULL) {
			
 
				-		cursor->blkno = 0;
			
 
				-		for (;;) {
			
 
				-			__uint16_t magic;
			
 
				-
			
 
				-			error = xfs_da3_node_read(NULL, context->dp,
			
 
				-						      cursor->blkno, -1, &bp,
			
 
				-						      XFS_ATTR_FORK);
			
 
				-			if (error)
			
 
				-				return(error);
			
 
				-			node = bp->b_addr;
			
 
				-			magic = be16_to_cpu(node->hdr.info.magic);
			
 
				-			if (magic == XFS_ATTR_LEAF_MAGIC ||
			
 
				-			    magic == XFS_ATTR3_LEAF_MAGIC)
			
 
				-				break;
			
 
				-			if (magic != XFS_DA_NODE_MAGIC &&
			
 
				-			    magic != XFS_DA3_NODE_MAGIC) {
			
 
				-				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
			
 
				-						     XFS_ERRLEVEL_LOW,
			
 
				-						     context->dp->i_mount,
			
 
				-						     node);
			
 
				-				xfs_trans_brelse(NULL, bp);
			
 
				-				return XFS_ERROR(EFSCORRUPTED);
			
 
				-			}
			
 
				-
			
 
				-			xfs_da3_node_hdr_from_disk(&nodehdr, node);
			
 
				-			btree = xfs_da3_node_tree_p(node);
			
 
				-			for (i = 0; i < nodehdr.count; btree++, i++) {
			
 
				-				if (cursor->hashval
			
 
				-						<= be32_to_cpu(btree->hashval)) {
			
 
				-					cursor->blkno = be32_to_cpu(btree->before);
			
 
				-					trace_xfs_attr_list_node_descend(context,
			
 
				-									 btree);
			
 
				-					break;
			
 
				-				}
			
 
				-			}
			
 
				-			if (i == nodehdr.count) {
			
 
				-				xfs_trans_brelse(NULL, bp);
			
 
				-				return 0;
			
 
				-			}
			
 
				-			xfs_trans_brelse(NULL, bp);
			
 
				-		}
			
 
				-	}
			
 
				-	ASSERT(bp != NULL);
			
 
				-
			
 
				-	/*
			
 
				-	 * Roll upward through the blocks, processing each leaf block in
			
 
				-	 * order.  As long as there is space in the result buffer, keep
			
 
				-	 * adding the information.
			
 
				-	 */
			
 
				-	for (;;) {
			
 
				-		leaf = bp->b_addr;
			
 
				-		error = xfs_attr3_leaf_list_int(bp, context);
			
 
				-		if (error) {
			
 
				-			xfs_trans_brelse(NULL, bp);
			
 
				-			return error;
			
 
				-		}
			
 
				-		xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				-		if (context->seen_enough || leafhdr.forw == 0)
			
 
				-			break;
			
 
				-		cursor->blkno = leafhdr.forw;
			
 
				-		xfs_trans_brelse(NULL, bp);
			
 
				-		error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
			
 
				-					   &bp);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-	}
			
 
				-	xfs_trans_brelse(NULL, bp);
			
 
				-	return 0;
			
 
				-}
			
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -141,5 +141,14 @@ typedef struct xfs_attr_list_context {
 
				  */
			
 
				 int xfs_attr_inactive(struct xfs_inode *dp);
			
 
				 int xfs_attr_list_int(struct xfs_attr_list_context *);
			
 
				+int xfs_inode_hasattr(struct xfs_inode *ip);
			
 
				+int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
			
 
				+		 unsigned char *value, int *valuelenp, int flags);
			
 
				+int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
			
 
				+		 unsigned char *value, int valuelen, int flags);
			
 
				+int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
			
 
				+int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
			
 
				+		  int flags, struct attrlist_cursor_kern *cursor);
			
 
				+
			
 
				 
			
 
				 #endif	/* __XFS_ATTR_H__ */
			
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -0,0 +1,453 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_alloc_btree.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_attr_remote.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_item.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_attr.h"
			
 
				+#include "xfs_attr_leaf.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_quota.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				+
			
 
				+/*
			
 
				+ * Look at all the extents for this logical region,
			
 
				+ * invalidate any buffers that are incore/in transactions.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_attr3_leaf_freextent(
			
 
				+	struct xfs_trans	**trans,
			
 
				+	struct xfs_inode	*dp,
			
 
				+	xfs_dablk_t		blkno,
			
 
				+	int			blkcnt)
			
 
				+{
			
 
				+	struct xfs_bmbt_irec	map;
			
 
				+	struct xfs_buf		*bp;
			
 
				+	xfs_dablk_t		tblkno;
			
 
				+	xfs_daddr_t		dblkno;
			
 
				+	int			tblkcnt;
			
 
				+	int			dblkcnt;
			
 
				+	int			nmap;
			
 
				+	int			error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Roll through the "value", invalidating the attribute value's
			
 
				+	 * blocks.
			
 
				+	 */
			
 
				+	tblkno = blkno;
			
 
				+	tblkcnt = blkcnt;
			
 
				+	while (tblkcnt > 0) {
			
 
				+		/*
			
 
				+		 * Try to remember where we decided to put the value.
			
 
				+		 */
			
 
				+		nmap = 1;
			
 
				+		error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
			
 
				+				       &map, &nmap, XFS_BMAPI_ATTRFORK);
			
 
				+		if (error) {
			
 
				+			return(error);
			
 
				+		}
			
 
				+		ASSERT(nmap == 1);
			
 
				+		ASSERT(map.br_startblock != DELAYSTARTBLOCK);
			
 
				+
			
 
				+		/*
			
 
				+		 * If it's a hole, these are already unmapped
			
 
				+		 * so there's nothing to invalidate.
			
 
				+		 */
			
 
				+		if (map.br_startblock != HOLESTARTBLOCK) {
			
 
				+
			
 
				+			dblkno = XFS_FSB_TO_DADDR(dp->i_mount,
			
 
				+						  map.br_startblock);
			
 
				+			dblkcnt = XFS_FSB_TO_BB(dp->i_mount,
			
 
				+						map.br_blockcount);
			
 
				+			bp = xfs_trans_get_buf(*trans,
			
 
				+					dp->i_mount->m_ddev_targp,
			
 
				+					dblkno, dblkcnt, 0);
			
 
				+			if (!bp)
			
 
				+				return ENOMEM;
			
 
				+			xfs_trans_binval(*trans, bp);
			
 
				+			/*
			
 
				+			 * Roll to next transaction.
			
 
				+			 */
			
 
				+			error = xfs_trans_roll(trans, dp);
			
 
				+			if (error)
			
 
				+				return (error);
			
 
				+		}
			
 
				+
			
 
				+		tblkno += map.br_blockcount;
			
 
				+		tblkcnt -= map.br_blockcount;
			
 
				+	}
			
 
				+
			
 
				+	return(0);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Invalidate all of the "remote" value regions pointed to by a particular
			
 
				+ * leaf block.
			
 
				+ * Note that we must release the lock on the buffer so that we are not
			
 
				+ * caught holding something that the logging code wants to flush to disk.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_attr3_leaf_inactive(
			
 
				+	struct xfs_trans	**trans,
			
 
				+	struct xfs_inode	*dp,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_attr_leafblock *leaf;
			
 
				+	struct xfs_attr3_icleaf_hdr ichdr;
			
 
				+	struct xfs_attr_leaf_entry *entry;
			
 
				+	struct xfs_attr_leaf_name_remote *name_rmt;
			
 
				+	struct xfs_attr_inactive_list *list;
			
 
				+	struct xfs_attr_inactive_list *lp;
			
 
				+	int			error;
			
 
				+	int			count;
			
 
				+	int			size;
			
 
				+	int			tmp;
			
 
				+	int			i;
			
 
				+
			
 
				+	leaf = bp->b_addr;
			
 
				+	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
			
 
				+
			
 
				+	/*
			
 
				+	 * Count the number of "remote" value extents.
			
 
				+	 */
			
 
				+	count = 0;
			
 
				+	entry = xfs_attr3_leaf_entryp(leaf);
			
 
				+	for (i = 0; i < ichdr.count; entry++, i++) {
			
 
				+		if (be16_to_cpu(entry->nameidx) &&
			
 
				+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
			
 
				+			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
			
 
				+			if (name_rmt->valueblk)
			
 
				+				count++;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If there are no "remote" values, we're done.
			
 
				+	 */
			
 
				+	if (count == 0) {
			
 
				+		xfs_trans_brelse(*trans, bp);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Allocate storage for a list of all the "remote" value extents.
			
 
				+	 */
			
 
				+	size = count * sizeof(xfs_attr_inactive_list_t);
			
 
				+	list = kmem_alloc(size, KM_SLEEP);
			
 
				+
			
 
				+	/*
			
 
				+	 * Identify each of the "remote" value extents.
			
 
				+	 */
			
 
				+	lp = list;
			
 
				+	entry = xfs_attr3_leaf_entryp(leaf);
			
 
				+	for (i = 0; i < ichdr.count; entry++, i++) {
			
 
				+		if (be16_to_cpu(entry->nameidx) &&
			
 
				+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
			
 
				+			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
			
 
				+			if (name_rmt->valueblk) {
			
 
				+				lp->valueblk = be32_to_cpu(name_rmt->valueblk);
			
 
				+				lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
			
 
				+						    be32_to_cpu(name_rmt->valuelen));
			
 
				+				lp++;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	xfs_trans_brelse(*trans, bp);	/* unlock for trans. in freextent() */
			
 
				+
			
 
				+	/*
			
 
				+	 * Invalidate each of the "remote" value extents.
			
 
				+	 */
			
 
				+	error = 0;
			
 
				+	for (lp = list, i = 0; i < count; i++, lp++) {
			
 
				+		tmp = xfs_attr3_leaf_freextent(trans, dp,
			
 
				+				lp->valueblk, lp->valuelen);
			
 
				+
			
 
				+		if (error == 0)
			
 
				+			error = tmp;	/* save only the 1st errno */
			
 
				+	}
			
 
				+
			
 
				+	kmem_free(list);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Recurse (gasp!) through the attribute nodes until we find leaves.
			
 
				+ * We're doing a depth-first traversal in order to invalidate everything.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_attr3_node_inactive(
			
 
				+	struct xfs_trans **trans,
			
 
				+	struct xfs_inode *dp,
			
 
				+	struct xfs_buf	*bp,
			
 
				+	int		level)
			
 
				+{
			
 
				+	xfs_da_blkinfo_t *info;
			
 
				+	xfs_da_intnode_t *node;
			
 
				+	xfs_dablk_t child_fsb;
			
 
				+	xfs_daddr_t parent_blkno, child_blkno;
			
 
				+	int error, i;
			
 
				+	struct xfs_buf *child_bp;
			
 
				+	struct xfs_da_node_entry *btree;
			
 
				+	struct xfs_da3_icnode_hdr ichdr;
			
 
				+
			
 
				+	/*
			
 
				+	 * Since this code is recursive (gasp!) we must protect ourselves.
			
 
				+	 */
			
 
				+	if (level > XFS_DA_NODE_MAXDEPTH) {
			
 
				+		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
			
 
				+		return XFS_ERROR(EIO);
			
 
				+	}
			
 
				+
			
 
				+	node = bp->b_addr;
			
 
				+	xfs_da3_node_hdr_from_disk(&ichdr, node);
			
 
				+	parent_blkno = bp->b_bn;
			
 
				+	if (!ichdr.count) {
			
 
				+		xfs_trans_brelse(*trans, bp);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	btree = xfs_da3_node_tree_p(node);
			
 
				+	child_fsb = be32_to_cpu(btree[0].before);
			
 
				+	xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is the node level just above the leaves, simply loop
			
 
				+	 * over the leaves removing all of them.  If this is higher up
			
 
				+	 * in the tree, recurse downward.
			
 
				+	 */
			
 
				+	for (i = 0; i < ichdr.count; i++) {
			
 
				+		/*
			
 
				+		 * Read the subsidiary block to see what we have to work with.
			
 
				+		 * Don't do this in a transaction.  This is a depth-first
			
 
				+		 * traversal of the tree so we may deal with many blocks
			
 
				+		 * before we come back to this one.
			
 
				+		 */
			
 
				+		error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
			
 
				+						XFS_ATTR_FORK);
			
 
				+		if (error)
			
 
				+			return(error);
			
 
				+		if (child_bp) {
			
 
				+						/* save for re-read later */
			
 
				+			child_blkno = XFS_BUF_ADDR(child_bp);
			
 
				+
			
 
				+			/*
			
 
				+			 * Invalidate the subtree, however we have to.
			
 
				+			 */
			
 
				+			info = child_bp->b_addr;
			
 
				+			switch (info->magic) {
			
 
				+			case cpu_to_be16(XFS_DA_NODE_MAGIC):
			
 
				+			case cpu_to_be16(XFS_DA3_NODE_MAGIC):
			
 
				+				error = xfs_attr3_node_inactive(trans, dp,
			
 
				+							child_bp, level + 1);
			
 
				+				break;
			
 
				+			case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
			
 
				+			case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
			
 
				+				error = xfs_attr3_leaf_inactive(trans, dp,
			
 
				+							child_bp);
			
 
				+				break;
			
 
				+			default:
			
 
				+				error = XFS_ERROR(EIO);
			
 
				+				xfs_trans_brelse(*trans, child_bp);
			
 
				+				break;
			
 
				+			}
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+
			
 
				+			/*
			
 
				+			 * Remove the subsidiary block from the cache
			
 
				+			 * and from the log.
			
 
				+			 */
			
 
				+			error = xfs_da_get_buf(*trans, dp, 0, child_blkno,
			
 
				+				&child_bp, XFS_ATTR_FORK);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			xfs_trans_binval(*trans, child_bp);
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * If we're not done, re-read the parent to get the next
			
 
				+		 * child block number.
			
 
				+		 */
			
 
				+		if (i + 1 < ichdr.count) {
			
 
				+			error = xfs_da3_node_read(*trans, dp, 0, parent_blkno,
			
 
				+						 &bp, XFS_ATTR_FORK);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			child_fsb = be32_to_cpu(btree[i + 1].before);
			
 
				+			xfs_trans_brelse(*trans, bp);
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Atomically commit the whole invalidate stuff.
			
 
				+		 */
			
 
				+		error = xfs_trans_roll(trans, dp);
			
 
				+		if (error)
			
 
				+			return  error;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Indiscriminately delete the entire attribute fork
			
 
				+ *
			
 
				+ * Recurse (gasp!) through the attribute nodes until we find leaves.
			
 
				+ * We're doing a depth-first traversal in order to invalidate everything.
			
 
				+ */
			
 
				+int
			
 
				+xfs_attr3_root_inactive(
			
 
				+	struct xfs_trans	**trans,
			
 
				+	struct xfs_inode	*dp)
			
 
				+{
			
 
				+	struct xfs_da_blkinfo	*info;
			
 
				+	struct xfs_buf		*bp;
			
 
				+	xfs_daddr_t		blkno;
			
 
				+	int			error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Read block 0 to see what we have to work with.
			
 
				+	 * We only get here if we have extents, since we remove
			
 
				+	 * the extents in reverse order the extent containing
			
 
				+	 * block 0 must still be there.
			
 
				+	 */
			
 
				+	error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	blkno = bp->b_bn;
			
 
				+
			
 
				+	/*
			
 
				+	 * Invalidate the tree, even if the "tree" is only a single leaf block.
			
 
				+	 * This is a depth-first traversal!
			
 
				+	 */
			
 
				+	info = bp->b_addr;
			
 
				+	switch (info->magic) {
			
 
				+	case cpu_to_be16(XFS_DA_NODE_MAGIC):
			
 
				+	case cpu_to_be16(XFS_DA3_NODE_MAGIC):
			
 
				+		error = xfs_attr3_node_inactive(trans, dp, bp, 1);
			
 
				+		break;
			
 
				+	case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
			
 
				+	case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
			
 
				+		error = xfs_attr3_leaf_inactive(trans, dp, bp);
			
 
				+		break;
			
 
				+	default:
			
 
				+		error = XFS_ERROR(EIO);
			
 
				+		xfs_trans_brelse(*trans, bp);
			
 
				+		break;
			
 
				+	}
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Invalidate the incore copy of the root block.
			
 
				+	 */
			
 
				+	error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	xfs_trans_binval(*trans, bp);	/* remove from cache */
			
 
				+	/*
			
 
				+	 * Commit the invalidate and start the next transaction.
			
 
				+	 */
			
 
				+	error = xfs_trans_roll(trans, dp);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_attr_inactive(xfs_inode_t *dp)
			
 
				+{
			
 
				+	xfs_trans_t *trans;
			
 
				+	xfs_mount_t *mp;
			
 
				+	int error;
			
 
				+
			
 
				+	mp = dp->i_mount;
			
 
				+	ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
			
 
				+
			
 
				+	xfs_ilock(dp, XFS_ILOCK_SHARED);
			
 
				+	if (!xfs_inode_hasattr(dp) ||
			
 
				+	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
			
 
				+		xfs_iunlock(dp, XFS_ILOCK_SHARED);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	xfs_iunlock(dp, XFS_ILOCK_SHARED);
			
 
				+
			
 
				+	/*
			
 
				+	 * Start our first transaction of the day.
			
 
				+	 *
			
 
				+	 * All future transactions during this code must be "chained" off
			
 
				+	 * this one via the trans_dup() call.  All transactions will contain
			
 
				+	 * the inode, and the inode will always be marked with trans_ihold().
			
 
				+	 * Since the inode will be locked in all transactions, we must log
			
 
				+	 * the inode in every transaction to let it float upward through
			
 
				+	 * the log.
			
 
				+	 */
			
 
				+	trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
			
 
				+	error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
			
 
				+	if (error) {
			
 
				+		xfs_trans_cancel(trans, 0);
			
 
				+		return(error);
			
 
				+	}
			
 
				+	xfs_ilock(dp, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * No need to make quota reservations here. We expect to release some
			
 
				+	 * blocks, not allocate, in the common case.
			
 
				+	 */
			
 
				+	xfs_trans_ijoin(trans, dp, 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * Decide on what work routines to call based on the inode size.
			
 
				+	 */
			
 
				+	if (!xfs_inode_hasattr(dp) ||
			
 
				+	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
			
 
				+		error = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	error = xfs_attr3_root_inactive(&trans, dp);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+	xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	return(error);
			
 
				+
			
 
				+out:
			
 
				+	xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
			
 
				+	xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				+	return(error);
			
 
				+}
			
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -22,6 +22,7 @@
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
@@ -77,16 +78,6 @@ STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state,
 
				 			int *number_entries_in_blk1,
			
 
				 			int *number_usedbytes_in_blk1);
			
 
				 
			
 
				-/*
			
 
				- * Routines used for shrinking the Btree.
			
 
				- */
			
 
				-STATIC int xfs_attr3_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
			
 
				-				  struct xfs_buf *bp, int level);
			
 
				-STATIC int xfs_attr3_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
			
 
				-				  struct xfs_buf *bp);
			
 
				-STATIC int xfs_attr3_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
			
 
				-				   xfs_dablk_t blkno, int blkcnt);
			
 
				-
			
 
				 /*
			
 
				  * Utility routines.
			
 
				  */
			
@@ -635,7 +626,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 
				 	xfs_attr_sf_entry_t *sfe;
			
 
				 	int i;
			
 
				 
			
 
				-	ASSERT(args->dp->i_d.di_aformat == XFS_IFINLINE);
			
 
				+	ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE);
			
 
				 	sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
			
 
				 	sfe = &sf->list[0];
			
 
				 	for (i = 0; i < sf->hdr.count;
			
@@ -751,182 +742,6 @@ out:
 
				 	return(error);
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				-xfs_attr_shortform_compare(const void *a, const void *b)
			
 
				-{
			
 
				-	xfs_attr_sf_sort_t *sa, *sb;
			
 
				-
			
 
				-	sa = (xfs_attr_sf_sort_t *)a;
			
 
				-	sb = (xfs_attr_sf_sort_t *)b;
			
 
				-	if (sa->hash < sb->hash) {
			
 
				-		return(-1);
			
 
				-	} else if (sa->hash > sb->hash) {
			
 
				-		return(1);
			
 
				-	} else {
			
 
				-		return(sa->entno - sb->entno);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-
			
 
				-#define XFS_ISRESET_CURSOR(cursor) \
			
 
				-	(!((cursor)->initted) && !((cursor)->hashval) && \
			
 
				-	 !((cursor)->blkno) && !((cursor)->offset))
			
 
				-/*
			
 
				- * Copy out entries of shortform attribute lists for attr_list().
			
 
				- * Shortform attribute lists are not stored in hashval sorted order.
			
 
				- * If the output buffer is not large enough to hold them all, then we
			
 
				- * we have to calculate each entries' hashvalue and sort them before
			
 
				- * we can begin returning them to the user.
			
 
				- */
			
 
				-/*ARGSUSED*/
			
 
				-int
			
 
				-xfs_attr_shortform_list(xfs_attr_list_context_t *context)
			
 
				-{
			
 
				-	attrlist_cursor_kern_t *cursor;
			
 
				-	xfs_attr_sf_sort_t *sbuf, *sbp;
			
 
				-	xfs_attr_shortform_t *sf;
			
 
				-	xfs_attr_sf_entry_t *sfe;
			
 
				-	xfs_inode_t *dp;
			
 
				-	int sbsize, nsbuf, count, i;
			
 
				-	int error;
			
 
				-
			
 
				-	ASSERT(context != NULL);
			
 
				-	dp = context->dp;
			
 
				-	ASSERT(dp != NULL);
			
 
				-	ASSERT(dp->i_afp != NULL);
			
 
				-	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
			
 
				-	ASSERT(sf != NULL);
			
 
				-	if (!sf->hdr.count)
			
 
				-		return(0);
			
 
				-	cursor = context->cursor;
			
 
				-	ASSERT(cursor != NULL);
			
 
				-
			
 
				-	trace_xfs_attr_list_sf(context);
			
 
				-
			
 
				-	/*
			
 
				-	 * If the buffer is large enough and the cursor is at the start,
			
 
				-	 * do not bother with sorting since we will return everything in
			
 
				-	 * one buffer and another call using the cursor won't need to be
			
 
				-	 * made.
			
 
				-	 * Note the generous fudge factor of 16 overhead bytes per entry.
			
 
				-	 * If bufsize is zero then put_listent must be a search function
			
 
				-	 * and can just scan through what we have.
			
 
				-	 */
			
 
				-	if (context->bufsize == 0 ||
			
 
				-	    (XFS_ISRESET_CURSOR(cursor) &&
			
 
				-             (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
			
 
				-		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
			
 
				-			error = context->put_listent(context,
			
 
				-					   sfe->flags,
			
 
				-					   sfe->nameval,
			
 
				-					   (int)sfe->namelen,
			
 
				-					   (int)sfe->valuelen,
			
 
				-					   &sfe->nameval[sfe->namelen]);
			
 
				-
			
 
				-			/*
			
 
				-			 * Either search callback finished early or
			
 
				-			 * didn't fit it all in the buffer after all.
			
 
				-			 */
			
 
				-			if (context->seen_enough)
			
 
				-				break;
			
 
				-
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
			
 
				-		}
			
 
				-		trace_xfs_attr_list_sf_all(context);
			
 
				-		return(0);
			
 
				-	}
			
 
				-
			
 
				-	/* do no more for a search callback */
			
 
				-	if (context->bufsize == 0)
			
 
				-		return 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * It didn't all fit, so we have to sort everything on hashval.
			
 
				-	 */
			
 
				-	sbsize = sf->hdr.count * sizeof(*sbuf);
			
 
				-	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
			
 
				-
			
 
				-	/*
			
 
				-	 * Scan the attribute list for the rest of the entries, storing
			
 
				-	 * the relevant info from only those that match into a buffer.
			
 
				-	 */
			
 
				-	nsbuf = 0;
			
 
				-	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
			
 
				-		if (unlikely(
			
 
				-		    ((char *)sfe < (char *)sf) ||
			
 
				-		    ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) {
			
 
				-			XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
			
 
				-					     XFS_ERRLEVEL_LOW,
			
 
				-					     context->dp->i_mount, sfe);
			
 
				-			kmem_free(sbuf);
			
 
				-			return XFS_ERROR(EFSCORRUPTED);
			
 
				-		}
			
 
				-
			
 
				-		sbp->entno = i;
			
 
				-		sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
			
 
				-		sbp->name = sfe->nameval;
			
 
				-		sbp->namelen = sfe->namelen;
			
 
				-		/* These are bytes, and both on-disk, don't endian-flip */
			
 
				-		sbp->valuelen = sfe->valuelen;
			
 
				-		sbp->flags = sfe->flags;
			
 
				-		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
			
 
				-		sbp++;
			
 
				-		nsbuf++;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Sort the entries on hash then entno.
			
 
				-	 */
			
 
				-	xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_attr_shortform_compare);
			
 
				-
			
 
				-	/*
			
 
				-	 * Re-find our place IN THE SORTED LIST.
			
 
				-	 */
			
 
				-	count = 0;
			
 
				-	cursor->initted = 1;
			
 
				-	cursor->blkno = 0;
			
 
				-	for (sbp = sbuf, i = 0; i < nsbuf; i++, sbp++) {
			
 
				-		if (sbp->hash == cursor->hashval) {
			
 
				-			if (cursor->offset == count) {
			
 
				-				break;
			
 
				-			}
			
 
				-			count++;
			
 
				-		} else if (sbp->hash > cursor->hashval) {
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	if (i == nsbuf) {
			
 
				-		kmem_free(sbuf);
			
 
				-		return(0);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Loop putting entries into the user buffer.
			
 
				-	 */
			
 
				-	for ( ; i < nsbuf; i++, sbp++) {
			
 
				-		if (cursor->hashval != sbp->hash) {
			
 
				-			cursor->hashval = sbp->hash;
			
 
				-			cursor->offset = 0;
			
 
				-		}
			
 
				-		error = context->put_listent(context,
			
 
				-					sbp->flags,
			
 
				-					sbp->name,
			
 
				-					sbp->namelen,
			
 
				-					sbp->valuelen,
			
 
				-					&sbp->name[sbp->namelen]);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-		if (context->seen_enough)
			
 
				-			break;
			
 
				-		cursor->offset++;
			
 
				-	}
			
 
				-
			
 
				-	kmem_free(sbuf);
			
 
				-	return(0);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Check a leaf attribute block to see if all the entries would fit into
			
 
				  * a shortform attribute list.
			
@@ -1121,7 +936,6 @@ out:
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /*========================================================================
			
 
				  * Routines used for growing the Btree.
			
 
				  *========================================================================*/
			
@@ -1482,7 +1296,6 @@ xfs_attr3_leaf_compact(
 
				 	ichdr_dst->freemap[0].size = ichdr_dst->firstused -
			
 
				 						ichdr_dst->freemap[0].base;
			
 
				 
			
 
				-
			
 
				 	/* write the header back to initialise the underlying buffer */
			
 
				 	xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
			
 
				 
			
@@ -2643,130 +2456,6 @@ xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
 
				 	return size;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Copy out attribute list entries for attr_list(), for leaf attribute lists.
			
 
				- */
			
 
				-int
			
 
				-xfs_attr3_leaf_list_int(
			
 
				-	struct xfs_buf			*bp,
			
 
				-	struct xfs_attr_list_context	*context)
			
 
				-{
			
 
				-	struct attrlist_cursor_kern	*cursor;
			
 
				-	struct xfs_attr_leafblock	*leaf;
			
 
				-	struct xfs_attr3_icleaf_hdr	ichdr;
			
 
				-	struct xfs_attr_leaf_entry	*entries;
			
 
				-	struct xfs_attr_leaf_entry	*entry;
			
 
				-	int				retval;
			
 
				-	int				i;
			
 
				-
			
 
				-	trace_xfs_attr_list_leaf(context);
			
 
				-
			
 
				-	leaf = bp->b_addr;
			
 
				-	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
			
 
				-	entries = xfs_attr3_leaf_entryp(leaf);
			
 
				-
			
 
				-	cursor = context->cursor;
			
 
				-	cursor->initted = 1;
			
 
				-
			
 
				-	/*
			
 
				-	 * Re-find our place in the leaf block if this is a new syscall.
			
 
				-	 */
			
 
				-	if (context->resynch) {
			
 
				-		entry = &entries[0];
			
 
				-		for (i = 0; i < ichdr.count; entry++, i++) {
			
 
				-			if (be32_to_cpu(entry->hashval) == cursor->hashval) {
			
 
				-				if (cursor->offset == context->dupcnt) {
			
 
				-					context->dupcnt = 0;
			
 
				-					break;
			
 
				-				}
			
 
				-				context->dupcnt++;
			
 
				-			} else if (be32_to_cpu(entry->hashval) >
			
 
				-					cursor->hashval) {
			
 
				-				context->dupcnt = 0;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if (i == ichdr.count) {
			
 
				-			trace_xfs_attr_list_notfound(context);
			
 
				-			return 0;
			
 
				-		}
			
 
				-	} else {
			
 
				-		entry = &entries[0];
			
 
				-		i = 0;
			
 
				-	}
			
 
				-	context->resynch = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * We have found our place, start copying out the new attributes.
			
 
				-	 */
			
 
				-	retval = 0;
			
 
				-	for (; i < ichdr.count; entry++, i++) {
			
 
				-		if (be32_to_cpu(entry->hashval) != cursor->hashval) {
			
 
				-			cursor->hashval = be32_to_cpu(entry->hashval);
			
 
				-			cursor->offset = 0;
			
 
				-		}
			
 
				-
			
 
				-		if (entry->flags & XFS_ATTR_INCOMPLETE)
			
 
				-			continue;		/* skip incomplete entries */
			
 
				-
			
 
				-		if (entry->flags & XFS_ATTR_LOCAL) {
			
 
				-			xfs_attr_leaf_name_local_t *name_loc =
			
 
				-				xfs_attr3_leaf_name_local(leaf, i);
			
 
				-
			
 
				-			retval = context->put_listent(context,
			
 
				-						entry->flags,
			
 
				-						name_loc->nameval,
			
 
				-						(int)name_loc->namelen,
			
 
				-						be16_to_cpu(name_loc->valuelen),
			
 
				-						&name_loc->nameval[name_loc->namelen]);
			
 
				-			if (retval)
			
 
				-				return retval;
			
 
				-		} else {
			
 
				-			xfs_attr_leaf_name_remote_t *name_rmt =
			
 
				-				xfs_attr3_leaf_name_remote(leaf, i);
			
 
				-
			
 
				-			int valuelen = be32_to_cpu(name_rmt->valuelen);
			
 
				-
			
 
				-			if (context->put_value) {
			
 
				-				xfs_da_args_t args;
			
 
				-
			
 
				-				memset((char *)&args, 0, sizeof(args));
			
 
				-				args.dp = context->dp;
			
 
				-				args.whichfork = XFS_ATTR_FORK;
			
 
				-				args.valuelen = valuelen;
			
 
				-				args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
			
 
				-				args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
			
 
				-				args.rmtblkcnt = xfs_attr3_rmt_blocks(
			
 
				-							args.dp->i_mount, valuelen);
			
 
				-				retval = xfs_attr_rmtval_get(&args);
			
 
				-				if (retval)
			
 
				-					return retval;
			
 
				-				retval = context->put_listent(context,
			
 
				-						entry->flags,
			
 
				-						name_rmt->name,
			
 
				-						(int)name_rmt->namelen,
			
 
				-						valuelen,
			
 
				-						args.value);
			
 
				-				kmem_free(args.value);
			
 
				-			} else {
			
 
				-				retval = context->put_listent(context,
			
 
				-						entry->flags,
			
 
				-						name_rmt->name,
			
 
				-						(int)name_rmt->namelen,
			
 
				-						valuelen,
			
 
				-						NULL);
			
 
				-			}
			
 
				-			if (retval)
			
 
				-				return retval;
			
 
				-		}
			
 
				-		if (context->seen_enough)
			
 
				-			break;
			
 
				-		cursor->offset++;
			
 
				-	}
			
 
				-	trace_xfs_attr_list_leaf_end(context);
			
 
				-	return retval;
			
 
				-}
			
 
				-
			
 
				 
			
 
				 /*========================================================================
			
 
				  * Manage the INCOMPLETE flag in a leaf entry
			
@@ -3011,345 +2700,3 @@ xfs_attr3_leaf_flipflags(
 
				 
			
 
				 	return error;
			
 
				 }
			
 
				-
			
 
				-/*========================================================================
			
 
				- * Indiscriminately delete the entire attribute fork
			
 
				- *========================================================================*/
			
 
				-
			
 
				-/*
			
 
				- * Recurse (gasp!) through the attribute nodes until we find leaves.
			
 
				- * We're doing a depth-first traversal in order to invalidate everything.
			
 
				- */
			
 
				-int
			
 
				-xfs_attr3_root_inactive(
			
 
				-	struct xfs_trans	**trans,
			
 
				-	struct xfs_inode	*dp)
			
 
				-{
			
 
				-	struct xfs_da_blkinfo	*info;
			
 
				-	struct xfs_buf		*bp;
			
 
				-	xfs_daddr_t		blkno;
			
 
				-	int			error;
			
 
				-
			
 
				-	/*
			
 
				-	 * Read block 0 to see what we have to work with.
			
 
				-	 * We only get here if we have extents, since we remove
			
 
				-	 * the extents in reverse order the extent containing
			
 
				-	 * block 0 must still be there.
			
 
				-	 */
			
 
				-	error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-	blkno = bp->b_bn;
			
 
				-
			
 
				-	/*
			
 
				-	 * Invalidate the tree, even if the "tree" is only a single leaf block.
			
 
				-	 * This is a depth-first traversal!
			
 
				-	 */
			
 
				-	info = bp->b_addr;
			
 
				-	switch (info->magic) {
			
 
				-	case cpu_to_be16(XFS_DA_NODE_MAGIC):
			
 
				-	case cpu_to_be16(XFS_DA3_NODE_MAGIC):
			
 
				-		error = xfs_attr3_node_inactive(trans, dp, bp, 1);
			
 
				-		break;
			
 
				-	case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
			
 
				-	case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
			
 
				-		error = xfs_attr3_leaf_inactive(trans, dp, bp);
			
 
				-		break;
			
 
				-	default:
			
 
				-		error = XFS_ERROR(EIO);
			
 
				-		xfs_trans_brelse(*trans, bp);
			
 
				-		break;
			
 
				-	}
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	/*
			
 
				-	 * Invalidate the incore copy of the root block.
			
 
				-	 */
			
 
				-	error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-	xfs_trans_binval(*trans, bp);	/* remove from cache */
			
 
				-	/*
			
 
				-	 * Commit the invalidate and start the next transaction.
			
 
				-	 */
			
 
				-	error = xfs_trans_roll(trans, dp);
			
 
				-
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Recurse (gasp!) through the attribute nodes until we find leaves.
			
 
				- * We're doing a depth-first traversal in order to invalidate everything.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_attr3_node_inactive(
			
 
				-	struct xfs_trans **trans,
			
 
				-	struct xfs_inode *dp,
			
 
				-	struct xfs_buf	*bp,
			
 
				-	int		level)
			
 
				-{
			
 
				-	xfs_da_blkinfo_t *info;
			
 
				-	xfs_da_intnode_t *node;
			
 
				-	xfs_dablk_t child_fsb;
			
 
				-	xfs_daddr_t parent_blkno, child_blkno;
			
 
				-	int error, i;
			
 
				-	struct xfs_buf *child_bp;
			
 
				-	struct xfs_da_node_entry *btree;
			
 
				-	struct xfs_da3_icnode_hdr ichdr;
			
 
				-
			
 
				-	/*
			
 
				-	 * Since this code is recursive (gasp!) we must protect ourselves.
			
 
				-	 */
			
 
				-	if (level > XFS_DA_NODE_MAXDEPTH) {
			
 
				-		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
			
 
				-		return XFS_ERROR(EIO);
			
 
				-	}
			
 
				-
			
 
				-	node = bp->b_addr;
			
 
				-	xfs_da3_node_hdr_from_disk(&ichdr, node);
			
 
				-	parent_blkno = bp->b_bn;
			
 
				-	if (!ichdr.count) {
			
 
				-		xfs_trans_brelse(*trans, bp);
			
 
				-		return 0;
			
 
				-	}
			
 
				-	btree = xfs_da3_node_tree_p(node);
			
 
				-	child_fsb = be32_to_cpu(btree[0].before);
			
 
				-	xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
			
 
				-
			
 
				-	/*
			
 
				-	 * If this is the node level just above the leaves, simply loop
			
 
				-	 * over the leaves removing all of them.  If this is higher up
			
 
				-	 * in the tree, recurse downward.
			
 
				-	 */
			
 
				-	for (i = 0; i < ichdr.count; i++) {
			
 
				-		/*
			
 
				-		 * Read the subsidiary block to see what we have to work with.
			
 
				-		 * Don't do this in a transaction.  This is a depth-first
			
 
				-		 * traversal of the tree so we may deal with many blocks
			
 
				-		 * before we come back to this one.
			
 
				-		 */
			
 
				-		error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
			
 
				-						XFS_ATTR_FORK);
			
 
				-		if (error)
			
 
				-			return(error);
			
 
				-		if (child_bp) {
			
 
				-						/* save for re-read later */
			
 
				-			child_blkno = XFS_BUF_ADDR(child_bp);
			
 
				-
			
 
				-			/*
			
 
				-			 * Invalidate the subtree, however we have to.
			
 
				-			 */
			
 
				-			info = child_bp->b_addr;
			
 
				-			switch (info->magic) {
			
 
				-			case cpu_to_be16(XFS_DA_NODE_MAGIC):
			
 
				-			case cpu_to_be16(XFS_DA3_NODE_MAGIC):
			
 
				-				error = xfs_attr3_node_inactive(trans, dp,
			
 
				-							child_bp, level + 1);
			
 
				-				break;
			
 
				-			case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
			
 
				-			case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
			
 
				-				error = xfs_attr3_leaf_inactive(trans, dp,
			
 
				-							child_bp);
			
 
				-				break;
			
 
				-			default:
			
 
				-				error = XFS_ERROR(EIO);
			
 
				-				xfs_trans_brelse(*trans, child_bp);
			
 
				-				break;
			
 
				-			}
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-
			
 
				-			/*
			
 
				-			 * Remove the subsidiary block from the cache
			
 
				-			 * and from the log.
			
 
				-			 */
			
 
				-			error = xfs_da_get_buf(*trans, dp, 0, child_blkno,
			
 
				-				&child_bp, XFS_ATTR_FORK);
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			xfs_trans_binval(*trans, child_bp);
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If we're not done, re-read the parent to get the next
			
 
				-		 * child block number.
			
 
				-		 */
			
 
				-		if (i + 1 < ichdr.count) {
			
 
				-			error = xfs_da3_node_read(*trans, dp, 0, parent_blkno,
			
 
				-						 &bp, XFS_ATTR_FORK);
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			child_fsb = be32_to_cpu(btree[i + 1].before);
			
 
				-			xfs_trans_brelse(*trans, bp);
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Atomically commit the whole invalidate stuff.
			
 
				-		 */
			
 
				-		error = xfs_trans_roll(trans, dp);
			
 
				-		if (error)
			
 
				-			return  error;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Invalidate all of the "remote" value regions pointed to by a particular
			
 
				- * leaf block.
			
 
				- * Note that we must release the lock on the buffer so that we are not
			
 
				- * caught holding something that the logging code wants to flush to disk.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_attr3_leaf_inactive(
			
 
				-	struct xfs_trans	**trans,
			
 
				-	struct xfs_inode	*dp,
			
 
				-	struct xfs_buf		*bp)
			
 
				-{
			
 
				-	struct xfs_attr_leafblock *leaf;
			
 
				-	struct xfs_attr3_icleaf_hdr ichdr;
			
 
				-	struct xfs_attr_leaf_entry *entry;
			
 
				-	struct xfs_attr_leaf_name_remote *name_rmt;
			
 
				-	struct xfs_attr_inactive_list *list;
			
 
				-	struct xfs_attr_inactive_list *lp;
			
 
				-	int			error;
			
 
				-	int			count;
			
 
				-	int			size;
			
 
				-	int			tmp;
			
 
				-	int			i;
			
 
				-
			
 
				-	leaf = bp->b_addr;
			
 
				-	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
			
 
				-
			
 
				-	/*
			
 
				-	 * Count the number of "remote" value extents.
			
 
				-	 */
			
 
				-	count = 0;
			
 
				-	entry = xfs_attr3_leaf_entryp(leaf);
			
 
				-	for (i = 0; i < ichdr.count; entry++, i++) {
			
 
				-		if (be16_to_cpu(entry->nameidx) &&
			
 
				-		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
			
 
				-			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
			
 
				-			if (name_rmt->valueblk)
			
 
				-				count++;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If there are no "remote" values, we're done.
			
 
				-	 */
			
 
				-	if (count == 0) {
			
 
				-		xfs_trans_brelse(*trans, bp);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate storage for a list of all the "remote" value extents.
			
 
				-	 */
			
 
				-	size = count * sizeof(xfs_attr_inactive_list_t);
			
 
				-	list = kmem_alloc(size, KM_SLEEP);
			
 
				-
			
 
				-	/*
			
 
				-	 * Identify each of the "remote" value extents.
			
 
				-	 */
			
 
				-	lp = list;
			
 
				-	entry = xfs_attr3_leaf_entryp(leaf);
			
 
				-	for (i = 0; i < ichdr.count; entry++, i++) {
			
 
				-		if (be16_to_cpu(entry->nameidx) &&
			
 
				-		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
			
 
				-			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
			
 
				-			if (name_rmt->valueblk) {
			
 
				-				lp->valueblk = be32_to_cpu(name_rmt->valueblk);
			
 
				-				lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
			
 
				-						    be32_to_cpu(name_rmt->valuelen));
			
 
				-				lp++;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	xfs_trans_brelse(*trans, bp);	/* unlock for trans. in freextent() */
			
 
				-
			
 
				-	/*
			
 
				-	 * Invalidate each of the "remote" value extents.
			
 
				-	 */
			
 
				-	error = 0;
			
 
				-	for (lp = list, i = 0; i < count; i++, lp++) {
			
 
				-		tmp = xfs_attr3_leaf_freextent(trans, dp,
			
 
				-				lp->valueblk, lp->valuelen);
			
 
				-
			
 
				-		if (error == 0)
			
 
				-			error = tmp;	/* save only the 1st errno */
			
 
				-	}
			
 
				-
			
 
				-	kmem_free(list);
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Look at all the extents for this logical region,
			
 
				- * invalidate any buffers that are incore/in transactions.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_attr3_leaf_freextent(
			
 
				-	struct xfs_trans	**trans,
			
 
				-	struct xfs_inode	*dp,
			
 
				-	xfs_dablk_t		blkno,
			
 
				-	int			blkcnt)
			
 
				-{
			
 
				-	struct xfs_bmbt_irec	map;
			
 
				-	struct xfs_buf		*bp;
			
 
				-	xfs_dablk_t		tblkno;
			
 
				-	xfs_daddr_t		dblkno;
			
 
				-	int			tblkcnt;
			
 
				-	int			dblkcnt;
			
 
				-	int			nmap;
			
 
				-	int			error;
			
 
				-
			
 
				-	/*
			
 
				-	 * Roll through the "value", invalidating the attribute value's
			
 
				-	 * blocks.
			
 
				-	 */
			
 
				-	tblkno = blkno;
			
 
				-	tblkcnt = blkcnt;
			
 
				-	while (tblkcnt > 0) {
			
 
				-		/*
			
 
				-		 * Try to remember where we decided to put the value.
			
 
				-		 */
			
 
				-		nmap = 1;
			
 
				-		error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
			
 
				-				       &map, &nmap, XFS_BMAPI_ATTRFORK);
			
 
				-		if (error) {
			
 
				-			return(error);
			
 
				-		}
			
 
				-		ASSERT(nmap == 1);
			
 
				-		ASSERT(map.br_startblock != DELAYSTARTBLOCK);
			
 
				-
			
 
				-		/*
			
 
				-		 * If it's a hole, these are already unmapped
			
 
				-		 * so there's nothing to invalidate.
			
 
				-		 */
			
 
				-		if (map.br_startblock != HOLESTARTBLOCK) {
			
 
				-
			
 
				-			dblkno = XFS_FSB_TO_DADDR(dp->i_mount,
			
 
				-						  map.br_startblock);
			
 
				-			dblkcnt = XFS_FSB_TO_BB(dp->i_mount,
			
 
				-						map.br_blockcount);
			
 
				-			bp = xfs_trans_get_buf(*trans,
			
 
				-					dp->i_mount->m_ddev_targp,
			
 
				-					dblkno, dblkcnt, 0);
			
 
				-			if (!bp)
			
 
				-				return ENOMEM;
			
 
				-			xfs_trans_binval(*trans, bp);
			
 
				-			/*
			
 
				-			 * Roll to next transaction.
			
 
				-			 */
			
 
				-			error = xfs_trans_roll(trans, dp);
			
 
				-			if (error)
			
 
				-				return (error);
			
 
				-		}
			
 
				-
			
 
				-		tblkno += map.br_blockcount;
			
 
				-		tblkcnt -= map.br_blockcount;
			
 
				-	}
			
 
				-
			
 
				-	return(0);
			
 
				-}
			
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -333,6 +333,8 @@ int	xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
 
				 			struct xfs_buf **bpp);
			
 
				 void	xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
			
 
				 				     struct xfs_attr_leafblock *from);
			
 
				+void	xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,
			
 
				+				   struct xfs_attr3_icleaf_hdr *from);
			
 
				 
			
 
				 extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
			
 
				 
			
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -0,0 +1,655 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_types.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_alloc_btree.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_attr_sf.h"
			
 
				+#include "xfs_attr_remote.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_item.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_attr.h"
			
 
				+#include "xfs_attr_leaf.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+
			
 
				+STATIC int
			
 
				+xfs_attr_shortform_compare(const void *a, const void *b)
			
 
				+{
			
 
				+	xfs_attr_sf_sort_t *sa, *sb;
			
 
				+
			
 
				+	sa = (xfs_attr_sf_sort_t *)a;
			
 
				+	sb = (xfs_attr_sf_sort_t *)b;
			
 
				+	if (sa->hash < sb->hash) {
			
 
				+		return(-1);
			
 
				+	} else if (sa->hash > sb->hash) {
			
 
				+		return(1);
			
 
				+	} else {
			
 
				+		return(sa->entno - sb->entno);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#define XFS_ISRESET_CURSOR(cursor) \
			
 
				+	(!((cursor)->initted) && !((cursor)->hashval) && \
			
 
				+	 !((cursor)->blkno) && !((cursor)->offset))
			
 
				+/*
			
 
				+ * Copy out entries of shortform attribute lists for attr_list().
			
 
				+ * Shortform attribute lists are not stored in hashval sorted order.
			
 
				+ * If the output buffer is not large enough to hold them all, then we
			
 
				+ * we have to calculate each entries' hashvalue and sort them before
			
 
				+ * we can begin returning them to the user.
			
 
				+ */
			
 
				+int
			
 
				+xfs_attr_shortform_list(xfs_attr_list_context_t *context)
			
 
				+{
			
 
				+	attrlist_cursor_kern_t *cursor;
			
 
				+	xfs_attr_sf_sort_t *sbuf, *sbp;
			
 
				+	xfs_attr_shortform_t *sf;
			
 
				+	xfs_attr_sf_entry_t *sfe;
			
 
				+	xfs_inode_t *dp;
			
 
				+	int sbsize, nsbuf, count, i;
			
 
				+	int error;
			
 
				+
			
 
				+	ASSERT(context != NULL);
			
 
				+	dp = context->dp;
			
 
				+	ASSERT(dp != NULL);
			
 
				+	ASSERT(dp->i_afp != NULL);
			
 
				+	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
			
 
				+	ASSERT(sf != NULL);
			
 
				+	if (!sf->hdr.count)
			
 
				+		return(0);
			
 
				+	cursor = context->cursor;
			
 
				+	ASSERT(cursor != NULL);
			
 
				+
			
 
				+	trace_xfs_attr_list_sf(context);
			
 
				+
			
 
				+	/*
			
 
				+	 * If the buffer is large enough and the cursor is at the start,
			
 
				+	 * do not bother with sorting since we will return everything in
			
 
				+	 * one buffer and another call using the cursor won't need to be
			
 
				+	 * made.
			
 
				+	 * Note the generous fudge factor of 16 overhead bytes per entry.
			
 
				+	 * If bufsize is zero then put_listent must be a search function
			
 
				+	 * and can just scan through what we have.
			
 
				+	 */
			
 
				+	if (context->bufsize == 0 ||
			
 
				+	    (XFS_ISRESET_CURSOR(cursor) &&
			
 
				+             (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
			
 
				+		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
			
 
				+			error = context->put_listent(context,
			
 
				+					   sfe->flags,
			
 
				+					   sfe->nameval,
			
 
				+					   (int)sfe->namelen,
			
 
				+					   (int)sfe->valuelen,
			
 
				+					   &sfe->nameval[sfe->namelen]);
			
 
				+
			
 
				+			/*
			
 
				+			 * Either search callback finished early or
			
 
				+			 * didn't fit it all in the buffer after all.
			
 
				+			 */
			
 
				+			if (context->seen_enough)
			
 
				+				break;
			
 
				+
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
			
 
				+		}
			
 
				+		trace_xfs_attr_list_sf_all(context);
			
 
				+		return(0);
			
 
				+	}
			
 
				+
			
 
				+	/* do no more for a search callback */
			
 
				+	if (context->bufsize == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * It didn't all fit, so we have to sort everything on hashval.
			
 
				+	 */
			
 
				+	sbsize = sf->hdr.count * sizeof(*sbuf);
			
 
				+	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
			
 
				+
			
 
				+	/*
			
 
				+	 * Scan the attribute list for the rest of the entries, storing
			
 
				+	 * the relevant info from only those that match into a buffer.
			
 
				+	 */
			
 
				+	nsbuf = 0;
			
 
				+	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
			
 
				+		if (unlikely(
			
 
				+		    ((char *)sfe < (char *)sf) ||
			
 
				+		    ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) {
			
 
				+			XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
			
 
				+					     XFS_ERRLEVEL_LOW,
			
 
				+					     context->dp->i_mount, sfe);
			
 
				+			kmem_free(sbuf);
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		}
			
 
				+
			
 
				+		sbp->entno = i;
			
 
				+		sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
			
 
				+		sbp->name = sfe->nameval;
			
 
				+		sbp->namelen = sfe->namelen;
			
 
				+		/* These are bytes, and both on-disk, don't endian-flip */
			
 
				+		sbp->valuelen = sfe->valuelen;
			
 
				+		sbp->flags = sfe->flags;
			
 
				+		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
			
 
				+		sbp++;
			
 
				+		nsbuf++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Sort the entries on hash then entno.
			
 
				+	 */
			
 
				+	xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_attr_shortform_compare);
			
 
				+
			
 
				+	/*
			
 
				+	 * Re-find our place IN THE SORTED LIST.
			
 
				+	 */
			
 
				+	count = 0;
			
 
				+	cursor->initted = 1;
			
 
				+	cursor->blkno = 0;
			
 
				+	for (sbp = sbuf, i = 0; i < nsbuf; i++, sbp++) {
			
 
				+		if (sbp->hash == cursor->hashval) {
			
 
				+			if (cursor->offset == count) {
			
 
				+				break;
			
 
				+			}
			
 
				+			count++;
			
 
				+		} else if (sbp->hash > cursor->hashval) {
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	if (i == nsbuf) {
			
 
				+		kmem_free(sbuf);
			
 
				+		return(0);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Loop putting entries into the user buffer.
			
 
				+	 */
			
 
				+	for ( ; i < nsbuf; i++, sbp++) {
			
 
				+		if (cursor->hashval != sbp->hash) {
			
 
				+			cursor->hashval = sbp->hash;
			
 
				+			cursor->offset = 0;
			
 
				+		}
			
 
				+		error = context->put_listent(context,
			
 
				+					sbp->flags,
			
 
				+					sbp->name,
			
 
				+					sbp->namelen,
			
 
				+					sbp->valuelen,
			
 
				+					&sbp->name[sbp->namelen]);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		if (context->seen_enough)
			
 
				+			break;
			
 
				+		cursor->offset++;
			
 
				+	}
			
 
				+
			
 
				+	kmem_free(sbuf);
			
 
				+	return(0);
			
 
				+}
			
 
				+
			
 
				+STATIC int
			
 
				+xfs_attr_node_list(xfs_attr_list_context_t *context)
			
 
				+{
			
 
				+	attrlist_cursor_kern_t *cursor;
			
 
				+	xfs_attr_leafblock_t *leaf;
			
 
				+	xfs_da_intnode_t *node;
			
 
				+	struct xfs_attr3_icleaf_hdr leafhdr;
			
 
				+	struct xfs_da3_icnode_hdr nodehdr;
			
 
				+	struct xfs_da_node_entry *btree;
			
 
				+	int error, i;
			
 
				+	struct xfs_buf *bp;
			
 
				+
			
 
				+	trace_xfs_attr_node_list(context);
			
 
				+
			
 
				+	cursor = context->cursor;
			
 
				+	cursor->initted = 1;
			
 
				+
			
 
				+	/*
			
 
				+	 * Do all sorts of validation on the passed-in cursor structure.
			
 
				+	 * If anything is amiss, ignore the cursor and look up the hashval
			
 
				+	 * starting from the btree root.
			
 
				+	 */
			
 
				+	bp = NULL;
			
 
				+	if (cursor->blkno > 0) {
			
 
				+		error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
			
 
				+					      &bp, XFS_ATTR_FORK);
			
 
				+		if ((error != 0) && (error != EFSCORRUPTED))
			
 
				+			return(error);
			
 
				+		if (bp) {
			
 
				+			struct xfs_attr_leaf_entry *entries;
			
 
				+
			
 
				+			node = bp->b_addr;
			
 
				+			switch (be16_to_cpu(node->hdr.info.magic)) {
			
 
				+			case XFS_DA_NODE_MAGIC:
			
 
				+			case XFS_DA3_NODE_MAGIC:
			
 
				+				trace_xfs_attr_list_wrong_blk(context);
			
 
				+				xfs_trans_brelse(NULL, bp);
			
 
				+				bp = NULL;
			
 
				+				break;
			
 
				+			case XFS_ATTR_LEAF_MAGIC:
			
 
				+			case XFS_ATTR3_LEAF_MAGIC:
			
 
				+				leaf = bp->b_addr;
			
 
				+				xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				+				entries = xfs_attr3_leaf_entryp(leaf);
			
 
				+				if (cursor->hashval > be32_to_cpu(
			
 
				+						entries[leafhdr.count - 1].hashval)) {
			
 
				+					trace_xfs_attr_list_wrong_blk(context);
			
 
				+					xfs_trans_brelse(NULL, bp);
			
 
				+					bp = NULL;
			
 
				+				} else if (cursor->hashval <= be32_to_cpu(
			
 
				+						entries[0].hashval)) {
			
 
				+					trace_xfs_attr_list_wrong_blk(context);
			
 
				+					xfs_trans_brelse(NULL, bp);
			
 
				+					bp = NULL;
			
 
				+				}
			
 
				+				break;
			
 
				+			default:
			
 
				+				trace_xfs_attr_list_wrong_blk(context);
			
 
				+				xfs_trans_brelse(NULL, bp);
			
 
				+				bp = NULL;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We did not find what we expected given the cursor's contents,
			
 
				+	 * so we start from the top and work down based on the hash value.
			
 
				+	 * Note that start of node block is same as start of leaf block.
			
 
				+	 */
			
 
				+	if (bp == NULL) {
			
 
				+		cursor->blkno = 0;
			
 
				+		for (;;) {
			
 
				+			__uint16_t magic;
			
 
				+
			
 
				+			error = xfs_da3_node_read(NULL, context->dp,
			
 
				+						      cursor->blkno, -1, &bp,
			
 
				+						      XFS_ATTR_FORK);
			
 
				+			if (error)
			
 
				+				return(error);
			
 
				+			node = bp->b_addr;
			
 
				+			magic = be16_to_cpu(node->hdr.info.magic);
			
 
				+			if (magic == XFS_ATTR_LEAF_MAGIC ||
			
 
				+			    magic == XFS_ATTR3_LEAF_MAGIC)
			
 
				+				break;
			
 
				+			if (magic != XFS_DA_NODE_MAGIC &&
			
 
				+			    magic != XFS_DA3_NODE_MAGIC) {
			
 
				+				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
			
 
				+						     XFS_ERRLEVEL_LOW,
			
 
				+						     context->dp->i_mount,
			
 
				+						     node);
			
 
				+				xfs_trans_brelse(NULL, bp);
			
 
				+				return XFS_ERROR(EFSCORRUPTED);
			
 
				+			}
			
 
				+
			
 
				+			xfs_da3_node_hdr_from_disk(&nodehdr, node);
			
 
				+			btree = xfs_da3_node_tree_p(node);
			
 
				+			for (i = 0; i < nodehdr.count; btree++, i++) {
			
 
				+				if (cursor->hashval
			
 
				+						<= be32_to_cpu(btree->hashval)) {
			
 
				+					cursor->blkno = be32_to_cpu(btree->before);
			
 
				+					trace_xfs_attr_list_node_descend(context,
			
 
				+									 btree);
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+			if (i == nodehdr.count) {
			
 
				+				xfs_trans_brelse(NULL, bp);
			
 
				+				return 0;
			
 
				+			}
			
 
				+			xfs_trans_brelse(NULL, bp);
			
 
				+		}
			
 
				+	}
			
 
				+	ASSERT(bp != NULL);
			
 
				+
			
 
				+	/*
			
 
				+	 * Roll upward through the blocks, processing each leaf block in
			
 
				+	 * order.  As long as there is space in the result buffer, keep
			
 
				+	 * adding the information.
			
 
				+	 */
			
 
				+	for (;;) {
			
 
				+		leaf = bp->b_addr;
			
 
				+		error = xfs_attr3_leaf_list_int(bp, context);
			
 
				+		if (error) {
			
 
				+			xfs_trans_brelse(NULL, bp);
			
 
				+			return error;
			
 
				+		}
			
 
				+		xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				+		if (context->seen_enough || leafhdr.forw == 0)
			
 
				+			break;
			
 
				+		cursor->blkno = leafhdr.forw;
			
 
				+		xfs_trans_brelse(NULL, bp);
			
 
				+		error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
			
 
				+					   &bp);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+	xfs_trans_brelse(NULL, bp);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Copy out attribute list entries for attr_list(), for leaf attribute lists.
			
 
				+ */
			
 
				+int
			
 
				+xfs_attr3_leaf_list_int(
			
 
				+	struct xfs_buf			*bp,
			
 
				+	struct xfs_attr_list_context	*context)
			
 
				+{
			
 
				+	struct attrlist_cursor_kern	*cursor;
			
 
				+	struct xfs_attr_leafblock	*leaf;
			
 
				+	struct xfs_attr3_icleaf_hdr	ichdr;
			
 
				+	struct xfs_attr_leaf_entry	*entries;
			
 
				+	struct xfs_attr_leaf_entry	*entry;
			
 
				+	int				retval;
			
 
				+	int				i;
			
 
				+
			
 
				+	trace_xfs_attr_list_leaf(context);
			
 
				+
			
 
				+	leaf = bp->b_addr;
			
 
				+	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
			
 
				+	entries = xfs_attr3_leaf_entryp(leaf);
			
 
				+
			
 
				+	cursor = context->cursor;
			
 
				+	cursor->initted = 1;
			
 
				+
			
 
				+	/*
			
 
				+	 * Re-find our place in the leaf block if this is a new syscall.
			
 
				+	 */
			
 
				+	if (context->resynch) {
			
 
				+		entry = &entries[0];
			
 
				+		for (i = 0; i < ichdr.count; entry++, i++) {
			
 
				+			if (be32_to_cpu(entry->hashval) == cursor->hashval) {
			
 
				+				if (cursor->offset == context->dupcnt) {
			
 
				+					context->dupcnt = 0;
			
 
				+					break;
			
 
				+				}
			
 
				+				context->dupcnt++;
			
 
				+			} else if (be32_to_cpu(entry->hashval) >
			
 
				+					cursor->hashval) {
			
 
				+				context->dupcnt = 0;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if (i == ichdr.count) {
			
 
				+			trace_xfs_attr_list_notfound(context);
			
 
				+			return 0;
			
 
				+		}
			
 
				+	} else {
			
 
				+		entry = &entries[0];
			
 
				+		i = 0;
			
 
				+	}
			
 
				+	context->resynch = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * We have found our place, start copying out the new attributes.
			
 
				+	 */
			
 
				+	retval = 0;
			
 
				+	for (; i < ichdr.count; entry++, i++) {
			
 
				+		if (be32_to_cpu(entry->hashval) != cursor->hashval) {
			
 
				+			cursor->hashval = be32_to_cpu(entry->hashval);
			
 
				+			cursor->offset = 0;
			
 
				+		}
			
 
				+
			
 
				+		if (entry->flags & XFS_ATTR_INCOMPLETE)
			
 
				+			continue;		/* skip incomplete entries */
			
 
				+
			
 
				+		if (entry->flags & XFS_ATTR_LOCAL) {
			
 
				+			xfs_attr_leaf_name_local_t *name_loc =
			
 
				+				xfs_attr3_leaf_name_local(leaf, i);
			
 
				+
			
 
				+			retval = context->put_listent(context,
			
 
				+						entry->flags,
			
 
				+						name_loc->nameval,
			
 
				+						(int)name_loc->namelen,
			
 
				+						be16_to_cpu(name_loc->valuelen),
			
 
				+						&name_loc->nameval[name_loc->namelen]);
			
 
				+			if (retval)
			
 
				+				return retval;
			
 
				+		} else {
			
 
				+			xfs_attr_leaf_name_remote_t *name_rmt =
			
 
				+				xfs_attr3_leaf_name_remote(leaf, i);
			
 
				+
			
 
				+			int valuelen = be32_to_cpu(name_rmt->valuelen);
			
 
				+
			
 
				+			if (context->put_value) {
			
 
				+				xfs_da_args_t args;
			
 
				+
			
 
				+				memset((char *)&args, 0, sizeof(args));
			
 
				+				args.dp = context->dp;
			
 
				+				args.whichfork = XFS_ATTR_FORK;
			
 
				+				args.valuelen = valuelen;
			
 
				+				args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
			
 
				+				args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
			
 
				+				args.rmtblkcnt = xfs_attr3_rmt_blocks(
			
 
				+							args.dp->i_mount, valuelen);
			
 
				+				retval = xfs_attr_rmtval_get(&args);
			
 
				+				if (retval)
			
 
				+					return retval;
			
 
				+				retval = context->put_listent(context,
			
 
				+						entry->flags,
			
 
				+						name_rmt->name,
			
 
				+						(int)name_rmt->namelen,
			
 
				+						valuelen,
			
 
				+						args.value);
			
 
				+				kmem_free(args.value);
			
 
				+			} else {
			
 
				+				retval = context->put_listent(context,
			
 
				+						entry->flags,
			
 
				+						name_rmt->name,
			
 
				+						(int)name_rmt->namelen,
			
 
				+						valuelen,
			
 
				+						NULL);
			
 
				+			}
			
 
				+			if (retval)
			
 
				+				return retval;
			
 
				+		}
			
 
				+		if (context->seen_enough)
			
 
				+			break;
			
 
				+		cursor->offset++;
			
 
				+	}
			
 
				+	trace_xfs_attr_list_leaf_end(context);
			
 
				+	return retval;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Copy out attribute entries for attr_list(), for leaf attribute lists.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_attr_leaf_list(xfs_attr_list_context_t *context)
			
 
				+{
			
 
				+	int error;
			
 
				+	struct xfs_buf *bp;
			
 
				+
			
 
				+	trace_xfs_attr_leaf_list(context);
			
 
				+
			
 
				+	context->cursor->blkno = 0;
			
 
				+	error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
			
 
				+	if (error)
			
 
				+		return XFS_ERROR(error);
			
 
				+
			
 
				+	error = xfs_attr3_leaf_list_int(bp, context);
			
 
				+	xfs_trans_brelse(NULL, bp);
			
 
				+	return XFS_ERROR(error);
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_attr_list_int(
			
 
				+	xfs_attr_list_context_t *context)
			
 
				+{
			
 
				+	int error;
			
 
				+	xfs_inode_t *dp = context->dp;
			
 
				+
			
 
				+	XFS_STATS_INC(xs_attr_list);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
			
 
				+		return EIO;
			
 
				+
			
 
				+	xfs_ilock(dp, XFS_ILOCK_SHARED);
			
 
				+
			
 
				+	/*
			
 
				+	 * Decide on what work routines to call based on the inode size.
			
 
				+	 */
			
 
				+	if (!xfs_inode_hasattr(dp)) {
			
 
				+		error = 0;
			
 
				+	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
			
 
				+		error = xfs_attr_shortform_list(context);
			
 
				+	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
			
 
				+		error = xfs_attr_leaf_list(context);
			
 
				+	} else {
			
 
				+		error = xfs_attr_node_list(context);
			
 
				+	}
			
 
				+
			
 
				+	xfs_iunlock(dp, XFS_ILOCK_SHARED);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+#define	ATTR_ENTBASESIZE		/* minimum bytes used by an attr */ \
			
 
				+	(((struct attrlist_ent *) 0)->a_name - (char *) 0)
			
 
				+#define	ATTR_ENTSIZE(namelen)		/* actual bytes used by an attr */ \
			
 
				+	((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
			
 
				+	 & ~(sizeof(u_int32_t)-1))
			
 
				+
			
 
				+/*
			
 
				+ * Format an attribute and copy it out to the user's buffer.
			
 
				+ * Take care to check values and protect against them changing later,
			
 
				+ * we may be reading them directly out of a user buffer.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_attr_put_listent(
			
 
				+	xfs_attr_list_context_t *context,
			
 
				+	int		flags,
			
 
				+	unsigned char	*name,
			
 
				+	int		namelen,
			
 
				+	int		valuelen,
			
 
				+	unsigned char	*value)
			
 
				+{
			
 
				+	struct attrlist *alist = (struct attrlist *)context->alist;
			
 
				+	attrlist_ent_t *aep;
			
 
				+	int arraytop;
			
 
				+
			
 
				+	ASSERT(!(context->flags & ATTR_KERNOVAL));
			
 
				+	ASSERT(context->count >= 0);
			
 
				+	ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
			
 
				+	ASSERT(context->firstu >= sizeof(*alist));
			
 
				+	ASSERT(context->firstu <= context->bufsize);
			
 
				+
			
 
				+	/*
			
 
				+	 * Only list entries in the right namespace.
			
 
				+	 */
			
 
				+	if (((context->flags & ATTR_SECURE) == 0) !=
			
 
				+	    ((flags & XFS_ATTR_SECURE) == 0))
			
 
				+		return 0;
			
 
				+	if (((context->flags & ATTR_ROOT) == 0) !=
			
 
				+	    ((flags & XFS_ATTR_ROOT) == 0))
			
 
				+		return 0;
			
 
				+
			
 
				+	arraytop = sizeof(*alist) +
			
 
				+			context->count * sizeof(alist->al_offset[0]);
			
 
				+	context->firstu -= ATTR_ENTSIZE(namelen);
			
 
				+	if (context->firstu < arraytop) {
			
 
				+		trace_xfs_attr_list_full(context);
			
 
				+		alist->al_more = 1;
			
 
				+		context->seen_enough = 1;
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				+	aep = (attrlist_ent_t *)&context->alist[context->firstu];
			
 
				+	aep->a_valuelen = valuelen;
			
 
				+	memcpy(aep->a_name, name, namelen);
			
 
				+	aep->a_name[namelen] = 0;
			
 
				+	alist->al_offset[context->count++] = context->firstu;
			
 
				+	alist->al_count = context->count;
			
 
				+	trace_xfs_attr_list_add(context);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Generate a list of extended attribute names and optionally
			
 
				+ * also value lengths.  Positive return value follows the XFS
			
 
				+ * convention of being an error, zero or negative return code
			
 
				+ * is the length of the buffer returned (negated), indicating
			
 
				+ * success.
			
 
				+ */
			
 
				+int
			
 
				+xfs_attr_list(
			
 
				+	xfs_inode_t	*dp,
			
 
				+	char		*buffer,
			
 
				+	int		bufsize,
			
 
				+	int		flags,
			
 
				+	attrlist_cursor_kern_t *cursor)
			
 
				+{
			
 
				+	xfs_attr_list_context_t context;
			
 
				+	struct attrlist *alist;
			
 
				+	int error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Validate the cursor.
			
 
				+	 */
			
 
				+	if (cursor->pad1 || cursor->pad2)
			
 
				+		return(XFS_ERROR(EINVAL));
			
 
				+	if ((cursor->initted == 0) &&
			
 
				+	    (cursor->hashval || cursor->blkno || cursor->offset))
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+
			
 
				+	/*
			
 
				+	 * Check for a properly aligned buffer.
			
 
				+	 */
			
 
				+	if (((long)buffer) & (sizeof(int)-1))
			
 
				+		return XFS_ERROR(EFAULT);
			
 
				+	if (flags & ATTR_KERNOVAL)
			
 
				+		bufsize = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Initialize the output buffer.
			
 
				+	 */
			
 
				+	memset(&context, 0, sizeof(context));
			
 
				+	context.dp = dp;
			
 
				+	context.cursor = cursor;
			
 
				+	context.resynch = 1;
			
 
				+	context.flags = flags;
			
 
				+	context.alist = buffer;
			
 
				+	context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
			
 
				+	context.firstu = context.bufsize;
			
 
				+	context.put_listent = xfs_attr_put_listent;
			
 
				+
			
 
				+	alist = (struct attrlist *)context.alist;
			
 
				+	alist->al_count = 0;
			
 
				+	alist->al_more = 0;
			
 
				+	alist->al_offset[0] = context.bufsize;
			
 
				+
			
 
				+	error = xfs_attr_list_int(&context);
			
 
				+	ASSERT(error >= 0);
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -22,6 +22,7 @@
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
@@ -33,6 +34,7 @@
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_attr_leaf.h"
			
 
				 #include "xfs_attr_remote.h"
			
@@ -237,7 +239,7 @@ xfs_attr_rmtval_copyout(
 
				 	xfs_ino_t	ino,
			
 
				 	int		*offset,
			
 
				 	int		*valuelen,
			
 
				-	char		**dst)
			
 
				+	__uint8_t	**dst)
			
 
				 {
			
 
				 	char		*src = bp->b_addr;
			
 
				 	xfs_daddr_t	bno = bp->b_bn;
			
@@ -249,7 +251,7 @@ xfs_attr_rmtval_copyout(
 
				 		int hdr_size = 0;
			
 
				 		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
			
 
				 
			
 
				-		byte_cnt = min_t(int, *valuelen, byte_cnt);
			
 
				+		byte_cnt = min(*valuelen, byte_cnt);
			
 
				 
			
 
				 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				 			if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
			
@@ -284,7 +286,7 @@ xfs_attr_rmtval_copyin(
 
				 	xfs_ino_t	ino,
			
 
				 	int		*offset,
			
 
				 	int		*valuelen,
			
 
				-	char		**src)
			
 
				+	__uint8_t	**src)
			
 
				 {
			
 
				 	char		*dst = bp->b_addr;
			
 
				 	xfs_daddr_t	bno = bp->b_bn;
			
@@ -337,7 +339,7 @@ xfs_attr_rmtval_get(
 
				 	struct xfs_mount	*mp = args->dp->i_mount;
			
 
				 	struct xfs_buf		*bp;
			
 
				 	xfs_dablk_t		lblkno = args->rmtblkno;
			
 
				-	char			*dst = args->value;
			
 
				+	__uint8_t		*dst = args->value;
			
 
				 	int			valuelen = args->valuelen;
			
 
				 	int			nmap;
			
 
				 	int			error;
			
@@ -401,7 +403,7 @@ xfs_attr_rmtval_set(
 
				 	struct xfs_bmbt_irec	map;
			
 
				 	xfs_dablk_t		lblkno;
			
 
				 	xfs_fileoff_t		lfileoff = 0;
			
 
				-	char			*src = args->value;
			
 
				+	__uint8_t		*src = args->value;
			
 
				 	int			blkcnt;
			
 
				 	int			valuelen;
			
 
				 	int			nmap;
			
@@ -543,11 +545,6 @@ xfs_attr_rmtval_remove(
 
				 
			
 
				 	/*
			
 
				 	 * Roll through the "value", invalidating the attribute value's blocks.
			
 
				-	 * Note that args->rmtblkcnt is the minimum number of data blocks we'll
			
 
				-	 * see for a CRC enabled remote attribute. Each extent will have a
			
 
				-	 * header, and so we may have more blocks than we realise here.  If we
			
 
				-	 * fail to map the blocks correctly, we'll have problems with the buffer
			
 
				-	 * lookups.
			
 
				 	 */
			
 
				 	lblkno = args->rmtblkno;
			
 
				 	blkcnt = args->rmtblkcnt;
			
@@ -628,4 +625,3 @@ xfs_attr_rmtval_remove(
 
				 	}
			
 
				 	return(0);
			
 
				 }
			
 
				-
			
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -17,16 +17,17 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_alloc_btree.h"
			
 
				 #include "xfs_ialloc_btree.h"
			
@@ -39,6 +40,7 @@
 
				 #include "xfs_extfree_item.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_attr_leaf.h"
			
@@ -46,7 +48,6 @@
 
				 #include "xfs_trans_space.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_filestream.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_symlink.h"
			
 
				 
			
@@ -108,19 +109,6 @@ xfs_bmap_compute_maxlevels(
 
				 	mp->m_bm_maxlevels[whichfork] = level;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Convert the given file system block to a disk block.  We have to treat it
			
 
				- * differently based on whether the file is a real time file or not, because the
			
 
				- * bmap code does.
			
 
				- */
			
 
				-xfs_daddr_t
			
 
				-xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
			
 
				-{
			
 
				-	return (XFS_IS_REALTIME_INODE(ip) ? \
			
 
				-		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
			
 
				-		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
			
 
				-}
			
 
				-
			
 
				 STATIC int				/* error */
			
 
				 xfs_bmbt_lookup_eq(
			
 
				 	struct xfs_btree_cur	*cur,
			
@@ -262,173 +250,6 @@ xfs_bmap_forkoff_reset(
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Extent tree block counting routines.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Count leaf blocks given a range of extent records.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_bmap_count_leaves(
			
 
				-	xfs_ifork_t		*ifp,
			
 
				-	xfs_extnum_t		idx,
			
 
				-	int			numrecs,
			
 
				-	int			*count)
			
 
				-{
			
 
				-	int		b;
			
 
				-
			
 
				-	for (b = 0; b < numrecs; b++) {
			
 
				-		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
			
 
				-		*count += xfs_bmbt_get_blockcount(frp);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Count leaf blocks given a range of extent records originally
			
 
				- * in btree format.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_bmap_disk_count_leaves(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	struct xfs_btree_block	*block,
			
 
				-	int			numrecs,
			
 
				-	int			*count)
			
 
				-{
			
 
				-	int		b;
			
 
				-	xfs_bmbt_rec_t	*frp;
			
 
				-
			
 
				-	for (b = 1; b <= numrecs; b++) {
			
 
				-		frp = XFS_BMBT_REC_ADDR(mp, block, b);
			
 
				-		*count += xfs_bmbt_disk_get_blockcount(frp);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Recursively walks each level of a btree
			
 
				- * to count total fsblocks is use.
			
 
				- */
			
 
				-STATIC int                                     /* error */
			
 
				-xfs_bmap_count_tree(
			
 
				-	xfs_mount_t     *mp,            /* file system mount point */
			
 
				-	xfs_trans_t     *tp,            /* transaction pointer */
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_fsblock_t   blockno,	/* file system block number */
			
 
				-	int             levelin,	/* level in btree */
			
 
				-	int		*count)		/* Count of blocks */
			
 
				-{
			
 
				-	int			error;
			
 
				-	xfs_buf_t		*bp, *nbp;
			
 
				-	int			level = levelin;
			
 
				-	__be64			*pp;
			
 
				-	xfs_fsblock_t           bno = blockno;
			
 
				-	xfs_fsblock_t		nextbno;
			
 
				-	struct xfs_btree_block	*block, *nextblock;
			
 
				-	int			numrecs;
			
 
				-
			
 
				-	error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
			
 
				-						&xfs_bmbt_buf_ops);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-	*count += 1;
			
 
				-	block = XFS_BUF_TO_BLOCK(bp);
			
 
				-
			
 
				-	if (--level) {
			
 
				-		/* Not at node above leaves, count this level of nodes */
			
 
				-		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
			
 
				-		while (nextbno != NULLFSBLOCK) {
			
 
				-			error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
			
 
				-						XFS_BMAP_BTREE_REF,
			
 
				-						&xfs_bmbt_buf_ops);
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			*count += 1;
			
 
				-			nextblock = XFS_BUF_TO_BLOCK(nbp);
			
 
				-			nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
			
 
				-			xfs_trans_brelse(tp, nbp);
			
 
				-		}
			
 
				-
			
 
				-		/* Dive to the next level */
			
 
				-		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
			
 
				-		bno = be64_to_cpu(*pp);
			
 
				-		if (unlikely((error =
			
 
				-		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
			
 
				-			xfs_trans_brelse(tp, bp);
			
 
				-			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
			
 
				-					 XFS_ERRLEVEL_LOW, mp);
			
 
				-			return XFS_ERROR(EFSCORRUPTED);
			
 
				-		}
			
 
				-		xfs_trans_brelse(tp, bp);
			
 
				-	} else {
			
 
				-		/* count all level 1 nodes and their leaves */
			
 
				-		for (;;) {
			
 
				-			nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
			
 
				-			numrecs = be16_to_cpu(block->bb_numrecs);
			
 
				-			xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
			
 
				-			xfs_trans_brelse(tp, bp);
			
 
				-			if (nextbno == NULLFSBLOCK)
			
 
				-				break;
			
 
				-			bno = nextbno;
			
 
				-			error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
			
 
				-						XFS_BMAP_BTREE_REF,
			
 
				-						&xfs_bmbt_buf_ops);
			
 
				-			if (error)
			
 
				-				return error;
			
 
				-			*count += 1;
			
 
				-			block = XFS_BUF_TO_BLOCK(bp);
			
 
				-		}
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Count fsblocks of the given fork.
			
 
				- */
			
 
				-int						/* error */
			
 
				-xfs_bmap_count_blocks(
			
 
				-	xfs_trans_t		*tp,		/* transaction pointer */
			
 
				-	xfs_inode_t		*ip,		/* incore inode */
			
 
				-	int			whichfork,	/* data or attr fork */
			
 
				-	int			*count)		/* out: count of blocks */
			
 
				-{
			
 
				-	struct xfs_btree_block	*block;	/* current btree block */
			
 
				-	xfs_fsblock_t		bno;	/* block # of "block" */
			
 
				-	xfs_ifork_t		*ifp;	/* fork structure */
			
 
				-	int			level;	/* btree level, for checking */
			
 
				-	xfs_mount_t		*mp;	/* file system mount structure */
			
 
				-	__be64			*pp;	/* pointer to block address */
			
 
				-
			
 
				-	bno = NULLFSBLOCK;
			
 
				-	mp = ip->i_mount;
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
			
 
				-		xfs_bmap_count_leaves(ifp, 0,
			
 
				-			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
			
 
				-			count);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
			
 
				-	 */
			
 
				-	block = ifp->if_broot;
			
 
				-	level = be16_to_cpu(block->bb_level);
			
 
				-	ASSERT(level > 0);
			
 
				-	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
			
 
				-	bno = be64_to_cpu(*pp);
			
 
				-	ASSERT(bno != NULLDFSBNO);
			
 
				-	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
			
 
				-	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
			
 
				-
			
 
				-	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
			
 
				-		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
			
 
				-				 mp);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Debug/sanity checking code
			
 
				  */
			
@@ -724,8 +545,8 @@ xfs_bmap_trace_exlist(
 
				 
			
 
				 /*
			
 
				  * Validate that the bmbt_irecs being returned from bmapi are valid
			
 
				- * given the callers original parameters.  Specifically check the
			
 
				- * ranges of the returned irecs to ensure that they only extent beyond
			
 
				+ * given the caller's original parameters.  Specifically check the
			
 
				+ * ranges of the returned irecs to ensure that they only extend beyond
			
 
				  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
			
 
				  */
			
 
				 STATIC void
			
@@ -823,7 +644,7 @@ xfs_bmap_add_free(
 
				  * Remove the entry "free" from the free item list.  Prev points to the
			
 
				  * previous entry, unless "free" is the head of the list.
			
 
				  */
			
 
				-STATIC void
			
 
				+void
			
 
				 xfs_bmap_del_free(
			
 
				 	xfs_bmap_free_t		*flist,	/* free item list header */
			
 
				 	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
			
@@ -837,92 +658,6 @@ xfs_bmap_del_free(
 
				 	kmem_zone_free(xfs_bmap_free_item_zone, free);
			
 
				 }
			
 
				 
			
 
				-
			
 
				-/*
			
 
				- * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
			
 
				- * caller.  Frees all the extents that need freeing, which must be done
			
 
				- * last due to locking considerations.  We never free any extents in
			
 
				- * the first transaction.
			
 
				- *
			
 
				- * Return 1 if the given transaction was committed and a new one
			
 
				- * started, and 0 otherwise in the committed parameter.
			
 
				- */
			
 
				-int						/* error */
			
 
				-xfs_bmap_finish(
			
 
				-	xfs_trans_t		**tp,		/* transaction pointer addr */
			
 
				-	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
			
 
				-	int			*committed)	/* xact committed or not */
			
 
				-{
			
 
				-	xfs_efd_log_item_t	*efd;		/* extent free data */
			
 
				-	xfs_efi_log_item_t	*efi;		/* extent free intention */
			
 
				-	int			error;		/* error return value */
			
 
				-	xfs_bmap_free_item_t	*free;		/* free extent item */
			
 
				-	unsigned int		logres;		/* new log reservation */
			
 
				-	unsigned int		logcount;	/* new log count */
			
 
				-	xfs_mount_t		*mp;		/* filesystem mount structure */
			
 
				-	xfs_bmap_free_item_t	*next;		/* next item on free list */
			
 
				-	xfs_trans_t		*ntp;		/* new transaction pointer */
			
 
				-
			
 
				-	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
			
 
				-	if (flist->xbf_count == 0) {
			
 
				-		*committed = 0;
			
 
				-		return 0;
			
 
				-	}
			
 
				-	ntp = *tp;
			
 
				-	efi = xfs_trans_get_efi(ntp, flist->xbf_count);
			
 
				-	for (free = flist->xbf_first; free; free = free->xbfi_next)
			
 
				-		xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
			
 
				-			free->xbfi_blockcount);
			
 
				-	logres = ntp->t_log_res;
			
 
				-	logcount = ntp->t_log_count;
			
 
				-	ntp = xfs_trans_dup(*tp);
			
 
				-	error = xfs_trans_commit(*tp, 0);
			
 
				-	*tp = ntp;
			
 
				-	*committed = 1;
			
 
				-	/*
			
 
				-	 * We have a new transaction, so we should return committed=1,
			
 
				-	 * even though we're returning an error.
			
 
				-	 */
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	/*
			
 
				-	 * transaction commit worked ok so we can drop the extra ticket
			
 
				-	 * reference that we gained in xfs_trans_dup()
			
 
				-	 */
			
 
				-	xfs_log_ticket_put(ntp->t_ticket);
			
 
				-
			
 
				-	if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
			
 
				-			logcount)))
			
 
				-		return error;
			
 
				-	efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
			
 
				-	for (free = flist->xbf_first; free != NULL; free = next) {
			
 
				-		next = free->xbfi_next;
			
 
				-		if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
			
 
				-				free->xbfi_blockcount))) {
			
 
				-			/*
			
 
				-			 * The bmap free list will be cleaned up at a
			
 
				-			 * higher level.  The EFI will be canceled when
			
 
				-			 * this transaction is aborted.
			
 
				-			 * Need to force shutdown here to make sure it
			
 
				-			 * happens, since this transaction may not be
			
 
				-			 * dirty yet.
			
 
				-			 */
			
 
				-			mp = ntp->t_mountp;
			
 
				-			if (!XFS_FORCED_SHUTDOWN(mp))
			
 
				-				xfs_force_shutdown(mp,
			
 
				-						   (error == EFSCORRUPTED) ?
			
 
				-						   SHUTDOWN_CORRUPT_INCORE :
			
 
				-						   SHUTDOWN_META_IO_ERROR);
			
 
				-			return error;
			
 
				-		}
			
 
				-		xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
			
 
				-			free->xbfi_blockcount);
			
 
				-		xfs_bmap_del_free(flist, NULL, free);
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Free up any items left in the list.
			
 
				  */
			
@@ -1413,8 +1148,8 @@ xfs_bmap_add_attrfork(
 
				 	blks = XFS_ADDAFORK_SPACE_RES(mp);
			
 
				 	if (rsvd)
			
 
				 		tp->t_flags |= XFS_TRANS_RESERVE;
			
 
				-	if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
			
 
				-			XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
			
 
				+	if (error)
			
 
				 		goto error0;
			
 
				 	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
			
@@ -1815,7 +1550,7 @@ xfs_bmap_first_unused(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Returns the file-relative block number of the last block + 1 before
			
 
				+ * Returns the file-relative block number of the last block - 1 before
			
 
				  * last_block (input value) in the file.
			
 
				  * This is not based on i_size, it is based on the extent records.
			
 
				  * Returns 0 for local files, as they do not have extent records.
			
@@ -1863,7 +1598,7 @@ xfs_bmap_last_before(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				+int
			
 
				 xfs_bmap_last_extent(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*ip,
			
@@ -1926,29 +1661,6 @@ xfs_bmap_isaeof(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Check if the endoff is outside the last extent. If so the caller will grow
			
 
				- * the allocation to a stripe unit boundary.  All offsets are considered outside
			
 
				- * the end of file for an empty fork, so 1 is returned in *eof in that case.
			
 
				- */
			
 
				-int
			
 
				-xfs_bmap_eof(
			
 
				-	struct xfs_inode	*ip,
			
 
				-	xfs_fileoff_t		endoff,
			
 
				-	int			whichfork,
			
 
				-	int			*eof)
			
 
				-{
			
 
				-	struct xfs_bmbt_irec	rec;
			
 
				-	int			error;
			
 
				-
			
 
				-	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
			
 
				-	if (error || *eof)
			
 
				-		return error;
			
 
				-
			
 
				-	*eof = endoff >= rec.br_startoff + rec.br_blockcount;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Returns the file-relative block number of the first block past eof in
			
 
				  * the file.  This is not based on i_size, it is based on the extent records.
			
@@ -3488,7 +3200,7 @@ done:
 
				 /*
			
 
				  * Adjust the size of the new extent based on di_extsize and rt extsize.
			
 
				  */
			
 
				-STATIC int
			
 
				+int
			
 
				 xfs_bmap_extsize_align(
			
 
				 	xfs_mount_t	*mp,
			
 
				 	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
			
@@ -3650,9 +3362,9 @@ xfs_bmap_extsize_align(
 
				 
			
 
				 #define XFS_ALLOC_GAP_UNITS	4
			
 
				 
			
 
				-STATIC void
			
 
				+void
			
 
				 xfs_bmap_adjacent(
			
 
				-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
			
 
				+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
			
 
				 {
			
 
				 	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
			
 
				 	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
			
@@ -3798,109 +3510,6 @@ xfs_bmap_adjacent(
 
				 #undef ISVALID
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				-xfs_bmap_rtalloc(
			
 
				-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
			
 
				-{
			
 
				-	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
			
 
				-	int		error;		/* error return value */
			
 
				-	xfs_mount_t	*mp;		/* mount point structure */
			
 
				-	xfs_extlen_t	prod = 0;	/* product factor for allocators */
			
 
				-	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
			
 
				-	xfs_extlen_t	align;		/* minimum allocation alignment */
			
 
				-	xfs_rtblock_t	rtb;
			
 
				-
			
 
				-	mp = ap->ip->i_mount;
			
 
				-	align = xfs_get_extsz_hint(ap->ip);
			
 
				-	prod = align / mp->m_sb.sb_rextsize;
			
 
				-	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
			
 
				-					align, 1, ap->eof, 0,
			
 
				-					ap->conv, &ap->offset, &ap->length);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-	ASSERT(ap->length);
			
 
				-	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * If the offset & length are not perfectly aligned
			
 
				-	 * then kill prod, it will just get us in trouble.
			
 
				-	 */
			
 
				-	if (do_mod(ap->offset, align) || ap->length % align)
			
 
				-		prod = 1;
			
 
				-	/*
			
 
				-	 * Set ralen to be the actual requested length in rtextents.
			
 
				-	 */
			
 
				-	ralen = ap->length / mp->m_sb.sb_rextsize;
			
 
				-	/*
			
 
				-	 * If the old value was close enough to MAXEXTLEN that
			
 
				-	 * we rounded up to it, cut it back so it's valid again.
			
 
				-	 * Note that if it's a really large request (bigger than
			
 
				-	 * MAXEXTLEN), we don't hear about that number, and can't
			
 
				-	 * adjust the starting point to match it.
			
 
				-	 */
			
 
				-	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
			
 
				-		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
			
 
				-
			
 
				-	/*
			
 
				-	 * Lock out other modifications to the RT bitmap inode.
			
 
				-	 */
			
 
				-	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
			
 
				-	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	/*
			
 
				-	 * If it's an allocation to an empty file at offset 0,
			
 
				-	 * pick an extent that will space things out in the rt area.
			
 
				-	 */
			
 
				-	if (ap->eof && ap->offset == 0) {
			
 
				-		xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
			
 
				-
			
 
				-		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-		ap->blkno = rtx * mp->m_sb.sb_rextsize;
			
 
				-	} else {
			
 
				-		ap->blkno = 0;
			
 
				-	}
			
 
				-
			
 
				-	xfs_bmap_adjacent(ap);
			
 
				-
			
 
				-	/*
			
 
				-	 * Realtime allocation, done through xfs_rtallocate_extent.
			
 
				-	 */
			
 
				-	atype = ap->blkno == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
			
 
				-	do_div(ap->blkno, mp->m_sb.sb_rextsize);
			
 
				-	rtb = ap->blkno;
			
 
				-	ap->length = ralen;
			
 
				-	if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
			
 
				-				&ralen, atype, ap->wasdel, prod, &rtb)))
			
 
				-		return error;
			
 
				-	if (rtb == NULLFSBLOCK && prod > 1 &&
			
 
				-	    (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
			
 
				-					   ap->length, &ralen, atype,
			
 
				-					   ap->wasdel, 1, &rtb)))
			
 
				-		return error;
			
 
				-	ap->blkno = rtb;
			
 
				-	if (ap->blkno != NULLFSBLOCK) {
			
 
				-		ap->blkno *= mp->m_sb.sb_rextsize;
			
 
				-		ralen *= mp->m_sb.sb_rextsize;
			
 
				-		ap->length = ralen;
			
 
				-		ap->ip->i_d.di_nblocks += ralen;
			
 
				-		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
			
 
				-		if (ap->wasdel)
			
 
				-			ap->ip->i_delayed_blks -= ralen;
			
 
				-		/*
			
 
				-		 * Adjust the disk quota also. This was reserved
			
 
				-		 * earlier.
			
 
				-		 */
			
 
				-		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
			
 
				-			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
			
 
				-					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
			
 
				-	} else {
			
 
				-		ap->length = 0;
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 STATIC int
			
 
				 xfs_bmap_btalloc_nullfb(
			
 
				 	struct xfs_bmalloca	*ap,
			
@@ -4018,7 +3627,7 @@ xfs_bmap_btalloc_nullfb(
 
				 
			
 
				 STATIC int
			
 
				 xfs_bmap_btalloc(
			
 
				-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
			
 
				+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
			
 
				 {
			
 
				 	xfs_mount_t	*mp;		/* mount point structure */
			
 
				 	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
			
@@ -4250,7 +3859,7 @@ xfs_bmap_btalloc(
 
				  */
			
 
				 STATIC int
			
 
				 xfs_bmap_alloc(
			
 
				-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
			
 
				+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
			
 
				 {
			
 
				 	if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
			
 
				 		return xfs_bmap_rtalloc(ap);
			
@@ -4638,7 +4247,7 @@ xfs_bmapi_delay(
 
				 }
			
 
				 
			
 
				 
			
 
				-STATIC int
			
 
				+int
			
 
				 __xfs_bmapi_allocate(
			
 
				 	struct xfs_bmalloca	*bma)
			
 
				 {
			
@@ -4648,12 +4257,9 @@ __xfs_bmapi_allocate(
 
				 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
			
 
				 	int			tmp_logflags = 0;
			
 
				 	int			error;
			
 
				-	int			rt;
			
 
				 
			
 
				 	ASSERT(bma->length > 0);
			
 
				 
			
 
				-	rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip);
			
 
				-
			
 
				 	/*
			
 
				 	 * For the wasdelay case, we could also just allocate the stuff asked
			
 
				 	 * for in this bmap call but that wouldn't be as good.
			
@@ -4756,45 +4362,6 @@ __xfs_bmapi_allocate(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-xfs_bmapi_allocate_worker(
			
 
				-	struct work_struct	*work)
			
 
				-{
			
 
				-	struct xfs_bmalloca	*args = container_of(work,
			
 
				-						struct xfs_bmalloca, work);
			
 
				-	unsigned long		pflags;
			
 
				-
			
 
				-	/* we are in a transaction context here */
			
 
				-	current_set_flags_nested(&pflags, PF_FSTRANS);
			
 
				-
			
 
				-	args->result = __xfs_bmapi_allocate(args);
			
 
				-	complete(args->done);
			
 
				-
			
 
				-	current_restore_flags_nested(&pflags, PF_FSTRANS);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Some allocation requests often come in with little stack to work on. Push
			
 
				- * them off to a worker thread so there is lots of stack to use. Otherwise just
			
 
				- * call directly to avoid the context switch overhead here.
			
 
				- */
			
 
				-int
			
 
				-xfs_bmapi_allocate(
			
 
				-	struct xfs_bmalloca	*args)
			
 
				-{
			
 
				-	DECLARE_COMPLETION_ONSTACK(done);
			
 
				-
			
 
				-	if (!args->stack_switch)
			
 
				-		return __xfs_bmapi_allocate(args);
			
 
				-
			
 
				-
			
 
				-	args->done = &done;
			
 
				-	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
			
 
				-	queue_work(xfs_alloc_wq, &args->work);
			
 
				-	wait_for_completion(&done);
			
 
				-	return args->result;
			
 
				-}
			
 
				-
			
 
				 STATIC int
			
 
				 xfs_bmapi_convert_unwritten(
			
 
				 	struct xfs_bmalloca	*bma,
			
@@ -5789,359 +5356,3 @@ error0:
 
				 	}
			
 
				 	return error;
			
 
				 }
			
 
				-
			
 
				-/*
			
 
				- * returns 1 for success, 0 if we failed to map the extent.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_getbmapx_fix_eof_hole(
			
 
				-	xfs_inode_t		*ip,		/* xfs incore inode pointer */
			
 
				-	struct getbmapx		*out,		/* output structure */
			
 
				-	int			prealloced,	/* this is a file with
			
 
				-						 * preallocated data space */
			
 
				-	__int64_t		end,		/* last block requested */
			
 
				-	xfs_fsblock_t		startblock)
			
 
				-{
			
 
				-	__int64_t		fixlen;
			
 
				-	xfs_mount_t		*mp;		/* file system mount point */
			
 
				-	xfs_ifork_t		*ifp;		/* inode fork pointer */
			
 
				-	xfs_extnum_t		lastx;		/* last extent pointer */
			
 
				-	xfs_fileoff_t		fileblock;
			
 
				-
			
 
				-	if (startblock == HOLESTARTBLOCK) {
			
 
				-		mp = ip->i_mount;
			
 
				-		out->bmv_block = -1;
			
 
				-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
			
 
				-		fixlen -= out->bmv_offset;
			
 
				-		if (prealloced && out->bmv_offset + out->bmv_length == end) {
			
 
				-			/* Came to hole at EOF. Trim it. */
			
 
				-			if (fixlen <= 0)
			
 
				-				return 0;
			
 
				-			out->bmv_length = fixlen;
			
 
				-		}
			
 
				-	} else {
			
 
				-		if (startblock == DELAYSTARTBLOCK)
			
 
				-			out->bmv_block = -2;
			
 
				-		else
			
 
				-			out->bmv_block = xfs_fsb_to_db(ip, startblock);
			
 
				-		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
			
 
				-		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
			
 
				-		if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
			
 
				-		   (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
			
 
				-			out->bmv_oflags |= BMV_OF_LAST;
			
 
				-	}
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Get inode's extents as described in bmv, and format for output.
			
 
				- * Calls formatter to fill the user's buffer until all extents
			
 
				- * are mapped, until the passed-in bmv->bmv_count slots have
			
 
				- * been filled, or until the formatter short-circuits the loop,
			
 
				- * if it is tracking filled-in extents on its own.
			
 
				- */
			
 
				-int						/* error code */
			
 
				-xfs_getbmap(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	struct getbmapx		*bmv,		/* user bmap structure */
			
 
				-	xfs_bmap_format_t	formatter,	/* format to user */
			
 
				-	void			*arg)		/* formatter arg */
			
 
				-{
			
 
				-	__int64_t		bmvend;		/* last block requested */
			
 
				-	int			error = 0;	/* return value */
			
 
				-	__int64_t		fixlen;		/* length for -1 case */
			
 
				-	int			i;		/* extent number */
			
 
				-	int			lock;		/* lock state */
			
 
				-	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
			
 
				-	xfs_mount_t		*mp;		/* file system mount point */
			
 
				-	int			nex;		/* # of user extents can do */
			
 
				-	int			nexleft;	/* # of user extents left */
			
 
				-	int			subnex;		/* # of bmapi's can do */
			
 
				-	int			nmap;		/* number of map entries */
			
 
				-	struct getbmapx		*out;		/* output structure */
			
 
				-	int			whichfork;	/* data or attr fork */
			
 
				-	int			prealloced;	/* this is a file with
			
 
				-						 * preallocated data space */
			
 
				-	int			iflags;		/* interface flags */
			
 
				-	int			bmapi_flags;	/* flags for xfs_bmapi */
			
 
				-	int			cur_ext = 0;
			
 
				-
			
 
				-	mp = ip->i_mount;
			
 
				-	iflags = bmv->bmv_iflags;
			
 
				-	whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
			
 
				-
			
 
				-	if (whichfork == XFS_ATTR_FORK) {
			
 
				-		if (XFS_IFORK_Q(ip)) {
			
 
				-			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
			
 
				-			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
			
 
				-			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
			
 
				-				return XFS_ERROR(EINVAL);
			
 
				-		} else if (unlikely(
			
 
				-			   ip->i_d.di_aformat != 0 &&
			
 
				-			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
			
 
				-			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
			
 
				-					 ip->i_mount);
			
 
				-			return XFS_ERROR(EFSCORRUPTED);
			
 
				-		}
			
 
				-
			
 
				-		prealloced = 0;
			
 
				-		fixlen = 1LL << 32;
			
 
				-	} else {
			
 
				-		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
			
 
				-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
			
 
				-		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
			
 
				-			return XFS_ERROR(EINVAL);
			
 
				-
			
 
				-		if (xfs_get_extsz_hint(ip) ||
			
 
				-		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
			
 
				-			prealloced = 1;
			
 
				-			fixlen = mp->m_super->s_maxbytes;
			
 
				-		} else {
			
 
				-			prealloced = 0;
			
 
				-			fixlen = XFS_ISIZE(ip);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (bmv->bmv_length == -1) {
			
 
				-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
			
 
				-		bmv->bmv_length =
			
 
				-			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
			
 
				-	} else if (bmv->bmv_length == 0) {
			
 
				-		bmv->bmv_entries = 0;
			
 
				-		return 0;
			
 
				-	} else if (bmv->bmv_length < 0) {
			
 
				-		return XFS_ERROR(EINVAL);
			
 
				-	}
			
 
				-
			
 
				-	nex = bmv->bmv_count - 1;
			
 
				-	if (nex <= 0)
			
 
				-		return XFS_ERROR(EINVAL);
			
 
				-	bmvend = bmv->bmv_offset + bmv->bmv_length;
			
 
				-
			
 
				-
			
 
				-	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
			
 
				-		return XFS_ERROR(ENOMEM);
			
 
				-	out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
			
 
				-	if (!out) {
			
 
				-		out = kmem_zalloc_large(bmv->bmv_count *
			
 
				-					sizeof(struct getbmapx));
			
 
				-		if (!out)
			
 
				-			return XFS_ERROR(ENOMEM);
			
 
				-	}
			
 
				-
			
 
				-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
			
 
				-	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
			
 
				-		if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
			
 
				-			error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
			
 
				-			if (error)
			
 
				-				goto out_unlock_iolock;
			
 
				-		}
			
 
				-		/*
			
 
				-		 * even after flushing the inode, there can still be delalloc
			
 
				-		 * blocks on the inode beyond EOF due to speculative
			
 
				-		 * preallocation. These are not removed until the release
			
 
				-		 * function is called or the inode is inactivated. Hence we
			
 
				-		 * cannot assert here that ip->i_delayed_blks == 0.
			
 
				-		 */
			
 
				-	}
			
 
				-
			
 
				-	lock = xfs_ilock_map_shared(ip);
			
 
				-
			
 
				-	/*
			
 
				-	 * Don't let nex be bigger than the number of extents
			
 
				-	 * we can have assuming alternating holes and real extents.
			
 
				-	 */
			
 
				-	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
			
 
				-		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
			
 
				-
			
 
				-	bmapi_flags = xfs_bmapi_aflag(whichfork);
			
 
				-	if (!(iflags & BMV_IF_PREALLOC))
			
 
				-		bmapi_flags |= XFS_BMAPI_IGSTATE;
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate enough space to handle "subnex" maps at a time.
			
 
				-	 */
			
 
				-	error = ENOMEM;
			
 
				-	subnex = 16;
			
 
				-	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
			
 
				-	if (!map)
			
 
				-		goto out_unlock_ilock;
			
 
				-
			
 
				-	bmv->bmv_entries = 0;
			
 
				-
			
 
				-	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
			
 
				-	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
			
 
				-		error = 0;
			
 
				-		goto out_free_map;
			
 
				-	}
			
 
				-
			
 
				-	nexleft = nex;
			
 
				-
			
 
				-	do {
			
 
				-		nmap = (nexleft > subnex) ? subnex : nexleft;
			
 
				-		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
			
 
				-				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
			
 
				-				       map, &nmap, bmapi_flags);
			
 
				-		if (error)
			
 
				-			goto out_free_map;
			
 
				-		ASSERT(nmap <= subnex);
			
 
				-
			
 
				-		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
			
 
				-			out[cur_ext].bmv_oflags = 0;
			
 
				-			if (map[i].br_state == XFS_EXT_UNWRITTEN)
			
 
				-				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
			
 
				-			else if (map[i].br_startblock == DELAYSTARTBLOCK)
			
 
				-				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
			
 
				-			out[cur_ext].bmv_offset =
			
 
				-				XFS_FSB_TO_BB(mp, map[i].br_startoff);
			
 
				-			out[cur_ext].bmv_length =
			
 
				-				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
			
 
				-			out[cur_ext].bmv_unused1 = 0;
			
 
				-			out[cur_ext].bmv_unused2 = 0;
			
 
				-
			
 
				-			/*
			
 
				-			 * delayed allocation extents that start beyond EOF can
			
 
				-			 * occur due to speculative EOF allocation when the
			
 
				-			 * delalloc extent is larger than the largest freespace
			
 
				-			 * extent at conversion time. These extents cannot be
			
 
				-			 * converted by data writeback, so can exist here even
			
 
				-			 * if we are not supposed to be finding delalloc
			
 
				-			 * extents.
			
 
				-			 */
			
 
				-			if (map[i].br_startblock == DELAYSTARTBLOCK &&
			
 
				-			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
			
 
				-				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
			
 
				-
			
 
				-                        if (map[i].br_startblock == HOLESTARTBLOCK &&
			
 
				-			    whichfork == XFS_ATTR_FORK) {
			
 
				-				/* came to the end of attribute fork */
			
 
				-				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
			
 
				-				goto out_free_map;
			
 
				-			}
			
 
				-
			
 
				-			if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
			
 
				-					prealloced, bmvend,
			
 
				-					map[i].br_startblock))
			
 
				-				goto out_free_map;
			
 
				-
			
 
				-			bmv->bmv_offset =
			
 
				-				out[cur_ext].bmv_offset +
			
 
				-				out[cur_ext].bmv_length;
			
 
				-			bmv->bmv_length =
			
 
				-				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
			
 
				-
			
 
				-			/*
			
 
				-			 * In case we don't want to return the hole,
			
 
				-			 * don't increase cur_ext so that we can reuse
			
 
				-			 * it in the next loop.
			
 
				-			 */
			
 
				-			if ((iflags & BMV_IF_NO_HOLES) &&
			
 
				-			    map[i].br_startblock == HOLESTARTBLOCK) {
			
 
				-				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			nexleft--;
			
 
				-			bmv->bmv_entries++;
			
 
				-			cur_ext++;
			
 
				-		}
			
 
				-	} while (nmap && nexleft && bmv->bmv_length);
			
 
				-
			
 
				- out_free_map:
			
 
				-	kmem_free(map);
			
 
				- out_unlock_ilock:
			
 
				-	xfs_iunlock_map_shared(ip, lock);
			
 
				- out_unlock_iolock:
			
 
				-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
			
 
				-
			
 
				-	for (i = 0; i < cur_ext; i++) {
			
 
				-		int full = 0;	/* user array is full */
			
 
				-
			
 
				-		/* format results & advance arg */
			
 
				-		error = formatter(&arg, &out[i], &full);
			
 
				-		if (error || full)
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	if (is_vmalloc_addr(out))
			
 
				-		kmem_free_large(out);
			
 
				-	else
			
 
				-		kmem_free(out);
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * dead simple method of punching delalyed allocation blocks from a range in
			
 
				- * the inode. Walks a block at a time so will be slow, but is only executed in
			
 
				- * rare error cases so the overhead is not critical. This will alays punch out
			
 
				- * both the start and end blocks, even if the ranges only partially overlap
			
 
				- * them, so it is up to the caller to ensure that partial blocks are not
			
 
				- * passed in.
			
 
				- */
			
 
				-int
			
 
				-xfs_bmap_punch_delalloc_range(
			
 
				-	struct xfs_inode	*ip,
			
 
				-	xfs_fileoff_t		start_fsb,
			
 
				-	xfs_fileoff_t		length)
			
 
				-{
			
 
				-	xfs_fileoff_t		remaining = length;
			
 
				-	int			error = 0;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				-
			
 
				-	do {
			
 
				-		int		done;
			
 
				-		xfs_bmbt_irec_t	imap;
			
 
				-		int		nimaps = 1;
			
 
				-		xfs_fsblock_t	firstblock;
			
 
				-		xfs_bmap_free_t flist;
			
 
				-
			
 
				-		/*
			
 
				-		 * Map the range first and check that it is a delalloc extent
			
 
				-		 * before trying to unmap the range. Otherwise we will be
			
 
				-		 * trying to remove a real extent (which requires a
			
 
				-		 * transaction) or a hole, which is probably a bad idea...
			
 
				-		 */
			
 
				-		error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
			
 
				-				       XFS_BMAPI_ENTIRE);
			
 
				-
			
 
				-		if (error) {
			
 
				-			/* something screwed, just bail */
			
 
				-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				-				xfs_alert(ip->i_mount,
			
 
				-			"Failed delalloc mapping lookup ino %lld fsb %lld.",
			
 
				-						ip->i_ino, start_fsb);
			
 
				-			}
			
 
				-			break;
			
 
				-		}
			
 
				-		if (!nimaps) {
			
 
				-			/* nothing there */
			
 
				-			goto next_block;
			
 
				-		}
			
 
				-		if (imap.br_startblock != DELAYSTARTBLOCK) {
			
 
				-			/* been converted, ignore */
			
 
				-			goto next_block;
			
 
				-		}
			
 
				-		WARN_ON(imap.br_blockcount == 0);
			
 
				-
			
 
				-		/*
			
 
				-		 * Note: while we initialise the firstblock/flist pair, they
			
 
				-		 * should never be used because blocks should never be
			
 
				-		 * allocated or freed for a delalloc extent and hence we need
			
 
				-		 * don't cancel or finish them after the xfs_bunmapi() call.
			
 
				-		 */
			
 
				-		xfs_bmap_init(&flist, &firstblock);
			
 
				-		error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
			
 
				-					&flist, &done);
			
 
				-		if (error)
			
 
				-			break;
			
 
				-
			
 
				-		ASSERT(!flist.xbf_count && !flist.xbf_first);
			
 
				-next_block:
			
 
				-		start_fsb++;
			
 
				-		remaining--;
			
 
				-	} while(remaining > 0);
			
 
				-
			
 
				-	return error;
			
 
				-}
			
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -107,41 +107,6 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
 
				 		(flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Argument structure for xfs_bmap_alloc.
			
 
				- */
			
 
				-typedef struct xfs_bmalloca {
			
 
				-	xfs_fsblock_t		*firstblock; /* i/o first block allocated */
			
 
				-	struct xfs_bmap_free	*flist;	/* bmap freelist */
			
 
				-	struct xfs_trans	*tp;	/* transaction pointer */
			
 
				-	struct xfs_inode	*ip;	/* incore inode pointer */
			
 
				-	struct xfs_bmbt_irec	prev;	/* extent before the new one */
			
 
				-	struct xfs_bmbt_irec	got;	/* extent after, or delayed */
			
 
				-
			
 
				-	xfs_fileoff_t		offset;	/* offset in file filling in */
			
 
				-	xfs_extlen_t		length;	/* i/o length asked/allocated */
			
 
				-	xfs_fsblock_t		blkno;	/* starting block of new extent */
			
 
				-
			
 
				-	struct xfs_btree_cur	*cur;	/* btree cursor */
			
 
				-	xfs_extnum_t		idx;	/* current extent index */
			
 
				-	int			nallocs;/* number of extents alloc'd */
			
 
				-	int			logflags;/* flags for transaction logging */
			
 
				-
			
 
				-	xfs_extlen_t		total;	/* total blocks needed for xaction */
			
 
				-	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
			
 
				-	xfs_extlen_t		minleft; /* amount must be left after alloc */
			
 
				-	char			eof;	/* set if allocating past last extent */
			
 
				-	char			wasdel;	/* replacing a delayed allocation */
			
 
				-	char			userdata;/* set if is user data */
			
 
				-	char			aeof;	/* allocated space at eof */
			
 
				-	char			conv;	/* overwriting unwritten extents */
			
 
				-	char			stack_switch;
			
 
				-	int			flags;
			
 
				-	struct completion	*done;
			
 
				-	struct work_struct	work;
			
 
				-	int			result;
			
 
				-} xfs_bmalloca_t;
			
 
				-
			
 
				 /*
			
 
				  * Flags for xfs_bmap_add_extent*.
			
 
				  */
			
@@ -162,7 +127,7 @@ typedef struct xfs_bmalloca {
 
				 	{ BMAP_RIGHT_FILLING,	"RF" }, \
			
 
				 	{ BMAP_ATTRFORK,	"ATTR" }
			
 
				 
			
 
				-#if defined(__KERNEL) && defined(DEBUG)
			
 
				+#ifdef DEBUG
			
 
				 void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
			
 
				 		int whichfork, unsigned long caller_ip);
			
 
				 #define	XFS_BMAP_TRACE_EXLIST(ip,c,w)	\
			
@@ -205,23 +170,4 @@ int	xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
 
				 		xfs_extnum_t num);
			
 
				 uint	xfs_default_attroffset(struct xfs_inode *ip);
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				-/* bmap to userspace formatter - copy to user & advance pointer */
			
 
				-typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
			
 
				-
			
 
				-int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
			
 
				-		int *committed);
			
 
				-int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
			
 
				-		xfs_bmap_format_t formatter, void *arg);
			
 
				-int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
			
 
				-		int whichfork, int *eof);
			
 
				-int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				-		int whichfork, int *count);
			
 
				-int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
			
 
				-		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
			
 
				-
			
 
				-xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
			
 
				-
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				 #endif	/* __XFS_BMAP_H__ */
			
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -17,7 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
@@ -722,7 +722,7 @@ xfs_bmbt_key_diff(
 
				 				      cur->bc_rec.b.br_startoff;
			
 
				 }
			
 
				 
			
 
				-static int
			
 
				+static bool
			
 
				 xfs_bmbt_verify(
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
@@ -775,7 +775,6 @@ xfs_bmbt_verify(
 
				 		return false;
			
 
				 
			
 
				 	return true;
			
 
				-
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -789,7 +788,6 @@ xfs_bmbt_read_verify(
 
				 				     bp->b_target->bt_mount, bp->b_addr);
			
 
				 		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				 	}
			
 
				-
			
 
				 }
			
 
				 
			
 
				 static void
			
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -0,0 +1,2026 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2012 Red Hat, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_inum.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_alloc_btree.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_extfree_item.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				+#include "xfs_rtalloc.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_quota.h"
			
 
				+#include "xfs_trans_space.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_icache.h"
			
 
				+
			
 
				+/* Kernel only BMAP related definitions and functions */
			
 
				+
			
 
				+/*
			
 
				+ * Convert the given file system block to a disk block.  We have to treat it
			
 
				+ * differently based on whether the file is a real time file or not, because the
			
 
				+ * bmap code does.
			
 
				+ */
			
 
				+xfs_daddr_t
			
 
				+xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
			
 
				+{
			
 
				+	return (XFS_IS_REALTIME_INODE(ip) ? \
			
 
				+		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
			
 
				+		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
			
 
				+ * caller.  Frees all the extents that need freeing, which must be done
			
 
				+ * last due to locking considerations.  We never free any extents in
			
 
				+ * the first transaction.
			
 
				+ *
			
 
				+ * Return 1 if the given transaction was committed and a new one
			
 
				+ * started, and 0 otherwise in the committed parameter.
			
 
				+ */
			
 
				+int						/* error */
			
 
				+xfs_bmap_finish(
			
 
				+	xfs_trans_t		**tp,		/* transaction pointer addr */
			
 
				+	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
			
 
				+	int			*committed)	/* xact committed or not */
			
 
				+{
			
 
				+	xfs_efd_log_item_t	*efd;		/* extent free data */
			
 
				+	xfs_efi_log_item_t	*efi;		/* extent free intention */
			
 
				+	int			error;		/* error return value */
			
 
				+	xfs_bmap_free_item_t	*free;		/* free extent item */
			
 
				+	struct xfs_trans_res	tres;		/* new log reservation */
			
 
				+	xfs_mount_t		*mp;		/* filesystem mount structure */
			
 
				+	xfs_bmap_free_item_t	*next;		/* next item on free list */
			
 
				+	xfs_trans_t		*ntp;		/* new transaction pointer */
			
 
				+
			
 
				+	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
			
 
				+	if (flist->xbf_count == 0) {
			
 
				+		*committed = 0;
			
 
				+		return 0;
			
 
				+	}
			
 
				+	ntp = *tp;
			
 
				+	efi = xfs_trans_get_efi(ntp, flist->xbf_count);
			
 
				+	for (free = flist->xbf_first; free; free = free->xbfi_next)
			
 
				+		xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
			
 
				+			free->xbfi_blockcount);
			
 
				+
			
 
				+	tres.tr_logres = ntp->t_log_res;
			
 
				+	tres.tr_logcount = ntp->t_log_count;
			
 
				+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
			
 
				+	ntp = xfs_trans_dup(*tp);
			
 
				+	error = xfs_trans_commit(*tp, 0);
			
 
				+	*tp = ntp;
			
 
				+	*committed = 1;
			
 
				+	/*
			
 
				+	 * We have a new transaction, so we should return committed=1,
			
 
				+	 * even though we're returning an error.
			
 
				+	 */
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * transaction commit worked ok so we can drop the extra ticket
			
 
				+	 * reference that we gained in xfs_trans_dup()
			
 
				+	 */
			
 
				+	xfs_log_ticket_put(ntp->t_ticket);
			
 
				+
			
 
				+	error = xfs_trans_reserve(ntp, &tres, 0, 0);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
			
 
				+	for (free = flist->xbf_first; free != NULL; free = next) {
			
 
				+		next = free->xbfi_next;
			
 
				+		if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
			
 
				+				free->xbfi_blockcount))) {
			
 
				+			/*
			
 
				+			 * The bmap free list will be cleaned up at a
			
 
				+			 * higher level.  The EFI will be canceled when
			
 
				+			 * this transaction is aborted.
			
 
				+			 * Need to force shutdown here to make sure it
			
 
				+			 * happens, since this transaction may not be
			
 
				+			 * dirty yet.
			
 
				+			 */
			
 
				+			mp = ntp->t_mountp;
			
 
				+			if (!XFS_FORCED_SHUTDOWN(mp))
			
 
				+				xfs_force_shutdown(mp,
			
 
				+						   (error == EFSCORRUPTED) ?
			
 
				+						   SHUTDOWN_CORRUPT_INCORE :
			
 
				+						   SHUTDOWN_META_IO_ERROR);
			
 
				+			return error;
			
 
				+		}
			
 
				+		xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
			
 
				+			free->xbfi_blockcount);
			
 
				+		xfs_bmap_del_free(flist, NULL, free);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_bmap_rtalloc(
			
 
				+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
			
 
				+{
			
 
				+	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
			
 
				+	int		error;		/* error return value */
			
 
				+	xfs_mount_t	*mp;		/* mount point structure */
			
 
				+	xfs_extlen_t	prod = 0;	/* product factor for allocators */
			
 
				+	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
			
 
				+	xfs_extlen_t	align;		/* minimum allocation alignment */
			
 
				+	xfs_rtblock_t	rtb;
			
 
				+
			
 
				+	mp = ap->ip->i_mount;
			
 
				+	align = xfs_get_extsz_hint(ap->ip);
			
 
				+	prod = align / mp->m_sb.sb_rextsize;
			
 
				+	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
			
 
				+					align, 1, ap->eof, 0,
			
 
				+					ap->conv, &ap->offset, &ap->length);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	ASSERT(ap->length);
			
 
				+	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * If the offset & length are not perfectly aligned
			
 
				+	 * then kill prod, it will just get us in trouble.
			
 
				+	 */
			
 
				+	if (do_mod(ap->offset, align) || ap->length % align)
			
 
				+		prod = 1;
			
 
				+	/*
			
 
				+	 * Set ralen to be the actual requested length in rtextents.
			
 
				+	 */
			
 
				+	ralen = ap->length / mp->m_sb.sb_rextsize;
			
 
				+	/*
			
 
				+	 * If the old value was close enough to MAXEXTLEN that
			
 
				+	 * we rounded up to it, cut it back so it's valid again.
			
 
				+	 * Note that if it's a really large request (bigger than
			
 
				+	 * MAXEXTLEN), we don't hear about that number, and can't
			
 
				+	 * adjust the starting point to match it.
			
 
				+	 */
			
 
				+	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
			
 
				+		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
			
 
				+
			
 
				+	/*
			
 
				+	 * Lock out other modifications to the RT bitmap inode.
			
 
				+	 */
			
 
				+	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * If it's an allocation to an empty file at offset 0,
			
 
				+	 * pick an extent that will space things out in the rt area.
			
 
				+	 */
			
 
				+	if (ap->eof && ap->offset == 0) {
			
 
				+		xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
			
 
				+
			
 
				+		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		ap->blkno = rtx * mp->m_sb.sb_rextsize;
			
 
				+	} else {
			
 
				+		ap->blkno = 0;
			
 
				+	}
			
 
				+
			
 
				+	xfs_bmap_adjacent(ap);
			
 
				+
			
 
				+	/*
			
 
				+	 * Realtime allocation, done through xfs_rtallocate_extent.
			
 
				+	 */
			
 
				+	atype = ap->blkno == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
			
 
				+	do_div(ap->blkno, mp->m_sb.sb_rextsize);
			
 
				+	rtb = ap->blkno;
			
 
				+	ap->length = ralen;
			
 
				+	if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
			
 
				+				&ralen, atype, ap->wasdel, prod, &rtb)))
			
 
				+		return error;
			
 
				+	if (rtb == NULLFSBLOCK && prod > 1 &&
			
 
				+	    (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
			
 
				+					   ap->length, &ralen, atype,
			
 
				+					   ap->wasdel, 1, &rtb)))
			
 
				+		return error;
			
 
				+	ap->blkno = rtb;
			
 
				+	if (ap->blkno != NULLFSBLOCK) {
			
 
				+		ap->blkno *= mp->m_sb.sb_rextsize;
			
 
				+		ralen *= mp->m_sb.sb_rextsize;
			
 
				+		ap->length = ralen;
			
 
				+		ap->ip->i_d.di_nblocks += ralen;
			
 
				+		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
			
 
				+		if (ap->wasdel)
			
 
				+			ap->ip->i_delayed_blks -= ralen;
			
 
				+		/*
			
 
				+		 * Adjust the disk quota also. This was reserved
			
 
				+		 * earlier.
			
 
				+		 */
			
 
				+		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
			
 
				+			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
			
 
				+					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
			
 
				+	} else {
			
 
				+		ap->length = 0;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Stack switching interfaces for allocation
			
 
				+ */
			
 
				+static void
			
 
				+xfs_bmapi_allocate_worker(
			
 
				+	struct work_struct	*work)
			
 
				+{
			
 
				+	struct xfs_bmalloca	*args = container_of(work,
			
 
				+						struct xfs_bmalloca, work);
			
 
				+	unsigned long		pflags;
			
 
				+
			
 
				+	/* we are in a transaction context here */
			
 
				+	current_set_flags_nested(&pflags, PF_FSTRANS);
			
 
				+
			
 
				+	args->result = __xfs_bmapi_allocate(args);
			
 
				+	complete(args->done);
			
 
				+
			
 
				+	current_restore_flags_nested(&pflags, PF_FSTRANS);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Some allocation requests often come in with little stack to work on. Push
			
 
				+ * them off to a worker thread so there is lots of stack to use. Otherwise just
			
 
				+ * call directly to avoid the context switch overhead here.
			
 
				+ */
			
 
				+int
			
 
				+xfs_bmapi_allocate(
			
 
				+	struct xfs_bmalloca	*args)
			
 
				+{
			
 
				+	DECLARE_COMPLETION_ONSTACK(done);
			
 
				+
			
 
				+	if (!args->stack_switch)
			
 
				+		return __xfs_bmapi_allocate(args);
			
 
				+
			
 
				+
			
 
				+	args->done = &done;
			
 
				+	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
			
 
				+	queue_work(xfs_alloc_wq, &args->work);
			
 
				+	wait_for_completion(&done);
			
 
				+	return args->result;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check if the endoff is outside the last extent. If so the caller will grow
			
 
				+ * the allocation to a stripe unit boundary.  All offsets are considered outside
			
 
				+ * the end of file for an empty fork, so 1 is returned in *eof in that case.
			
 
				+ */
			
 
				+int
			
 
				+xfs_bmap_eof(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	xfs_fileoff_t		endoff,
			
 
				+	int			whichfork,
			
 
				+	int			*eof)
			
 
				+{
			
 
				+	struct xfs_bmbt_irec	rec;
			
 
				+	int			error;
			
 
				+
			
 
				+	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
			
 
				+	if (error || *eof)
			
 
				+		return error;
			
 
				+
			
 
				+	*eof = endoff >= rec.br_startoff + rec.br_blockcount;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Extent tree block counting routines.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Count leaf blocks given a range of extent records.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_bmap_count_leaves(
			
 
				+	xfs_ifork_t		*ifp,
			
 
				+	xfs_extnum_t		idx,
			
 
				+	int			numrecs,
			
 
				+	int			*count)
			
 
				+{
			
 
				+	int		b;
			
 
				+
			
 
				+	for (b = 0; b < numrecs; b++) {
			
 
				+		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
			
 
				+		*count += xfs_bmbt_get_blockcount(frp);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Count leaf blocks given a range of extent records originally
			
 
				+ * in btree format.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_bmap_disk_count_leaves(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_btree_block	*block,
			
 
				+	int			numrecs,
			
 
				+	int			*count)
			
 
				+{
			
 
				+	int		b;
			
 
				+	xfs_bmbt_rec_t	*frp;
			
 
				+
			
 
				+	for (b = 1; b <= numrecs; b++) {
			
 
				+		frp = XFS_BMBT_REC_ADDR(mp, block, b);
			
 
				+		*count += xfs_bmbt_disk_get_blockcount(frp);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Recursively walks each level of a btree
			
 
				+ * to count total fsblocks in use.
			
 
				+ */
			
 
				+STATIC int                                     /* error */
			
 
				+xfs_bmap_count_tree(
			
 
				+	xfs_mount_t     *mp,            /* file system mount point */
			
 
				+	xfs_trans_t     *tp,            /* transaction pointer */
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_fsblock_t   blockno,	/* file system block number */
			
 
				+	int             levelin,	/* level in btree */
			
 
				+	int		*count)		/* Count of blocks */
			
 
				+{
			
 
				+	int			error;
			
 
				+	xfs_buf_t		*bp, *nbp;
			
 
				+	int			level = levelin;
			
 
				+	__be64			*pp;
			
 
				+	xfs_fsblock_t           bno = blockno;
			
 
				+	xfs_fsblock_t		nextbno;
			
 
				+	struct xfs_btree_block	*block, *nextblock;
			
 
				+	int			numrecs;
			
 
				+
			
 
				+	error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
			
 
				+						&xfs_bmbt_buf_ops);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	*count += 1;
			
 
				+	block = XFS_BUF_TO_BLOCK(bp);
			
 
				+
			
 
				+	if (--level) {
			
 
				+		/* Not at node above leaves, count this level of nodes */
			
 
				+		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
			
 
				+		while (nextbno != NULLFSBLOCK) {
			
 
				+			error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
			
 
				+						XFS_BMAP_BTREE_REF,
			
 
				+						&xfs_bmbt_buf_ops);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			*count += 1;
			
 
				+			nextblock = XFS_BUF_TO_BLOCK(nbp);
			
 
				+			nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
			
 
				+			xfs_trans_brelse(tp, nbp);
			
 
				+		}
			
 
				+
			
 
				+		/* Dive to the next level */
			
 
				+		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
			
 
				+		bno = be64_to_cpu(*pp);
			
 
				+		if (unlikely((error =
			
 
				+		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
			
 
				+			xfs_trans_brelse(tp, bp);
			
 
				+			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
			
 
				+					 XFS_ERRLEVEL_LOW, mp);
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		}
			
 
				+		xfs_trans_brelse(tp, bp);
			
 
				+	} else {
			
 
				+		/* count all level 1 nodes and their leaves */
			
 
				+		for (;;) {
			
 
				+			nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
			
 
				+			numrecs = be16_to_cpu(block->bb_numrecs);
			
 
				+			xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
			
 
				+			xfs_trans_brelse(tp, bp);
			
 
				+			if (nextbno == NULLFSBLOCK)
			
 
				+				break;
			
 
				+			bno = nextbno;
			
 
				+			error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
			
 
				+						XFS_BMAP_BTREE_REF,
			
 
				+						&xfs_bmbt_buf_ops);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			*count += 1;
			
 
				+			block = XFS_BUF_TO_BLOCK(bp);
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Count fsblocks of the given fork.
			
 
				+ */
			
 
				+int						/* error */
			
 
				+xfs_bmap_count_blocks(
			
 
				+	xfs_trans_t		*tp,		/* transaction pointer */
			
 
				+	xfs_inode_t		*ip,		/* incore inode */
			
 
				+	int			whichfork,	/* data or attr fork */
			
 
				+	int			*count)		/* out: count of blocks */
			
 
				+{
			
 
				+	struct xfs_btree_block	*block;	/* current btree block */
			
 
				+	xfs_fsblock_t		bno;	/* block # of "block" */
			
 
				+	xfs_ifork_t		*ifp;	/* fork structure */
			
 
				+	int			level;	/* btree level, for checking */
			
 
				+	xfs_mount_t		*mp;	/* file system mount structure */
			
 
				+	__be64			*pp;	/* pointer to block address */
			
 
				+
			
 
				+	bno = NULLFSBLOCK;
			
 
				+	mp = ip->i_mount;
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
			
 
				+		xfs_bmap_count_leaves(ifp, 0,
			
 
				+			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
			
 
				+			count);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
			
 
				+	 */
			
 
				+	block = ifp->if_broot;
			
 
				+	level = be16_to_cpu(block->bb_level);
			
 
				+	ASSERT(level > 0);
			
 
				+	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
			
 
				+	bno = be64_to_cpu(*pp);
			
 
				+	ASSERT(bno != NULLDFSBNO);
			
 
				+	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
			
 
				+	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
			
 
				+
			
 
				+	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
			
 
				+		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
			
 
				+				 mp);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * returns 1 for success, 0 if we failed to map the extent.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_getbmapx_fix_eof_hole(
			
 
				+	xfs_inode_t		*ip,		/* xfs incore inode pointer */
			
 
				+	struct getbmapx		*out,		/* output structure */
			
 
				+	int			prealloced,	/* this is a file with
			
 
				+						 * preallocated data space */
			
 
				+	__int64_t		end,		/* last block requested */
			
 
				+	xfs_fsblock_t		startblock)
			
 
				+{
			
 
				+	__int64_t		fixlen;
			
 
				+	xfs_mount_t		*mp;		/* file system mount point */
			
 
				+	xfs_ifork_t		*ifp;		/* inode fork pointer */
			
 
				+	xfs_extnum_t		lastx;		/* last extent pointer */
			
 
				+	xfs_fileoff_t		fileblock;
			
 
				+
			
 
				+	if (startblock == HOLESTARTBLOCK) {
			
 
				+		mp = ip->i_mount;
			
 
				+		out->bmv_block = -1;
			
 
				+		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
			
 
				+		fixlen -= out->bmv_offset;
			
 
				+		if (prealloced && out->bmv_offset + out->bmv_length == end) {
			
 
				+			/* Came to hole at EOF. Trim it. */
			
 
				+			if (fixlen <= 0)
			
 
				+				return 0;
			
 
				+			out->bmv_length = fixlen;
			
 
				+		}
			
 
				+	} else {
			
 
				+		if (startblock == DELAYSTARTBLOCK)
			
 
				+			out->bmv_block = -2;
			
 
				+		else
			
 
				+			out->bmv_block = xfs_fsb_to_db(ip, startblock);
			
 
				+		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
			
 
				+		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
			
 
				+		if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
			
 
				+		   (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
			
 
				+			out->bmv_oflags |= BMV_OF_LAST;
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Get inode's extents as described in bmv, and format for output.
			
 
				+ * Calls formatter to fill the user's buffer until all extents
			
 
				+ * are mapped, until the passed-in bmv->bmv_count slots have
			
 
				+ * been filled, or until the formatter short-circuits the loop,
			
 
				+ * if it is tracking filled-in extents on its own.
			
 
				+ */
			
 
				+int						/* error code */
			
 
				+xfs_getbmap(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	struct getbmapx		*bmv,		/* user bmap structure */
			
 
				+	xfs_bmap_format_t	formatter,	/* format to user */
			
 
				+	void			*arg)		/* formatter arg */
			
 
				+{
			
 
				+	__int64_t		bmvend;		/* last block requested */
			
 
				+	int			error = 0;	/* return value */
			
 
				+	__int64_t		fixlen;		/* length for -1 case */
			
 
				+	int			i;		/* extent number */
			
 
				+	int			lock;		/* lock state */
			
 
				+	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
			
 
				+	xfs_mount_t		*mp;		/* file system mount point */
			
 
				+	int			nex;		/* # of user extents can do */
			
 
				+	int			nexleft;	/* # of user extents left */
			
 
				+	int			subnex;		/* # of bmapi's can do */
			
 
				+	int			nmap;		/* number of map entries */
			
 
				+	struct getbmapx		*out;		/* output structure */
			
 
				+	int			whichfork;	/* data or attr fork */
			
 
				+	int			prealloced;	/* this is a file with
			
 
				+						 * preallocated data space */
			
 
				+	int			iflags;		/* interface flags */
			
 
				+	int			bmapi_flags;	/* flags for xfs_bmapi */
			
 
				+	int			cur_ext = 0;
			
 
				+
			
 
				+	mp = ip->i_mount;
			
 
				+	iflags = bmv->bmv_iflags;
			
 
				+	whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
			
 
				+
			
 
				+	if (whichfork == XFS_ATTR_FORK) {
			
 
				+		if (XFS_IFORK_Q(ip)) {
			
 
				+			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
			
 
				+			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
			
 
				+			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
			
 
				+				return XFS_ERROR(EINVAL);
			
 
				+		} else if (unlikely(
			
 
				+			   ip->i_d.di_aformat != 0 &&
			
 
				+			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
			
 
				+			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
			
 
				+					 ip->i_mount);
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		}
			
 
				+
			
 
				+		prealloced = 0;
			
 
				+		fixlen = 1LL << 32;
			
 
				+	} else {
			
 
				+		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
			
 
				+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
			
 
				+		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
			
 
				+			return XFS_ERROR(EINVAL);
			
 
				+
			
 
				+		if (xfs_get_extsz_hint(ip) ||
			
 
				+		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
			
 
				+			prealloced = 1;
			
 
				+			fixlen = mp->m_super->s_maxbytes;
			
 
				+		} else {
			
 
				+			prealloced = 0;
			
 
				+			fixlen = XFS_ISIZE(ip);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (bmv->bmv_length == -1) {
			
 
				+		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
			
 
				+		bmv->bmv_length =
			
 
				+			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
			
 
				+	} else if (bmv->bmv_length == 0) {
			
 
				+		bmv->bmv_entries = 0;
			
 
				+		return 0;
			
 
				+	} else if (bmv->bmv_length < 0) {
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+	}
			
 
				+
			
 
				+	nex = bmv->bmv_count - 1;
			
 
				+	if (nex <= 0)
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+	bmvend = bmv->bmv_offset + bmv->bmv_length;
			
 
				+
			
 
				+
			
 
				+	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
			
 
				+		return XFS_ERROR(ENOMEM);
			
 
				+	out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
			
 
				+	if (!out) {
			
 
				+		out = kmem_zalloc_large(bmv->bmv_count *
			
 
				+					sizeof(struct getbmapx));
			
 
				+		if (!out)
			
 
				+			return XFS_ERROR(ENOMEM);
			
 
				+	}
			
 
				+
			
 
				+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
			
 
				+	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
			
 
				+		if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
			
 
				+			error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
			
 
				+			if (error)
			
 
				+				goto out_unlock_iolock;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * even after flushing the inode, there can still be delalloc
			
 
				+		 * blocks on the inode beyond EOF due to speculative
			
 
				+		 * preallocation. These are not removed until the release
			
 
				+		 * function is called or the inode is inactivated. Hence we
			
 
				+		 * cannot assert here that ip->i_delayed_blks == 0.
			
 
				+		 */
			
 
				+	}
			
 
				+
			
 
				+	lock = xfs_ilock_map_shared(ip);
			
 
				+
			
 
				+	/*
			
 
				+	 * Don't let nex be bigger than the number of extents
			
 
				+	 * we can have assuming alternating holes and real extents.
			
 
				+	 */
			
 
				+	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
			
 
				+		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
			
 
				+
			
 
				+	bmapi_flags = xfs_bmapi_aflag(whichfork);
			
 
				+	if (!(iflags & BMV_IF_PREALLOC))
			
 
				+		bmapi_flags |= XFS_BMAPI_IGSTATE;
			
 
				+
			
 
				+	/*
			
 
				+	 * Allocate enough space to handle "subnex" maps at a time.
			
 
				+	 */
			
 
				+	error = ENOMEM;
			
 
				+	subnex = 16;
			
 
				+	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
			
 
				+	if (!map)
			
 
				+		goto out_unlock_ilock;
			
 
				+
			
 
				+	bmv->bmv_entries = 0;
			
 
				+
			
 
				+	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
			
 
				+	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
			
 
				+		error = 0;
			
 
				+		goto out_free_map;
			
 
				+	}
			
 
				+
			
 
				+	nexleft = nex;
			
 
				+
			
 
				+	do {
			
 
				+		nmap = (nexleft > subnex) ? subnex : nexleft;
			
 
				+		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
			
 
				+				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
			
 
				+				       map, &nmap, bmapi_flags);
			
 
				+		if (error)
			
 
				+			goto out_free_map;
			
 
				+		ASSERT(nmap <= subnex);
			
 
				+
			
 
				+		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
			
 
				+			out[cur_ext].bmv_oflags = 0;
			
 
				+			if (map[i].br_state == XFS_EXT_UNWRITTEN)
			
 
				+				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
			
 
				+			else if (map[i].br_startblock == DELAYSTARTBLOCK)
			
 
				+				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
			
 
				+			out[cur_ext].bmv_offset =
			
 
				+				XFS_FSB_TO_BB(mp, map[i].br_startoff);
			
 
				+			out[cur_ext].bmv_length =
			
 
				+				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
			
 
				+			out[cur_ext].bmv_unused1 = 0;
			
 
				+			out[cur_ext].bmv_unused2 = 0;
			
 
				+
			
 
				+			/*
			
 
				+			 * delayed allocation extents that start beyond EOF can
			
 
				+			 * occur due to speculative EOF allocation when the
			
 
				+			 * delalloc extent is larger than the largest freespace
			
 
				+			 * extent at conversion time. These extents cannot be
			
 
				+			 * converted by data writeback, so can exist here even
			
 
				+			 * if we are not supposed to be finding delalloc
			
 
				+			 * extents.
			
 
				+			 */
			
 
				+			if (map[i].br_startblock == DELAYSTARTBLOCK &&
			
 
				+			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
			
 
				+				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
			
 
				+
			
 
				+                        if (map[i].br_startblock == HOLESTARTBLOCK &&
			
 
				+			    whichfork == XFS_ATTR_FORK) {
			
 
				+				/* came to the end of attribute fork */
			
 
				+				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
			
 
				+				goto out_free_map;
			
 
				+			}
			
 
				+
			
 
				+			if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
			
 
				+					prealloced, bmvend,
			
 
				+					map[i].br_startblock))
			
 
				+				goto out_free_map;
			
 
				+
			
 
				+			bmv->bmv_offset =
			
 
				+				out[cur_ext].bmv_offset +
			
 
				+				out[cur_ext].bmv_length;
			
 
				+			bmv->bmv_length =
			
 
				+				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
			
 
				+
			
 
				+			/*
			
 
				+			 * In case we don't want to return the hole,
			
 
				+			 * don't increase cur_ext so that we can reuse
			
 
				+			 * it in the next loop.
			
 
				+			 */
			
 
				+			if ((iflags & BMV_IF_NO_HOLES) &&
			
 
				+			    map[i].br_startblock == HOLESTARTBLOCK) {
			
 
				+				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			nexleft--;
			
 
				+			bmv->bmv_entries++;
			
 
				+			cur_ext++;
			
 
				+		}
			
 
				+	} while (nmap && nexleft && bmv->bmv_length);
			
 
				+
			
 
				+ out_free_map:
			
 
				+	kmem_free(map);
			
 
				+ out_unlock_ilock:
			
 
				+	xfs_iunlock_map_shared(ip, lock);
			
 
				+ out_unlock_iolock:
			
 
				+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
			
 
				+
			
 
				+	for (i = 0; i < cur_ext; i++) {
			
 
				+		int full = 0;	/* user array is full */
			
 
				+
			
 
				+		/* format results & advance arg */
			
 
				+		error = formatter(&arg, &out[i], &full);
			
 
				+		if (error || full)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (is_vmalloc_addr(out))
			
 
				+		kmem_free_large(out);
			
 
				+	else
			
 
				+		kmem_free(out);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * dead simple method of punching delalyed allocation blocks from a range in
			
 
				+ * the inode. Walks a block at a time so will be slow, but is only executed in
			
 
				+ * rare error cases so the overhead is not critical. This will always punch out
			
 
				+ * both the start and end blocks, even if the ranges only partially overlap
			
 
				+ * them, so it is up to the caller to ensure that partial blocks are not
			
 
				+ * passed in.
			
 
				+ */
			
 
				+int
			
 
				+xfs_bmap_punch_delalloc_range(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	xfs_fileoff_t		start_fsb,
			
 
				+	xfs_fileoff_t		length)
			
 
				+{
			
 
				+	xfs_fileoff_t		remaining = length;
			
 
				+	int			error = 0;
			
 
				+
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				+
			
 
				+	do {
			
 
				+		int		done;
			
 
				+		xfs_bmbt_irec_t	imap;
			
 
				+		int		nimaps = 1;
			
 
				+		xfs_fsblock_t	firstblock;
			
 
				+		xfs_bmap_free_t flist;
			
 
				+
			
 
				+		/*
			
 
				+		 * Map the range first and check that it is a delalloc extent
			
 
				+		 * before trying to unmap the range. Otherwise we will be
			
 
				+		 * trying to remove a real extent (which requires a
			
 
				+		 * transaction) or a hole, which is probably a bad idea...
			
 
				+		 */
			
 
				+		error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
			
 
				+				       XFS_BMAPI_ENTIRE);
			
 
				+
			
 
				+		if (error) {
			
 
				+			/* something screwed, just bail */
			
 
				+			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				+				xfs_alert(ip->i_mount,
			
 
				+			"Failed delalloc mapping lookup ino %lld fsb %lld.",
			
 
				+						ip->i_ino, start_fsb);
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		if (!nimaps) {
			
 
				+			/* nothing there */
			
 
				+			goto next_block;
			
 
				+		}
			
 
				+		if (imap.br_startblock != DELAYSTARTBLOCK) {
			
 
				+			/* been converted, ignore */
			
 
				+			goto next_block;
			
 
				+		}
			
 
				+		WARN_ON(imap.br_blockcount == 0);
			
 
				+
			
 
				+		/*
			
 
				+		 * Note: while we initialise the firstblock/flist pair, they
			
 
				+		 * should never be used because blocks should never be
			
 
				+		 * allocated or freed for a delalloc extent and hence we need
			
 
				+		 * don't cancel or finish them after the xfs_bunmapi() call.
			
 
				+		 */
			
 
				+		xfs_bmap_init(&flist, &firstblock);
			
 
				+		error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
			
 
				+					&flist, &done);
			
 
				+		if (error)
			
 
				+			break;
			
 
				+
			
 
				+		ASSERT(!flist.xbf_count && !flist.xbf_first);
			
 
				+next_block:
			
 
				+		start_fsb++;
			
 
				+		remaining--;
			
 
				+	} while(remaining > 0);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Test whether it is appropriate to check an inode for and free post EOF
			
 
				+ * blocks. The 'force' parameter determines whether we should also consider
			
 
				+ * regular files that are marked preallocated or append-only.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
			
 
				+{
			
 
				+	/* prealloc/delalloc exists only on regular files */
			
 
				+	if (!S_ISREG(ip->i_d.di_mode))
			
 
				+		return false;
			
 
				+
			
 
				+	/*
			
 
				+	 * Zero sized files with no cached pages and delalloc blocks will not
			
 
				+	 * have speculative prealloc/delalloc blocks to remove.
			
 
				+	 */
			
 
				+	if (VFS_I(ip)->i_size == 0 &&
			
 
				+	    VN_CACHED(VFS_I(ip)) == 0 &&
			
 
				+	    ip->i_delayed_blks == 0)
			
 
				+		return false;
			
 
				+
			
 
				+	/* If we haven't read in the extent list, then don't do it now. */
			
 
				+	if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
			
 
				+		return false;
			
 
				+
			
 
				+	/*
			
 
				+	 * Do not free real preallocated or append-only files unless the file
			
 
				+	 * has delalloc blocks and we are forced to remove them.
			
 
				+	 */
			
 
				+	if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
			
 
				+		if (!force || ip->i_delayed_blks == 0)
			
 
				+			return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is called by xfs_inactive to free any blocks beyond eof
			
 
				+ * when the link count isn't zero and by xfs_dm_punch_hole() when
			
 
				+ * punching a hole to EOF.
			
 
				+ */
			
 
				+int
			
 
				+xfs_free_eofblocks(
			
 
				+	xfs_mount_t	*mp,
			
 
				+	xfs_inode_t	*ip,
			
 
				+	bool		need_iolock)
			
 
				+{
			
 
				+	xfs_trans_t	*tp;
			
 
				+	int		error;
			
 
				+	xfs_fileoff_t	end_fsb;
			
 
				+	xfs_fileoff_t	last_fsb;
			
 
				+	xfs_filblks_t	map_len;
			
 
				+	int		nimaps;
			
 
				+	xfs_bmbt_irec_t	imap;
			
 
				+
			
 
				+	/*
			
 
				+	 * Figure out if there are any blocks beyond the end
			
 
				+	 * of the file.  If not, then there is nothing to do.
			
 
				+	 */
			
 
				+	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
			
 
				+	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
			
 
				+	if (last_fsb <= end_fsb)
			
 
				+		return 0;
			
 
				+	map_len = last_fsb - end_fsb;
			
 
				+
			
 
				+	nimaps = 1;
			
 
				+	xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				+	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				+
			
 
				+	if (!error && (nimaps != 0) &&
			
 
				+	    (imap.br_startblock != HOLESTARTBLOCK ||
			
 
				+	     ip->i_delayed_blks)) {
			
 
				+		/*
			
 
				+		 * Attach the dquots to the inode up front.
			
 
				+		 */
			
 
				+		error = xfs_qm_dqattach(ip, 0);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				+		/*
			
 
				+		 * There are blocks after the end of file.
			
 
				+		 * Free them up now by truncating the file to
			
 
				+		 * its current size.
			
 
				+		 */
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
			
 
				+
			
 
				+		if (need_iolock) {
			
 
				+			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
			
 
				+				xfs_trans_cancel(tp, 0);
			
 
				+				return EAGAIN;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
			
 
				+		if (error) {
			
 
				+			ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				+			xfs_trans_cancel(tp, 0);
			
 
				+			if (need_iolock)
			
 
				+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				+			return error;
			
 
				+		}
			
 
				+
			
 
				+		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+		xfs_trans_ijoin(tp, ip, 0);
			
 
				+
			
 
				+		/*
			
 
				+		 * Do not update the on-disk file size.  If we update the
			
 
				+		 * on-disk file size and then the system crashes before the
			
 
				+		 * contents of the file are flushed to disk then the files
			
 
				+		 * may be full of holes (ie NULL files bug).
			
 
				+		 */
			
 
				+		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
			
 
				+					      XFS_ISIZE(ip));
			
 
				+		if (error) {
			
 
				+			/*
			
 
				+			 * If we get an error at this point we simply don't
			
 
				+			 * bother truncating the file.
			
 
				+			 */
			
 
				+			xfs_trans_cancel(tp,
			
 
				+					 (XFS_TRANS_RELEASE_LOG_RES |
			
 
				+					  XFS_TRANS_ABORT));
			
 
				+		} else {
			
 
				+			error = xfs_trans_commit(tp,
			
 
				+						XFS_TRANS_RELEASE_LOG_RES);
			
 
				+			if (!error)
			
 
				+				xfs_inode_clear_eofblocks_tag(ip);
			
 
				+		}
			
 
				+
			
 
				+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+		if (need_iolock)
			
 
				+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				+	}
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_alloc_file_space()
			
 
				+ *      This routine allocates disk space for the given file.
			
 
				+ *
			
 
				+ *	If alloc_type == 0, this request is for an ALLOCSP type
			
 
				+ *	request which will change the file size.  In this case, no
			
 
				+ *	DMAPI event will be generated by the call.  A TRUNCATE event
			
 
				+ *	will be generated later by xfs_setattr.
			
 
				+ *
			
 
				+ *	If alloc_type != 0, this request is for a RESVSP type
			
 
				+ *	request, and a DMAPI DM_EVENT_WRITE will be generated if the
			
 
				+ *	lower block boundary byte address is less than the file's
			
 
				+ *	length.
			
 
				+ *
			
 
				+ * RETURNS:
			
 
				+ *       0 on success
			
 
				+ *      errno on error
			
 
				+ *
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_alloc_file_space(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	xfs_off_t		offset,
			
 
				+	xfs_off_t		len,
			
 
				+	int			alloc_type,
			
 
				+	int			attr_flags)
			
 
				+{
			
 
				+	xfs_mount_t		*mp = ip->i_mount;
			
 
				+	xfs_off_t		count;
			
 
				+	xfs_filblks_t		allocated_fsb;
			
 
				+	xfs_filblks_t		allocatesize_fsb;
			
 
				+	xfs_extlen_t		extsz, temp;
			
 
				+	xfs_fileoff_t		startoffset_fsb;
			
 
				+	xfs_fsblock_t		firstfsb;
			
 
				+	int			nimaps;
			
 
				+	int			quota_flag;
			
 
				+	int			rt;
			
 
				+	xfs_trans_t		*tp;
			
 
				+	xfs_bmbt_irec_t		imaps[1], *imapp;
			
 
				+	xfs_bmap_free_t		free_list;
			
 
				+	uint			qblocks, resblks, resrtextents;
			
 
				+	int			committed;
			
 
				+	int			error;
			
 
				+
			
 
				+	trace_xfs_alloc_file_space(ip);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	error = xfs_qm_dqattach(ip, 0);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	if (len <= 0)
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+
			
 
				+	rt = XFS_IS_REALTIME_INODE(ip);
			
 
				+	extsz = xfs_get_extsz_hint(ip);
			
 
				+
			
 
				+	count = len;
			
 
				+	imapp = &imaps[0];
			
 
				+	nimaps = 1;
			
 
				+	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
			
 
				+	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
			
 
				+
			
 
				+	/*
			
 
				+	 * Allocate file space until done or until there is an error
			
 
				+	 */
			
 
				+	while (allocatesize_fsb && !error) {
			
 
				+		xfs_fileoff_t	s, e;
			
 
				+
			
 
				+		/*
			
 
				+		 * Determine space reservations for data/realtime.
			
 
				+		 */
			
 
				+		if (unlikely(extsz)) {
			
 
				+			s = startoffset_fsb;
			
 
				+			do_div(s, extsz);
			
 
				+			s *= extsz;
			
 
				+			e = startoffset_fsb + allocatesize_fsb;
			
 
				+			if ((temp = do_mod(startoffset_fsb, extsz)))
			
 
				+				e += temp;
			
 
				+			if ((temp = do_mod(e, extsz)))
			
 
				+				e += extsz - temp;
			
 
				+		} else {
			
 
				+			s = 0;
			
 
				+			e = allocatesize_fsb;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * The transaction reservation is limited to a 32-bit block
			
 
				+		 * count, hence we need to limit the number of blocks we are
			
 
				+		 * trying to reserve to avoid an overflow. We can't allocate
			
 
				+		 * more than @nimaps extents, and an extent is limited on disk
			
 
				+		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
			
 
				+		 */
			
 
				+		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
			
 
				+		if (unlikely(rt)) {
			
 
				+			resrtextents = qblocks = resblks;
			
 
				+			resrtextents /= mp->m_sb.sb_rextsize;
			
 
				+			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
			
 
				+			quota_flag = XFS_QMOPT_RES_RTBLKS;
			
 
				+		} else {
			
 
				+			resrtextents = 0;
			
 
				+			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
			
 
				+			quota_flag = XFS_QMOPT_RES_REGBLKS;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Allocate and setup the transaction.
			
 
				+		 */
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
			
 
				+					  resblks, resrtextents);
			
 
				+		/*
			
 
				+		 * Check for running out of space
			
 
				+		 */
			
 
				+		if (error) {
			
 
				+			/*
			
 
				+			 * Free the transaction structure.
			
 
				+			 */
			
 
				+			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
			
 
				+			xfs_trans_cancel(tp, 0);
			
 
				+			break;
			
 
				+		}
			
 
				+		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
			
 
				+						      0, quota_flag);
			
 
				+		if (error)
			
 
				+			goto error1;
			
 
				+
			
 
				+		xfs_trans_ijoin(tp, ip, 0);
			
 
				+
			
 
				+		xfs_bmap_init(&free_list, &firstfsb);
			
 
				+		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
			
 
				+					allocatesize_fsb, alloc_type, &firstfsb,
			
 
				+					0, imapp, &nimaps, &free_list);
			
 
				+		if (error) {
			
 
				+			goto error0;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Complete the transaction
			
 
				+		 */
			
 
				+		error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				+		if (error) {
			
 
				+			goto error0;
			
 
				+		}
			
 
				+
			
 
				+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+		if (error) {
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		allocated_fsb = imapp->br_blockcount;
			
 
				+
			
 
				+		if (nimaps == 0) {
			
 
				+			error = XFS_ERROR(ENOSPC);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		startoffset_fsb += allocated_fsb;
			
 
				+		allocatesize_fsb -= allocated_fsb;
			
 
				+	}
			
 
				+
			
 
				+	return error;
			
 
				+
			
 
				+error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
			
 
				+
			
 
				+error1:	/* Just cancel transaction */
			
 
				+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Zero file bytes between startoff and endoff inclusive.
			
 
				+ * The iolock is held exclusive and no blocks are buffered.
			
 
				+ *
			
 
				+ * This function is used by xfs_free_file_space() to zero
			
 
				+ * partial blocks when the range to free is not block aligned.
			
 
				+ * When unreserving space with boundaries that are not block
			
 
				+ * aligned we round up the start and round down the end
			
 
				+ * boundaries and then use this function to zero the parts of
			
 
				+ * the blocks that got dropped during the rounding.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_zero_remaining_bytes(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	xfs_off_t		startoff,
			
 
				+	xfs_off_t		endoff)
			
 
				+{
			
 
				+	xfs_bmbt_irec_t		imap;
			
 
				+	xfs_fileoff_t		offset_fsb;
			
 
				+	xfs_off_t		lastoffset;
			
 
				+	xfs_off_t		offset;
			
 
				+	xfs_buf_t		*bp;
			
 
				+	xfs_mount_t		*mp = ip->i_mount;
			
 
				+	int			nimap;
			
 
				+	int			error = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Avoid doing I/O beyond eof - it's not necessary
			
 
				+	 * since nothing can read beyond eof.  The space will
			
 
				+	 * be zeroed when the file is extended anyway.
			
 
				+	 */
			
 
				+	if (startoff >= XFS_ISIZE(ip))
			
 
				+		return 0;
			
 
				+
			
 
				+	if (endoff > XFS_ISIZE(ip))
			
 
				+		endoff = XFS_ISIZE(ip);
			
 
				+
			
 
				+	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
			
 
				+					mp->m_rtdev_targp : mp->m_ddev_targp,
			
 
				+				  BTOBB(mp->m_sb.sb_blocksize), 0);
			
 
				+	if (!bp)
			
 
				+		return XFS_ERROR(ENOMEM);
			
 
				+
			
 
				+	xfs_buf_unlock(bp);
			
 
				+
			
 
				+	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
			
 
				+		offset_fsb = XFS_B_TO_FSBT(mp, offset);
			
 
				+		nimap = 1;
			
 
				+		error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
			
 
				+		if (error || nimap < 1)
			
 
				+			break;
			
 
				+		ASSERT(imap.br_blockcount >= 1);
			
 
				+		ASSERT(imap.br_startoff == offset_fsb);
			
 
				+		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
			
 
				+		if (lastoffset > endoff)
			
 
				+			lastoffset = endoff;
			
 
				+		if (imap.br_startblock == HOLESTARTBLOCK)
			
 
				+			continue;
			
 
				+		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
			
 
				+		if (imap.br_state == XFS_EXT_UNWRITTEN)
			
 
				+			continue;
			
 
				+		XFS_BUF_UNDONE(bp);
			
 
				+		XFS_BUF_UNWRITE(bp);
			
 
				+		XFS_BUF_READ(bp);
			
 
				+		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
			
 
				+		xfsbdstrat(mp, bp);
			
 
				+		error = xfs_buf_iowait(bp);
			
 
				+		if (error) {
			
 
				+			xfs_buf_ioerror_alert(bp,
			
 
				+					"xfs_zero_remaining_bytes(read)");
			
 
				+			break;
			
 
				+		}
			
 
				+		memset(bp->b_addr +
			
 
				+			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
			
 
				+		      0, lastoffset - offset + 1);
			
 
				+		XFS_BUF_UNDONE(bp);
			
 
				+		XFS_BUF_UNREAD(bp);
			
 
				+		XFS_BUF_WRITE(bp);
			
 
				+		xfsbdstrat(mp, bp);
			
 
				+		error = xfs_buf_iowait(bp);
			
 
				+		if (error) {
			
 
				+			xfs_buf_ioerror_alert(bp,
			
 
				+					"xfs_zero_remaining_bytes(write)");
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	xfs_buf_free(bp);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_free_file_space()
			
 
				+ *      This routine frees disk space for the given file.
			
 
				+ *
			
 
				+ *	This routine is only called by xfs_change_file_space
			
 
				+ *	for an UNRESVSP type call.
			
 
				+ *
			
 
				+ * RETURNS:
			
 
				+ *       0 on success
			
 
				+ *      errno on error
			
 
				+ *
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_free_file_space(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	xfs_off_t		offset,
			
 
				+	xfs_off_t		len,
			
 
				+	int			attr_flags)
			
 
				+{
			
 
				+	int			committed;
			
 
				+	int			done;
			
 
				+	xfs_fileoff_t		endoffset_fsb;
			
 
				+	int			error;
			
 
				+	xfs_fsblock_t		firstfsb;
			
 
				+	xfs_bmap_free_t		free_list;
			
 
				+	xfs_bmbt_irec_t		imap;
			
 
				+	xfs_off_t		ioffset;
			
 
				+	xfs_extlen_t		mod=0;
			
 
				+	xfs_mount_t		*mp;
			
 
				+	int			nimap;
			
 
				+	uint			resblks;
			
 
				+	xfs_off_t		rounding;
			
 
				+	int			rt;
			
 
				+	xfs_fileoff_t		startoffset_fsb;
			
 
				+	xfs_trans_t		*tp;
			
 
				+	int			need_iolock = 1;
			
 
				+
			
 
				+	mp = ip->i_mount;
			
 
				+
			
 
				+	trace_xfs_free_file_space(ip);
			
 
				+
			
 
				+	error = xfs_qm_dqattach(ip, 0);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	error = 0;
			
 
				+	if (len <= 0)	/* if nothing being freed */
			
 
				+		return error;
			
 
				+	rt = XFS_IS_REALTIME_INODE(ip);
			
 
				+	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
			
 
				+	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
			
 
				+
			
 
				+	if (attr_flags & XFS_ATTR_NOLOCK)
			
 
				+		need_iolock = 0;
			
 
				+	if (need_iolock) {
			
 
				+		xfs_ilock(ip, XFS_IOLOCK_EXCL);
			
 
				+		/* wait for the completion of any pending DIOs */
			
 
				+		inode_dio_wait(VFS_I(ip));
			
 
				+	}
			
 
				+
			
 
				+	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
			
 
				+	ioffset = offset & ~(rounding - 1);
			
 
				+	error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
			
 
				+					      ioffset, -1);
			
 
				+	if (error)
			
 
				+		goto out_unlock_iolock;
			
 
				+	truncate_pagecache_range(VFS_I(ip), ioffset, -1);
			
 
				+
			
 
				+	/*
			
 
				+	 * Need to zero the stuff we're not freeing, on disk.
			
 
				+	 * If it's a realtime file & can't use unwritten extents then we
			
 
				+	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
			
 
				+	 * will take care of it for us.
			
 
				+	 */
			
 
				+	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
			
 
				+		nimap = 1;
			
 
				+		error = xfs_bmapi_read(ip, startoffset_fsb, 1,
			
 
				+					&imap, &nimap, 0);
			
 
				+		if (error)
			
 
				+			goto out_unlock_iolock;
			
 
				+		ASSERT(nimap == 0 || nimap == 1);
			
 
				+		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
			
 
				+			xfs_daddr_t	block;
			
 
				+
			
 
				+			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
			
 
				+			block = imap.br_startblock;
			
 
				+			mod = do_div(block, mp->m_sb.sb_rextsize);
			
 
				+			if (mod)
			
 
				+				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
			
 
				+		}
			
 
				+		nimap = 1;
			
 
				+		error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
			
 
				+					&imap, &nimap, 0);
			
 
				+		if (error)
			
 
				+			goto out_unlock_iolock;
			
 
				+		ASSERT(nimap == 0 || nimap == 1);
			
 
				+		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
			
 
				+			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
			
 
				+			mod++;
			
 
				+			if (mod && (mod != mp->m_sb.sb_rextsize))
			
 
				+				endoffset_fsb -= mod;
			
 
				+		}
			
 
				+	}
			
 
				+	if ((done = (endoffset_fsb <= startoffset_fsb)))
			
 
				+		/*
			
 
				+		 * One contiguous piece to clear
			
 
				+		 */
			
 
				+		error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
			
 
				+	else {
			
 
				+		/*
			
 
				+		 * Some full blocks, possibly two pieces to clear
			
 
				+		 */
			
 
				+		if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
			
 
				+			error = xfs_zero_remaining_bytes(ip, offset,
			
 
				+				XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
			
 
				+		if (!error &&
			
 
				+		    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
			
 
				+			error = xfs_zero_remaining_bytes(ip,
			
 
				+				XFS_FSB_TO_B(mp, endoffset_fsb),
			
 
				+				offset + len - 1);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * free file space until done or until there is an error
			
 
				+	 */
			
 
				+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
			
 
				+	while (!error && !done) {
			
 
				+
			
 
				+		/*
			
 
				+		 * allocate and setup the transaction. Allow this
			
 
				+		 * transaction to dip into the reserve blocks to ensure
			
 
				+		 * the freeing of the space succeeds at ENOSPC.
			
 
				+		 */
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
			
 
				+		tp->t_flags |= XFS_TRANS_RESERVE;
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
			
 
				+
			
 
				+		/*
			
 
				+		 * check for running out of space
			
 
				+		 */
			
 
				+		if (error) {
			
 
				+			/*
			
 
				+			 * Free the transaction structure.
			
 
				+			 */
			
 
				+			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
			
 
				+			xfs_trans_cancel(tp, 0);
			
 
				+			break;
			
 
				+		}
			
 
				+		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+		error = xfs_trans_reserve_quota(tp, mp,
			
 
				+				ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
			
 
				+				resblks, 0, XFS_QMOPT_RES_REGBLKS);
			
 
				+		if (error)
			
 
				+			goto error1;
			
 
				+
			
 
				+		xfs_trans_ijoin(tp, ip, 0);
			
 
				+
			
 
				+		/*
			
 
				+		 * issue the bunmapi() call to free the blocks
			
 
				+		 */
			
 
				+		xfs_bmap_init(&free_list, &firstfsb);
			
 
				+		error = xfs_bunmapi(tp, ip, startoffset_fsb,
			
 
				+				  endoffset_fsb - startoffset_fsb,
			
 
				+				  0, 2, &firstfsb, &free_list, &done);
			
 
				+		if (error) {
			
 
				+			goto error0;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * complete the transaction
			
 
				+		 */
			
 
				+		error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				+		if (error) {
			
 
				+			goto error0;
			
 
				+		}
			
 
				+
			
 
				+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	}
			
 
				+
			
 
				+ out_unlock_iolock:
			
 
				+	if (need_iolock)
			
 
				+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				+	return error;
			
 
				+
			
 
				+ error0:
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+ error1:
			
 
				+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
			
 
				+	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
			
 
				+		    XFS_ILOCK_EXCL);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+STATIC int
			
 
				+xfs_zero_file_space(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	xfs_off_t		offset,
			
 
				+	xfs_off_t		len,
			
 
				+	int			attr_flags)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	uint			granularity;
			
 
				+	xfs_off_t		start_boundary;
			
 
				+	xfs_off_t		end_boundary;
			
 
				+	int			error;
			
 
				+
			
 
				+	granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
			
 
				+
			
 
				+	/*
			
 
				+	 * Round the range of extents we are going to convert inwards.  If the
			
 
				+	 * offset is aligned, then it doesn't get changed so we zero from the
			
 
				+	 * start of the block offset points to.
			
 
				+	 */
			
 
				+	start_boundary = round_up(offset, granularity);
			
 
				+	end_boundary = round_down(offset + len, granularity);
			
 
				+
			
 
				+	ASSERT(start_boundary >= offset);
			
 
				+	ASSERT(end_boundary <= offset + len);
			
 
				+
			
 
				+	if (!(attr_flags & XFS_ATTR_NOLOCK))
			
 
				+		xfs_ilock(ip, XFS_IOLOCK_EXCL);
			
 
				+
			
 
				+	if (start_boundary < end_boundary - 1) {
			
 
				+		/* punch out the page cache over the conversion range */
			
 
				+		truncate_pagecache_range(VFS_I(ip), start_boundary,
			
 
				+					 end_boundary - 1);
			
 
				+		/* convert the blocks */
			
 
				+		error = xfs_alloc_file_space(ip, start_boundary,
			
 
				+					end_boundary - start_boundary - 1,
			
 
				+					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
			
 
				+					attr_flags);
			
 
				+		if (error)
			
 
				+			goto out_unlock;
			
 
				+
			
 
				+		/* We've handled the interior of the range, now for the edges */
			
 
				+		if (start_boundary != offset)
			
 
				+			error = xfs_iozero(ip, offset, start_boundary - offset);
			
 
				+		if (error)
			
 
				+			goto out_unlock;
			
 
				+
			
 
				+		if (end_boundary != offset + len)
			
 
				+			error = xfs_iozero(ip, end_boundary,
			
 
				+					   offset + len - end_boundary);
			
 
				+
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * It's either a sub-granularity range or the range spanned lies
			
 
				+		 * partially across two adjacent blocks.
			
 
				+		 */
			
 
				+		error = xfs_iozero(ip, offset, len);
			
 
				+	}
			
 
				+
			
 
				+out_unlock:
			
 
				+	if (!(attr_flags & XFS_ATTR_NOLOCK))
			
 
				+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				+	return error;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_change_file_space()
			
 
				+ *      This routine allocates or frees disk space for the given file.
			
 
				+ *      The user specified parameters are checked for alignment and size
			
 
				+ *      limitations.
			
 
				+ *
			
 
				+ * RETURNS:
			
 
				+ *       0 on success
			
 
				+ *      errno on error
			
 
				+ *
			
 
				+ */
			
 
				+int
			
 
				+xfs_change_file_space(
			
 
				+	xfs_inode_t	*ip,
			
 
				+	int		cmd,
			
 
				+	xfs_flock64_t	*bf,
			
 
				+	xfs_off_t	offset,
			
 
				+	int		attr_flags)
			
 
				+{
			
 
				+	xfs_mount_t	*mp = ip->i_mount;
			
 
				+	int		clrprealloc;
			
 
				+	int		error;
			
 
				+	xfs_fsize_t	fsize;
			
 
				+	int		setprealloc;
			
 
				+	xfs_off_t	startoffset;
			
 
				+	xfs_trans_t	*tp;
			
 
				+	struct iattr	iattr;
			
 
				+
			
 
				+	if (!S_ISREG(ip->i_d.di_mode))
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+
			
 
				+	switch (bf->l_whence) {
			
 
				+	case 0: /*SEEK_SET*/
			
 
				+		break;
			
 
				+	case 1: /*SEEK_CUR*/
			
 
				+		bf->l_start += offset;
			
 
				+		break;
			
 
				+	case 2: /*SEEK_END*/
			
 
				+		bf->l_start += XFS_ISIZE(ip);
			
 
				+		break;
			
 
				+	default:
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * length of <= 0 for resv/unresv/zero is invalid.  length for
			
 
				+	 * alloc/free is ignored completely and we have no idea what userspace
			
 
				+	 * might have set it to, so set it to zero to allow range
			
 
				+	 * checks to pass.
			
 
				+	 */
			
 
				+	switch (cmd) {
			
 
				+	case XFS_IOC_ZERO_RANGE:
			
 
				+	case XFS_IOC_RESVSP:
			
 
				+	case XFS_IOC_RESVSP64:
			
 
				+	case XFS_IOC_UNRESVSP:
			
 
				+	case XFS_IOC_UNRESVSP64:
			
 
				+		if (bf->l_len <= 0)
			
 
				+			return XFS_ERROR(EINVAL);
			
 
				+		break;
			
 
				+	default:
			
 
				+		bf->l_len = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	if (bf->l_start < 0 ||
			
 
				+	    bf->l_start > mp->m_super->s_maxbytes ||
			
 
				+	    bf->l_start + bf->l_len < 0 ||
			
 
				+	    bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+
			
 
				+	bf->l_whence = 0;
			
 
				+
			
 
				+	startoffset = bf->l_start;
			
 
				+	fsize = XFS_ISIZE(ip);
			
 
				+
			
 
				+	setprealloc = clrprealloc = 0;
			
 
				+	switch (cmd) {
			
 
				+	case XFS_IOC_ZERO_RANGE:
			
 
				+		error = xfs_zero_file_space(ip, startoffset, bf->l_len,
			
 
				+						attr_flags);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		setprealloc = 1;
			
 
				+		break;
			
 
				+
			
 
				+	case XFS_IOC_RESVSP:
			
 
				+	case XFS_IOC_RESVSP64:
			
 
				+		error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
			
 
				+						XFS_BMAPI_PREALLOC, attr_flags);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		setprealloc = 1;
			
 
				+		break;
			
 
				+
			
 
				+	case XFS_IOC_UNRESVSP:
			
 
				+	case XFS_IOC_UNRESVSP64:
			
 
				+		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
			
 
				+								attr_flags)))
			
 
				+			return error;
			
 
				+		break;
			
 
				+
			
 
				+	case XFS_IOC_ALLOCSP:
			
 
				+	case XFS_IOC_ALLOCSP64:
			
 
				+	case XFS_IOC_FREESP:
			
 
				+	case XFS_IOC_FREESP64:
			
 
				+		/*
			
 
				+		 * These operations actually do IO when extending the file, but
			
 
				+		 * the allocation is done seperately to the zeroing that is
			
 
				+		 * done. This set of operations need to be serialised against
			
 
				+		 * other IO operations, such as truncate and buffered IO. We
			
 
				+		 * need to take the IOLOCK here to serialise the allocation and
			
 
				+		 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
			
 
				+		 * truncate, direct IO) from racing against the transient
			
 
				+		 * allocated but not written state we can have here.
			
 
				+		 */
			
 
				+		xfs_ilock(ip, XFS_IOLOCK_EXCL);
			
 
				+		if (startoffset > fsize) {
			
 
				+			error = xfs_alloc_file_space(ip, fsize,
			
 
				+					startoffset - fsize, 0,
			
 
				+					attr_flags | XFS_ATTR_NOLOCK);
			
 
				+			if (error) {
			
 
				+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		iattr.ia_valid = ATTR_SIZE;
			
 
				+		iattr.ia_size = startoffset;
			
 
				+
			
 
				+		error = xfs_setattr_size(ip, &iattr,
			
 
				+					 attr_flags | XFS_ATTR_NOLOCK);
			
 
				+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				+
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				+		clrprealloc = 1;
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		ASSERT(0);
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * update the inode timestamp, mode, and prealloc flag bits
			
 
				+	 */
			
 
				+	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0);
			
 
				+	if (error) {
			
 
				+		xfs_trans_cancel(tp, 0);
			
 
				+		return error;
			
 
				+	}
			
 
				+
			
 
				+	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	if ((attr_flags & XFS_ATTR_DMI) == 0) {
			
 
				+		ip->i_d.di_mode &= ~S_ISUID;
			
 
				+
			
 
				+		/*
			
 
				+		 * Note that we don't have to worry about mandatory
			
 
				+		 * file locking being disabled here because we only
			
 
				+		 * clear the S_ISGID bit if the Group execute bit is
			
 
				+		 * on, but if it was on then mandatory locking wouldn't
			
 
				+		 * have been enabled.
			
 
				+		 */
			
 
				+		if (ip->i_d.di_mode & S_IXGRP)
			
 
				+			ip->i_d.di_mode &= ~S_ISGID;
			
 
				+
			
 
				+		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				+	}
			
 
				+	if (setprealloc)
			
 
				+		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
			
 
				+	else if (clrprealloc)
			
 
				+		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
			
 
				+
			
 
				+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+	if (attr_flags & XFS_ATTR_SYNC)
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+	return xfs_trans_commit(tp, 0);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We need to check that the format of the data fork in the temporary inode is
			
 
				+ * valid for the target inode before doing the swap. This is not a problem with
			
 
				+ * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
			
 
				+ * data fork depending on the space the attribute fork is taking so we can get
			
 
				+ * invalid formats on the target inode.
			
 
				+ *
			
 
				+ * E.g. target has space for 7 extents in extent format, temp inode only has
			
 
				+ * space for 6.  If we defragment down to 7 extents, then the tmp format is a
			
 
				+ * btree, but when swapped it needs to be in extent format. Hence we can't just
			
 
				+ * blindly swap data forks on attr2 filesystems.
			
 
				+ *
			
 
				+ * Note that we check the swap in both directions so that we don't end up with
			
 
				+ * a corrupt temporary inode, either.
			
 
				+ *
			
 
				+ * Note that fixing the way xfs_fsr sets up the attribute fork in the source
			
 
				+ * inode will prevent this situation from occurring, so all we do here is
			
 
				+ * reject and log the attempt. basically we are putting the responsibility on
			
 
				+ * userspace to get this right.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_swap_extents_check_format(
			
 
				+	xfs_inode_t	*ip,	/* target inode */
			
 
				+	xfs_inode_t	*tip)	/* tmp inode */
			
 
				+{
			
 
				+
			
 
				+	/* Should never get a local format */
			
 
				+	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
			
 
				+	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * if the target inode has less extents that then temporary inode then
			
 
				+	 * why did userspace call us?
			
 
				+	 */
			
 
				+	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * if the target inode is in extent form and the temp inode is in btree
			
 
				+	 * form then we will end up with the target inode in the wrong format
			
 
				+	 * as we already know there are less extents in the temp inode.
			
 
				+	 */
			
 
				+	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
			
 
				+	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	/* Check temp in extent form to max in target */
			
 
				+	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
			
 
				+	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
			
 
				+			XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	/* Check target in extent form to max in temp */
			
 
				+	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
			
 
				+	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
			
 
				+			XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are in a btree format, check that the temp root block will fit
			
 
				+	 * in the target and that it has enough extents to be in btree format
			
 
				+	 * in the target.
			
 
				+	 *
			
 
				+	 * Note that we have to be careful to allow btree->extent conversions
			
 
				+	 * (a common defrag case) which will occur when the temp inode is in
			
 
				+	 * extent format...
			
 
				+	 */
			
 
				+	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
			
 
				+		if (XFS_IFORK_BOFF(ip) &&
			
 
				+		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
			
 
				+			return EINVAL;
			
 
				+		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
			
 
				+		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
			
 
				+			return EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	/* Reciprocal target->temp btree format checks */
			
 
				+	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
			
 
				+		if (XFS_IFORK_BOFF(tip) &&
			
 
				+		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
			
 
				+			return EINVAL;
			
 
				+		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
			
 
				+		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
			
 
				+			return EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_swap_extents(
			
 
				+	xfs_inode_t	*ip,	/* target inode */
			
 
				+	xfs_inode_t	*tip,	/* tmp inode */
			
 
				+	xfs_swapext_t	*sxp)
			
 
				+{
			
 
				+	xfs_mount_t	*mp = ip->i_mount;
			
 
				+	xfs_trans_t	*tp;
			
 
				+	xfs_bstat_t	*sbp = &sxp->sx_stat;
			
 
				+	xfs_ifork_t	*tempifp, *ifp, *tifp;
			
 
				+	int		src_log_flags, target_log_flags;
			
 
				+	int		error = 0;
			
 
				+	int		aforkblks = 0;
			
 
				+	int		taforkblks = 0;
			
 
				+	__uint64_t	tmp;
			
 
				+
			
 
				+	/*
			
 
				+	 * We have no way of updating owner information in the BMBT blocks for
			
 
				+	 * each inode on CRC enabled filesystems, so to avoid corrupting the
			
 
				+	 * this metadata we simply don't allow extent swaps to occur.
			
 
				+	 */
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+
			
 
				+	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
			
 
				+	if (!tempifp) {
			
 
				+		error = XFS_ERROR(ENOMEM);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * we have to do two separate lock calls here to keep lockdep
			
 
				+	 * happy. If we try to get all the locks in one call, lock will
			
 
				+	 * report false positives when we drop the ILOCK and regain them
			
 
				+	 * below.
			
 
				+	 */
			
 
				+	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
			
 
				+	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/* Verify that both files have the same format */
			
 
				+	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/* Verify both files are either real-time or non-realtime */
			
 
				+	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
			
 
				+	if (error)
			
 
				+		goto out_unlock;
			
 
				+	truncate_pagecache_range(VFS_I(tip), 0, -1);
			
 
				+
			
 
				+	/* Verify O_DIRECT for ftmp */
			
 
				+	if (VN_CACHED(VFS_I(tip)) != 0) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/* Verify all data are being swapped */
			
 
				+	if (sxp->sx_offset != 0 ||
			
 
				+	    sxp->sx_length != ip->i_d.di_size ||
			
 
				+	    sxp->sx_length != tip->i_d.di_size) {
			
 
				+		error = XFS_ERROR(EFAULT);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	trace_xfs_swap_extent_before(ip, 0);
			
 
				+	trace_xfs_swap_extent_before(tip, 1);
			
 
				+
			
 
				+	/* check inode formats now that data is flushed */
			
 
				+	error = xfs_swap_extents_check_format(ip, tip);
			
 
				+	if (error) {
			
 
				+		xfs_notice(mp,
			
 
				+		    "%s: inode 0x%llx format is incompatible for exchanging.",
			
 
				+				__func__, ip->i_ino);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Compare the current change & modify times with that
			
 
				+	 * passed in.  If they differ, we abort this swap.
			
 
				+	 * This is the mechanism used to ensure the calling
			
 
				+	 * process that the file was not changed out from
			
 
				+	 * under it.
			
 
				+	 */
			
 
				+	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
			
 
				+	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
			
 
				+	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
			
 
				+	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
			
 
				+		error = XFS_ERROR(EBUSY);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/* We need to fail if the file is memory mapped.  Once we have tossed
			
 
				+	 * all existing pages, the page fault will have no option
			
 
				+	 * but to go to the filesystem for pages. By making the page fault call
			
 
				+	 * vop_read (or write in the case of autogrow) they block on the iolock
			
 
				+	 * until we have switched the extents.
			
 
				+	 */
			
 
				+	if (VN_MAPPED(VFS_I(ip))) {
			
 
				+		error = XFS_ERROR(EBUSY);
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	xfs_iunlock(tip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * There is a race condition here since we gave up the
			
 
				+	 * ilock.  However, the data fork will not change since
			
 
				+	 * we have the iolock (locked for truncation too) so we
			
 
				+	 * are safe.  We don't really care if non-io related
			
 
				+	 * fields change.
			
 
				+	 */
			
 
				+	truncate_pagecache_range(VFS_I(ip), 0, -1);
			
 
				+
			
 
				+	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
			
 
				+	if (error) {
			
 
				+		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
			
 
				+		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
			
 
				+		xfs_trans_cancel(tp, 0);
			
 
				+		goto out;
			
 
				+	}
			
 
				+	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * Count the number of extended attribute blocks
			
 
				+	 */
			
 
				+	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
			
 
				+	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
			
 
				+		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
			
 
				+		if (error)
			
 
				+			goto out_trans_cancel;
			
 
				+	}
			
 
				+	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
			
 
				+	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
			
 
				+		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
			
 
				+			&taforkblks);
			
 
				+		if (error)
			
 
				+			goto out_trans_cancel;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Swap the data forks of the inodes
			
 
				+	 */
			
 
				+	ifp = &ip->i_df;
			
 
				+	tifp = &tip->i_df;
			
 
				+	*tempifp = *ifp;	/* struct copy */
			
 
				+	*ifp = *tifp;		/* struct copy */
			
 
				+	*tifp = *tempifp;	/* struct copy */
			
 
				+
			
 
				+	/*
			
 
				+	 * Fix the on-disk inode values
			
 
				+	 */
			
 
				+	tmp = (__uint64_t)ip->i_d.di_nblocks;
			
 
				+	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
			
 
				+	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
			
 
				+
			
 
				+	tmp = (__uint64_t) ip->i_d.di_nextents;
			
 
				+	ip->i_d.di_nextents = tip->i_d.di_nextents;
			
 
				+	tip->i_d.di_nextents = tmp;
			
 
				+
			
 
				+	tmp = (__uint64_t) ip->i_d.di_format;
			
 
				+	ip->i_d.di_format = tip->i_d.di_format;
			
 
				+	tip->i_d.di_format = tmp;
			
 
				+
			
 
				+	/*
			
 
				+	 * The extents in the source inode could still contain speculative
			
 
				+	 * preallocation beyond EOF (e.g. the file is open but not modified
			
 
				+	 * while defrag is in progress). In that case, we need to copy over the
			
 
				+	 * number of delalloc blocks the data fork in the source inode is
			
 
				+	 * tracking beyond EOF so that when the fork is truncated away when the
			
 
				+	 * temporary inode is unlinked we don't underrun the i_delayed_blks
			
 
				+	 * counter on that inode.
			
 
				+	 */
			
 
				+	ASSERT(tip->i_delayed_blks == 0);
			
 
				+	tip->i_delayed_blks = ip->i_delayed_blks;
			
 
				+	ip->i_delayed_blks = 0;
			
 
				+
			
 
				+	src_log_flags = XFS_ILOG_CORE;
			
 
				+	switch (ip->i_d.di_format) {
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		/* If the extents fit in the inode, fix the
			
 
				+		 * pointer.  Otherwise it's already NULL or
			
 
				+		 * pointing to the extent.
			
 
				+		 */
			
 
				+		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
			
 
				+			ifp->if_u1.if_extents =
			
 
				+				ifp->if_u2.if_inline_ext;
			
 
				+		}
			
 
				+		src_log_flags |= XFS_ILOG_DEXT;
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		src_log_flags |= XFS_ILOG_DBROOT;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	target_log_flags = XFS_ILOG_CORE;
			
 
				+	switch (tip->i_d.di_format) {
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		/* If the extents fit in the inode, fix the
			
 
				+		 * pointer.  Otherwise it's already NULL or
			
 
				+		 * pointing to the extent.
			
 
				+		 */
			
 
				+		if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
			
 
				+			tifp->if_u1.if_extents =
			
 
				+				tifp->if_u2.if_inline_ext;
			
 
				+		}
			
 
				+		target_log_flags |= XFS_ILOG_DEXT;
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		target_log_flags |= XFS_ILOG_DBROOT;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				+
			
 
				+	xfs_trans_log_inode(tp, ip,  src_log_flags);
			
 
				+	xfs_trans_log_inode(tp, tip, target_log_flags);
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is a synchronous mount, make sure that the
			
 
				+	 * transaction goes to disk before returning to the user.
			
 
				+	 */
			
 
				+	if (mp->m_flags & XFS_MOUNT_WSYNC)
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+
			
 
				+	error = xfs_trans_commit(tp, 0);
			
 
				+
			
 
				+	trace_xfs_swap_extent_after(ip, 0);
			
 
				+	trace_xfs_swap_extent_after(tip, 1);
			
 
				+out:
			
 
				+	kmem_free(tempifp);
			
 
				+	return error;
			
 
				+
			
 
				+out_unlock:
			
 
				+	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				+	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				+	goto out;
			
 
				+
			
 
				+out_trans_cancel:
			
 
				+	xfs_trans_cancel(tp, 0);
			
 
				+	goto out_unlock;
			
 
				+}
			
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -0,0 +1,110 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef __XFS_BMAP_UTIL_H__
			
 
				+#define	__XFS_BMAP_UTIL_H__
			
 
				+
			
 
				+/* Kernel only BMAP related definitions and functions */
			
 
				+
			
 
				+struct xfs_bmbt_irec;
			
 
				+struct xfs_bmap_free_item;
			
 
				+struct xfs_ifork;
			
 
				+struct xfs_inode;
			
 
				+struct xfs_mount;
			
 
				+struct xfs_trans;
			
 
				+
			
 
				+/*
			
 
				+ * Argument structure for xfs_bmap_alloc.
			
 
				+ */
			
 
				+struct xfs_bmalloca {
			
 
				+	xfs_fsblock_t		*firstblock; /* i/o first block allocated */
			
 
				+	struct xfs_bmap_free	*flist;	/* bmap freelist */
			
 
				+	struct xfs_trans	*tp;	/* transaction pointer */
			
 
				+	struct xfs_inode	*ip;	/* incore inode pointer */
			
 
				+	struct xfs_bmbt_irec	prev;	/* extent before the new one */
			
 
				+	struct xfs_bmbt_irec	got;	/* extent after, or delayed */
			
 
				+
			
 
				+	xfs_fileoff_t		offset;	/* offset in file filling in */
			
 
				+	xfs_extlen_t		length;	/* i/o length asked/allocated */
			
 
				+	xfs_fsblock_t		blkno;	/* starting block of new extent */
			
 
				+
			
 
				+	struct xfs_btree_cur	*cur;	/* btree cursor */
			
 
				+	xfs_extnum_t		idx;	/* current extent index */
			
 
				+	int			nallocs;/* number of extents alloc'd */
			
 
				+	int			logflags;/* flags for transaction logging */
			
 
				+
			
 
				+	xfs_extlen_t		total;	/* total blocks needed for xaction */
			
 
				+	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
			
 
				+	xfs_extlen_t		minleft; /* amount must be left after alloc */
			
 
				+	char			eof;	/* set if allocating past last extent */
			
 
				+	char			wasdel;	/* replacing a delayed allocation */
			
 
				+	char			userdata;/* set if is user data */
			
 
				+	char			aeof;	/* allocated space at eof */
			
 
				+	char			conv;	/* overwriting unwritten extents */
			
 
				+	char			stack_switch;
			
 
				+	int			flags;
			
 
				+	struct completion	*done;
			
 
				+	struct work_struct	work;
			
 
				+	int			result;
			
 
				+};
			
 
				+
			
 
				+int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
			
 
				+			int *committed);
			
 
				+int	xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
			
 
				+int	xfs_bmapi_allocate(struct xfs_bmalloca *args);
			
 
				+int	__xfs_bmapi_allocate(struct xfs_bmalloca *args);
			
 
				+int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
			
 
				+		     int whichfork, int *eof);
			
 
				+int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				+			      int whichfork, int *count);
			
 
				+int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
			
 
				+		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
			
 
				+
			
 
				+/* bmap to userspace formatter - copy to user & advance pointer */
			
 
				+typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
			
 
				+int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
			
 
				+		xfs_bmap_format_t formatter, void *arg);
			
 
				+
			
 
				+/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
			
 
				+void	xfs_bmap_del_free(struct xfs_bmap_free *flist,
			
 
				+			  struct xfs_bmap_free_item *prev,
			
 
				+			  struct xfs_bmap_free_item *free);
			
 
				+int	xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
			
 
				+			       struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz,
			
 
				+			       int rt, int eof, int delay, int convert,
			
 
				+			       xfs_fileoff_t *offp, xfs_extlen_t *lenp);
			
 
				+void	xfs_bmap_adjacent(struct xfs_bmalloca *ap);
			
 
				+int	xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
			
 
				+			     int whichfork, struct xfs_bmbt_irec *rec,
			
 
				+			     int *is_empty);
			
 
				+
			
 
				+/* preallocation and hole punch interface */
			
 
				+int	xfs_change_file_space(struct xfs_inode *ip, int cmd,
			
 
				+			      xfs_flock64_t *bf, xfs_off_t offset,
			
 
				+			      int attr_flags);
			
 
				+
			
 
				+/* EOF block manipulation functions */
			
 
				+bool	xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
			
 
				+int	xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
			
 
				+			   bool need_iolock);
			
 
				+
			
 
				+int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
			
 
				+			 struct xfs_swapext *sx);
			
 
				+
			
 
				+xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
			
 
				+
			
 
				+#endif	/* __XFS_BMAP_UTIL_H__ */
			
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -510,7 +510,7 @@ xfs_btree_ptr_addr(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Get a the root block which is stored in the inode.
			
 
				+ * Get the root block which is stored in the inode.
			
 
				  *
			
 
				  * For now this btree implementation assumes the btree root is always
			
 
				  * stored in the if_broot field of an inode fork.
			
@@ -978,6 +978,7 @@ xfs_btree_init_block_int(
 
				 			buf->bb_u.l.bb_owner = cpu_to_be64(owner);
			
 
				 			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
			
 
				 			buf->bb_u.l.bb_pad = 0;
			
 
				+			buf->bb_u.l.bb_lsn = 0;
			
 
				 		}
			
 
				 	} else {
			
 
				 		/* owner is a 32 bit value on short blocks */
			
@@ -989,6 +990,7 @@ xfs_btree_init_block_int(
 
				 			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
			
 
				 			buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
			
 
				 			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
			
 
				+			buf->bb_u.s.bb_lsn = 0;
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -1684,7 +1686,7 @@ xfs_lookup_get_search_key(
 
				 
			
 
				 /*
			
 
				  * Lookup the record.  The cursor is made to point to it, based on dir.
			
 
				- * Return 0 if can't find any such record, 1 for success.
			
 
				+ * stat is set to 0 if can't find any such record, 1 for success.
			
 
				  */
			
 
				 int					/* error */
			
 
				 xfs_btree_lookup(
			
@@ -2756,7 +2758,6 @@ xfs_btree_make_block_unfull(
 
				 
			
 
				 		if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
			
 
				 			/* A root block that can be made bigger. */
			
 
				-
			
 
				 			xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
			
 
				 		} else {
			
 
				 			/* A root block that needs replacing */
			
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -88,13 +88,11 @@ struct xfs_btree_block {
 
				 #define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40)
			
 
				 #define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48)
			
 
				 
			
 
				-
			
 
				 #define XFS_BTREE_SBLOCK_CRC_OFF \
			
 
				 	offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
			
 
				 #define XFS_BTREE_LBLOCK_CRC_OFF \
			
 
				 	offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
			
 
				 
			
 
				-
			
 
				 /*
			
 
				  * Generic key, ptr and record wrapper structures.
			
 
				  *
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -35,6 +35,7 @@
 
				 #include <linux/freezer.h>
			
 
				 
			
 
				 #include "xfs_sb.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
@@ -303,7 +304,7 @@ _xfs_buf_free_pages(
 
				  *	Releases the specified buffer.
			
 
				  *
			
 
				  * 	The modification state of any associated pages is left unchanged.
			
 
				- * 	The buffer most not be on any hash - use xfs_buf_rele instead for
			
 
				+ * 	The buffer must not be on any hash - use xfs_buf_rele instead for
			
 
				  * 	hashed and refcounted buffers
			
 
				  */
			
 
				 void
			
@@ -1621,7 +1622,7 @@ xfs_setsize_buftarg_flags(
 
				 /*
			
 
				  *	When allocating the initial buffer target we have not yet
			
 
				  *	read in the superblock, so don't know what sized sectors
			
 
				- *	are being used is at this early stage.  Play safe.
			
 
				+ *	are being used at this early stage.  Play safe.
			
 
				  */
			
 
				 STATIC int
			
 
				 xfs_setsize_buftarg_early(
			
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -39,6 +39,14 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
 
				 
			
 
				 STATIC void	xfs_buf_do_callbacks(struct xfs_buf *bp);
			
 
				 
			
 
				+static inline int
			
 
				+xfs_buf_log_format_size(
			
 
				+	struct xfs_buf_log_format *blfp)
			
 
				+{
			
 
				+	return offsetof(struct xfs_buf_log_format, blf_data_map) +
			
 
				+			(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This returns the number of log iovecs needed to log the
			
 
				  * given buf log item.
			
@@ -49,25 +57,27 @@ STATIC void	xfs_buf_do_callbacks(struct xfs_buf *bp);
 
				  *
			
 
				  * If the XFS_BLI_STALE flag has been set, then log nothing.
			
 
				  */
			
 
				-STATIC uint
			
 
				+STATIC void
			
 
				 xfs_buf_item_size_segment(
			
 
				 	struct xfs_buf_log_item	*bip,
			
 
				-	struct xfs_buf_log_format *blfp)
			
 
				+	struct xfs_buf_log_format *blfp,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				 	struct xfs_buf		*bp = bip->bli_buf;
			
 
				-	uint			nvecs;
			
 
				 	int			next_bit;
			
 
				 	int			last_bit;
			
 
				 
			
 
				 	last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
			
 
				 	if (last_bit == -1)
			
 
				-		return 0;
			
 
				+		return;
			
 
				 
			
 
				 	/*
			
 
				 	 * initial count for a dirty buffer is 2 vectors - the format structure
			
 
				 	 * and the first dirty region.
			
 
				 	 */
			
 
				-	nvecs = 2;
			
 
				+	*nvecs += 2;
			
 
				+	*nbytes += xfs_buf_log_format_size(blfp) + XFS_BLF_CHUNK;
			
 
				 
			
 
				 	while (last_bit != -1) {
			
 
				 		/*
			
@@ -87,18 +97,17 @@ xfs_buf_item_size_segment(
 
				 			break;
			
 
				 		} else if (next_bit != last_bit + 1) {
			
 
				 			last_bit = next_bit;
			
 
				-			nvecs++;
			
 
				+			(*nvecs)++;
			
 
				 		} else if (xfs_buf_offset(bp, next_bit * XFS_BLF_CHUNK) !=
			
 
				 			   (xfs_buf_offset(bp, last_bit * XFS_BLF_CHUNK) +
			
 
				 			    XFS_BLF_CHUNK)) {
			
 
				 			last_bit = next_bit;
			
 
				-			nvecs++;
			
 
				+			(*nvecs)++;
			
 
				 		} else {
			
 
				 			last_bit++;
			
 
				 		}
			
 
				+		*nbytes += XFS_BLF_CHUNK;
			
 
				 	}
			
 
				-
			
 
				-	return nvecs;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -118,12 +127,13 @@ xfs_buf_item_size_segment(
 
				  * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
			
 
				  * format structures.
			
 
				  */
			
 
				-STATIC uint
			
 
				+STATIC void
			
 
				 xfs_buf_item_size(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
			
 
				-	uint			nvecs;
			
 
				 	int			i;
			
 
				 
			
 
				 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
			
@@ -135,7 +145,11 @@ xfs_buf_item_size(
 
				 		 */
			
 
				 		trace_xfs_buf_item_size_stale(bip);
			
 
				 		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
			
 
				-		return bip->bli_format_count;
			
 
				+		*nvecs += bip->bli_format_count;
			
 
				+		for (i = 0; i < bip->bli_format_count; i++) {
			
 
				+			*nbytes += xfs_buf_log_format_size(&bip->bli_formats[i]);
			
 
				+		}
			
 
				+		return;
			
 
				 	}
			
 
				 
			
 
				 	ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
			
@@ -147,7 +161,8 @@ xfs_buf_item_size(
 
				 		 * commit, so no vectors are used at all.
			
 
				 		 */
			
 
				 		trace_xfs_buf_item_size_ordered(bip);
			
 
				-		return XFS_LOG_VEC_ORDERED;
			
 
				+		*nvecs = XFS_LOG_VEC_ORDERED;
			
 
				+		return;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -159,13 +174,11 @@ xfs_buf_item_size(
 
				 	 * count for the extra buf log format structure that will need to be
			
 
				 	 * written.
			
 
				 	 */
			
 
				-	nvecs = 0;
			
 
				 	for (i = 0; i < bip->bli_format_count; i++) {
			
 
				-		nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
			
 
				+		xfs_buf_item_size_segment(bip, &bip->bli_formats[i],
			
 
				+					  nvecs, nbytes);
			
 
				 	}
			
 
				-
			
 
				 	trace_xfs_buf_item_size(bip);
			
 
				-	return nvecs;
			
 
				 }
			
 
				 
			
 
				 static struct xfs_log_iovec *
			
@@ -192,8 +205,7 @@ xfs_buf_item_format_segment(
 
				 	 * the actual size of the dirty bitmap rather than the size of the in
			
 
				 	 * memory structure.
			
 
				 	 */
			
 
				-	base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
			
 
				-			(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
			
 
				+	base_size = xfs_buf_log_format_size(blfp);
			
 
				 
			
 
				 	nvecs = 0;
			
 
				 	first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
			
@@ -601,11 +613,9 @@ xfs_buf_item_unlock(
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-	if (clean)
			
 
				-		xfs_buf_item_relse(bp);
			
 
				-	else if (aborted) {
			
 
				+	if (clean || aborted) {
			
 
				 		if (atomic_dec_and_test(&bip->bli_refcount)) {
			
 
				-			ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
			
 
				+			ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp));
			
 
				 			xfs_buf_item_relse(bp);
			
 
				 		}
			
 
				 	} else
			
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -18,101 +18,9 @@
 
				 #ifndef	__XFS_BUF_ITEM_H__
			
 
				 #define	__XFS_BUF_ITEM_H__
			
 
				 
			
 
				-extern kmem_zone_t	*xfs_buf_item_zone;
			
 
				-
			
 
				-/*
			
 
				- * This flag indicates that the buffer contains on disk inodes
			
 
				- * and requires special recovery handling.
			
 
				- */
			
 
				-#define	XFS_BLF_INODE_BUF	(1<<0)
			
 
				-/*
			
 
				- * This flag indicates that the buffer should not be replayed
			
 
				- * during recovery because its blocks are being freed.
			
 
				- */
			
 
				-#define	XFS_BLF_CANCEL		(1<<1)
			
 
				-
			
 
				-/*
			
 
				- * This flag indicates that the buffer contains on disk
			
 
				- * user or group dquots and may require special recovery handling.
			
 
				- */
			
 
				-#define	XFS_BLF_UDQUOT_BUF	(1<<2)
			
 
				-#define XFS_BLF_PDQUOT_BUF	(1<<3)
			
 
				-#define	XFS_BLF_GDQUOT_BUF	(1<<4)
			
 
				-
			
 
				-#define	XFS_BLF_CHUNK		128
			
 
				-#define	XFS_BLF_SHIFT		7
			
 
				-#define	BIT_TO_WORD_SHIFT	5
			
 
				-#define	NBWORD			(NBBY * sizeof(unsigned int))
			
 
				-
			
 
				-/*
			
 
				- * This is the structure used to lay out a buf log item in the
			
 
				- * log.  The data map describes which 128 byte chunks of the buffer
			
 
				- * have been logged.
			
 
				- */
			
 
				-#define XFS_BLF_DATAMAP_SIZE	((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
			
 
				+/* kernel only definitions */
			
 
				 
			
 
				-typedef struct xfs_buf_log_format {
			
 
				-	unsigned short	blf_type;	/* buf log item type indicator */
			
 
				-	unsigned short	blf_size;	/* size of this item */
			
 
				-	ushort		blf_flags;	/* misc state */
			
 
				-	ushort		blf_len;	/* number of blocks in this buf */
			
 
				-	__int64_t	blf_blkno;	/* starting blkno of this buf */
			
 
				-	unsigned int	blf_map_size;	/* used size of data bitmap in words */
			
 
				-	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
			
 
				-} xfs_buf_log_format_t;
			
 
				-
			
 
				-/*
			
 
				- * All buffers now need to tell recovery where the magic number
			
 
				- * is so that it can verify and calculate the CRCs on the buffer correctly
			
 
				- * once the changes have been replayed into the buffer.
			
 
				- *
			
 
				- * The type value is held in the upper 5 bits of the blf_flags field, which is
			
 
				- * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
			
 
				- */
			
 
				-#define XFS_BLFT_BITS	5
			
 
				-#define XFS_BLFT_SHIFT	11
			
 
				-#define XFS_BLFT_MASK	(((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
			
 
				-
			
 
				-enum xfs_blft {
			
 
				-	XFS_BLFT_UNKNOWN_BUF = 0,
			
 
				-	XFS_BLFT_UDQUOT_BUF,
			
 
				-	XFS_BLFT_PDQUOT_BUF,
			
 
				-	XFS_BLFT_GDQUOT_BUF,
			
 
				-	XFS_BLFT_BTREE_BUF,
			
 
				-	XFS_BLFT_AGF_BUF,
			
 
				-	XFS_BLFT_AGFL_BUF,
			
 
				-	XFS_BLFT_AGI_BUF,
			
 
				-	XFS_BLFT_DINO_BUF,
			
 
				-	XFS_BLFT_SYMLINK_BUF,
			
 
				-	XFS_BLFT_DIR_BLOCK_BUF,
			
 
				-	XFS_BLFT_DIR_DATA_BUF,
			
 
				-	XFS_BLFT_DIR_FREE_BUF,
			
 
				-	XFS_BLFT_DIR_LEAF1_BUF,
			
 
				-	XFS_BLFT_DIR_LEAFN_BUF,
			
 
				-	XFS_BLFT_DA_NODE_BUF,
			
 
				-	XFS_BLFT_ATTR_LEAF_BUF,
			
 
				-	XFS_BLFT_ATTR_RMT_BUF,
			
 
				-	XFS_BLFT_SB_BUF,
			
 
				-	XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
			
 
				-};
			
 
				-
			
 
				-static inline void
			
 
				-xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
			
 
				-{
			
 
				-	ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
			
 
				-	blf->blf_flags &= ~XFS_BLFT_MASK;
			
 
				-	blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
			
 
				-}
			
 
				-
			
 
				-static inline __uint16_t
			
 
				-xfs_blft_from_flags(struct xfs_buf_log_format *blf)
			
 
				-{
			
 
				-	return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * buf log item flags
			
 
				- */
			
 
				+/* buf log item flags */
			
 
				 #define	XFS_BLI_HOLD		0x01
			
 
				 #define	XFS_BLI_DIRTY		0x02
			
 
				 #define	XFS_BLI_STALE		0x04
			
@@ -133,8 +41,6 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
 
				 	{ XFS_BLI_ORDERED,	"ORDERED" }
			
 
				 
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				-
			
 
				 struct xfs_buf;
			
 
				 struct xfs_mount;
			
 
				 struct xfs_buf_log_item;
			
@@ -169,6 +75,6 @@ void	xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
 
				 			       enum xfs_blft);
			
 
				 void	xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp);
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				+extern kmem_zone_t	*xfs_buf_item_zone;
			
 
				 
			
 
				 #endif	/* __XFS_BUF_ITEM_H__ */
			
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -27,8 +27,8 @@
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_da_btree.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
@@ -399,7 +399,7 @@ xfs_da3_split(
 
				 	struct xfs_da_intnode	*node;
			
 
				 	struct xfs_buf		*bp;
			
 
				 	int			max;
			
 
				-	int			action;
			
 
				+	int			action = 0;
			
 
				 	int			error;
			
 
				 	int			i;
			
 
				 
			
@@ -2454,9 +2454,9 @@ static int
 
				 xfs_buf_map_from_irec(
			
 
				 	struct xfs_mount	*mp,
			
 
				 	struct xfs_buf_map	**mapp,
			
 
				-	unsigned int		*nmaps,
			
 
				+	int			*nmaps,
			
 
				 	struct xfs_bmbt_irec	*irecs,
			
 
				-	unsigned int		nirecs)
			
 
				+	int			nirecs)
			
 
				 {
			
 
				 	struct xfs_buf_map	*map;
			
 
				 	int			i;
			
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -133,12 +133,19 @@ extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to,
 
				 				     struct xfs_da3_icnode_hdr *from);
			
 
				 
			
 
				 static inline int
			
 
				-xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
			
 
				+__xfs_da3_node_hdr_size(bool v3)
			
 
				 {
			
 
				-	if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC))
			
 
				+	if (v3)
			
 
				 		return sizeof(struct xfs_da3_node_hdr);
			
 
				 	return sizeof(struct xfs_da_node_hdr);
			
 
				 }
			
 
				+static inline int
			
 
				+xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
			
 
				+{
			
 
				+	bool	v3 = dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC);
			
 
				+
			
 
				+	return __xfs_da3_node_hdr_size(v3);
			
 
				+}
			
 
				 
			
 
				 static inline struct xfs_da_node_entry *
			
 
				 xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
			
@@ -176,6 +183,7 @@ enum xfs_dacmp {
 
				 typedef struct xfs_da_args {
			
 
				 	const __uint8_t	*name;		/* string (maybe not NULL terminated) */
			
 
				 	int		namelen;	/* length of string (maybe no NULL) */
			
 
				+	__uint8_t	filetype;	/* filetype of inode for directories */
			
 
				 	__uint8_t	*value;		/* set of bytes (maybe contain NULLs) */
			
 
				 	int		valuelen;	/* length of value */
			
 
				 	int		flags;		/* argument flags (eg: ATTR_NOCREATE) */
			
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -1,459 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				- * All Rights Reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it would be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program; if not, write the Free Software Foundation,
			
 
				- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				- */
			
 
				-#include "xfs.h"
			
 
				-#include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				-#include "xfs_log.h"
			
 
				-#include "xfs_trans.h"
			
 
				-#include "xfs_sb.h"
			
 
				-#include "xfs_ag.h"
			
 
				-#include "xfs_mount.h"
			
 
				-#include "xfs_bmap_btree.h"
			
 
				-#include "xfs_alloc_btree.h"
			
 
				-#include "xfs_ialloc_btree.h"
			
 
				-#include "xfs_btree.h"
			
 
				-#include "xfs_dinode.h"
			
 
				-#include "xfs_inode.h"
			
 
				-#include "xfs_inode_item.h"
			
 
				-#include "xfs_bmap.h"
			
 
				-#include "xfs_itable.h"
			
 
				-#include "xfs_dfrag.h"
			
 
				-#include "xfs_error.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				-#include "xfs_trace.h"
			
 
				-
			
 
				-
			
 
				-static int xfs_swap_extents(
			
 
				-	xfs_inode_t	*ip,	/* target inode */
			
 
				-	xfs_inode_t	*tip,	/* tmp inode */
			
 
				-	xfs_swapext_t	*sxp);
			
 
				-
			
 
				-/*
			
 
				- * ioctl interface for swapext
			
 
				- */
			
 
				-int
			
 
				-xfs_swapext(
			
 
				-	xfs_swapext_t	*sxp)
			
 
				-{
			
 
				-	xfs_inode_t     *ip, *tip;
			
 
				-	struct fd	f, tmp;
			
 
				-	int		error = 0;
			
 
				-
			
 
				-	/* Pull information for the target fd */
			
 
				-	f = fdget((int)sxp->sx_fdtarget);
			
 
				-	if (!f.file) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (!(f.file->f_mode & FMODE_WRITE) ||
			
 
				-	    !(f.file->f_mode & FMODE_READ) ||
			
 
				-	    (f.file->f_flags & O_APPEND)) {
			
 
				-		error = XFS_ERROR(EBADF);
			
 
				-		goto out_put_file;
			
 
				-	}
			
 
				-
			
 
				-	tmp = fdget((int)sxp->sx_fdtmp);
			
 
				-	if (!tmp.file) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out_put_file;
			
 
				-	}
			
 
				-
			
 
				-	if (!(tmp.file->f_mode & FMODE_WRITE) ||
			
 
				-	    !(tmp.file->f_mode & FMODE_READ) ||
			
 
				-	    (tmp.file->f_flags & O_APPEND)) {
			
 
				-		error = XFS_ERROR(EBADF);
			
 
				-		goto out_put_tmp_file;
			
 
				-	}
			
 
				-
			
 
				-	if (IS_SWAPFILE(file_inode(f.file)) ||
			
 
				-	    IS_SWAPFILE(file_inode(tmp.file))) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out_put_tmp_file;
			
 
				-	}
			
 
				-
			
 
				-	ip = XFS_I(file_inode(f.file));
			
 
				-	tip = XFS_I(file_inode(tmp.file));
			
 
				-
			
 
				-	if (ip->i_mount != tip->i_mount) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out_put_tmp_file;
			
 
				-	}
			
 
				-
			
 
				-	if (ip->i_ino == tip->i_ino) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out_put_tmp_file;
			
 
				-	}
			
 
				-
			
 
				-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				-		error = XFS_ERROR(EIO);
			
 
				-		goto out_put_tmp_file;
			
 
				-	}
			
 
				-
			
 
				-	error = xfs_swap_extents(ip, tip, sxp);
			
 
				-
			
 
				- out_put_tmp_file:
			
 
				-	fdput(tmp);
			
 
				- out_put_file:
			
 
				-	fdput(f);
			
 
				- out:
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * We need to check that the format of the data fork in the temporary inode is
			
 
				- * valid for the target inode before doing the swap. This is not a problem with
			
 
				- * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
			
 
				- * data fork depending on the space the attribute fork is taking so we can get
			
 
				- * invalid formats on the target inode.
			
 
				- *
			
 
				- * E.g. target has space for 7 extents in extent format, temp inode only has
			
 
				- * space for 6.  If we defragment down to 7 extents, then the tmp format is a
			
 
				- * btree, but when swapped it needs to be in extent format. Hence we can't just
			
 
				- * blindly swap data forks on attr2 filesystems.
			
 
				- *
			
 
				- * Note that we check the swap in both directions so that we don't end up with
			
 
				- * a corrupt temporary inode, either.
			
 
				- *
			
 
				- * Note that fixing the way xfs_fsr sets up the attribute fork in the source
			
 
				- * inode will prevent this situation from occurring, so all we do here is
			
 
				- * reject and log the attempt. basically we are putting the responsibility on
			
 
				- * userspace to get this right.
			
 
				- */
			
 
				-static int
			
 
				-xfs_swap_extents_check_format(
			
 
				-	xfs_inode_t	*ip,	/* target inode */
			
 
				-	xfs_inode_t	*tip)	/* tmp inode */
			
 
				-{
			
 
				-
			
 
				-	/* Should never get a local format */
			
 
				-	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
			
 
				-	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
			
 
				-		return EINVAL;
			
 
				-
			
 
				-	/*
			
 
				-	 * if the target inode has less extents that then temporary inode then
			
 
				-	 * why did userspace call us?
			
 
				-	 */
			
 
				-	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
			
 
				-		return EINVAL;
			
 
				-
			
 
				-	/*
			
 
				-	 * if the target inode is in extent form and the temp inode is in btree
			
 
				-	 * form then we will end up with the target inode in the wrong format
			
 
				-	 * as we already know there are less extents in the temp inode.
			
 
				-	 */
			
 
				-	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
			
 
				-	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
			
 
				-		return EINVAL;
			
 
				-
			
 
				-	/* Check temp in extent form to max in target */
			
 
				-	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
			
 
				-	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
			
 
				-			XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
			
 
				-		return EINVAL;
			
 
				-
			
 
				-	/* Check target in extent form to max in temp */
			
 
				-	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
			
 
				-	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
			
 
				-			XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
			
 
				-		return EINVAL;
			
 
				-
			
 
				-	/*
			
 
				-	 * If we are in a btree format, check that the temp root block will fit
			
 
				-	 * in the target and that it has enough extents to be in btree format
			
 
				-	 * in the target.
			
 
				-	 *
			
 
				-	 * Note that we have to be careful to allow btree->extent conversions
			
 
				-	 * (a common defrag case) which will occur when the temp inode is in
			
 
				-	 * extent format...
			
 
				-	 */
			
 
				-	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
			
 
				-		if (XFS_IFORK_BOFF(ip) &&
			
 
				-		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
			
 
				-			return EINVAL;
			
 
				-		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
			
 
				-		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
			
 
				-			return EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	/* Reciprocal target->temp btree format checks */
			
 
				-	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
			
 
				-		if (XFS_IFORK_BOFF(tip) &&
			
 
				-		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
			
 
				-			return EINVAL;
			
 
				-		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
			
 
				-		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
			
 
				-			return EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-xfs_swap_extents(
			
 
				-	xfs_inode_t	*ip,	/* target inode */
			
 
				-	xfs_inode_t	*tip,	/* tmp inode */
			
 
				-	xfs_swapext_t	*sxp)
			
 
				-{
			
 
				-	xfs_mount_t	*mp = ip->i_mount;
			
 
				-	xfs_trans_t	*tp;
			
 
				-	xfs_bstat_t	*sbp = &sxp->sx_stat;
			
 
				-	xfs_ifork_t	*tempifp, *ifp, *tifp;
			
 
				-	int		src_log_flags, target_log_flags;
			
 
				-	int		error = 0;
			
 
				-	int		aforkblks = 0;
			
 
				-	int		taforkblks = 0;
			
 
				-	__uint64_t	tmp;
			
 
				-
			
 
				-	/*
			
 
				-	 * We have no way of updating owner information in the BMBT blocks for
			
 
				-	 * each inode on CRC enabled filesystems, so to avoid corrupting the
			
 
				-	 * this metadata we simply don't allow extent swaps to occur.
			
 
				-	 */
			
 
				-	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return XFS_ERROR(EINVAL);
			
 
				-
			
 
				-	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
			
 
				-	if (!tempifp) {
			
 
				-		error = XFS_ERROR(ENOMEM);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * we have to do two separate lock calls here to keep lockdep
			
 
				-	 * happy. If we try to get all the locks in one call, lock will
			
 
				-	 * report false positives when we drop the ILOCK and regain them
			
 
				-	 * below.
			
 
				-	 */
			
 
				-	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
			
 
				-	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	/* Verify that both files have the same format */
			
 
				-	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	/* Verify both files are either real-time or non-realtime */
			
 
				-	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
			
 
				-	if (error)
			
 
				-		goto out_unlock;
			
 
				-	truncate_pagecache_range(VFS_I(tip), 0, -1);
			
 
				-
			
 
				-	/* Verify O_DIRECT for ftmp */
			
 
				-	if (VN_CACHED(VFS_I(tip)) != 0) {
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	/* Verify all data are being swapped */
			
 
				-	if (sxp->sx_offset != 0 ||
			
 
				-	    sxp->sx_length != ip->i_d.di_size ||
			
 
				-	    sxp->sx_length != tip->i_d.di_size) {
			
 
				-		error = XFS_ERROR(EFAULT);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	trace_xfs_swap_extent_before(ip, 0);
			
 
				-	trace_xfs_swap_extent_before(tip, 1);
			
 
				-
			
 
				-	/* check inode formats now that data is flushed */
			
 
				-	error = xfs_swap_extents_check_format(ip, tip);
			
 
				-	if (error) {
			
 
				-		xfs_notice(mp,
			
 
				-		    "%s: inode 0x%llx format is incompatible for exchanging.",
			
 
				-				__func__, ip->i_ino);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Compare the current change & modify times with that
			
 
				-	 * passed in.  If they differ, we abort this swap.
			
 
				-	 * This is the mechanism used to ensure the calling
			
 
				-	 * process that the file was not changed out from
			
 
				-	 * under it.
			
 
				-	 */
			
 
				-	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
			
 
				-	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
			
 
				-	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
			
 
				-	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
			
 
				-		error = XFS_ERROR(EBUSY);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	/* We need to fail if the file is memory mapped.  Once we have tossed
			
 
				-	 * all existing pages, the page fault will have no option
			
 
				-	 * but to go to the filesystem for pages. By making the page fault call
			
 
				-	 * vop_read (or write in the case of autogrow) they block on the iolock
			
 
				-	 * until we have switched the extents.
			
 
				-	 */
			
 
				-	if (VN_MAPPED(VFS_I(ip))) {
			
 
				-		error = XFS_ERROR(EBUSY);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				-	xfs_iunlock(tip, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	/*
			
 
				-	 * There is a race condition here since we gave up the
			
 
				-	 * ilock.  However, the data fork will not change since
			
 
				-	 * we have the iolock (locked for truncation too) so we
			
 
				-	 * are safe.  We don't really care if non-io related
			
 
				-	 * fields change.
			
 
				-	 */
			
 
				-	truncate_pagecache_range(VFS_I(ip), 0, -1);
			
 
				-
			
 
				-	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
			
 
				-	if ((error = xfs_trans_reserve(tp, 0,
			
 
				-				     XFS_ICHANGE_LOG_RES(mp), 0,
			
 
				-				     0, 0))) {
			
 
				-		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
			
 
				-		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
			
 
				-		xfs_trans_cancel(tp, 0);
			
 
				-		goto out;
			
 
				-	}
			
 
				-	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	/*
			
 
				-	 * Count the number of extended attribute blocks
			
 
				-	 */
			
 
				-	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
			
 
				-	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
			
 
				-		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
			
 
				-		if (error)
			
 
				-			goto out_trans_cancel;
			
 
				-	}
			
 
				-	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
			
 
				-	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
			
 
				-		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
			
 
				-			&taforkblks);
			
 
				-		if (error)
			
 
				-			goto out_trans_cancel;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Swap the data forks of the inodes
			
 
				-	 */
			
 
				-	ifp = &ip->i_df;
			
 
				-	tifp = &tip->i_df;
			
 
				-	*tempifp = *ifp;	/* struct copy */
			
 
				-	*ifp = *tifp;		/* struct copy */
			
 
				-	*tifp = *tempifp;	/* struct copy */
			
 
				-
			
 
				-	/*
			
 
				-	 * Fix the on-disk inode values
			
 
				-	 */
			
 
				-	tmp = (__uint64_t)ip->i_d.di_nblocks;
			
 
				-	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
			
 
				-	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
			
 
				-
			
 
				-	tmp = (__uint64_t) ip->i_d.di_nextents;
			
 
				-	ip->i_d.di_nextents = tip->i_d.di_nextents;
			
 
				-	tip->i_d.di_nextents = tmp;
			
 
				-
			
 
				-	tmp = (__uint64_t) ip->i_d.di_format;
			
 
				-	ip->i_d.di_format = tip->i_d.di_format;
			
 
				-	tip->i_d.di_format = tmp;
			
 
				-
			
 
				-	/*
			
 
				-	 * The extents in the source inode could still contain speculative
			
 
				-	 * preallocation beyond EOF (e.g. the file is open but not modified
			
 
				-	 * while defrag is in progress). In that case, we need to copy over the
			
 
				-	 * number of delalloc blocks the data fork in the source inode is
			
 
				-	 * tracking beyond EOF so that when the fork is truncated away when the
			
 
				-	 * temporary inode is unlinked we don't underrun the i_delayed_blks
			
 
				-	 * counter on that inode.
			
 
				-	 */
			
 
				-	ASSERT(tip->i_delayed_blks == 0);
			
 
				-	tip->i_delayed_blks = ip->i_delayed_blks;
			
 
				-	ip->i_delayed_blks = 0;
			
 
				-
			
 
				-	src_log_flags = XFS_ILOG_CORE;
			
 
				-	switch (ip->i_d.di_format) {
			
 
				-	case XFS_DINODE_FMT_EXTENTS:
			
 
				-		/* If the extents fit in the inode, fix the
			
 
				-		 * pointer.  Otherwise it's already NULL or
			
 
				-		 * pointing to the extent.
			
 
				-		 */
			
 
				-		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
			
 
				-			ifp->if_u1.if_extents =
			
 
				-				ifp->if_u2.if_inline_ext;
			
 
				-		}
			
 
				-		src_log_flags |= XFS_ILOG_DEXT;
			
 
				-		break;
			
 
				-	case XFS_DINODE_FMT_BTREE:
			
 
				-		src_log_flags |= XFS_ILOG_DBROOT;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	target_log_flags = XFS_ILOG_CORE;
			
 
				-	switch (tip->i_d.di_format) {
			
 
				-	case XFS_DINODE_FMT_EXTENTS:
			
 
				-		/* If the extents fit in the inode, fix the
			
 
				-		 * pointer.  Otherwise it's already NULL or
			
 
				-		 * pointing to the extent.
			
 
				-		 */
			
 
				-		if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
			
 
				-			tifp->if_u1.if_extents =
			
 
				-				tifp->if_u2.if_inline_ext;
			
 
				-		}
			
 
				-		target_log_flags |= XFS_ILOG_DEXT;
			
 
				-		break;
			
 
				-	case XFS_DINODE_FMT_BTREE:
			
 
				-		target_log_flags |= XFS_ILOG_DBROOT;
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				-	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				-
			
 
				-	xfs_trans_log_inode(tp, ip,  src_log_flags);
			
 
				-	xfs_trans_log_inode(tp, tip, target_log_flags);
			
 
				-
			
 
				-	/*
			
 
				-	 * If this is a synchronous mount, make sure that the
			
 
				-	 * transaction goes to disk before returning to the user.
			
 
				-	 */
			
 
				-	if (mp->m_flags & XFS_MOUNT_WSYNC)
			
 
				-		xfs_trans_set_sync(tp);
			
 
				-
			
 
				-	error = xfs_trans_commit(tp, 0);
			
 
				-
			
 
				-	trace_xfs_swap_extent_after(ip, 0);
			
 
				-	trace_xfs_swap_extent_after(tip, 1);
			
 
				-out:
			
 
				-	kmem_free(tempifp);
			
 
				-	return error;
			
 
				-
			
 
				-out_unlock:
			
 
				-	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				-	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
			
 
				-	goto out;
			
 
				-
			
 
				-out_trans_cancel:
			
 
				-	xfs_trans_cancel(tp, 0);
			
 
				-	goto out_unlock;
			
 
				-}
			
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -1,53 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
			
 
				- * All Rights Reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it would be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program; if not, write the Free Software Foundation,
			
 
				- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				- */
			
 
				-#ifndef __XFS_DFRAG_H__
			
 
				-#define	__XFS_DFRAG_H__
			
 
				-
			
 
				-/*
			
 
				- * Structure passed to xfs_swapext
			
 
				- */
			
 
				-
			
 
				-typedef struct xfs_swapext
			
 
				-{
			
 
				-	__int64_t	sx_version;	/* version */
			
 
				-	__int64_t	sx_fdtarget;	/* fd of target file */
			
 
				-	__int64_t	sx_fdtmp;	/* fd of tmp file */
			
 
				-	xfs_off_t	sx_offset;	/* offset into file */
			
 
				-	xfs_off_t	sx_length;	/* leng from offset */
			
 
				-	char		sx_pad[16];	/* pad space, unused */
			
 
				-	xfs_bstat_t	sx_stat;	/* stat of target b4 copy */
			
 
				-} xfs_swapext_t;
			
 
				-
			
 
				-/*
			
 
				- * Version flag
			
 
				- */
			
 
				-#define XFS_SX_VERSION		0
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				-/*
			
 
				- * Prototypes for visible xfs_dfrag.c routines.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Syscall interface for xfs_swapext
			
 
				- */
			
 
				-int	xfs_swapext(struct xfs_swapext *sx);
			
 
				-
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				-#endif	/* __XFS_DFRAG_H__ */
			
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -31,14 +31,14 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_bmap.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
 
				-struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2};
			
 
				+struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
			
 
				+
			
 
				 
			
 
				 /*
			
 
				  * ASCII case-insensitive (ie. A-Z) support for directories that was
			
@@ -90,6 +90,9 @@ void
 
				 xfs_dir_mount(
			
 
				 	xfs_mount_t	*mp)
			
 
				 {
			
 
				+	int	nodehdr_size;
			
 
				+
			
 
				+
			
 
				 	ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
			
 
				 	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
			
 
				 	       XFS_MAX_BLOCKSIZE);
			
@@ -98,12 +101,13 @@ xfs_dir_mount(
 
				 	mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
			
 
				 	mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
			
 
				 	mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
			
 
				-	mp->m_attr_node_ents =
			
 
				-		(mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) /
			
 
				-		(uint)sizeof(xfs_da_node_entry_t);
			
 
				-	mp->m_dir_node_ents =
			
 
				-		(mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
			
 
				-		(uint)sizeof(xfs_da_node_entry_t);
			
 
				+
			
 
				+	nodehdr_size = __xfs_da3_node_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
			
 
				+	mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) /
			
 
				+				(uint)sizeof(xfs_da_node_entry_t);
			
 
				+	mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) /
			
 
				+				(uint)sizeof(xfs_da_node_entry_t);
			
 
				+
			
 
				 	mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
			
 
				 	if (xfs_sb_version_hasasciici(&mp->m_sb))
			
 
				 		mp->m_dirnameops = &xfs_ascii_ci_nameops;
			
@@ -209,6 +213,7 @@ xfs_dir_createname(
 
				 	memset(&args, 0, sizeof(xfs_da_args_t));
			
 
				 	args.name = name->name;
			
 
				 	args.namelen = name->len;
			
 
				+	args.filetype = name->type;
			
 
				 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
			
 
				 	args.inumber = inum;
			
 
				 	args.dp = dp;
			
@@ -283,6 +288,7 @@ xfs_dir_lookup(
 
				 	memset(&args, 0, sizeof(xfs_da_args_t));
			
 
				 	args.name = name->name;
			
 
				 	args.namelen = name->len;
			
 
				+	args.filetype = name->type;
			
 
				 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
			
 
				 	args.dp = dp;
			
 
				 	args.whichfork = XFS_DATA_FORK;
			
@@ -338,6 +344,7 @@ xfs_dir_removename(
 
				 	memset(&args, 0, sizeof(xfs_da_args_t));
			
 
				 	args.name = name->name;
			
 
				 	args.namelen = name->len;
			
 
				+	args.filetype = name->type;
			
 
				 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
			
 
				 	args.inumber = ino;
			
 
				 	args.dp = dp;
			
@@ -362,37 +369,6 @@ xfs_dir_removename(
 
				 	return rval;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Read a directory.
			
 
				- */
			
 
				-int
			
 
				-xfs_readdir(
			
 
				-	xfs_inode_t	*dp,
			
 
				-	struct dir_context *ctx,
			
 
				-	size_t		bufsize)
			
 
				-{
			
 
				-	int		rval;		/* return value */
			
 
				-	int		v;		/* type-checking value */
			
 
				-
			
 
				-	trace_xfs_readdir(dp);
			
 
				-
			
 
				-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
			
 
				-		return XFS_ERROR(EIO);
			
 
				-
			
 
				-	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				-	XFS_STATS_INC(xs_dir_getdents);
			
 
				-
			
 
				-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
			
 
				-		rval = xfs_dir2_sf_getdents(dp, ctx);
			
 
				-	else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
			
 
				-		;
			
 
				-	else if (v)
			
 
				-		rval = xfs_dir2_block_getdents(dp, ctx);
			
 
				-	else
			
 
				-		rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);
			
 
				-	return rval;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Replace the inode number of a directory entry.
			
 
				  */
			
@@ -418,6 +394,7 @@ xfs_dir_replace(
 
				 	memset(&args, 0, sizeof(xfs_da_args_t));
			
 
				 	args.name = name->name;
			
 
				 	args.namelen = name->len;
			
 
				+	args.filetype = name->type;
			
 
				 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
			
 
				 	args.inumber = inum;
			
 
				 	args.dp = dp;
			
@@ -465,6 +442,7 @@ xfs_dir_canenter(
 
				 	memset(&args, 0, sizeof(xfs_da_args_t));
			
 
				 	args.name = name->name;
			
 
				 	args.namelen = name->len;
			
 
				+	args.filetype = name->type;
			
 
				 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
			
 
				 	args.dp = dp;
			
 
				 	args.whichfork = XFS_DATA_FORK;
			
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -23,6 +23,11 @@ struct xfs_da_args;
 
				 struct xfs_inode;
			
 
				 struct xfs_mount;
			
 
				 struct xfs_trans;
			
 
				+struct xfs_dir2_sf_hdr;
			
 
				+struct xfs_dir2_sf_entry;
			
 
				+struct xfs_dir2_data_hdr;
			
 
				+struct xfs_dir2_data_entry;
			
 
				+struct xfs_dir2_data_unused;
			
 
				 
			
 
				 extern struct xfs_name	xfs_name_dotdot;
			
 
				 
			
@@ -57,4 +62,45 @@ extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
 
				  */
			
 
				 extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
			
 
				 
			
 
				+/*
			
 
				+ * Interface routines used by userspace utilities
			
 
				+ */
			
 
				+extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
			
 
				+extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp,
			
 
				+		xfs_ino_t ino);
			
 
				+extern xfs_ino_t xfs_dir3_sfe_get_ino(struct xfs_mount *mp,
			
 
				+		struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep);
			
 
				+extern void xfs_dir3_sfe_put_ino(struct xfs_mount *mp,
			
 
				+		struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep,
			
 
				+		xfs_ino_t ino);
			
 
				+
			
 
				+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
			
 
				+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
			
 
				+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
			
 
				+				struct xfs_buf *bp);
			
 
				+
			
 
				+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
			
 
				+		struct xfs_dir2_data_hdr *hdr, int *loghead);
			
 
				+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				+		struct xfs_dir2_data_entry *dep);
			
 
				+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
			
 
				+		struct xfs_buf *bp);
			
 
				+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				+		struct xfs_dir2_data_unused *dup);
			
 
				+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				+		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
			
 
				+		int *needlogp, int *needscanp);
			
 
				+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				+		struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
			
 
				+		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
			
 
				+
			
 
				+extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
			
 
				+		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup);
			
 
				+
			
 
				+extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
			
 
				+
			
 
				 #endif	/* __XFS_DIR2_H__ */
			
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -31,8 +31,8 @@
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
@@ -126,7 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
 
				 	.verify_write = xfs_dir3_block_write_verify,
			
 
				 };
			
 
				 
			
 
				-static int
			
 
				+int
			
 
				 xfs_dir3_block_read(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*dp,
			
@@ -369,7 +369,7 @@ xfs_dir2_block_addname(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	len = xfs_dir2_data_entsize(args->namelen);
			
 
				+	len = xfs_dir3_data_entsize(mp, args->namelen);
			
 
				 
			
 
				 	/*
			
 
				 	 * Set up pointers to parts of the block.
			
@@ -549,7 +549,8 @@ xfs_dir2_block_addname(
 
				 	dep->inumber = cpu_to_be64(args->inumber);
			
 
				 	dep->namelen = args->namelen;
			
 
				 	memcpy(dep->name, args->name, args->namelen);
			
 
				-	tagp = xfs_dir2_data_entry_tag_p(dep);
			
 
				+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
			
 
				+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
			
 
				 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
			
 
				 	/*
			
 
				 	 * Clean up the bestfree array and log the header, tail, and entry.
			
@@ -564,101 +565,6 @@ xfs_dir2_block_addname(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Readdir for block directories.
			
 
				- */
			
 
				-int						/* error */
			
 
				-xfs_dir2_block_getdents(
			
 
				-	xfs_inode_t		*dp,		/* incore inode */
			
 
				-	struct dir_context	*ctx)
			
 
				-{
			
 
				-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
			
 
				-	struct xfs_buf		*bp;		/* buffer for block */
			
 
				-	xfs_dir2_block_tail_t	*btp;		/* block tail */
			
 
				-	xfs_dir2_data_entry_t	*dep;		/* block data entry */
			
 
				-	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
			
 
				-	char			*endptr;	/* end of the data entries */
			
 
				-	int			error;		/* error return value */
			
 
				-	xfs_mount_t		*mp;		/* filesystem mount point */
			
 
				-	char			*ptr;		/* current data entry */
			
 
				-	int			wantoff;	/* starting block offset */
			
 
				-	xfs_off_t		cook;
			
 
				-
			
 
				-	mp = dp->i_mount;
			
 
				-	/*
			
 
				-	 * If the block number in the offset is out of range, we're done.
			
 
				-	 */
			
 
				-	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
			
 
				-		return 0;
			
 
				-
			
 
				-	error = xfs_dir3_block_read(NULL, dp, &bp);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	/*
			
 
				-	 * Extract the byte offset we start at from the seek pointer.
			
 
				-	 * We'll skip entries before this.
			
 
				-	 */
			
 
				-	wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);
			
 
				-	hdr = bp->b_addr;
			
 
				-	xfs_dir3_data_check(dp, bp);
			
 
				-	/*
			
 
				-	 * Set up values for the loop.
			
 
				-	 */
			
 
				-	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				-	ptr = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				-	endptr = (char *)xfs_dir2_block_leaf_p(btp);
			
 
				-
			
 
				-	/*
			
 
				-	 * Loop over the data portion of the block.
			
 
				-	 * Each object is a real entry (dep) or an unused one (dup).
			
 
				-	 */
			
 
				-	while (ptr < endptr) {
			
 
				-		dup = (xfs_dir2_data_unused_t *)ptr;
			
 
				-		/*
			
 
				-		 * Unused, skip it.
			
 
				-		 */
			
 
				-		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			
 
				-			ptr += be16_to_cpu(dup->length);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		dep = (xfs_dir2_data_entry_t *)ptr;
			
 
				-
			
 
				-		/*
			
 
				-		 * Bump pointer for the next iteration.
			
 
				-		 */
			
 
				-		ptr += xfs_dir2_data_entsize(dep->namelen);
			
 
				-		/*
			
 
				-		 * The entry is before the desired starting point, skip it.
			
 
				-		 */
			
 
				-		if ((char *)dep - (char *)hdr < wantoff)
			
 
				-			continue;
			
 
				-
			
 
				-		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				-					    (char *)dep - (char *)hdr);
			
 
				-
			
 
				-		ctx->pos = cook & 0x7fffffff;
			
 
				-		/*
			
 
				-		 * If it didn't fit, set the final offset to here & return.
			
 
				-		 */
			
 
				-		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
			
 
				-			    be64_to_cpu(dep->inumber), DT_UNKNOWN)) {
			
 
				-			xfs_trans_brelse(NULL, bp);
			
 
				-			return 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Reached the end of the block.
			
 
				-	 * Set the offset to a non-existent block 1 and return.
			
 
				-	 */
			
 
				-	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
			
 
				-			0x7fffffff;
			
 
				-	xfs_trans_brelse(NULL, bp);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Log leaf entries from the block.
			
 
				  */
			
@@ -736,6 +642,7 @@ xfs_dir2_block_lookup(
 
				 	 * Fill in inode number, CI name if appropriate, release the block.
			
 
				 	 */
			
 
				 	args->inumber = be64_to_cpu(dep->inumber);
			
 
				+	args->filetype = xfs_dir3_dirent_get_ftype(mp, dep);
			
 
				 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
			
 
				 	xfs_trans_brelse(args->trans, bp);
			
 
				 	return XFS_ERROR(error);
			
@@ -894,7 +801,7 @@ xfs_dir2_block_removename(
 
				 	needlog = needscan = 0;
			
 
				 	xfs_dir2_data_make_free(tp, bp,
			
 
				 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
			
 
				-		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
			
 
				+		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
			
 
				 	/*
			
 
				 	 * Fix up the block tail.
			
 
				 	 */
			
@@ -968,6 +875,7 @@ xfs_dir2_block_replace(
 
				 	 * Change the inode number to the new value.
			
 
				 	 */
			
 
				 	dep->inumber = cpu_to_be64(args->inumber);
			
 
				+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
			
 
				 	xfs_dir2_data_log_entry(args->trans, bp, dep);
			
 
				 	xfs_dir3_data_check(dp, bp);
			
 
				 	return 0;
			
@@ -1254,7 +1162,8 @@ xfs_dir2_sf_to_block(
 
				 	dep->inumber = cpu_to_be64(dp->i_ino);
			
 
				 	dep->namelen = 1;
			
 
				 	dep->name[0] = '.';
			
 
				-	tagp = xfs_dir2_data_entry_tag_p(dep);
			
 
				+	xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
			
 
				+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
			
 
				 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
			
 
				 	xfs_dir2_data_log_entry(tp, bp, dep);
			
 
				 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
			
@@ -1267,7 +1176,8 @@ xfs_dir2_sf_to_block(
 
				 	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
			
 
				 	dep->namelen = 2;
			
 
				 	dep->name[0] = dep->name[1] = '.';
			
 
				-	tagp = xfs_dir2_data_entry_tag_p(dep);
			
 
				+	xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
			
 
				+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
			
 
				 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
			
 
				 	xfs_dir2_data_log_entry(tp, bp, dep);
			
 
				 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
			
@@ -1312,10 +1222,12 @@ xfs_dir2_sf_to_block(
 
				 		 * Copy a real entry.
			
 
				 		 */
			
 
				 		dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
			
 
				-		dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
			
 
				+		dep->inumber = cpu_to_be64(xfs_dir3_sfe_get_ino(mp, sfp, sfep));
			
 
				 		dep->namelen = sfep->namelen;
			
 
				+		xfs_dir3_dirent_put_ftype(mp, dep,
			
 
				+					xfs_dir3_sfe_get_ftype(mp, sfp, sfep));
			
 
				 		memcpy(dep->name, sfep->name, dep->namelen);
			
 
				-		tagp = xfs_dir2_data_entry_tag_p(dep);
			
 
				+		tagp = xfs_dir3_data_entry_tag_p(mp, dep);
			
 
				 		*tagp = cpu_to_be16((char *)dep - (char *)hdr);
			
 
				 		xfs_dir2_data_log_entry(tp, bp, dep);
			
 
				 		name.name = sfep->name;
			
@@ -1328,7 +1240,7 @@ xfs_dir2_sf_to_block(
 
				 		if (++i == sfp->count)
			
 
				 			sfep = NULL;
			
 
				 		else
			
 
				-			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
			
 
				+			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
			
 
				 	}
			
 
				 	/* Done with the temporary buffer */
			
 
				 	kmem_free(sfp);
			
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -29,14 +29,12 @@
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_cksum.h"
			
 
				 
			
 
				-STATIC xfs_dir2_data_free_t *
			
 
				-xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
			
 
				-
			
 
				 /*
			
 
				  * Check the consistency of the data block.
			
 
				  * The input can also be a block-format directory.
			
@@ -149,8 +147,10 @@ __xfs_dir3_data_check(
 
				 		XFS_WANT_CORRUPTED_RETURN(
			
 
				 			!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
			
 
				 		XFS_WANT_CORRUPTED_RETURN(
			
 
				-			be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
			
 
				+			be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) ==
			
 
				 					       (char *)dep - (char *)hdr);
			
 
				+		XFS_WANT_CORRUPTED_RETURN(
			
 
				+			xfs_dir3_dirent_get_ftype(mp, dep) < XFS_DIR3_FT_MAX);
			
 
				 		count++;
			
 
				 		lastfree = 0;
			
 
				 		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
@@ -168,7 +168,7 @@ __xfs_dir3_data_check(
 
				 			}
			
 
				 			XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
			
 
				 		}
			
 
				-		p += xfs_dir2_data_entsize(dep->namelen);
			
 
				+		p += xfs_dir3_data_entsize(mp, dep->namelen);
			
 
				 	}
			
 
				 	/*
			
 
				 	 * Need to have seen all the entries and all the bestfree slots.
			
@@ -325,7 +325,7 @@ xfs_dir3_data_readahead(
 
				  * Given a data block and an unused entry from that block,
			
 
				  * return the bestfree entry if any that corresponds to it.
			
 
				  */
			
 
				-STATIC xfs_dir2_data_free_t *
			
 
				+xfs_dir2_data_free_t *
			
 
				 xfs_dir2_data_freefind(
			
 
				 	xfs_dir2_data_hdr_t	*hdr,		/* data block */
			
 
				 	xfs_dir2_data_unused_t	*dup)		/* data unused entry */
			
@@ -333,7 +333,7 @@ xfs_dir2_data_freefind(
 
				 	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
			
 
				 	xfs_dir2_data_aoff_t	off;		/* offset value needed */
			
 
				 	struct xfs_dir2_data_free *bf;
			
 
				-#if defined(DEBUG) && defined(__KERNEL__)
			
 
				+#ifdef DEBUG
			
 
				 	int			matched;	/* matched the value */
			
 
				 	int			seenzero;	/* saw a 0 bestfree entry */
			
 
				 #endif
			
@@ -341,7 +341,7 @@ xfs_dir2_data_freefind(
 
				 	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
			
 
				 	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				 
			
 
				-#if defined(DEBUG) && defined(__KERNEL__)
			
 
				+#ifdef DEBUG
			
 
				 	/*
			
 
				 	 * Validate some consistency in the bestfree table.
			
 
				 	 * Check order, non-overlapping entries, and if we find the
			
@@ -538,8 +538,8 @@ xfs_dir2_data_freescan(
 
				 		else {
			
 
				 			dep = (xfs_dir2_data_entry_t *)p;
			
 
				 			ASSERT((char *)dep - (char *)hdr ==
			
 
				-			       be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
			
 
				-			p += xfs_dir2_data_entsize(dep->namelen);
			
 
				+			       be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)));
			
 
				+			p += xfs_dir3_data_entsize(mp, dep->namelen);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -629,7 +629,8 @@ xfs_dir2_data_log_entry(
 
				 	struct xfs_buf		*bp,
			
 
				 	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
			
 
				 {
			
 
				-	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
			
 
				+	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
			
 
				+	struct xfs_mount	*mp = tp->t_mountp;
			
 
				 
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
@@ -637,7 +638,7 @@ xfs_dir2_data_log_entry(
 
				 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				 
			
 
				 	xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
			
 
				-		(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
			
 
				+		(uint)((char *)(xfs_dir3_data_entry_tag_p(mp, dep) + 1) -
			
 
				 		       (char *)hdr - 1));
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -68,6 +68,23 @@
 
				 #define	XFS_DIR3_DATA_MAGIC	0x58444433	/* XDD3: multiblock dirs */
			
 
				 #define	XFS_DIR3_FREE_MAGIC	0x58444633	/* XDF3: free index blocks */
			
 
				 
			
 
				+/*
			
 
				+ * Dirents in version 3 directories have a file type field. Additions to this
			
 
				+ * list are an on-disk format change, requiring feature bits. Valid values
			
 
				+ * are as follows:
			
 
				+ */
			
 
				+#define XFS_DIR3_FT_UNKNOWN		0
			
 
				+#define XFS_DIR3_FT_REG_FILE		1
			
 
				+#define XFS_DIR3_FT_DIR			2
			
 
				+#define XFS_DIR3_FT_CHRDEV		3
			
 
				+#define XFS_DIR3_FT_BLKDEV		4
			
 
				+#define XFS_DIR3_FT_FIFO		5
			
 
				+#define XFS_DIR3_FT_SOCK		6
			
 
				+#define XFS_DIR3_FT_SYMLINK		7
			
 
				+#define XFS_DIR3_FT_WHT			8
			
 
				+
			
 
				+#define XFS_DIR3_FT_MAX			9
			
 
				+
			
 
				 /*
			
 
				  * Byte offset in data block and shortform entry.
			
 
				  */
			
@@ -138,6 +155,9 @@ typedef struct xfs_dir2_sf_entry {
 
				 	xfs_dir2_sf_off_t	offset;		/* saved offset */
			
 
				 	__u8			name[];		/* name, variable size */
			
 
				 	/*
			
 
				+	 * A single byte containing the file type field follows the inode
			
 
				+	 * number for version 3 directory entries.
			
 
				+	 *
			
 
				 	 * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
			
 
				 	 * variable offset after the name.
			
 
				 	 */
			
@@ -162,16 +182,6 @@ xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
 
				 	put_unaligned_be16(off, &sfep->offset.i);
			
 
				 }
			
 
				 
			
 
				-static inline int
			
 
				-xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len)
			
 
				-{
			
 
				-	return sizeof(struct xfs_dir2_sf_entry) +	/* namelen + offset */
			
 
				-		len +					/* name */
			
 
				-		(hdr->i8count ?				/* ino */
			
 
				-		 sizeof(xfs_dir2_ino8_t) :
			
 
				-		 sizeof(xfs_dir2_ino4_t));
			
 
				-}
			
 
				-
			
 
				 static inline struct xfs_dir2_sf_entry *
			
 
				 xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
			
 
				 {
			
@@ -179,14 +189,78 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
 
				 		((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
			
 
				 }
			
 
				 
			
 
				+static inline int
			
 
				+xfs_dir3_sf_entsize(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dir2_sf_hdr	*hdr,
			
 
				+	int			len)
			
 
				+{
			
 
				+	int count = sizeof(struct xfs_dir2_sf_entry); 	/* namelen + offset */
			
 
				+
			
 
				+	count += len;					/* name */
			
 
				+	count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
			
 
				+				sizeof(xfs_dir2_ino4_t); /* ino # */
			
 
				+	if (xfs_sb_version_hasftype(&mp->m_sb))
			
 
				+		count += sizeof(__uint8_t);		/* file type */
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				 static inline struct xfs_dir2_sf_entry *
			
 
				-xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
			
 
				-		struct xfs_dir2_sf_entry *sfep)
			
 
				+xfs_dir3_sf_nextentry(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dir2_sf_hdr	*hdr,
			
 
				+	struct xfs_dir2_sf_entry *sfep)
			
 
				 {
			
 
				 	return (struct xfs_dir2_sf_entry *)
			
 
				-		((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
			
 
				+		((char *)sfep + xfs_dir3_sf_entsize(mp, hdr, sfep->namelen));
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * in dir3 shortform directories, the file type field is stored at a variable
			
 
				+ * offset after the inode number. Because it's only a single byte, endian
			
 
				+ * conversion is not necessary.
			
 
				+ */
			
 
				+static inline __uint8_t *
			
 
				+xfs_dir3_sfe_ftypep(
			
 
				+	struct xfs_dir2_sf_hdr	*hdr,
			
 
				+	struct xfs_dir2_sf_entry *sfep)
			
 
				+{
			
 
				+	return (__uint8_t *)&sfep->name[sfep->namelen];
			
 
				+}
			
 
				+
			
 
				+static inline __uint8_t
			
 
				+xfs_dir3_sfe_get_ftype(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dir2_sf_hdr	*hdr,
			
 
				+	struct xfs_dir2_sf_entry *sfep)
			
 
				+{
			
 
				+	__uint8_t	*ftp;
			
 
				+
			
 
				+	if (!xfs_sb_version_hasftype(&mp->m_sb))
			
 
				+		return XFS_DIR3_FT_UNKNOWN;
			
 
				+
			
 
				+	ftp = xfs_dir3_sfe_ftypep(hdr, sfep);
			
 
				+	if (*ftp >= XFS_DIR3_FT_MAX)
			
 
				+		return XFS_DIR3_FT_UNKNOWN;
			
 
				+	return *ftp;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+xfs_dir3_sfe_put_ftype(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dir2_sf_hdr	*hdr,
			
 
				+	struct xfs_dir2_sf_entry *sfep,
			
 
				+	__uint8_t		ftype)
			
 
				+{
			
 
				+	__uint8_t	*ftp;
			
 
				+
			
 
				+	ASSERT(ftype < XFS_DIR3_FT_MAX);
			
 
				+
			
 
				+	if (!xfs_sb_version_hasftype(&mp->m_sb))
			
 
				+		return;
			
 
				+	ftp = xfs_dir3_sfe_ftypep(hdr, sfep);
			
 
				+	*ftp = ftype;
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * Data block structures.
			
@@ -286,12 +360,18 @@ xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
 
				  * Active entry in a data block.
			
 
				  *
			
 
				  * Aligned to 8 bytes.  After the variable length name field there is a
			
 
				- * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p.
			
 
				+ * 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p.
			
 
				+ *
			
 
				+ * For dir3 structures, there is file type field between the name and the tag.
			
 
				+ * This can only be manipulated by helper functions. It is packed hard against
			
 
				+ * the end of the name so any padding for rounding is between the file type and
			
 
				+ * the tag.
			
 
				  */
			
 
				 typedef struct xfs_dir2_data_entry {
			
 
				 	__be64			inumber;	/* inode number */
			
 
				 	__u8			namelen;	/* name length */
			
 
				 	__u8			name[];		/* name bytes, no null */
			
 
				+     /* __u8			filetype; */	/* type of inode we point to */
			
 
				      /*	__be16                  tag; */		/* starting offset of us */
			
 
				 } xfs_dir2_data_entry_t;
			
 
				 
			
@@ -311,20 +391,67 @@ typedef struct xfs_dir2_data_unused {
 
				 /*
			
 
				  * Size of a data entry.
			
 
				  */
			
 
				-static inline int xfs_dir2_data_entsize(int n)
			
 
				+static inline int
			
 
				+__xfs_dir3_data_entsize(
			
 
				+	bool	ftype,
			
 
				+	int	n)
			
 
				 {
			
 
				-	return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n +
			
 
				-		 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
			
 
				+	int	size = offsetof(struct xfs_dir2_data_entry, name[0]);
			
 
				+
			
 
				+	size += n;
			
 
				+	size += sizeof(xfs_dir2_data_off_t);
			
 
				+	if (ftype)
			
 
				+		size += sizeof(__uint8_t);
			
 
				+	return roundup(size, XFS_DIR2_DATA_ALIGN);
			
 
				+}
			
 
				+static inline int
			
 
				+xfs_dir3_data_entsize(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	int			n)
			
 
				+{
			
 
				+	bool ftype = xfs_sb_version_hasftype(&mp->m_sb) ? true : false;
			
 
				+	return __xfs_dir3_data_entsize(ftype, n);
			
 
				+}
			
 
				+
			
 
				+static inline __uint8_t
			
 
				+xfs_dir3_dirent_get_ftype(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dir2_data_entry *dep)
			
 
				+{
			
 
				+	if (xfs_sb_version_hasftype(&mp->m_sb)) {
			
 
				+		__uint8_t	type = dep->name[dep->namelen];
			
 
				+
			
 
				+		ASSERT(type < XFS_DIR3_FT_MAX);
			
 
				+		if (type < XFS_DIR3_FT_MAX)
			
 
				+			return type;
			
 
				+
			
 
				+	}
			
 
				+	return XFS_DIR3_FT_UNKNOWN;
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+xfs_dir3_dirent_put_ftype(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dir2_data_entry *dep,
			
 
				+	__uint8_t		type)
			
 
				+{
			
 
				+	ASSERT(type < XFS_DIR3_FT_MAX);
			
 
				+	ASSERT(dep->namelen != 0);
			
 
				+
			
 
				+	if (xfs_sb_version_hasftype(&mp->m_sb))
			
 
				+		dep->name[dep->namelen] = type;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Pointer to an entry's tag word.
			
 
				  */
			
 
				 static inline __be16 *
			
 
				-xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep)
			
 
				+xfs_dir3_data_entry_tag_p(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dir2_data_entry *dep)
			
 
				 {
			
 
				 	return (__be16 *)((char *)dep +
			
 
				-		xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
			
 
				+		xfs_dir3_data_entsize(mp, dep->namelen) - sizeof(__be16));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -375,13 +502,17 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
 
				  * data block header because the sfe embeds the block offset of the entry into
			
 
				  * it so that it doesn't change when format conversion occurs. Bad Things Happen
			
 
				  * if we don't follow this rule.
			
 
				+ *
			
 
				+ * XXX: there is scope for significant optimisation of the logic here. Right
			
 
				+ * now we are checking for "dir3 format" over and over again. Ideally we should
			
 
				+ * only do it once for each operation.
			
 
				  */
			
 
				 #define	XFS_DIR3_DATA_DOT_OFFSET(mp)	\
			
 
				 	xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
			
 
				 #define	XFS_DIR3_DATA_DOTDOT_OFFSET(mp)	\
			
 
				-	(XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir2_data_entsize(1))
			
 
				+	(XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1))
			
 
				 #define	XFS_DIR3_DATA_FIRST_OFFSET(mp)		\
			
 
				-	(XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir2_data_entsize(2))
			
 
				+	(XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2))
			
 
				 
			
 
				 static inline xfs_dir2_data_aoff_t
			
 
				 xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
			
@@ -392,13 +523,19 @@ xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
 
				 static inline xfs_dir2_data_aoff_t
			
 
				 xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr)
			
 
				 {
			
 
				-	return xfs_dir3_data_dot_offset(hdr) + xfs_dir2_data_entsize(1);
			
 
				+	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
			
 
				+	return xfs_dir3_data_dot_offset(hdr) +
			
 
				+		__xfs_dir3_data_entsize(dir3, 1);
			
 
				 }
			
 
				 
			
 
				 static inline xfs_dir2_data_aoff_t
			
 
				 xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr)
			
 
				 {
			
 
				-	return xfs_dir3_data_dotdot_offset(hdr) + xfs_dir2_data_entsize(2);
			
 
				+	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
			
 
				+	return xfs_dir3_data_dotdot_offset(hdr) +
			
 
				+		__xfs_dir3_data_entsize(dir3, 2);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -519,6 +656,9 @@ struct xfs_dir3_leaf {
 
				 
			
 
				 #define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc)
			
 
				 
			
 
				+extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to,
			
 
				+					struct xfs_dir2_leaf *from);
			
 
				+
			
 
				 static inline int
			
 
				 xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp)
			
 
				 {
			
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -31,6 +31,7 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
@@ -695,7 +696,7 @@ xfs_dir2_leaf_addname(
 
				 	ents = xfs_dir3_leaf_ents_p(leaf);
			
 
				 	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				 	bestsp = xfs_dir2_leaf_bests_p(ltp);
			
 
				-	length = xfs_dir2_data_entsize(args->namelen);
			
 
				+	length = xfs_dir3_data_entsize(mp, args->namelen);
			
 
				 
			
 
				 	/*
			
 
				 	 * See if there are any entries with the same hash value
			
@@ -896,7 +897,8 @@ xfs_dir2_leaf_addname(
 
				 	dep->inumber = cpu_to_be64(args->inumber);
			
 
				 	dep->namelen = args->namelen;
			
 
				 	memcpy(dep->name, args->name, dep->namelen);
			
 
				-	tagp = xfs_dir2_data_entry_tag_p(dep);
			
 
				+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
			
 
				+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
			
 
				 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
			
 
				 	/*
			
 
				 	 * Need to scan fix up the bestfree table.
			
@@ -1083,396 +1085,6 @@ xfs_dir3_leaf_compact_x1(
 
				 	*highstalep = highstale;
			
 
				 }
			
 
				 
			
 
				-struct xfs_dir2_leaf_map_info {
			
 
				-	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
			
 
				-	xfs_dablk_t	map_off;	/* last mapped file offset */
			
 
				-	int		map_size;	/* total entries in *map */
			
 
				-	int		map_valid;	/* valid entries in *map */
			
 
				-	int		nmap;		/* mappings to ask xfs_bmapi */
			
 
				-	xfs_dir2_db_t	curdb;		/* db for current block */
			
 
				-	int		ra_current;	/* number of read-ahead blks */
			
 
				-	int		ra_index;	/* *map index for read-ahead */
			
 
				-	int		ra_offset;	/* map entry offset for ra */
			
 
				-	int		ra_want;	/* readahead count wanted */
			
 
				-	struct xfs_bmbt_irec map[];	/* map vector for blocks */
			
 
				-};
			
 
				-
			
 
				-STATIC int
			
 
				-xfs_dir2_leaf_readbuf(
			
 
				-	struct xfs_inode	*dp,
			
 
				-	size_t			bufsize,
			
 
				-	struct xfs_dir2_leaf_map_info *mip,
			
 
				-	xfs_dir2_off_t		*curoff,
			
 
				-	struct xfs_buf		**bpp)
			
 
				-{
			
 
				-	struct xfs_mount	*mp = dp->i_mount;
			
 
				-	struct xfs_buf		*bp = *bpp;
			
 
				-	struct xfs_bmbt_irec	*map = mip->map;
			
 
				-	struct blk_plug		plug;
			
 
				-	int			error = 0;
			
 
				-	int			length;
			
 
				-	int			i;
			
 
				-	int			j;
			
 
				-
			
 
				-	/*
			
 
				-	 * If we have a buffer, we need to release it and
			
 
				-	 * take it out of the mapping.
			
 
				-	 */
			
 
				-
			
 
				-	if (bp) {
			
 
				-		xfs_trans_brelse(NULL, bp);
			
 
				-		bp = NULL;
			
 
				-		mip->map_blocks -= mp->m_dirblkfsbs;
			
 
				-		/*
			
 
				-		 * Loop to get rid of the extents for the
			
 
				-		 * directory block.
			
 
				-		 */
			
 
				-		for (i = mp->m_dirblkfsbs; i > 0; ) {
			
 
				-			j = min_t(int, map->br_blockcount, i);
			
 
				-			map->br_blockcount -= j;
			
 
				-			map->br_startblock += j;
			
 
				-			map->br_startoff += j;
			
 
				-			/*
			
 
				-			 * If mapping is done, pitch it from
			
 
				-			 * the table.
			
 
				-			 */
			
 
				-			if (!map->br_blockcount && --mip->map_valid)
			
 
				-				memmove(&map[0], &map[1],
			
 
				-					sizeof(map[0]) * mip->map_valid);
			
 
				-			i -= j;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Recalculate the readahead blocks wanted.
			
 
				-	 */
			
 
				-	mip->ra_want = howmany(bufsize + mp->m_dirblksize,
			
 
				-			       mp->m_sb.sb_blocksize) - 1;
			
 
				-	ASSERT(mip->ra_want >= 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * If we don't have as many as we want, and we haven't
			
 
				-	 * run out of data blocks, get some more mappings.
			
 
				-	 */
			
 
				-	if (1 + mip->ra_want > mip->map_blocks &&
			
 
				-	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
			
 
				-		/*
			
 
				-		 * Get more bmaps, fill in after the ones
			
 
				-		 * we already have in the table.
			
 
				-		 */
			
 
				-		mip->nmap = mip->map_size - mip->map_valid;
			
 
				-		error = xfs_bmapi_read(dp, mip->map_off,
			
 
				-				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
			
 
				-								mip->map_off,
			
 
				-				&map[mip->map_valid], &mip->nmap, 0);
			
 
				-
			
 
				-		/*
			
 
				-		 * Don't know if we should ignore this or try to return an
			
 
				-		 * error.  The trouble with returning errors is that readdir
			
 
				-		 * will just stop without actually passing the error through.
			
 
				-		 */
			
 
				-		if (error)
			
 
				-			goto out;	/* XXX */
			
 
				-
			
 
				-		/*
			
 
				-		 * If we got all the mappings we asked for, set the final map
			
 
				-		 * offset based on the last bmap value received.  Otherwise,
			
 
				-		 * we've reached the end.
			
 
				-		 */
			
 
				-		if (mip->nmap == mip->map_size - mip->map_valid) {
			
 
				-			i = mip->map_valid + mip->nmap - 1;
			
 
				-			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
			
 
				-		} else
			
 
				-			mip->map_off = xfs_dir2_byte_to_da(mp,
			
 
				-							XFS_DIR2_LEAF_OFFSET);
			
 
				-
			
 
				-		/*
			
 
				-		 * Look for holes in the mapping, and eliminate them.  Count up
			
 
				-		 * the valid blocks.
			
 
				-		 */
			
 
				-		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
			
 
				-			if (map[i].br_startblock == HOLESTARTBLOCK) {
			
 
				-				mip->nmap--;
			
 
				-				length = mip->map_valid + mip->nmap - i;
			
 
				-				if (length)
			
 
				-					memmove(&map[i], &map[i + 1],
			
 
				-						sizeof(map[i]) * length);
			
 
				-			} else {
			
 
				-				mip->map_blocks += map[i].br_blockcount;
			
 
				-				i++;
			
 
				-			}
			
 
				-		}
			
 
				-		mip->map_valid += mip->nmap;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * No valid mappings, so no more data blocks.
			
 
				-	 */
			
 
				-	if (!mip->map_valid) {
			
 
				-		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Read the directory block starting at the first mapping.
			
 
				-	 */
			
 
				-	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
			
 
				-	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
			
 
				-			map->br_blockcount >= mp->m_dirblkfsbs ?
			
 
				-			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
			
 
				-
			
 
				-	/*
			
 
				-	 * Should just skip over the data block instead of giving up.
			
 
				-	 */
			
 
				-	if (error)
			
 
				-		goto out;	/* XXX */
			
 
				-
			
 
				-	/*
			
 
				-	 * Adjust the current amount of read-ahead: we just read a block that
			
 
				-	 * was previously ra.
			
 
				-	 */
			
 
				-	if (mip->ra_current)
			
 
				-		mip->ra_current -= mp->m_dirblkfsbs;
			
 
				-
			
 
				-	/*
			
 
				-	 * Do we need more readahead?
			
 
				-	 */
			
 
				-	blk_start_plug(&plug);
			
 
				-	for (mip->ra_index = mip->ra_offset = i = 0;
			
 
				-	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
			
 
				-	     i += mp->m_dirblkfsbs) {
			
 
				-		ASSERT(mip->ra_index < mip->map_valid);
			
 
				-		/*
			
 
				-		 * Read-ahead a contiguous directory block.
			
 
				-		 */
			
 
				-		if (i > mip->ra_current &&
			
 
				-		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
			
 
				-			xfs_dir3_data_readahead(NULL, dp,
			
 
				-				map[mip->ra_index].br_startoff + mip->ra_offset,
			
 
				-				XFS_FSB_TO_DADDR(mp,
			
 
				-					map[mip->ra_index].br_startblock +
			
 
				-							mip->ra_offset));
			
 
				-			mip->ra_current = i;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Read-ahead a non-contiguous directory block.  This doesn't
			
 
				-		 * use our mapping, but this is a very rare case.
			
 
				-		 */
			
 
				-		else if (i > mip->ra_current) {
			
 
				-			xfs_dir3_data_readahead(NULL, dp,
			
 
				-					map[mip->ra_index].br_startoff +
			
 
				-							mip->ra_offset, -1);
			
 
				-			mip->ra_current = i;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Advance offset through the mapping table.
			
 
				-		 */
			
 
				-		for (j = 0; j < mp->m_dirblkfsbs; j++) {
			
 
				-			/*
			
 
				-			 * The rest of this extent but not more than a dir
			
 
				-			 * block.
			
 
				-			 */
			
 
				-			length = min_t(int, mp->m_dirblkfsbs,
			
 
				-					map[mip->ra_index].br_blockcount -
			
 
				-							mip->ra_offset);
			
 
				-			j += length;
			
 
				-			mip->ra_offset += length;
			
 
				-
			
 
				-			/*
			
 
				-			 * Advance to the next mapping if this one is used up.
			
 
				-			 */
			
 
				-			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
			
 
				-				mip->ra_offset = 0;
			
 
				-				mip->ra_index++;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	blk_finish_plug(&plug);
			
 
				-
			
 
				-out:
			
 
				-	*bpp = bp;
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Getdents (readdir) for leaf and node directories.
			
 
				- * This reads the data blocks only, so is the same for both forms.
			
 
				- */
			
 
				-int						/* error */
			
 
				-xfs_dir2_leaf_getdents(
			
 
				-	xfs_inode_t		*dp,		/* incore directory inode */
			
 
				-	struct dir_context	*ctx,
			
 
				-	size_t			bufsize)
			
 
				-{
			
 
				-	struct xfs_buf		*bp = NULL;	/* data block buffer */
			
 
				-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
			
 
				-	xfs_dir2_data_entry_t	*dep;		/* data entry */
			
 
				-	xfs_dir2_data_unused_t	*dup;		/* unused entry */
			
 
				-	int			error = 0;	/* error return value */
			
 
				-	int			length;		/* temporary length value */
			
 
				-	xfs_mount_t		*mp;		/* filesystem mount point */
			
 
				-	int			byteoff;	/* offset in current block */
			
 
				-	xfs_dir2_off_t		curoff;		/* current overall offset */
			
 
				-	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
			
 
				-	char			*ptr = NULL;	/* pointer to current data */
			
 
				-	struct xfs_dir2_leaf_map_info *map_info;
			
 
				-
			
 
				-	/*
			
 
				-	 * If the offset is at or past the largest allowed value,
			
 
				-	 * give up right away.
			
 
				-	 */
			
 
				-	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
			
 
				-		return 0;
			
 
				-
			
 
				-	mp = dp->i_mount;
			
 
				-
			
 
				-	/*
			
 
				-	 * Set up to bmap a number of blocks based on the caller's
			
 
				-	 * buffer size, the directory block size, and the filesystem
			
 
				-	 * block size.
			
 
				-	 */
			
 
				-	length = howmany(bufsize + mp->m_dirblksize,
			
 
				-				     mp->m_sb.sb_blocksize);
			
 
				-	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
			
 
				-				(length * sizeof(struct xfs_bmbt_irec)),
			
 
				-			       KM_SLEEP | KM_NOFS);
			
 
				-	map_info->map_size = length;
			
 
				-
			
 
				-	/*
			
 
				-	 * Inside the loop we keep the main offset value as a byte offset
			
 
				-	 * in the directory file.
			
 
				-	 */
			
 
				-	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
			
 
				-
			
 
				-	/*
			
 
				-	 * Force this conversion through db so we truncate the offset
			
 
				-	 * down to get the start of the data block.
			
 
				-	 */
			
 
				-	map_info->map_off = xfs_dir2_db_to_da(mp,
			
 
				-					      xfs_dir2_byte_to_db(mp, curoff));
			
 
				-
			
 
				-	/*
			
 
				-	 * Loop over directory entries until we reach the end offset.
			
 
				-	 * Get more blocks and readahead as necessary.
			
 
				-	 */
			
 
				-	while (curoff < XFS_DIR2_LEAF_OFFSET) {
			
 
				-		/*
			
 
				-		 * If we have no buffer, or we're off the end of the
			
 
				-		 * current buffer, need to get another one.
			
 
				-		 */
			
 
				-		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
			
 
				-
			
 
				-			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
			
 
				-						      &curoff, &bp);
			
 
				-			if (error || !map_info->map_valid)
			
 
				-				break;
			
 
				-
			
 
				-			/*
			
 
				-			 * Having done a read, we need to set a new offset.
			
 
				-			 */
			
 
				-			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
			
 
				-			/*
			
 
				-			 * Start of the current block.
			
 
				-			 */
			
 
				-			if (curoff < newoff)
			
 
				-				curoff = newoff;
			
 
				-			/*
			
 
				-			 * Make sure we're in the right block.
			
 
				-			 */
			
 
				-			else if (curoff > newoff)
			
 
				-				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
			
 
				-				       map_info->curdb);
			
 
				-			hdr = bp->b_addr;
			
 
				-			xfs_dir3_data_check(dp, bp);
			
 
				-			/*
			
 
				-			 * Find our position in the block.
			
 
				-			 */
			
 
				-			ptr = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				-			byteoff = xfs_dir2_byte_to_off(mp, curoff);
			
 
				-			/*
			
 
				-			 * Skip past the header.
			
 
				-			 */
			
 
				-			if (byteoff == 0)
			
 
				-				curoff += xfs_dir3_data_entry_offset(hdr);
			
 
				-			/*
			
 
				-			 * Skip past entries until we reach our offset.
			
 
				-			 */
			
 
				-			else {
			
 
				-				while ((char *)ptr - (char *)hdr < byteoff) {
			
 
				-					dup = (xfs_dir2_data_unused_t *)ptr;
			
 
				-
			
 
				-					if (be16_to_cpu(dup->freetag)
			
 
				-						  == XFS_DIR2_DATA_FREE_TAG) {
			
 
				-
			
 
				-						length = be16_to_cpu(dup->length);
			
 
				-						ptr += length;
			
 
				-						continue;
			
 
				-					}
			
 
				-					dep = (xfs_dir2_data_entry_t *)ptr;
			
 
				-					length =
			
 
				-					   xfs_dir2_data_entsize(dep->namelen);
			
 
				-					ptr += length;
			
 
				-				}
			
 
				-				/*
			
 
				-				 * Now set our real offset.
			
 
				-				 */
			
 
				-				curoff =
			
 
				-					xfs_dir2_db_off_to_byte(mp,
			
 
				-					    xfs_dir2_byte_to_db(mp, curoff),
			
 
				-					    (char *)ptr - (char *)hdr);
			
 
				-				if (ptr >= (char *)hdr + mp->m_dirblksize) {
			
 
				-					continue;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-		/*
			
 
				-		 * We have a pointer to an entry.
			
 
				-		 * Is it a live one?
			
 
				-		 */
			
 
				-		dup = (xfs_dir2_data_unused_t *)ptr;
			
 
				-		/*
			
 
				-		 * No, it's unused, skip over it.
			
 
				-		 */
			
 
				-		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			
 
				-			length = be16_to_cpu(dup->length);
			
 
				-			ptr += length;
			
 
				-			curoff += length;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		dep = (xfs_dir2_data_entry_t *)ptr;
			
 
				-		length = xfs_dir2_data_entsize(dep->namelen);
			
 
				-
			
 
				-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
			
 
				-		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
			
 
				-			    be64_to_cpu(dep->inumber), DT_UNKNOWN))
			
 
				-			break;
			
 
				-
			
 
				-		/*
			
 
				-		 * Advance to next entry in the block.
			
 
				-		 */
			
 
				-		ptr += length;
			
 
				-		curoff += length;
			
 
				-		/* bufsize may have just been a guess; don't go negative */
			
 
				-		bufsize = bufsize > length ? bufsize - length : 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * All done.  Set output offset value to current offset.
			
 
				-	 */
			
 
				-	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
			
 
				-		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
			
 
				-	else
			
 
				-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
			
 
				-	kmem_free(map_info);
			
 
				-	if (bp)
			
 
				-		xfs_trans_brelse(NULL, bp);
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  * Log the bests entries indicated from a leaf1 block.
			
 
				  */
			
@@ -1614,6 +1226,7 @@ xfs_dir2_leaf_lookup(
 
				 	 * Return the found inode number & CI name if appropriate
			
 
				 	 */
			
 
				 	args->inumber = be64_to_cpu(dep->inumber);
			
 
				+	args->filetype = xfs_dir3_dirent_get_ftype(dp->i_mount, dep);
			
 
				 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
			
 
				 	xfs_trans_brelse(tp, dbp);
			
 
				 	xfs_trans_brelse(tp, lbp);
			
@@ -1816,7 +1429,7 @@ xfs_dir2_leaf_removename(
 
				 	 */
			
 
				 	xfs_dir2_data_make_free(tp, dbp,
			
 
				 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
			
 
				-		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
			
 
				+		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
			
 
				 	/*
			
 
				 	 * We just mark the leaf entry stale by putting a null in it.
			
 
				 	 */
			
@@ -1944,6 +1557,7 @@ xfs_dir2_leaf_replace(
 
				 	 * Put the new inode number in, log it.
			
 
				 	 */
			
 
				 	dep->inumber = cpu_to_be64(args->inumber);
			
 
				+	xfs_dir3_dirent_put_ftype(dp->i_mount, dep, args->filetype);
			
 
				 	tp = args->trans;
			
 
				 	xfs_dir2_data_log_entry(tp, dbp, dep);
			
 
				 	xfs_dir3_leaf_check(dp->i_mount, lbp);
			
@@ -1975,10 +1589,6 @@ xfs_dir2_leaf_search_hash(
 
				 	ents = xfs_dir3_leaf_ents_p(leaf);
			
 
				 	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				 
			
 
				-#ifndef __KERNEL__
			
 
				-	if (!leafhdr.count)
			
 
				-		return 0;
			
 
				-#endif
			
 
				 	/*
			
 
				 	 * Note, the table cannot be empty, so we have to go through the loop.
			
 
				 	 * Binary search the leaf entries looking for our hash value.
			
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -30,6 +30,7 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
@@ -312,11 +313,13 @@ xfs_dir2_free_log_header(
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				+#ifdef DEBUG
			
 
				 	xfs_dir2_free_t		*free;		/* freespace structure */
			
 
				 
			
 
				 	free = bp->b_addr;
			
 
				 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
			
 
				 	       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
			
 
				+#endif
			
 
				 	xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1);
			
 
				 }
			
 
				 
			
@@ -602,7 +605,7 @@ xfs_dir2_leafn_lookup_for_addname(
 
				 		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
			
 
				 		       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
			
 
				 	}
			
 
				-	length = xfs_dir2_data_entsize(args->namelen);
			
 
				+	length = xfs_dir3_data_entsize(mp, args->namelen);
			
 
				 	/*
			
 
				 	 * Loop over leaf entries with the right hash value.
			
 
				 	 */
			
@@ -813,6 +816,7 @@ xfs_dir2_leafn_lookup_for_entry(
 
				 				xfs_trans_brelse(tp, state->extrablk.bp);
			
 
				 			args->cmpresult = cmp;
			
 
				 			args->inumber = be64_to_cpu(dep->inumber);
			
 
				+			args->filetype = xfs_dir3_dirent_get_ftype(mp, dep);
			
 
				 			*indexp = index;
			
 
				 			state->extravalid = 1;
			
 
				 			state->extrablk.bp = curbp;
			
@@ -1256,7 +1260,7 @@ xfs_dir2_leafn_remove(
 
				 	longest = be16_to_cpu(bf[0].length);
			
 
				 	needlog = needscan = 0;
			
 
				 	xfs_dir2_data_make_free(tp, dbp, off,
			
 
				-		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
			
 
				+		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
			
 
				 	/*
			
 
				 	 * Rescan the data block freespaces for bestfree.
			
 
				 	 * Log the data block header if needed.
			
@@ -1708,7 +1712,7 @@ xfs_dir2_node_addname_int(
 
				 	dp = args->dp;
			
 
				 	mp = dp->i_mount;
			
 
				 	tp = args->trans;
			
 
				-	length = xfs_dir2_data_entsize(args->namelen);
			
 
				+	length = xfs_dir3_data_entsize(mp, args->namelen);
			
 
				 	/*
			
 
				 	 * If we came in with a freespace block that means that lookup
			
 
				 	 * found an entry with our hash value.  This is the freespace
			
@@ -2004,7 +2008,8 @@ xfs_dir2_node_addname_int(
 
				 	dep->inumber = cpu_to_be64(args->inumber);
			
 
				 	dep->namelen = args->namelen;
			
 
				 	memcpy(dep->name, args->name, dep->namelen);
			
 
				-	tagp = xfs_dir2_data_entry_tag_p(dep);
			
 
				+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
			
 
				+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
			
 
				 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
			
 
				 	xfs_dir2_data_log_entry(tp, dbp, dep);
			
 
				 	/*
			
@@ -2224,6 +2229,7 @@ xfs_dir2_node_replace(
 
				 		 * Fill in the new inode number and log the entry.
			
 
				 		 */
			
 
				 		dep->inumber = cpu_to_be64(inum);
			
 
				+		xfs_dir3_dirent_put_ftype(state->mp, dep, args->filetype);
			
 
				 		xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
			
 
				 		rval = 0;
			
 
				 	}
			
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -18,23 +18,26 @@
 
				 #ifndef __XFS_DIR2_PRIV_H__
			
 
				 #define __XFS_DIR2_PRIV_H__
			
 
				 
			
 
				+struct dir_context;
			
 
				+
			
 
				 /* xfs_dir2.c */
			
 
				 extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
			
 
				-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
			
 
				-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
			
 
				 extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
			
 
				 				xfs_dir2_db_t *dbp);
			
 
				-extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
			
 
				-				struct xfs_buf *bp);
			
 
				 extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
			
 
				 				const unsigned char *name, int len);
			
 
				 
			
 
				-/* xfs_dir2_block.c */
			
 
				-extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
			
 
				+#define S_SHIFT 12
			
 
				+extern const unsigned char xfs_mode_to_ftype[];
			
 
				+
			
 
				+extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp,
			
 
				+					__uint8_t filetype);
			
 
				 
			
 
				+
			
 
				+/* xfs_dir2_block.c */
			
 
				+extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				+			       struct xfs_buf **bpp);
			
 
				 extern int xfs_dir2_block_addname(struct xfs_da_args *args);
			
 
				-extern int xfs_dir2_block_getdents(struct xfs_inode *dp,
			
 
				-		struct dir_context *ctx);
			
 
				 extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
			
 
				 extern int xfs_dir2_block_removename(struct xfs_da_args *args);
			
 
				 extern int xfs_dir2_block_replace(struct xfs_da_args *args);
			
@@ -48,9 +51,6 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
 
				 #define	xfs_dir3_data_check(dp,bp)
			
 
				 #endif
			
 
				 
			
 
				-extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
			
 
				-extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
			
 
				-
			
 
				 extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
			
 
				 extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				 		xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
			
@@ -60,27 +60,10 @@ extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
 
				 extern struct xfs_dir2_data_free *
			
 
				 xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
			
 
				 		struct xfs_dir2_data_unused *dup, int *loghead);
			
 
				-extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
			
 
				-		struct xfs_dir2_data_hdr *hdr, int *loghead);
			
 
				 extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
			
 
				 		struct xfs_buf **bpp);
			
 
				-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				-		struct xfs_dir2_data_entry *dep);
			
 
				-extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
			
 
				-		struct xfs_buf *bp);
			
 
				-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				-		struct xfs_dir2_data_unused *dup);
			
 
				-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				-		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
			
 
				-		int *needlogp, int *needscanp);
			
 
				-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				-		struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
			
 
				-		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
			
 
				 
			
 
				 /* xfs_dir2_leaf.c */
			
 
				-extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
			
 
				-extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
			
 
				-
			
 
				 extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				 		xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
			
 
				 extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
			
@@ -91,8 +74,6 @@ extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,
 
				 extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
			
 
				 		struct xfs_dir2_leaf_entry *ents, int *indexp,
			
 
				 		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
			
 
				-extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, struct dir_context *ctx,
			
 
				-		size_t bufsize);
			
 
				 extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
			
 
				 		struct xfs_buf **bpp, __uint16_t magic);
			
 
				 extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
			
@@ -144,18 +125,18 @@ extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
 
				 		xfs_dablk_t fbno, struct xfs_buf **bpp);
			
 
				 
			
 
				 /* xfs_dir2_sf.c */
			
 
				-extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
			
 
				-extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
			
 
				-		struct xfs_dir2_sf_entry *sfep);
			
 
				 extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
			
 
				 		struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
			
 
				 extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
			
 
				 		int size, xfs_dir2_sf_hdr_t *sfhp);
			
 
				 extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
			
 
				 extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
			
 
				-extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, struct dir_context *ctx);
			
 
				 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
			
 
				 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
			
 
				 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
			
 
				 
			
 
				+/* xfs_dir2_readdir.c */
			
 
				+extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
			
 
				+		       size_t bufsize);
			
 
				+
			
 
				 #endif /* __XFS_DIR2_PRIV_H__ */
			
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -0,0 +1,695 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_types.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				+#include "xfs_dir2_priv.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+
			
 
				+/*
			
 
				+ * Directory file type support functions
			
 
				+ */
			
 
				+static unsigned char xfs_dir3_filetype_table[] = {
			
 
				+	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK,
			
 
				+	DT_FIFO, DT_SOCK, DT_LNK, DT_WHT,
			
 
				+};
			
 
				+
			
 
				+unsigned char
			
 
				+xfs_dir3_get_dtype(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	__uint8_t		filetype)
			
 
				+{
			
 
				+	if (!xfs_sb_version_hasftype(&mp->m_sb))
			
 
				+		return DT_UNKNOWN;
			
 
				+
			
 
				+	if (filetype >= XFS_DIR3_FT_MAX)
			
 
				+		return DT_UNKNOWN;
			
 
				+
			
 
				+	return xfs_dir3_filetype_table[filetype];
			
 
				+}
			
 
				+/*
			
 
				+ * @mode, if set, indicates that the type field needs to be set up.
			
 
				+ * This uses the transformation from file mode to DT_* as defined in linux/fs.h
			
 
				+ * for file type specification. This will be propagated into the directory
			
 
				+ * structure if appropriate for the given operation and filesystem config.
			
 
				+ */
			
 
				+const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = {
			
 
				+	[0]			= XFS_DIR3_FT_UNKNOWN,
			
 
				+	[S_IFREG >> S_SHIFT]    = XFS_DIR3_FT_REG_FILE,
			
 
				+	[S_IFDIR >> S_SHIFT]    = XFS_DIR3_FT_DIR,
			
 
				+	[S_IFCHR >> S_SHIFT]    = XFS_DIR3_FT_CHRDEV,
			
 
				+	[S_IFBLK >> S_SHIFT]    = XFS_DIR3_FT_BLKDEV,
			
 
				+	[S_IFIFO >> S_SHIFT]    = XFS_DIR3_FT_FIFO,
			
 
				+	[S_IFSOCK >> S_SHIFT]   = XFS_DIR3_FT_SOCK,
			
 
				+	[S_IFLNK >> S_SHIFT]    = XFS_DIR3_FT_SYMLINK,
			
 
				+};
			
 
				+
			
 
				+STATIC int
			
 
				+xfs_dir2_sf_getdents(
			
 
				+	xfs_inode_t		*dp,		/* incore directory inode */
			
 
				+	struct dir_context	*ctx)
			
 
				+{
			
 
				+	int			i;		/* shortform entry number */
			
 
				+	xfs_mount_t		*mp;		/* filesystem mount point */
			
 
				+	xfs_dir2_dataptr_t	off;		/* current entry's offset */
			
 
				+	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
			
 
				+	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
			
 
				+	xfs_dir2_dataptr_t	dot_offset;
			
 
				+	xfs_dir2_dataptr_t	dotdot_offset;
			
 
				+	xfs_ino_t		ino;
			
 
				+
			
 
				+	mp = dp->i_mount;
			
 
				+
			
 
				+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
			
 
				+	/*
			
 
				+	 * Give up if the directory is way too short.
			
 
				+	 */
			
 
				+	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
			
 
				+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				+		return XFS_ERROR(EIO);
			
 
				+	}
			
 
				+
			
 
				+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
			
 
				+	ASSERT(dp->i_df.if_u1.if_data != NULL);
			
 
				+
			
 
				+	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
			
 
				+
			
 
				+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
			
 
				+
			
 
				+	/*
			
 
				+	 * If the block number in the offset is out of range, we're done.
			
 
				+	 */
			
 
				+	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Precalculate offsets for . and .. as we will always need them.
			
 
				+	 *
			
 
				+	 * XXX(hch): the second argument is sometimes 0 and sometimes
			
 
				+	 * mp->m_dirdatablk.
			
 
				+	 */
			
 
				+	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				+					     XFS_DIR3_DATA_DOT_OFFSET(mp));
			
 
				+	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				+						XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
			
 
				+
			
 
				+	/*
			
 
				+	 * Put . entry unless we're starting past it.
			
 
				+	 */
			
 
				+	if (ctx->pos <= dot_offset) {
			
 
				+		ctx->pos = dot_offset & 0x7fffffff;
			
 
				+		if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR))
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Put .. entry unless we're starting past it.
			
 
				+	 */
			
 
				+	if (ctx->pos <= dotdot_offset) {
			
 
				+		ino = xfs_dir2_sf_get_parent_ino(sfp);
			
 
				+		ctx->pos = dotdot_offset & 0x7fffffff;
			
 
				+		if (!dir_emit(ctx, "..", 2, ino, DT_DIR))
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Loop while there are more entries and put'ing works.
			
 
				+	 */
			
 
				+	sfep = xfs_dir2_sf_firstentry(sfp);
			
 
				+	for (i = 0; i < sfp->count; i++) {
			
 
				+		__uint8_t filetype;
			
 
				+
			
 
				+		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				+				xfs_dir2_sf_get_offset(sfep));
			
 
				+
			
 
				+		if (ctx->pos > off) {
			
 
				+			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep);
			
 
				+		filetype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep);
			
 
				+		ctx->pos = off & 0x7fffffff;
			
 
				+		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
			
 
				+			    xfs_dir3_get_dtype(mp, filetype)))
			
 
				+			return 0;
			
 
				+		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
			
 
				+	}
			
 
				+
			
 
				+	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
			
 
				+			0x7fffffff;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Readdir for block directories.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_dir2_block_getdents(
			
 
				+	xfs_inode_t		*dp,		/* incore inode */
			
 
				+	struct dir_context	*ctx)
			
 
				+{
			
 
				+	xfs_dir2_data_hdr_t	*hdr;		/* block header */
			
 
				+	struct xfs_buf		*bp;		/* buffer for block */
			
 
				+	xfs_dir2_block_tail_t	*btp;		/* block tail */
			
 
				+	xfs_dir2_data_entry_t	*dep;		/* block data entry */
			
 
				+	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
			
 
				+	char			*endptr;	/* end of the data entries */
			
 
				+	int			error;		/* error return value */
			
 
				+	xfs_mount_t		*mp;		/* filesystem mount point */
			
 
				+	char			*ptr;		/* current data entry */
			
 
				+	int			wantoff;	/* starting block offset */
			
 
				+	xfs_off_t		cook;
			
 
				+
			
 
				+	mp = dp->i_mount;
			
 
				+	/*
			
 
				+	 * If the block number in the offset is out of range, we're done.
			
 
				+	 */
			
 
				+	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
			
 
				+		return 0;
			
 
				+
			
 
				+	error = xfs_dir3_block_read(NULL, dp, &bp);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Extract the byte offset we start at from the seek pointer.
			
 
				+	 * We'll skip entries before this.
			
 
				+	 */
			
 
				+	wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);
			
 
				+	hdr = bp->b_addr;
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				+	/*
			
 
				+	 * Set up values for the loop.
			
 
				+	 */
			
 
				+	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				+	ptr = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				+	endptr = (char *)xfs_dir2_block_leaf_p(btp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Loop over the data portion of the block.
			
 
				+	 * Each object is a real entry (dep) or an unused one (dup).
			
 
				+	 */
			
 
				+	while (ptr < endptr) {
			
 
				+		__uint8_t filetype;
			
 
				+
			
 
				+		dup = (xfs_dir2_data_unused_t *)ptr;
			
 
				+		/*
			
 
				+		 * Unused, skip it.
			
 
				+		 */
			
 
				+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			
 
				+			ptr += be16_to_cpu(dup->length);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		dep = (xfs_dir2_data_entry_t *)ptr;
			
 
				+
			
 
				+		/*
			
 
				+		 * Bump pointer for the next iteration.
			
 
				+		 */
			
 
				+		ptr += xfs_dir3_data_entsize(mp, dep->namelen);
			
 
				+		/*
			
 
				+		 * The entry is before the desired starting point, skip it.
			
 
				+		 */
			
 
				+		if ((char *)dep - (char *)hdr < wantoff)
			
 
				+			continue;
			
 
				+
			
 
				+		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				+					    (char *)dep - (char *)hdr);
			
 
				+
			
 
				+		ctx->pos = cook & 0x7fffffff;
			
 
				+		filetype = xfs_dir3_dirent_get_ftype(mp, dep);
			
 
				+		/*
			
 
				+		 * If it didn't fit, set the final offset to here & return.
			
 
				+		 */
			
 
				+		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
			
 
				+			    be64_to_cpu(dep->inumber),
			
 
				+			    xfs_dir3_get_dtype(mp, filetype))) {
			
 
				+			xfs_trans_brelse(NULL, bp);
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Reached the end of the block.
			
 
				+	 * Set the offset to a non-existent block 1 and return.
			
 
				+	 */
			
 
				+	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
			
 
				+			0x7fffffff;
			
 
				+	xfs_trans_brelse(NULL, bp);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+struct xfs_dir2_leaf_map_info {
			
 
				+	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
			
 
				+	xfs_dablk_t	map_off;	/* last mapped file offset */
			
 
				+	int		map_size;	/* total entries in *map */
			
 
				+	int		map_valid;	/* valid entries in *map */
			
 
				+	int		nmap;		/* mappings to ask xfs_bmapi */
			
 
				+	xfs_dir2_db_t	curdb;		/* db for current block */
			
 
				+	int		ra_current;	/* number of read-ahead blks */
			
 
				+	int		ra_index;	/* *map index for read-ahead */
			
 
				+	int		ra_offset;	/* map entry offset for ra */
			
 
				+	int		ra_want;	/* readahead count wanted */
			
 
				+	struct xfs_bmbt_irec map[];	/* map vector for blocks */
			
 
				+};
			
 
				+
			
 
				+STATIC int
			
 
				+xfs_dir2_leaf_readbuf(
			
 
				+	struct xfs_inode	*dp,
			
 
				+	size_t			bufsize,
			
 
				+	struct xfs_dir2_leaf_map_info *mip,
			
 
				+	xfs_dir2_off_t		*curoff,
			
 
				+	struct xfs_buf		**bpp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = dp->i_mount;
			
 
				+	struct xfs_buf		*bp = *bpp;
			
 
				+	struct xfs_bmbt_irec	*map = mip->map;
			
 
				+	struct blk_plug		plug;
			
 
				+	int			error = 0;
			
 
				+	int			length;
			
 
				+	int			i;
			
 
				+	int			j;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we have a buffer, we need to release it and
			
 
				+	 * take it out of the mapping.
			
 
				+	 */
			
 
				+
			
 
				+	if (bp) {
			
 
				+		xfs_trans_brelse(NULL, bp);
			
 
				+		bp = NULL;
			
 
				+		mip->map_blocks -= mp->m_dirblkfsbs;
			
 
				+		/*
			
 
				+		 * Loop to get rid of the extents for the
			
 
				+		 * directory block.
			
 
				+		 */
			
 
				+		for (i = mp->m_dirblkfsbs; i > 0; ) {
			
 
				+			j = min_t(int, map->br_blockcount, i);
			
 
				+			map->br_blockcount -= j;
			
 
				+			map->br_startblock += j;
			
 
				+			map->br_startoff += j;
			
 
				+			/*
			
 
				+			 * If mapping is done, pitch it from
			
 
				+			 * the table.
			
 
				+			 */
			
 
				+			if (!map->br_blockcount && --mip->map_valid)
			
 
				+				memmove(&map[0], &map[1],
			
 
				+					sizeof(map[0]) * mip->map_valid);
			
 
				+			i -= j;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Recalculate the readahead blocks wanted.
			
 
				+	 */
			
 
				+	mip->ra_want = howmany(bufsize + mp->m_dirblksize,
			
 
				+			       mp->m_sb.sb_blocksize) - 1;
			
 
				+	ASSERT(mip->ra_want >= 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * If we don't have as many as we want, and we haven't
			
 
				+	 * run out of data blocks, get some more mappings.
			
 
				+	 */
			
 
				+	if (1 + mip->ra_want > mip->map_blocks &&
			
 
				+	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
			
 
				+		/*
			
 
				+		 * Get more bmaps, fill in after the ones
			
 
				+		 * we already have in the table.
			
 
				+		 */
			
 
				+		mip->nmap = mip->map_size - mip->map_valid;
			
 
				+		error = xfs_bmapi_read(dp, mip->map_off,
			
 
				+				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
			
 
				+								mip->map_off,
			
 
				+				&map[mip->map_valid], &mip->nmap, 0);
			
 
				+
			
 
				+		/*
			
 
				+		 * Don't know if we should ignore this or try to return an
			
 
				+		 * error.  The trouble with returning errors is that readdir
			
 
				+		 * will just stop without actually passing the error through.
			
 
				+		 */
			
 
				+		if (error)
			
 
				+			goto out;	/* XXX */
			
 
				+
			
 
				+		/*
			
 
				+		 * If we got all the mappings we asked for, set the final map
			
 
				+		 * offset based on the last bmap value received.  Otherwise,
			
 
				+		 * we've reached the end.
			
 
				+		 */
			
 
				+		if (mip->nmap == mip->map_size - mip->map_valid) {
			
 
				+			i = mip->map_valid + mip->nmap - 1;
			
 
				+			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
			
 
				+		} else
			
 
				+			mip->map_off = xfs_dir2_byte_to_da(mp,
			
 
				+							XFS_DIR2_LEAF_OFFSET);
			
 
				+
			
 
				+		/*
			
 
				+		 * Look for holes in the mapping, and eliminate them.  Count up
			
 
				+		 * the valid blocks.
			
 
				+		 */
			
 
				+		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
			
 
				+			if (map[i].br_startblock == HOLESTARTBLOCK) {
			
 
				+				mip->nmap--;
			
 
				+				length = mip->map_valid + mip->nmap - i;
			
 
				+				if (length)
			
 
				+					memmove(&map[i], &map[i + 1],
			
 
				+						sizeof(map[i]) * length);
			
 
				+			} else {
			
 
				+				mip->map_blocks += map[i].br_blockcount;
			
 
				+				i++;
			
 
				+			}
			
 
				+		}
			
 
				+		mip->map_valid += mip->nmap;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * No valid mappings, so no more data blocks.
			
 
				+	 */
			
 
				+	if (!mip->map_valid) {
			
 
				+		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Read the directory block starting at the first mapping.
			
 
				+	 */
			
 
				+	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
			
 
				+	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
			
 
				+			map->br_blockcount >= mp->m_dirblkfsbs ?
			
 
				+			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Should just skip over the data block instead of giving up.
			
 
				+	 */
			
 
				+	if (error)
			
 
				+		goto out;	/* XXX */
			
 
				+
			
 
				+	/*
			
 
				+	 * Adjust the current amount of read-ahead: we just read a block that
			
 
				+	 * was previously ra.
			
 
				+	 */
			
 
				+	if (mip->ra_current)
			
 
				+		mip->ra_current -= mp->m_dirblkfsbs;
			
 
				+
			
 
				+	/*
			
 
				+	 * Do we need more readahead?
			
 
				+	 */
			
 
				+	blk_start_plug(&plug);
			
 
				+	for (mip->ra_index = mip->ra_offset = i = 0;
			
 
				+	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
			
 
				+	     i += mp->m_dirblkfsbs) {
			
 
				+		ASSERT(mip->ra_index < mip->map_valid);
			
 
				+		/*
			
 
				+		 * Read-ahead a contiguous directory block.
			
 
				+		 */
			
 
				+		if (i > mip->ra_current &&
			
 
				+		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
			
 
				+			xfs_dir3_data_readahead(NULL, dp,
			
 
				+				map[mip->ra_index].br_startoff + mip->ra_offset,
			
 
				+				XFS_FSB_TO_DADDR(mp,
			
 
				+					map[mip->ra_index].br_startblock +
			
 
				+							mip->ra_offset));
			
 
				+			mip->ra_current = i;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Read-ahead a non-contiguous directory block.  This doesn't
			
 
				+		 * use our mapping, but this is a very rare case.
			
 
				+		 */
			
 
				+		else if (i > mip->ra_current) {
			
 
				+			xfs_dir3_data_readahead(NULL, dp,
			
 
				+					map[mip->ra_index].br_startoff +
			
 
				+							mip->ra_offset, -1);
			
 
				+			mip->ra_current = i;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Advance offset through the mapping table.
			
 
				+		 */
			
 
				+		for (j = 0; j < mp->m_dirblkfsbs; j++) {
			
 
				+			/*
			
 
				+			 * The rest of this extent but not more than a dir
			
 
				+			 * block.
			
 
				+			 */
			
 
				+			length = min_t(int, mp->m_dirblkfsbs,
			
 
				+					map[mip->ra_index].br_blockcount -
			
 
				+							mip->ra_offset);
			
 
				+			j += length;
			
 
				+			mip->ra_offset += length;
			
 
				+
			
 
				+			/*
			
 
				+			 * Advance to the next mapping if this one is used up.
			
 
				+			 */
			
 
				+			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
			
 
				+				mip->ra_offset = 0;
			
 
				+				mip->ra_index++;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	blk_finish_plug(&plug);
			
 
				+
			
 
				+out:
			
 
				+	*bpp = bp;
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Getdents (readdir) for leaf and node directories.
			
 
				+ * This reads the data blocks only, so is the same for both forms.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_dir2_leaf_getdents(
			
 
				+	xfs_inode_t		*dp,		/* incore directory inode */
			
 
				+	struct dir_context	*ctx,
			
 
				+	size_t			bufsize)
			
 
				+{
			
 
				+	struct xfs_buf		*bp = NULL;	/* data block buffer */
			
 
				+	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
			
 
				+	xfs_dir2_data_entry_t	*dep;		/* data entry */
			
 
				+	xfs_dir2_data_unused_t	*dup;		/* unused entry */
			
 
				+	int			error = 0;	/* error return value */
			
 
				+	int			length;		/* temporary length value */
			
 
				+	xfs_mount_t		*mp;		/* filesystem mount point */
			
 
				+	int			byteoff;	/* offset in current block */
			
 
				+	xfs_dir2_off_t		curoff;		/* current overall offset */
			
 
				+	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
			
 
				+	char			*ptr = NULL;	/* pointer to current data */
			
 
				+	struct xfs_dir2_leaf_map_info *map_info;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the offset is at or past the largest allowed value,
			
 
				+	 * give up right away.
			
 
				+	 */
			
 
				+	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
			
 
				+		return 0;
			
 
				+
			
 
				+	mp = dp->i_mount;
			
 
				+
			
 
				+	/*
			
 
				+	 * Set up to bmap a number of blocks based on the caller's
			
 
				+	 * buffer size, the directory block size, and the filesystem
			
 
				+	 * block size.
			
 
				+	 */
			
 
				+	length = howmany(bufsize + mp->m_dirblksize,
			
 
				+				     mp->m_sb.sb_blocksize);
			
 
				+	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
			
 
				+				(length * sizeof(struct xfs_bmbt_irec)),
			
 
				+			       KM_SLEEP | KM_NOFS);
			
 
				+	map_info->map_size = length;
			
 
				+
			
 
				+	/*
			
 
				+	 * Inside the loop we keep the main offset value as a byte offset
			
 
				+	 * in the directory file.
			
 
				+	 */
			
 
				+	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
			
 
				+
			
 
				+	/*
			
 
				+	 * Force this conversion through db so we truncate the offset
			
 
				+	 * down to get the start of the data block.
			
 
				+	 */
			
 
				+	map_info->map_off = xfs_dir2_db_to_da(mp,
			
 
				+					      xfs_dir2_byte_to_db(mp, curoff));
			
 
				+
			
 
				+	/*
			
 
				+	 * Loop over directory entries until we reach the end offset.
			
 
				+	 * Get more blocks and readahead as necessary.
			
 
				+	 */
			
 
				+	while (curoff < XFS_DIR2_LEAF_OFFSET) {
			
 
				+		__uint8_t filetype;
			
 
				+
			
 
				+		/*
			
 
				+		 * If we have no buffer, or we're off the end of the
			
 
				+		 * current buffer, need to get another one.
			
 
				+		 */
			
 
				+		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
			
 
				+
			
 
				+			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
			
 
				+						      &curoff, &bp);
			
 
				+			if (error || !map_info->map_valid)
			
 
				+				break;
			
 
				+
			
 
				+			/*
			
 
				+			 * Having done a read, we need to set a new offset.
			
 
				+			 */
			
 
				+			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
			
 
				+			/*
			
 
				+			 * Start of the current block.
			
 
				+			 */
			
 
				+			if (curoff < newoff)
			
 
				+				curoff = newoff;
			
 
				+			/*
			
 
				+			 * Make sure we're in the right block.
			
 
				+			 */
			
 
				+			else if (curoff > newoff)
			
 
				+				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
			
 
				+				       map_info->curdb);
			
 
				+			hdr = bp->b_addr;
			
 
				+			xfs_dir3_data_check(dp, bp);
			
 
				+			/*
			
 
				+			 * Find our position in the block.
			
 
				+			 */
			
 
				+			ptr = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				+			byteoff = xfs_dir2_byte_to_off(mp, curoff);
			
 
				+			/*
			
 
				+			 * Skip past the header.
			
 
				+			 */
			
 
				+			if (byteoff == 0)
			
 
				+				curoff += xfs_dir3_data_entry_offset(hdr);
			
 
				+			/*
			
 
				+			 * Skip past entries until we reach our offset.
			
 
				+			 */
			
 
				+			else {
			
 
				+				while ((char *)ptr - (char *)hdr < byteoff) {
			
 
				+					dup = (xfs_dir2_data_unused_t *)ptr;
			
 
				+
			
 
				+					if (be16_to_cpu(dup->freetag)
			
 
				+						  == XFS_DIR2_DATA_FREE_TAG) {
			
 
				+
			
 
				+						length = be16_to_cpu(dup->length);
			
 
				+						ptr += length;
			
 
				+						continue;
			
 
				+					}
			
 
				+					dep = (xfs_dir2_data_entry_t *)ptr;
			
 
				+					length =
			
 
				+					   xfs_dir3_data_entsize(mp, dep->namelen);
			
 
				+					ptr += length;
			
 
				+				}
			
 
				+				/*
			
 
				+				 * Now set our real offset.
			
 
				+				 */
			
 
				+				curoff =
			
 
				+					xfs_dir2_db_off_to_byte(mp,
			
 
				+					    xfs_dir2_byte_to_db(mp, curoff),
			
 
				+					    (char *)ptr - (char *)hdr);
			
 
				+				if (ptr >= (char *)hdr + mp->m_dirblksize) {
			
 
				+					continue;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		/*
			
 
				+		 * We have a pointer to an entry.
			
 
				+		 * Is it a live one?
			
 
				+		 */
			
 
				+		dup = (xfs_dir2_data_unused_t *)ptr;
			
 
				+		/*
			
 
				+		 * No, it's unused, skip over it.
			
 
				+		 */
			
 
				+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			
 
				+			length = be16_to_cpu(dup->length);
			
 
				+			ptr += length;
			
 
				+			curoff += length;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		dep = (xfs_dir2_data_entry_t *)ptr;
			
 
				+		length = xfs_dir3_data_entsize(mp, dep->namelen);
			
 
				+		filetype = xfs_dir3_dirent_get_ftype(mp, dep);
			
 
				+
			
 
				+		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
			
 
				+		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
			
 
				+			    be64_to_cpu(dep->inumber),
			
 
				+			    xfs_dir3_get_dtype(mp, filetype)))
			
 
				+			break;
			
 
				+
			
 
				+		/*
			
 
				+		 * Advance to next entry in the block.
			
 
				+		 */
			
 
				+		ptr += length;
			
 
				+		curoff += length;
			
 
				+		/* bufsize may have just been a guess; don't go negative */
			
 
				+		bufsize = bufsize > length ? bufsize - length : 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * All done.  Set output offset value to current offset.
			
 
				+	 */
			
 
				+	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
			
 
				+		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
			
 
				+	else
			
 
				+		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
			
 
				+	kmem_free(map_info);
			
 
				+	if (bp)
			
 
				+		xfs_trans_brelse(NULL, bp);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Read a directory.
			
 
				+ */
			
 
				+int
			
 
				+xfs_readdir(
			
 
				+	xfs_inode_t	*dp,
			
 
				+	struct dir_context *ctx,
			
 
				+	size_t		bufsize)
			
 
				+{
			
 
				+	int		rval;		/* return value */
			
 
				+	int		v;		/* type-checking value */
			
 
				+
			
 
				+	trace_xfs_readdir(dp);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	ASSERT(S_ISDIR(dp->i_d.di_mode));
			
 
				+	XFS_STATS_INC(xs_dir_getdents);
			
 
				+
			
 
				+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
			
 
				+		rval = xfs_dir2_sf_getdents(dp, ctx);
			
 
				+	else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
			
 
				+		;
			
 
				+	else if (v)
			
 
				+		rval = xfs_dir2_block_getdents(dp, ctx);
			
 
				+	else
			
 
				+		rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);
			
 
				+	return rval;
			
 
				+}
			
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -29,8 +29,8 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -95,7 +95,7 @@ xfs_dir2_sf_get_parent_ino(
 
				 	return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				+void
			
 
				 xfs_dir2_sf_put_parent_ino(
			
 
				 	struct xfs_dir2_sf_hdr	*hdr,
			
 
				 	xfs_ino_t		ino)
			
@@ -105,31 +105,38 @@ xfs_dir2_sf_put_parent_ino(
 
				 
			
 
				 /*
			
 
				  * In short-form directory entries the inode numbers are stored at variable
			
 
				- * offset behind the entry name.  The inode numbers may only be accessed
			
 
				- * through the helpers below.
			
 
				+ * offset behind the entry name. If the entry stores a filetype value, then it
			
 
				+ * sits between the name and the inode number. Hence the inode numbers may only
			
 
				+ * be accessed through the helpers below.
			
 
				  */
			
 
				 static xfs_dir2_inou_t *
			
 
				-xfs_dir2_sfe_inop(
			
 
				+xfs_dir3_sfe_inop(
			
 
				+	struct xfs_mount	*mp,
			
 
				 	struct xfs_dir2_sf_entry *sfep)
			
 
				 {
			
 
				-	return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
			
 
				+	__uint8_t	*ptr = &sfep->name[sfep->namelen];
			
 
				+	if (xfs_sb_version_hasftype(&mp->m_sb))
			
 
				+		ptr++;
			
 
				+	return (xfs_dir2_inou_t *)ptr;
			
 
				 }
			
 
				 
			
 
				 xfs_ino_t
			
 
				-xfs_dir2_sfe_get_ino(
			
 
				+xfs_dir3_sfe_get_ino(
			
 
				+	struct xfs_mount	*mp,
			
 
				 	struct xfs_dir2_sf_hdr	*hdr,
			
 
				 	struct xfs_dir2_sf_entry *sfep)
			
 
				 {
			
 
				-	return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
			
 
				+	return xfs_dir2_sf_get_ino(hdr, xfs_dir3_sfe_inop(mp, sfep));
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-xfs_dir2_sfe_put_ino(
			
 
				+void
			
 
				+xfs_dir3_sfe_put_ino(
			
 
				+	struct xfs_mount	*mp,
			
 
				 	struct xfs_dir2_sf_hdr	*hdr,
			
 
				 	struct xfs_dir2_sf_entry *sfep,
			
 
				 	xfs_ino_t		ino)
			
 
				 {
			
 
				-	xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
			
 
				+	xfs_dir2_sf_put_ino(hdr, xfs_dir3_sfe_inop(mp, sfep), ino);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -157,9 +164,16 @@ xfs_dir2_block_sfsize(
 
				 	int			namelen;	/* total name bytes */
			
 
				 	xfs_ino_t		parent = 0;	/* parent inode number */
			
 
				 	int			size=0;		/* total computed size */
			
 
				+	int			has_ftype;
			
 
				 
			
 
				 	mp = dp->i_mount;
			
 
				 
			
 
				+	/*
			
 
				+	 * if there is a filetype field, add the extra byte to the namelen
			
 
				+	 * for each entry that we see.
			
 
				+	 */
			
 
				+	has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;
			
 
				+
			
 
				 	count = i8count = namelen = 0;
			
 
				 	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				 	blp = xfs_dir2_block_leaf_p(btp);
			
@@ -188,9 +202,10 @@ xfs_dir2_block_sfsize(
 
				 		if (!isdot)
			
 
				 			i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
			
 
				 #endif
			
 
				+		/* take into account the file type field */
			
 
				 		if (!isdot && !isdotdot) {
			
 
				 			count++;
			
 
				-			namelen += dep->namelen;
			
 
				+			namelen += dep->namelen + has_ftype;
			
 
				 		} else if (isdotdot)
			
 
				 			parent = be64_to_cpu(dep->inumber);
			
 
				 		/*
			
@@ -316,12 +331,14 @@ xfs_dir2_block_to_sf(
 
				 				(xfs_dir2_data_aoff_t)
			
 
				 				((char *)dep - (char *)hdr));
			
 
				 			memcpy(sfep->name, dep->name, dep->namelen);
			
 
				-			xfs_dir2_sfe_put_ino(sfp, sfep,
			
 
				+			xfs_dir3_sfe_put_ino(mp, sfp, sfep,
			
 
				 					     be64_to_cpu(dep->inumber));
			
 
				+			xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
			
 
				+					xfs_dir3_dirent_get_ftype(mp, dep));
			
 
				 
			
 
				-			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
			
 
				+			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
			
 
				 		}
			
 
				-		ptr += xfs_dir2_data_entsize(dep->namelen);
			
 
				+		ptr += xfs_dir3_data_entsize(mp, dep->namelen);
			
 
				 	}
			
 
				 	ASSERT((char *)sfep - (char *)sfp == size);
			
 
				 	xfs_dir2_sf_check(args);
			
@@ -372,7 +389,7 @@ xfs_dir2_sf_addname(
 
				 	/*
			
 
				 	 * Compute entry (and change in) size.
			
 
				 	 */
			
 
				-	add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
			
 
				+	add_entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen);
			
 
				 	incr_isize = add_entsize;
			
 
				 	objchange = 0;
			
 
				 #if XFS_BIG_INUMS
			
@@ -466,8 +483,9 @@ xfs_dir2_sf_addname_easy(
 
				 	/*
			
 
				 	 * Grow the in-inode space.
			
 
				 	 */
			
 
				-	xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
			
 
				-		XFS_DATA_FORK);
			
 
				+	xfs_idata_realloc(dp,
			
 
				+			  xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen),
			
 
				+			  XFS_DATA_FORK);
			
 
				 	/*
			
 
				 	 * Need to set up again due to realloc of the inode data.
			
 
				 	 */
			
@@ -479,7 +497,9 @@ xfs_dir2_sf_addname_easy(
 
				 	sfep->namelen = args->namelen;
			
 
				 	xfs_dir2_sf_put_offset(sfep, offset);
			
 
				 	memcpy(sfep->name, args->name, sfep->namelen);
			
 
				-	xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
			
 
				+	xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, args->inumber);
			
 
				+	xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, args->filetype);
			
 
				+
			
 
				 	/*
			
 
				 	 * Update the header and inode.
			
 
				 	 */
			
@@ -519,11 +539,13 @@ xfs_dir2_sf_addname_hard(
 
				 	xfs_dir2_sf_hdr_t	*oldsfp;	/* original shortform dir */
			
 
				 	xfs_dir2_sf_entry_t	*sfep;		/* entry in new dir */
			
 
				 	xfs_dir2_sf_hdr_t	*sfp;		/* new shortform dir */
			
 
				+	struct xfs_mount	*mp;
			
 
				 
			
 
				 	/*
			
 
				 	 * Copy the old directory to the stack buffer.
			
 
				 	 */
			
 
				 	dp = args->dp;
			
 
				+	mp = dp->i_mount;
			
 
				 
			
 
				 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
			
 
				 	old_isize = (int)dp->i_d.di_size;
			
@@ -535,13 +557,13 @@ xfs_dir2_sf_addname_hard(
 
				 	 * to insert the new entry.
			
 
				 	 * If it's going to end up at the end then oldsfep will point there.
			
 
				 	 */
			
 
				-	for (offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount),
			
 
				+	for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp),
			
 
				 	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
			
 
				-	      add_datasize = xfs_dir2_data_entsize(args->namelen),
			
 
				+	      add_datasize = xfs_dir3_data_entsize(mp, args->namelen),
			
 
				 	      eof = (char *)oldsfep == &buf[old_isize];
			
 
				 	     !eof;
			
 
				-	     offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen),
			
 
				-	      oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep),
			
 
				+	     offset = new_offset + xfs_dir3_data_entsize(mp, oldsfep->namelen),
			
 
				+	      oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep),
			
 
				 	      eof = (char *)oldsfep == &buf[old_isize]) {
			
 
				 		new_offset = xfs_dir2_sf_get_offset(oldsfep);
			
 
				 		if (offset + add_datasize <= new_offset)
			
@@ -570,7 +592,8 @@ xfs_dir2_sf_addname_hard(
 
				 	sfep->namelen = args->namelen;
			
 
				 	xfs_dir2_sf_put_offset(sfep, offset);
			
 
				 	memcpy(sfep->name, args->name, sfep->namelen);
			
 
				-	xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
			
 
				+	xfs_dir3_sfe_put_ino(mp, sfp, sfep, args->inumber);
			
 
				+	xfs_dir3_sfe_put_ftype(mp, sfp, sfep, args->filetype);
			
 
				 	sfp->count++;
			
 
				 #if XFS_BIG_INUMS
			
 
				 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
			
@@ -580,7 +603,7 @@ xfs_dir2_sf_addname_hard(
 
				 	 * If there's more left to copy, do that.
			
 
				 	 */
			
 
				 	if (!eof) {
			
 
				-		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
			
 
				+		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
			
 
				 		memcpy(sfep, oldsfep, old_isize - nbytes);
			
 
				 	}
			
 
				 	kmem_free(buf);
			
@@ -616,7 +639,7 @@ xfs_dir2_sf_addname_pick(
 
				 	mp = dp->i_mount;
			
 
				 
			
 
				 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
			
 
				-	size = xfs_dir2_data_entsize(args->namelen);
			
 
				+	size = xfs_dir3_data_entsize(mp, args->namelen);
			
 
				 	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
			
 
				 	sfep = xfs_dir2_sf_firstentry(sfp);
			
 
				 	holefit = 0;
			
@@ -629,8 +652,8 @@ xfs_dir2_sf_addname_pick(
 
				 		if (!holefit)
			
 
				 			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
			
 
				 		offset = xfs_dir2_sf_get_offset(sfep) +
			
 
				-			 xfs_dir2_data_entsize(sfep->namelen);
			
 
				-		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
			
 
				+			 xfs_dir3_data_entsize(mp, sfep->namelen);
			
 
				+		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
			
 
				 	}
			
 
				 	/*
			
 
				 	 * Calculate data bytes used excluding the new entry, if this
			
@@ -684,31 +707,34 @@ xfs_dir2_sf_check(
 
				 	int			offset;		/* data offset */
			
 
				 	xfs_dir2_sf_entry_t	*sfep;		/* shortform dir entry */
			
 
				 	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
			
 
				+	struct xfs_mount	*mp;
			
 
				 
			
 
				 	dp = args->dp;
			
 
				+	mp = dp->i_mount;
			
 
				 
			
 
				 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
			
 
				-	offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount);
			
 
				+	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
			
 
				 	ino = xfs_dir2_sf_get_parent_ino(sfp);
			
 
				 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
			
 
				 
			
 
				 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
			
 
				 	     i < sfp->count;
			
 
				-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
			
 
				+	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep)) {
			
 
				 		ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
			
 
				-		ino = xfs_dir2_sfe_get_ino(sfp, sfep);
			
 
				+		ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep);
			
 
				 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
			
 
				 		offset =
			
 
				 			xfs_dir2_sf_get_offset(sfep) +
			
 
				-			xfs_dir2_data_entsize(sfep->namelen);
			
 
				+			xfs_dir3_data_entsize(mp, sfep->namelen);
			
 
				+		ASSERT(xfs_dir3_sfe_get_ftype(mp, sfp, sfep) <
			
 
				+							XFS_DIR3_FT_MAX);
			
 
				 	}
			
 
				 	ASSERT(i8count == sfp->i8count);
			
 
				 	ASSERT(XFS_BIG_INUMS || i8count == 0);
			
 
				 	ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
			
 
				 	ASSERT(offset +
			
 
				 	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
			
 
				-	       (uint)sizeof(xfs_dir2_block_tail_t) <=
			
 
				-	       dp->i_mount->m_dirblksize);
			
 
				+	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dirblksize);
			
 
				 }
			
 
				 #endif	/* DEBUG */
			
 
				 
			
@@ -765,100 +791,6 @@ xfs_dir2_sf_create(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int						/* error */
			
 
				-xfs_dir2_sf_getdents(
			
 
				-	xfs_inode_t		*dp,		/* incore directory inode */
			
 
				-	struct dir_context	*ctx)
			
 
				-{
			
 
				-	int			i;		/* shortform entry number */
			
 
				-	xfs_mount_t		*mp;		/* filesystem mount point */
			
 
				-	xfs_dir2_dataptr_t	off;		/* current entry's offset */
			
 
				-	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
			
 
				-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
			
 
				-	xfs_dir2_dataptr_t	dot_offset;
			
 
				-	xfs_dir2_dataptr_t	dotdot_offset;
			
 
				-	xfs_ino_t		ino;
			
 
				-
			
 
				-	mp = dp->i_mount;
			
 
				-
			
 
				-	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
			
 
				-	/*
			
 
				-	 * Give up if the directory is way too short.
			
 
				-	 */
			
 
				-	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
			
 
				-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				-		return XFS_ERROR(EIO);
			
 
				-	}
			
 
				-
			
 
				-	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
			
 
				-	ASSERT(dp->i_df.if_u1.if_data != NULL);
			
 
				-
			
 
				-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
			
 
				-
			
 
				-	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
			
 
				-
			
 
				-	/*
			
 
				-	 * If the block number in the offset is out of range, we're done.
			
 
				-	 */
			
 
				-	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
			
 
				-		return 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * Precalculate offsets for . and .. as we will always need them.
			
 
				-	 *
			
 
				-	 * XXX(hch): the second argument is sometimes 0 and sometimes
			
 
				-	 * mp->m_dirdatablk.
			
 
				-	 */
			
 
				-	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				-					     XFS_DIR3_DATA_DOT_OFFSET(mp));
			
 
				-	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				-						XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
			
 
				-
			
 
				-	/*
			
 
				-	 * Put . entry unless we're starting past it.
			
 
				-	 */
			
 
				-	if (ctx->pos <= dot_offset) {
			
 
				-		ctx->pos = dot_offset & 0x7fffffff;
			
 
				-		if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR))
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Put .. entry unless we're starting past it.
			
 
				-	 */
			
 
				-	if (ctx->pos <= dotdot_offset) {
			
 
				-		ino = xfs_dir2_sf_get_parent_ino(sfp);
			
 
				-		ctx->pos = dotdot_offset & 0x7fffffff;
			
 
				-		if (!dir_emit(ctx, "..", 2, ino, DT_DIR))
			
 
				-			return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Loop while there are more entries and put'ing works.
			
 
				-	 */
			
 
				-	sfep = xfs_dir2_sf_firstentry(sfp);
			
 
				-	for (i = 0; i < sfp->count; i++) {
			
 
				-		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				-				xfs_dir2_sf_get_offset(sfep));
			
 
				-
			
 
				-		if (ctx->pos > off) {
			
 
				-			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		ino = xfs_dir2_sfe_get_ino(sfp, sfep);
			
 
				-		ctx->pos = off & 0x7fffffff;
			
 
				-		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen,
			
 
				-			    ino, DT_UNKNOWN))
			
 
				-			return 0;
			
 
				-		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
			
 
				-	}
			
 
				-
			
 
				-	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
			
 
				-			0x7fffffff;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Lookup an entry in a shortform directory.
			
 
				  * Returns EEXIST if found, ENOENT if not found.
			
@@ -898,6 +830,7 @@ xfs_dir2_sf_lookup(
 
				 	if (args->namelen == 1 && args->name[0] == '.') {
			
 
				 		args->inumber = dp->i_ino;
			
 
				 		args->cmpresult = XFS_CMP_EXACT;
			
 
				+		args->filetype = XFS_DIR3_FT_DIR;
			
 
				 		return XFS_ERROR(EEXIST);
			
 
				 	}
			
 
				 	/*
			
@@ -907,6 +840,7 @@ xfs_dir2_sf_lookup(
 
				 	    args->name[0] == '.' && args->name[1] == '.') {
			
 
				 		args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
			
 
				 		args->cmpresult = XFS_CMP_EXACT;
			
 
				+		args->filetype = XFS_DIR3_FT_DIR;
			
 
				 		return XFS_ERROR(EEXIST);
			
 
				 	}
			
 
				 	/*
			
@@ -914,7 +848,7 @@ xfs_dir2_sf_lookup(
 
				 	 */
			
 
				 	ci_sfep = NULL;
			
 
				 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
			
 
				-				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
			
 
				+	     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
			
 
				 		/*
			
 
				 		 * Compare name and if it's an exact match, return the inode
			
 
				 		 * number. If it's the first case-insensitive match, store the
			
@@ -924,7 +858,10 @@ xfs_dir2_sf_lookup(
 
				 								sfep->namelen);
			
 
				 		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
			
 
				 			args->cmpresult = cmp;
			
 
				-			args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
			
 
				+			args->inumber = xfs_dir3_sfe_get_ino(dp->i_mount,
			
 
				+							     sfp, sfep);
			
 
				+			args->filetype = xfs_dir3_sfe_get_ftype(dp->i_mount,
			
 
				+								sfp, sfep);
			
 
				 			if (cmp == XFS_CMP_EXACT)
			
 
				 				return XFS_ERROR(EEXIST);
			
 
				 			ci_sfep = sfep;
			
@@ -980,10 +917,10 @@ xfs_dir2_sf_removename(
 
				 	 * Find the one we're deleting.
			
 
				 	 */
			
 
				 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
			
 
				-				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
			
 
				+	     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
			
 
				 		if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
			
 
				 								XFS_CMP_EXACT) {
			
 
				-			ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
			
 
				+			ASSERT(xfs_dir3_sfe_get_ino(dp->i_mount, sfp, sfep) ==
			
 
				 			       args->inumber);
			
 
				 			break;
			
 
				 		}
			
@@ -997,7 +934,7 @@ xfs_dir2_sf_removename(
 
				 	 * Calculate sizes.
			
 
				 	 */
			
 
				 	byteoff = (int)((char *)sfep - (char *)sfp);
			
 
				-	entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
			
 
				+	entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen);
			
 
				 	newsize = oldsize - entsize;
			
 
				 	/*
			
 
				 	 * Copy the part if any after the removed entry, sliding it down.
			
@@ -1113,16 +1050,19 @@ xfs_dir2_sf_replace(
 
				 	 * Normal entry, look for the name.
			
 
				 	 */
			
 
				 	else {
			
 
				-		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
			
 
				-				i < sfp->count;
			
 
				-				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
			
 
				+		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
			
 
				+		     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
			
 
				 			if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
			
 
				 								XFS_CMP_EXACT) {
			
 
				 #if XFS_BIG_INUMS || defined(DEBUG)
			
 
				-				ino = xfs_dir2_sfe_get_ino(sfp, sfep);
			
 
				+				ino = xfs_dir3_sfe_get_ino(dp->i_mount,
			
 
				+							   sfp, sfep);
			
 
				 				ASSERT(args->inumber != ino);
			
 
				 #endif
			
 
				-				xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
			
 
				+				xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep,
			
 
				+						     args->inumber);
			
 
				+				xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep,
			
 
				+						       args->filetype);
			
 
				 				break;
			
 
				 			}
			
 
				 		}
			
@@ -1189,10 +1129,12 @@ xfs_dir2_sf_toino4(
 
				 	int			oldsize;	/* old inode size */
			
 
				 	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
			
 
				 	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
			
 
				+	struct xfs_mount	*mp;
			
 
				 
			
 
				 	trace_xfs_dir2_sf_toino4(args);
			
 
				 
			
 
				 	dp = args->dp;
			
 
				+	mp = dp->i_mount;
			
 
				 
			
 
				 	/*
			
 
				 	 * Copy the old directory to the buffer.
			
@@ -1230,13 +1172,15 @@ xfs_dir2_sf_toino4(
 
				 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
			
 
				 		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
			
 
				 	     i < sfp->count;
			
 
				-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
			
 
				-		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
			
 
				+	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep),
			
 
				+		  oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) {
			
 
				 		sfep->namelen = oldsfep->namelen;
			
 
				 		sfep->offset = oldsfep->offset;
			
 
				 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
			
 
				-		xfs_dir2_sfe_put_ino(sfp, sfep,
			
 
				-			xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
			
 
				+		xfs_dir3_sfe_put_ino(mp, sfp, sfep,
			
 
				+			xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep));
			
 
				+		xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
			
 
				+			xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep));
			
 
				 	}
			
 
				 	/*
			
 
				 	 * Clean up the inode.
			
@@ -1264,10 +1208,12 @@ xfs_dir2_sf_toino8(
 
				 	int			oldsize;	/* old inode size */
			
 
				 	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
			
 
				 	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
			
 
				+	struct xfs_mount	*mp;
			
 
				 
			
 
				 	trace_xfs_dir2_sf_toino8(args);
			
 
				 
			
 
				 	dp = args->dp;
			
 
				+	mp = dp->i_mount;
			
 
				 
			
 
				 	/*
			
 
				 	 * Copy the old directory to the buffer.
			
@@ -1305,13 +1251,15 @@ xfs_dir2_sf_toino8(
 
				 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
			
 
				 		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
			
 
				 	     i < sfp->count;
			
 
				-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
			
 
				-		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
			
 
				+	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep),
			
 
				+		  oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) {
			
 
				 		sfep->namelen = oldsfep->namelen;
			
 
				 		sfep->offset = oldsfep->offset;
			
 
				 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
			
 
				-		xfs_dir2_sfe_put_ino(sfp, sfep,
			
 
				-			xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
			
 
				+		xfs_dir3_sfe_put_ino(mp, sfp, sfep,
			
 
				+			xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep));
			
 
				+		xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
			
 
				+			xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep));
			
 
				 	}
			
 
				 	/*
			
 
				 	 * Clean up the inode.
			
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -16,12 +16,13 @@
 
				  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				  */
			
 
				 #include "xfs.h"
			
 
				-#include "xfs_sb.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_quota.h"
			
 
				-#include "xfs_trans.h"
			
 
				 #include "xfs_alloc_btree.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_ialloc_btree.h"
			
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
@@ -28,6 +29,7 @@
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_itable.h"
			
@@ -710,10 +712,8 @@ xfs_qm_dqread(
 
				 
			
 
				 	if (flags & XFS_QMOPT_DQALLOC) {
			
 
				 		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
			
 
				-		error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
			
 
				-					  XFS_QM_DQALLOC_LOG_RES(mp), 0,
			
 
				-					  XFS_TRANS_PERM_LOG_RES,
			
 
				-					  XFS_WRITE_LOG_COUNT);
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm,
			
 
				+					  XFS_QM_DQALLOC_SPACE_RES(mp), 0);
			
 
				 		if (error)
			
 
				 			goto error1;
			
 
				 		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
			
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
@@ -43,14 +44,15 @@ static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
 
				 /*
			
 
				  * returns the number of iovecs needed to log the given dquot item.
			
 
				  */
			
 
				-STATIC uint
			
 
				+STATIC void
			
 
				 xfs_qm_dquot_logitem_size(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				-	/*
			
 
				-	 * we need only two iovecs, one for the format, one for the real thing
			
 
				-	 */
			
 
				-	return 2;
			
 
				+	*nvecs += 2;
			
 
				+	*nbytes += sizeof(struct xfs_dq_logformat) +
			
 
				+		   sizeof(struct xfs_disk_dquot);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -285,11 +287,14 @@ static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
 
				  * We only need 1 iovec for an quotaoff item.  It just logs the
			
 
				  * quotaoff_log_format structure.
			
 
				  */
			
 
				-STATIC uint
			
 
				+STATIC void
			
 
				 xfs_qm_qoff_logitem_size(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				-	return 1;
			
 
				+	*nvecs += 1;
			
 
				+	*nbytes += sizeof(struct xfs_qoff_logitem);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -26,7 +26,6 @@
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_error.h"
			
 
				 
			
 
				 #ifdef DEBUG
			
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -21,10 +21,11 @@
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_export.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_inode_item.h"
			
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -147,7 +147,7 @@ xfs_extent_busy_search(
 
				  * extent.  If the overlap covers the beginning, the end, or all of the busy
			
 
				  * extent, the overlapping portion can be made unbusy and used for the
			
 
				  * allocation.  We can't split a busy extent because we can't modify a
			
 
				- * transaction/CIL context busy list, but we can update an entries block
			
 
				+ * transaction/CIL context busy list, but we can update an entry's block
			
 
				  * number or length.
			
 
				  *
			
 
				  * Returns true if the extent can safely be reused, or false if the search
			
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -73,11 +73,22 @@ __xfs_efi_release(
 
				  * We only need 1 iovec for an efi item.  It just logs the efi_log_format
			
 
				  * structure.
			
 
				  */
			
 
				-STATIC uint
			
 
				+static inline int
			
 
				+xfs_efi_item_sizeof(
			
 
				+	struct xfs_efi_log_item *efip)
			
 
				+{
			
 
				+	return sizeof(struct xfs_efi_log_format) +
			
 
				+	       (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				 xfs_efi_item_size(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				-	return 1;
			
 
				+	*nvecs += 1;
			
 
				+	*nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -93,21 +104,17 @@ xfs_efi_item_format(
 
				 	struct xfs_log_iovec	*log_vector)
			
 
				 {
			
 
				 	struct xfs_efi_log_item	*efip = EFI_ITEM(lip);
			
 
				-	uint			size;
			
 
				 
			
 
				 	ASSERT(atomic_read(&efip->efi_next_extent) ==
			
 
				 				efip->efi_format.efi_nextents);
			
 
				 
			
 
				 	efip->efi_format.efi_type = XFS_LI_EFI;
			
 
				-
			
 
				-	size = sizeof(xfs_efi_log_format_t);
			
 
				-	size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
			
 
				 	efip->efi_format.efi_size = 1;
			
 
				 
			
 
				 	log_vector->i_addr = &efip->efi_format;
			
 
				-	log_vector->i_len = size;
			
 
				+	log_vector->i_len = xfs_efi_item_sizeof(efip);
			
 
				 	log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
			
 
				-	ASSERT(size >= sizeof(xfs_efi_log_format_t));
			
 
				+	ASSERT(log_vector->i_len >= sizeof(xfs_efi_log_format_t));
			
 
				 }
			
 
				 
			
 
				 
			
@@ -333,11 +340,22 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp)
 
				  * We only need 1 iovec for an efd item.  It just logs the efd_log_format
			
 
				  * structure.
			
 
				  */
			
 
				-STATIC uint
			
 
				+static inline int
			
 
				+xfs_efd_item_sizeof(
			
 
				+	struct xfs_efd_log_item *efdp)
			
 
				+{
			
 
				+	return sizeof(xfs_efd_log_format_t) +
			
 
				+	       (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				 xfs_efd_item_size(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				-	return 1;
			
 
				+	*nvecs += 1;
			
 
				+	*nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -353,20 +371,16 @@ xfs_efd_item_format(
 
				 	struct xfs_log_iovec	*log_vector)
			
 
				 {
			
 
				 	struct xfs_efd_log_item	*efdp = EFD_ITEM(lip);
			
 
				-	uint			size;
			
 
				 
			
 
				 	ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
			
 
				 
			
 
				 	efdp->efd_format.efd_type = XFS_LI_EFD;
			
 
				-
			
 
				-	size = sizeof(xfs_efd_log_format_t);
			
 
				-	size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
			
 
				 	efdp->efd_format.efd_size = 1;
			
 
				 
			
 
				 	log_vector->i_addr = &efdp->efd_format;
			
 
				-	log_vector->i_len = size;
			
 
				+	log_vector->i_len = xfs_efd_item_sizeof(efdp);
			
 
				 	log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
			
 
				-	ASSERT(size >= sizeof(xfs_efd_log_format_t));
			
 
				+	ASSERT(log_vector->i_len >= sizeof(xfs_efd_log_format_t));
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -18,93 +18,11 @@
 
				 #ifndef	__XFS_EXTFREE_ITEM_H__
			
 
				 #define	__XFS_EXTFREE_ITEM_H__
			
 
				 
			
 
				+/* kernel only EFI/EFD definitions */
			
 
				+
			
 
				 struct xfs_mount;
			
 
				 struct kmem_zone;
			
 
				 
			
 
				-typedef struct xfs_extent {
			
 
				-	xfs_dfsbno_t	ext_start;
			
 
				-	xfs_extlen_t	ext_len;
			
 
				-} xfs_extent_t;
			
 
				-
			
 
				-/*
			
 
				- * Since an xfs_extent_t has types (start:64, len: 32)
			
 
				- * there are different alignments on 32 bit and 64 bit kernels.
			
 
				- * So we provide the different variants for use by a
			
 
				- * conversion routine.
			
 
				- */
			
 
				-
			
 
				-typedef struct xfs_extent_32 {
			
 
				-	__uint64_t	ext_start;
			
 
				-	__uint32_t	ext_len;
			
 
				-} __attribute__((packed)) xfs_extent_32_t;
			
 
				-
			
 
				-typedef struct xfs_extent_64 {
			
 
				-	__uint64_t	ext_start;
			
 
				-	__uint32_t	ext_len;
			
 
				-	__uint32_t	ext_pad;
			
 
				-} xfs_extent_64_t;
			
 
				-
			
 
				-/*
			
 
				- * This is the structure used to lay out an efi log item in the
			
 
				- * log.  The efi_extents field is a variable size array whose
			
 
				- * size is given by efi_nextents.
			
 
				- */
			
 
				-typedef struct xfs_efi_log_format {
			
 
				-	__uint16_t		efi_type;	/* efi log item type */
			
 
				-	__uint16_t		efi_size;	/* size of this item */
			
 
				-	__uint32_t		efi_nextents;	/* # extents to free */
			
 
				-	__uint64_t		efi_id;		/* efi identifier */
			
 
				-	xfs_extent_t		efi_extents[1];	/* array of extents to free */
			
 
				-} xfs_efi_log_format_t;
			
 
				-
			
 
				-typedef struct xfs_efi_log_format_32 {
			
 
				-	__uint16_t		efi_type;	/* efi log item type */
			
 
				-	__uint16_t		efi_size;	/* size of this item */
			
 
				-	__uint32_t		efi_nextents;	/* # extents to free */
			
 
				-	__uint64_t		efi_id;		/* efi identifier */
			
 
				-	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
			
 
				-} __attribute__((packed)) xfs_efi_log_format_32_t;
			
 
				-
			
 
				-typedef struct xfs_efi_log_format_64 {
			
 
				-	__uint16_t		efi_type;	/* efi log item type */
			
 
				-	__uint16_t		efi_size;	/* size of this item */
			
 
				-	__uint32_t		efi_nextents;	/* # extents to free */
			
 
				-	__uint64_t		efi_id;		/* efi identifier */
			
 
				-	xfs_extent_64_t		efi_extents[1];	/* array of extents to free */
			
 
				-} xfs_efi_log_format_64_t;
			
 
				-
			
 
				-/*
			
 
				- * This is the structure used to lay out an efd log item in the
			
 
				- * log.  The efd_extents array is a variable size array whose
			
 
				- * size is given by efd_nextents;
			
 
				- */
			
 
				-typedef struct xfs_efd_log_format {
			
 
				-	__uint16_t		efd_type;	/* efd log item type */
			
 
				-	__uint16_t		efd_size;	/* size of this item */
			
 
				-	__uint32_t		efd_nextents;	/* # of extents freed */
			
 
				-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
			
 
				-	xfs_extent_t		efd_extents[1];	/* array of extents freed */
			
 
				-} xfs_efd_log_format_t;
			
 
				-
			
 
				-typedef struct xfs_efd_log_format_32 {
			
 
				-	__uint16_t		efd_type;	/* efd log item type */
			
 
				-	__uint16_t		efd_size;	/* size of this item */
			
 
				-	__uint32_t		efd_nextents;	/* # of extents freed */
			
 
				-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
			
 
				-	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
			
 
				-} __attribute__((packed)) xfs_efd_log_format_32_t;
			
 
				-
			
 
				-typedef struct xfs_efd_log_format_64 {
			
 
				-	__uint16_t		efd_type;	/* efd log item type */
			
 
				-	__uint16_t		efd_size;	/* size of this item */
			
 
				-	__uint32_t		efd_nextents;	/* # of extents freed */
			
 
				-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
			
 
				-	xfs_extent_64_t		efd_extents[1];	/* array of extents freed */
			
 
				-} xfs_efd_log_format_64_t;
			
 
				-
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				-
			
 
				 /*
			
 
				  * Max number of extents in fast allocation path.
			
 
				  */
			
@@ -160,6 +78,4 @@ int			xfs_efi_copy_format(xfs_log_iovec_t *buf,
 
				 					    xfs_efi_log_format_t *dst_efi_fmt);
			
 
				 void			xfs_efi_item_free(xfs_efi_log_item_t *);
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				 #endif	/* __XFS_EXTFREE_ITEM_H__ */
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -28,10 +28,11 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_da_btree.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_ioctl.h"
			
 
				 #include "xfs_trace.h"
			
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -16,18 +16,18 @@
 
				  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				  */
			
 
				 #include "xfs.h"
			
 
				+#include "xfs_log.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_alloc.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_mru_cache.h"
			
 
				 #include "xfs_filestream.h"
			
 
				 #include "xfs_trace.h"
			
@@ -668,8 +668,8 @@ exit:
 
				  */
			
 
				 int
			
 
				 xfs_filestream_new_ag(
			
 
				-	xfs_bmalloca_t	*ap,
			
 
				-	xfs_agnumber_t	*agp)
			
 
				+	struct xfs_bmalloca	*ap,
			
 
				+	xfs_agnumber_t		*agp)
			
 
				 {
			
 
				 	int		flags, err;
			
 
				 	xfs_inode_t	*ip, *pip = NULL;
			
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -18,8 +18,6 @@
 
				 #ifndef __XFS_FILESTREAM_H__
			
 
				 #define __XFS_FILESTREAM_H__
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				-
			
 
				 struct xfs_mount;
			
 
				 struct xfs_inode;
			
 
				 struct xfs_perag;
			
@@ -69,6 +67,4 @@ xfs_inode_is_filestream(
 
				 		(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
			
 
				 }
			
 
				 
			
 
				-#endif /* __KERNEL__ */
			
 
				-
			
 
				 #endif /* __XFS_FILESTREAM_H__ */
			
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -0,0 +1,169 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef __XFS_FORMAT_H__
			
 
				+#define __XFS_FORMAT_H__
			
 
				+
			
 
				+/*
			
 
				+ * XFS On Disk Format Definitions
			
 
				+ *
			
 
				+ * This header file defines all the on-disk format definitions for 
			
 
				+ * general XFS objects. Directory and attribute related objects are defined in
			
 
				+ * xfs_da_format.h, which log and log item formats are defined in
			
 
				+ * xfs_log_format.h. Everything else goes here.
			
 
				+ */
			
 
				+
			
 
				+struct xfs_mount;
			
 
				+struct xfs_trans;
			
 
				+struct xfs_inode;
			
 
				+struct xfs_buf;
			
 
				+struct xfs_ifork;
			
 
				+
			
 
				+/*
			
 
				+ * RealTime Device format definitions
			
 
				+ */
			
 
				+
			
 
				+/* Min and max rt extent sizes, specified in bytes */
			
 
				+#define	XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
			
 
				+#define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64kB */
			
 
				+#define	XFS_MIN_RTEXTSIZE	(4 * 1024)		/* 4kB */
			
 
				+
			
 
				+#define	XFS_BLOCKSIZE(mp)	((mp)->m_sb.sb_blocksize)
			
 
				+#define	XFS_BLOCKMASK(mp)	((mp)->m_blockmask)
			
 
				+#define	XFS_BLOCKWSIZE(mp)	((mp)->m_blockwsize)
			
 
				+#define	XFS_BLOCKWMASK(mp)	((mp)->m_blockwmask)
			
 
				+
			
 
				+/*
			
 
				+ * RT Summary and bit manipulation macros.
			
 
				+ */
			
 
				+#define	XFS_SUMOFFS(mp,ls,bb)	((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
			
 
				+#define	XFS_SUMOFFSTOBLOCK(mp,s)	\
			
 
				+	(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
			
 
				+#define	XFS_SUMPTR(mp,bp,so)	\
			
 
				+	((xfs_suminfo_t *)((bp)->b_addr + \
			
 
				+		(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
			
 
				+
			
 
				+#define	XFS_BITTOBLOCK(mp,bi)	((bi) >> (mp)->m_blkbit_log)
			
 
				+#define	XFS_BLOCKTOBIT(mp,bb)	((bb) << (mp)->m_blkbit_log)
			
 
				+#define	XFS_BITTOWORD(mp,bi)	\
			
 
				+	((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
			
 
				+
			
 
				+#define	XFS_RTMIN(a,b)	((a) < (b) ? (a) : (b))
			
 
				+#define	XFS_RTMAX(a,b)	((a) > (b) ? (a) : (b))
			
 
				+
			
 
				+#define	XFS_RTLOBIT(w)	xfs_lowbit32(w)
			
 
				+#define	XFS_RTHIBIT(w)	xfs_highbit32(w)
			
 
				+
			
 
				+#if XFS_BIG_BLKNOS
			
 
				+#define	XFS_RTBLOCKLOG(b)	xfs_highbit64(b)
			
 
				+#else
			
 
				+#define	XFS_RTBLOCKLOG(b)	xfs_highbit32(b)
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Dquot and dquot block format definitions
			
 
				+ */
			
 
				+#define XFS_DQUOT_MAGIC		0x4451		/* 'DQ' */
			
 
				+#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
			
 
				+
			
 
				+/*
			
 
				+ * This is the main portion of the on-disk representation of quota
			
 
				+ * information for a user. This is the q_core of the xfs_dquot_t that
			
 
				+ * is kept in kernel memory. We pad this with some more expansion room
			
 
				+ * to construct the on disk structure.
			
 
				+ */
			
 
				+typedef struct	xfs_disk_dquot {
			
 
				+	__be16		d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
			
 
				+	__u8		d_version;	/* dquot version */
			
 
				+	__u8		d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
			
 
				+	__be32		d_id;		/* user,project,group id */
			
 
				+	__be64		d_blk_hardlimit;/* absolute limit on disk blks */
			
 
				+	__be64		d_blk_softlimit;/* preferred limit on disk blks */
			
 
				+	__be64		d_ino_hardlimit;/* maximum # allocated inodes */
			
 
				+	__be64		d_ino_softlimit;/* preferred inode limit */
			
 
				+	__be64		d_bcount;	/* disk blocks owned by the user */
			
 
				+	__be64		d_icount;	/* inodes owned by the user */
			
 
				+	__be32		d_itimer;	/* zero if within inode limits if not,
			
 
				+					   this is when we refuse service */
			
 
				+	__be32		d_btimer;	/* similar to above; for disk blocks */
			
 
				+	__be16		d_iwarns;	/* warnings issued wrt num inodes */
			
 
				+	__be16		d_bwarns;	/* warnings issued wrt disk blocks */
			
 
				+	__be32		d_pad0;		/* 64 bit align */
			
 
				+	__be64		d_rtb_hardlimit;/* absolute limit on realtime blks */
			
 
				+	__be64		d_rtb_softlimit;/* preferred limit on RT disk blks */
			
 
				+	__be64		d_rtbcount;	/* realtime blocks owned */
			
 
				+	__be32		d_rtbtimer;	/* similar to above; for RT disk blocks */
			
 
				+	__be16		d_rtbwarns;	/* warnings issued wrt RT disk blocks */
			
 
				+	__be16		d_pad;
			
 
				+} xfs_disk_dquot_t;
			
 
				+
			
 
				+/*
			
 
				+ * This is what goes on disk. This is separated from the xfs_disk_dquot because
			
 
				+ * carrying the unnecessary padding would be a waste of memory.
			
 
				+ */
			
 
				+typedef struct xfs_dqblk {
			
 
				+	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
			
 
				+	char		  dd_fill[4];	/* filling for posterity */
			
 
				+
			
 
				+	/*
			
 
				+	 * These two are only present on filesystems with the CRC bits set.
			
 
				+	 */
			
 
				+	__be32		  dd_crc;	/* checksum */
			
 
				+	__be64		  dd_lsn;	/* last modification in log */
			
 
				+	uuid_t		  dd_uuid;	/* location information */
			
 
				+} xfs_dqblk_t;
			
 
				+
			
 
				+#define XFS_DQUOT_CRC_OFF	offsetof(struct xfs_dqblk, dd_crc)
			
 
				+
			
 
				+/*
			
 
				+ * Remote symlink format and access functions.
			
 
				+ */
			
 
				+#define XFS_SYMLINK_MAGIC	0x58534c4d	/* XSLM */
			
 
				+
			
 
				+struct xfs_dsymlink_hdr {
			
 
				+	__be32	sl_magic;
			
 
				+	__be32	sl_offset;
			
 
				+	__be32	sl_bytes;
			
 
				+	__be32	sl_crc;
			
 
				+	uuid_t	sl_uuid;
			
 
				+	__be64	sl_owner;
			
 
				+	__be64	sl_blkno;
			
 
				+	__be64	sl_lsn;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * The maximum pathlen is 1024 bytes. Since the minimum file system
			
 
				+ * blocksize is 512 bytes, we can get a max of 3 extents back from
			
 
				+ * bmapi when crc headers are taken into account.
			
 
				+ */
			
 
				+#define XFS_SYMLINK_MAPS 3
			
 
				+
			
 
				+#define XFS_SYMLINK_BUF_SPACE(mp, bufsize)	\
			
 
				+	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
			
 
				+			sizeof(struct xfs_dsymlink_hdr) : 0))
			
 
				+
			
 
				+int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
			
 
				+int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
			
 
				+			uint32_t size, struct xfs_buf *bp);
			
 
				+bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
			
 
				+			uint32_t size, struct xfs_buf *bp);
			
 
				+void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				+				 struct xfs_inode *ip, struct xfs_ifork *ifp);
			
 
				+
			
 
				+extern const struct xfs_buf_ops xfs_symlink_buf_ops;
			
 
				+
			
 
				+#endif /* __XFS_FORMAT_H__ */
			
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -240,7 +240,9 @@ typedef struct xfs_fsop_resblks {
 
				 
			
 
				 
			
 
				 /*
			
 
				- * Minimum and maximum sizes need for growth checks
			
 
				+ * Minimum and maximum sizes need for growth checks.
			
 
				+ *
			
 
				+ * Block counts are in units of filesystem blocks, not basic blocks.
			
 
				  */
			
 
				 #define XFS_MIN_AG_BLOCKS	64
			
 
				 #define XFS_MIN_LOG_BLOCKS	512ULL
			
@@ -310,6 +312,17 @@ typedef struct xfs_bstat {
 
				 	__u16		bs_aextents;	/* attribute number of extents	*/
			
 
				 } xfs_bstat_t;
			
 
				 
			
 
				+/*
			
 
				+ * Project quota id helpers (previously projid was 16bit only
			
 
				+ * and using two 16bit values to hold new 32bit projid was choosen
			
 
				+ * to retain compatibility with "old" filesystems).
			
 
				+ */
			
 
				+static inline __uint32_t
			
 
				+bstat_get_projid(struct xfs_bstat *bs)
			
 
				+{
			
 
				+	return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * The user-level BulkStat Request interface structure.
			
 
				  */
			
@@ -344,7 +357,7 @@ typedef struct xfs_error_injection {
 
				  * Speculative preallocation trimming.
			
 
				  */
			
 
				 #define XFS_EOFBLOCKS_VERSION		1
			
 
				-struct xfs_eofblocks {
			
 
				+struct xfs_fs_eofblocks {
			
 
				 	__u32		eof_version;
			
 
				 	__u32		eof_flags;
			
 
				 	uid_t		eof_uid;
			
@@ -449,6 +462,21 @@ typedef struct xfs_handle {
 
				 				 - (char *) &(handle))			  \
			
 
				 				 + (handle).ha_fid.fid_len)
			
 
				 
			
 
				+/*
			
 
				+ * Structure passed to XFS_IOC_SWAPEXT
			
 
				+ */
			
 
				+typedef struct xfs_swapext
			
 
				+{
			
 
				+	__int64_t	sx_version;	/* version */
			
 
				+#define XFS_SX_VERSION		0
			
 
				+	__int64_t	sx_fdtarget;	/* fd of target file */
			
 
				+	__int64_t	sx_fdtmp;	/* fd of tmp file */
			
 
				+	xfs_off_t	sx_offset;	/* offset into file */
			
 
				+	xfs_off_t	sx_length;	/* leng from offset */
			
 
				+	char		sx_pad[16];	/* pad space, unused */
			
 
				+	xfs_bstat_t	sx_stat;	/* stat of target b4 copy */
			
 
				+} xfs_swapext_t;
			
 
				+
			
 
				 /*
			
 
				  * Flags for going down operation
			
 
				  */
			
@@ -511,8 +539,14 @@ typedef struct xfs_handle {
 
				 #define XFS_IOC_ERROR_INJECTION	     _IOW ('X', 116, struct xfs_error_injection)
			
 
				 #define XFS_IOC_ERROR_CLEARALL	     _IOW ('X', 117, struct xfs_error_injection)
			
 
				 /*	XFS_IOC_ATTRCTL_BY_HANDLE -- deprecated 118	 */
			
 
				+
			
 
				 /*	XFS_IOC_FREEZE		  -- FIFREEZE   119	 */
			
 
				 /*	XFS_IOC_THAW		  -- FITHAW     120	 */
			
 
				+#ifndef FIFREEZE
			
 
				+#define XFS_IOC_FREEZE		     _IOWR('X', 119, int)
			
 
				+#define XFS_IOC_THAW		     _IOWR('X', 120, int)
			
 
				+#endif
			
 
				+
			
 
				 #define XFS_IOC_FSSETDM_BY_HANDLE    _IOW ('X', 121, struct xfs_fsop_setdm_handlereq)
			
 
				 #define XFS_IOC_ATTRLIST_BY_HANDLE   _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
			
 
				 #define XFS_IOC_ATTRMULTI_BY_HANDLE  _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
			
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -203,8 +203,9 @@ xfs_growfs_data_private(
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
			
 
				 	tp->t_flags |= XFS_TRANS_RESERVE;
			
 
				-	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
			
 
				-			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
			
 
				+				  XFS_GROWFS_SPACE_RES(mp), 0);
			
 
				+	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return error;
			
 
				 	}
			
@@ -739,8 +740,7 @@ xfs_fs_log_dummy(
 
				 	int		error;
			
 
				 
			
 
				 	tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
			
 
				-				  XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return error;
			
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -39,6 +39,7 @@
 
				 #include "xfs_cksum.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_icreate_item.h"
			
 
				+#include "xfs_icache.h"
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -506,7 +507,7 @@ xfs_ialloc_next_ag(
 
				 
			
 
				 /*
			
 
				  * Select an allocation group to look for a free inode in, based on the parent
			
 
				- * inode and then mode.  Return the allocation group buffer.
			
 
				+ * inode and the mode.  Return the allocation group buffer.
			
 
				  */
			
 
				 STATIC xfs_agnumber_t
			
 
				 xfs_ialloc_ag_select(
			
@@ -728,7 +729,7 @@ xfs_dialloc_ag(
 
				 		error = xfs_inobt_get_rec(cur, &rec, &j);
			
 
				 		if (error)
			
 
				 			goto error0;
			
 
				-		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			
 
				+		XFS_WANT_CORRUPTED_GOTO(j == 1, error0);
			
 
				 
			
 
				 		if (rec.ir_freecount > 0) {
			
 
				 			/*
			
@@ -1341,7 +1342,7 @@ xfs_imap(
 
				 	xfs_agblock_t	cluster_agbno;	/* first block in inode cluster */
			
 
				 	int		error;	/* error code */
			
 
				 	int		offset;	/* index of inode in its buffer */
			
 
				-	int		offset_agbno;	/* blks from chunk start to inode */
			
 
				+	xfs_agblock_t	offset_agbno;	/* blks from chunk start to inode */
			
 
				 
			
 
				 	ASSERT(ino != NULLFSINO);
			
 
				 
			
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_log_priv.h"
			
@@ -31,12 +32,12 @@
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_filestream.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_fsops.h"
			
 
				 #include "xfs_icache.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 
			
 
				 #include <linux/kthread.h>
			
 
				 #include <linux/freezer.h>
			
@@ -619,7 +620,7 @@ restart:
 
				 
			
 
				 /*
			
 
				  * Background scanning to trim post-EOF preallocated space. This is queued
			
 
				- * based on the 'background_prealloc_discard_period' tunable (5m by default).
			
 
				+ * based on the 'speculative_prealloc_lifetime' tunable (5m by default).
			
 
				  */
			
 
				 STATIC void
			
 
				 xfs_queue_eofblocks(
			
@@ -1203,15 +1204,15 @@ xfs_inode_match_id(
 
				 	struct xfs_inode	*ip,
			
 
				 	struct xfs_eofblocks	*eofb)
			
 
				 {
			
 
				-	if (eofb->eof_flags & XFS_EOF_FLAGS_UID &&
			
 
				-	    ip->i_d.di_uid != eofb->eof_uid)
			
 
				+	if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
			
 
				+	    !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (eofb->eof_flags & XFS_EOF_FLAGS_GID &&
			
 
				-	    ip->i_d.di_gid != eofb->eof_gid)
			
 
				+	if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
			
 
				+	    !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (eofb->eof_flags & XFS_EOF_FLAGS_PRID &&
			
 
				+	if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
			
 
				 	    xfs_get_projid(ip) != eofb->eof_prid)
			
 
				 		return 0;
			
 
				 
			
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -21,9 +21,24 @@
 
				 struct xfs_mount;
			
 
				 struct xfs_perag;
			
 
				 
			
 
				+struct xfs_eofblocks {
			
 
				+	__u32		eof_flags;
			
 
				+	kuid_t		eof_uid;
			
 
				+	kgid_t		eof_gid;
			
 
				+	prid_t		eof_prid;
			
 
				+	__u64		eof_min_file_size;
			
 
				+};
			
 
				+
			
 
				 #define SYNC_WAIT		0x0001	/* wait for i/o to complete */
			
 
				 #define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
			
 
				 
			
 
				+/*
			
 
				+ * Flags for xfs_iget()
			
 
				+ */
			
 
				+#define XFS_IGET_CREATE		0x1
			
 
				+#define XFS_IGET_UNTRUSTED	0x2
			
 
				+#define XFS_IGET_DONTCACHE	0x4
			
 
				+
			
 
				 int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
			
 
				 	     uint flags, uint lock_flags, xfs_inode_t **ipp);
			
 
				 
			
@@ -49,4 +64,39 @@ int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
 
				 		int flags, void *args),
			
 
				 	int flags, void *args, int tag);
			
 
				 
			
 
				+static inline int
			
 
				+xfs_fs_eofblocks_from_user(
			
 
				+	struct xfs_fs_eofblocks		*src,
			
 
				+	struct xfs_eofblocks		*dst)
			
 
				+{
			
 
				+	if (src->eof_version != XFS_EOFBLOCKS_VERSION)
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
			
 
				+	    memchr_inv(src->pad64, 0, sizeof(src->pad64)))
			
 
				+		return EINVAL;
			
 
				+
			
 
				+	dst->eof_flags = src->eof_flags;
			
 
				+	dst->eof_prid = src->eof_prid;
			
 
				+	dst->eof_min_file_size = src->eof_min_file_size;
			
 
				+
			
 
				+	dst->eof_uid = INVALID_UID;
			
 
				+	if (src->eof_flags & XFS_EOF_FLAGS_UID) {
			
 
				+		dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
			
 
				+		if (!uid_valid(dst->eof_uid))
			
 
				+			return EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	dst->eof_gid = INVALID_GID;
			
 
				+	if (src->eof_flags & XFS_EOF_FLAGS_GID) {
			
 
				+		dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
			
 
				+		if (!gid_valid(dst->eof_gid))
			
 
				+			return EINVAL;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 #endif
			
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -20,23 +20,11 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				-#include "xfs_buf_item.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				-#include "xfs_bmap_btree.h"
			
 
				-#include "xfs_alloc_btree.h"
			
 
				-#include "xfs_ialloc_btree.h"
			
 
				-#include "xfs_attr_sf.h"
			
 
				-#include "xfs_dinode.h"
			
 
				-#include "xfs_inode.h"
			
 
				-#include "xfs_inode_item.h"
			
 
				-#include "xfs_btree.h"
			
 
				-#include "xfs_ialloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_icreate_item.h"
			
 
				 
			
@@ -52,11 +40,14 @@ static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip)
 
				  *
			
 
				  * We only need one iovec for the icreate log structure.
			
 
				  */
			
 
				-STATIC uint
			
 
				+STATIC void
			
 
				 xfs_icreate_item_size(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				-	return 1;
			
 
				+	*nvecs += 1;
			
 
				+	*nbytes += sizeof(struct xfs_icreate_log);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_icreate_item.h
+++ b/fs/xfs/xfs_icreate_item.h
@@ -18,24 +18,6 @@
 
				 #ifndef XFS_ICREATE_ITEM_H
			
 
				 #define XFS_ICREATE_ITEM_H	1
			
 
				 
			
 
				-/*
			
 
				- * on disk log item structure
			
 
				- *
			
 
				- * Log recovery assumes the first two entries are the type and size and they fit
			
 
				- * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so
			
 
				- * decoding can be done correctly.
			
 
				- */
			
 
				-struct xfs_icreate_log {
			
 
				-	__uint16_t	icl_type;	/* type of log format structure */
			
 
				-	__uint16_t	icl_size;	/* size of log format structure */
			
 
				-	__be32		icl_ag;		/* ag being allocated in */
			
 
				-	__be32		icl_agbno;	/* start block of inode range */
			
 
				-	__be32		icl_count;	/* number of inodes to initialise */
			
 
				-	__be32		icl_isize;	/* size of inodes */
			
 
				-	__be32		icl_length;	/* length of extent to initialise */
			
 
				-	__be32		icl_gen;	/* inode generation number to use */
			
 
				-};
			
 
				-
			
 
				 /* in memory log item structure */
			
 
				 struct xfs_icreate_item {
			
 
				 	struct xfs_log_item	ic_item;
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -19,18 +19,23 @@
 
				 
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				+#include "xfs_trans_space.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_alloc_btree.h"
			
 
				 #include "xfs_ialloc_btree.h"
			
 
				 #include "xfs_attr_sf.h"
			
 
				+#include "xfs_attr.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_buf_item.h"
			
@@ -39,16 +44,15 @@
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_ialloc.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_filestream.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_cksum.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				+#include "xfs_symlink.h"
			
 
				 
			
 
				-kmem_zone_t *xfs_ifork_zone;
			
 
				 kmem_zone_t *xfs_inode_zone;
			
 
				 
			
 
				 /*
			
@@ -58,9 +62,6 @@ kmem_zone_t *xfs_inode_zone;
 
				 #define	XFS_ITRUNC_MAX_EXTENTS	2
			
 
				 
			
 
				 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
			
 
				-STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
			
 
				-STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				-STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				 
			
 
				 /*
			
 
				  * helper function to extract extent size hint from inode
			
@@ -310,623 +311,202 @@ xfs_isilocked(
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-void
			
 
				-__xfs_iflock(
			
 
				-	struct xfs_inode	*ip)
			
 
				-{
			
 
				-	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
			
 
				-	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
			
 
				-
			
 
				-	do {
			
 
				-		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
			
 
				-		if (xfs_isiflocked(ip))
			
 
				-			io_schedule();
			
 
				-	} while (!xfs_iflock_nowait(ip));
			
 
				-
			
 
				-	finish_wait(wq, &wait.wait);
			
 
				-}
			
 
				-
			
 
				 #ifdef DEBUG
			
 
				+int xfs_locked_n;
			
 
				+int xfs_small_retries;
			
 
				+int xfs_middle_retries;
			
 
				+int xfs_lots_retries;
			
 
				+int xfs_lock_delays;
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				- * Make sure that the extents in the given memory buffer
			
 
				- * are valid.
			
 
				+ * Bump the subclass so xfs_lock_inodes() acquires each lock with
			
 
				+ * a different value
			
 
				  */
			
 
				-STATIC void
			
 
				-xfs_validate_extents(
			
 
				-	xfs_ifork_t		*ifp,
			
 
				-	int			nrecs,
			
 
				-	xfs_exntfmt_t		fmt)
			
 
				+static inline int
			
 
				+xfs_lock_inumorder(int lock_mode, int subclass)
			
 
				 {
			
 
				-	xfs_bmbt_irec_t		irec;
			
 
				-	xfs_bmbt_rec_host_t	rec;
			
 
				-	int			i;
			
 
				+	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
			
 
				+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
			
 
				+	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
			
 
				+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
			
 
				 
			
 
				-	for (i = 0; i < nrecs; i++) {
			
 
				-		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				-		rec.l0 = get_unaligned(&ep->l0);
			
 
				-		rec.l1 = get_unaligned(&ep->l1);
			
 
				-		xfs_bmbt_get_all(&rec, &irec);
			
 
				-		if (fmt == XFS_EXTFMT_NOSTATE)
			
 
				-			ASSERT(irec.br_state == XFS_EXT_NORM);
			
 
				-	}
			
 
				+	return lock_mode;
			
 
				 }
			
 
				-#else /* DEBUG */
			
 
				-#define xfs_validate_extents(ifp, nrecs, fmt)
			
 
				-#endif /* DEBUG */
			
 
				 
			
 
				 /*
			
 
				- * Check that none of the inode's in the buffer have a next
			
 
				- * unlinked field of 0.
			
 
				+ * The following routine will lock n inodes in exclusive mode.
			
 
				+ * We assume the caller calls us with the inodes in i_ino order.
			
 
				+ *
			
 
				+ * We need to detect deadlock where an inode that we lock
			
 
				+ * is in the AIL and we start waiting for another inode that is locked
			
 
				+ * by a thread in a long running transaction (such as truncate). This can
			
 
				+ * result in deadlock since the long running trans might need to wait
			
 
				+ * for the inode we just locked in order to push the tail and free space
			
 
				+ * in the log.
			
 
				  */
			
 
				-#if defined(DEBUG)
			
 
				 void
			
 
				-xfs_inobp_check(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	xfs_buf_t	*bp)
			
 
				-{
			
 
				-	int		i;
			
 
				-	int		j;
			
 
				-	xfs_dinode_t	*dip;
			
 
				-
			
 
				-	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
			
 
				-
			
 
				-	for (i = 0; i < j; i++) {
			
 
				-		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
			
 
				-					i * mp->m_sb.sb_inodesize);
			
 
				-		if (!dip->di_next_unlinked)  {
			
 
				-			xfs_alert(mp,
			
 
				-	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
			
 
				-				bp);
			
 
				-			ASSERT(dip->di_next_unlinked);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-static void
			
 
				-xfs_inode_buf_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				+xfs_lock_inodes(
			
 
				+	xfs_inode_t	**ips,
			
 
				+	int		inodes,
			
 
				+	uint		lock_mode)
			
 
				 {
			
 
				-	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				-	int		i;
			
 
				-	int		ni;
			
 
				-
			
 
				-	/*
			
 
				-	 * Validate the magic number and version of every inode in the buffer
			
 
				-	 */
			
 
				-	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
			
 
				-	for (i = 0; i < ni; i++) {
			
 
				-		int		di_ok;
			
 
				-		xfs_dinode_t	*dip;
			
 
				-
			
 
				-		dip = (struct xfs_dinode *)xfs_buf_offset(bp,
			
 
				-					(i << mp->m_sb.sb_inodelog));
			
 
				-		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
			
 
				-			    XFS_DINODE_GOOD_VERSION(dip->di_version);
			
 
				-		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
			
 
				-						XFS_ERRTAG_ITOBP_INOTOBP,
			
 
				-						XFS_RANDOM_ITOBP_INOTOBP))) {
			
 
				-			xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
			
 
				-					     mp, dip);
			
 
				-#ifdef DEBUG
			
 
				-			xfs_emerg(mp,
			
 
				-				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
			
 
				-				(unsigned long long)bp->b_bn, i,
			
 
				-				be16_to_cpu(dip->di_magic));
			
 
				-			ASSERT(0);
			
 
				-#endif
			
 
				-		}
			
 
				-	}
			
 
				-	xfs_inobp_check(mp, bp);
			
 
				-}
			
 
				+	int		attempts = 0, i, j, try_lock;
			
 
				+	xfs_log_item_t	*lp;
			
 
				 
			
 
				+	ASSERT(ips && (inodes >= 2)); /* we need at least two */
			
 
				 
			
 
				-static void
			
 
				-xfs_inode_buf_read_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	xfs_inode_buf_verify(bp);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-xfs_inode_buf_write_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	xfs_inode_buf_verify(bp);
			
 
				-}
			
 
				+	try_lock = 0;
			
 
				+	i = 0;
			
 
				 
			
 
				-const struct xfs_buf_ops xfs_inode_buf_ops = {
			
 
				-	.verify_read = xfs_inode_buf_read_verify,
			
 
				-	.verify_write = xfs_inode_buf_write_verify,
			
 
				-};
			
 
				+again:
			
 
				+	for (; i < inodes; i++) {
			
 
				+		ASSERT(ips[i]);
			
 
				 
			
 
				+		if (i && (ips[i] == ips[i-1]))	/* Already locked */
			
 
				+			continue;
			
 
				 
			
 
				-/*
			
 
				- * This routine is called to map an inode to the buffer containing the on-disk
			
 
				- * version of the inode.  It returns a pointer to the buffer containing the
			
 
				- * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
			
 
				- * pointer to the on-disk inode within that buffer.
			
 
				- *
			
 
				- * If a non-zero error is returned, then the contents of bpp and dipp are
			
 
				- * undefined.
			
 
				- */
			
 
				-int
			
 
				-xfs_imap_to_bp(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	struct xfs_trans	*tp,
			
 
				-	struct xfs_imap		*imap,
			
 
				-	struct xfs_dinode       **dipp,
			
 
				-	struct xfs_buf		**bpp,
			
 
				-	uint			buf_flags,
			
 
				-	uint			iget_flags)
			
 
				-{
			
 
				-	struct xfs_buf		*bp;
			
 
				-	int			error;
			
 
				+		/*
			
 
				+		 * If try_lock is not set yet, make sure all locked inodes
			
 
				+		 * are not in the AIL.
			
 
				+		 * If any are, set try_lock to be used later.
			
 
				+		 */
			
 
				 
			
 
				-	buf_flags |= XBF_UNMAPPED;
			
 
				-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
			
 
				-				   (int)imap->im_len, buf_flags, &bp,
			
 
				-				   &xfs_inode_buf_ops);
			
 
				-	if (error) {
			
 
				-		if (error == EAGAIN) {
			
 
				-			ASSERT(buf_flags & XBF_TRYLOCK);
			
 
				-			return error;
			
 
				+		if (!try_lock) {
			
 
				+			for (j = (i - 1); j >= 0 && !try_lock; j--) {
			
 
				+				lp = (xfs_log_item_t *)ips[j]->i_itemp;
			
 
				+				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
			
 
				+					try_lock++;
			
 
				+				}
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				-		if (error == EFSCORRUPTED &&
			
 
				-		    (iget_flags & XFS_IGET_UNTRUSTED))
			
 
				-			return XFS_ERROR(EINVAL);
			
 
				-
			
 
				-		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
			
 
				-			__func__, error);
			
 
				-		return error;
			
 
				-	}
			
 
				-
			
 
				-	*bpp = bp;
			
 
				-	*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Move inode type and inode format specific information from the
			
 
				- * on-disk inode to the in-core inode.  For fifos, devs, and sockets
			
 
				- * this means set if_rdev to the proper value.  For files, directories,
			
 
				- * and symlinks this means to bring in the in-line data or extent
			
 
				- * pointers.  For a file in B-tree format, only the root is immediately
			
 
				- * brought in-core.  The rest will be in-lined in if_extents when it
			
 
				- * is first referenced (see xfs_iread_extents()).
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_iformat(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	xfs_dinode_t		*dip)
			
 
				-{
			
 
				-	xfs_attr_shortform_t	*atp;
			
 
				-	int			size;
			
 
				-	int			error = 0;
			
 
				-	xfs_fsize_t             di_size;
			
 
				-
			
 
				-	if (unlikely(be32_to_cpu(dip->di_nextents) +
			
 
				-		     be16_to_cpu(dip->di_anextents) >
			
 
				-		     be64_to_cpu(dip->di_nblocks))) {
			
 
				-		xfs_warn(ip->i_mount,
			
 
				-			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
			
 
				-			(unsigned long long)ip->i_ino,
			
 
				-			(int)(be32_to_cpu(dip->di_nextents) +
			
 
				-			      be16_to_cpu(dip->di_anextents)),
			
 
				-			(unsigned long long)
			
 
				-				be64_to_cpu(dip->di_nblocks));
			
 
				-		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
			
 
				-				     ip->i_mount, dip);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
			
 
				-		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
			
 
				-			(unsigned long long)ip->i_ino,
			
 
				-			dip->di_forkoff);
			
 
				-		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
			
 
				-				     ip->i_mount, dip);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
			
 
				-		     !ip->i_mount->m_rtdev_targp)) {
			
 
				-		xfs_warn(ip->i_mount,
			
 
				-			"corrupt dinode %Lu, has realtime flag set.",
			
 
				-			ip->i_ino);
			
 
				-		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
			
 
				-				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	switch (ip->i_d.di_mode & S_IFMT) {
			
 
				-	case S_IFIFO:
			
 
				-	case S_IFCHR:
			
 
				-	case S_IFBLK:
			
 
				-	case S_IFSOCK:
			
 
				-		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
			
 
				-			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
			
 
				-					      ip->i_mount, dip);
			
 
				-			return XFS_ERROR(EFSCORRUPTED);
			
 
				-		}
			
 
				-		ip->i_d.di_size = 0;
			
 
				-		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
			
 
				-		break;
			
 
				+		/*
			
 
				+		 * If any of the previous locks we have locked is in the AIL,
			
 
				+		 * we must TRY to get the second and subsequent locks. If
			
 
				+		 * we can't get any, we must release all we have
			
 
				+		 * and try again.
			
 
				+		 */
			
 
				 
			
 
				-	case S_IFREG:
			
 
				-	case S_IFLNK:
			
 
				-	case S_IFDIR:
			
 
				-		switch (dip->di_format) {
			
 
				-		case XFS_DINODE_FMT_LOCAL:
			
 
				+		if (try_lock) {
			
 
				+			/* try_lock must be 0 if i is 0. */
			
 
				 			/*
			
 
				-			 * no local regular files yet
			
 
				+			 * try_lock means we have an inode locked
			
 
				+			 * that is in the AIL.
			
 
				 			 */
			
 
				-			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
			
 
				-				xfs_warn(ip->i_mount,
			
 
				-			"corrupt inode %Lu (local format for regular file).",
			
 
				-					(unsigned long long) ip->i_ino);
			
 
				-				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
			
 
				-						     XFS_ERRLEVEL_LOW,
			
 
				-						     ip->i_mount, dip);
			
 
				-				return XFS_ERROR(EFSCORRUPTED);
			
 
				-			}
			
 
				+			ASSERT(i != 0);
			
 
				+			if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
			
 
				+				attempts++;
			
 
				+
			
 
				+				/*
			
 
				+				 * Unlock all previous guys and try again.
			
 
				+				 * xfs_iunlock will try to push the tail
			
 
				+				 * if the inode is in the AIL.
			
 
				+				 */
			
 
				+
			
 
				+				for(j = i - 1; j >= 0; j--) {
			
 
				+
			
 
				+					/*
			
 
				+					 * Check to see if we've already
			
 
				+					 * unlocked this one.
			
 
				+					 * Not the first one going back,
			
 
				+					 * and the inode ptr is the same.
			
 
				+					 */
			
 
				+					if ((j != (i - 1)) && ips[j] ==
			
 
				+								ips[j+1])
			
 
				+						continue;
			
 
				+
			
 
				+					xfs_iunlock(ips[j], lock_mode);
			
 
				+				}
			
 
				 
			
 
				-			di_size = be64_to_cpu(dip->di_size);
			
 
				-			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
			
 
				-				xfs_warn(ip->i_mount,
			
 
				-			"corrupt inode %Lu (bad size %Ld for local inode).",
			
 
				-					(unsigned long long) ip->i_ino,
			
 
				-					(long long) di_size);
			
 
				-				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
			
 
				-						     XFS_ERRLEVEL_LOW,
			
 
				-						     ip->i_mount, dip);
			
 
				-				return XFS_ERROR(EFSCORRUPTED);
			
 
				+				if ((attempts % 5) == 0) {
			
 
				+					delay(1); /* Don't just spin the CPU */
			
 
				+#ifdef DEBUG
			
 
				+					xfs_lock_delays++;
			
 
				+#endif
			
 
				+				}
			
 
				+				i = 0;
			
 
				+				try_lock = 0;
			
 
				+				goto again;
			
 
				 			}
			
 
				-
			
 
				-			size = (int)di_size;
			
 
				-			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
			
 
				-			break;
			
 
				-		case XFS_DINODE_FMT_EXTENTS:
			
 
				-			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
			
 
				-			break;
			
 
				-		case XFS_DINODE_FMT_BTREE:
			
 
				-			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
			
 
				-			break;
			
 
				-		default:
			
 
				-			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
			
 
				-					 ip->i_mount);
			
 
				-			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		} else {
			
 
				+			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
			
 
				 		}
			
 
				-		break;
			
 
				-
			
 
				-	default:
			
 
				-		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-	if (error) {
			
 
				-		return error;
			
 
				 	}
			
 
				-	if (!XFS_DFORK_Q(dip))
			
 
				-		return 0;
			
 
				-
			
 
				-	ASSERT(ip->i_afp == NULL);
			
 
				-	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
			
 
				-
			
 
				-	switch (dip->di_aformat) {
			
 
				-	case XFS_DINODE_FMT_LOCAL:
			
 
				-		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
			
 
				-		size = be16_to_cpu(atp->hdr.totsize);
			
 
				-
			
 
				-		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
			
 
				-			xfs_warn(ip->i_mount,
			
 
				-				"corrupt inode %Lu (bad attr fork size %Ld).",
			
 
				-				(unsigned long long) ip->i_ino,
			
 
				-				(long long) size);
			
 
				-			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
			
 
				-					     XFS_ERRLEVEL_LOW,
			
 
				-					     ip->i_mount, dip);
			
 
				-			return XFS_ERROR(EFSCORRUPTED);
			
 
				-		}
			
 
				 
			
 
				-		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
			
 
				-		break;
			
 
				-	case XFS_DINODE_FMT_EXTENTS:
			
 
				-		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
			
 
				-		break;
			
 
				-	case XFS_DINODE_FMT_BTREE:
			
 
				-		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
			
 
				-		break;
			
 
				-	default:
			
 
				-		error = XFS_ERROR(EFSCORRUPTED);
			
 
				-		break;
			
 
				-	}
			
 
				-	if (error) {
			
 
				-		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
			
 
				-		ip->i_afp = NULL;
			
 
				-		xfs_idestroy_fork(ip, XFS_DATA_FORK);
			
 
				+#ifdef DEBUG
			
 
				+	if (attempts) {
			
 
				+		if (attempts < 5) xfs_small_retries++;
			
 
				+		else if (attempts < 100) xfs_middle_retries++;
			
 
				+		else xfs_lots_retries++;
			
 
				+	} else {
			
 
				+		xfs_locked_n++;
			
 
				 	}
			
 
				-	return error;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * The file is in-lined in the on-disk inode.
			
 
				- * If it fits into if_inline_data, then copy
			
 
				- * it there, otherwise allocate a buffer for it
			
 
				- * and copy the data there.  Either way, set
			
 
				- * if_data to point at the data.
			
 
				- * If we allocate a buffer for the data, make
			
 
				- * sure that its size is a multiple of 4 and
			
 
				- * record the real size in i_real_bytes.
			
 
				+ * xfs_lock_two_inodes() can only be used to lock one type of lock
			
 
				+ * at a time - the iolock or the ilock, but not both at once. If
			
 
				+ * we lock both at once, lockdep will report false positives saying
			
 
				+ * we have violated locking orders.
			
 
				  */
			
 
				-STATIC int
			
 
				-xfs_iformat_local(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	xfs_dinode_t	*dip,
			
 
				-	int		whichfork,
			
 
				-	int		size)
			
 
				+void
			
 
				+xfs_lock_two_inodes(
			
 
				+	xfs_inode_t		*ip0,
			
 
				+	xfs_inode_t		*ip1,
			
 
				+	uint			lock_mode)
			
 
				 {
			
 
				-	xfs_ifork_t	*ifp;
			
 
				-	int		real_size;
			
 
				-
			
 
				-	/*
			
 
				-	 * If the size is unreasonable, then something
			
 
				-	 * is wrong and we just bail out rather than crash in
			
 
				-	 * kmem_alloc() or memcpy() below.
			
 
				-	 */
			
 
				-	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
			
 
				-		xfs_warn(ip->i_mount,
			
 
				-	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
			
 
				-			(unsigned long long) ip->i_ino, size,
			
 
				-			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
			
 
				-		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
			
 
				-				     ip->i_mount, dip);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	real_size = 0;
			
 
				-	if (size == 0)
			
 
				-		ifp->if_u1.if_data = NULL;
			
 
				-	else if (size <= sizeof(ifp->if_u2.if_inline_data))
			
 
				-		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				-	else {
			
 
				-		real_size = roundup(size, 4);
			
 
				-		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
			
 
				-	}
			
 
				-	ifp->if_bytes = size;
			
 
				-	ifp->if_real_bytes = real_size;
			
 
				-	if (size)
			
 
				-		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
			
 
				-	ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				-	ifp->if_flags |= XFS_IFINLINE;
			
 
				-	return 0;
			
 
				-}
			
 
				+	xfs_inode_t		*temp;
			
 
				+	int			attempts = 0;
			
 
				+	xfs_log_item_t		*lp;
			
 
				 
			
 
				-/*
			
 
				- * The file consists of a set of extents all
			
 
				- * of which fit into the on-disk inode.
			
 
				- * If there are few enough extents to fit into
			
 
				- * the if_inline_ext, then copy them there.
			
 
				- * Otherwise allocate a buffer for them and copy
			
 
				- * them into it.  Either way, set if_extents
			
 
				- * to point at the extents.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_iformat_extents(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	xfs_dinode_t	*dip,
			
 
				-	int		whichfork)
			
 
				-{
			
 
				-	xfs_bmbt_rec_t	*dp;
			
 
				-	xfs_ifork_t	*ifp;
			
 
				-	int		nex;
			
 
				-	int		size;
			
 
				-	int		i;
			
 
				-
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
			
 
				-	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-
			
 
				-	/*
			
 
				-	 * If the number of extents is unreasonable, then something
			
 
				-	 * is wrong and we just bail out rather than crash in
			
 
				-	 * kmem_alloc() or memcpy() below.
			
 
				-	 */
			
 
				-	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
			
 
				-		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
			
 
				-			(unsigned long long) ip->i_ino, nex);
			
 
				-		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
			
 
				-				     ip->i_mount, dip);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	ifp->if_real_bytes = 0;
			
 
				-	if (nex == 0)
			
 
				-		ifp->if_u1.if_extents = NULL;
			
 
				-	else if (nex <= XFS_INLINE_EXTS)
			
 
				-		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				-	else
			
 
				-		xfs_iext_add(ifp, 0, nex);
			
 
				-
			
 
				-	ifp->if_bytes = size;
			
 
				-	if (size) {
			
 
				-		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
			
 
				-		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
			
 
				-		for (i = 0; i < nex; i++, dp++) {
			
 
				-			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				-			ep->l0 = get_unaligned_be64(&dp->l0);
			
 
				-			ep->l1 = get_unaligned_be64(&dp->l1);
			
 
				-		}
			
 
				-		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
			
 
				-		if (whichfork != XFS_DATA_FORK ||
			
 
				-			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
			
 
				-				if (unlikely(xfs_check_nostate_extents(
			
 
				-				    ifp, 0, nex))) {
			
 
				-					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
			
 
				-							 XFS_ERRLEVEL_LOW,
			
 
				-							 ip->i_mount);
			
 
				-					return XFS_ERROR(EFSCORRUPTED);
			
 
				-				}
			
 
				-	}
			
 
				-	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				-	return 0;
			
 
				-}
			
 
				+	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
			
 
				+		ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
			
 
				+	ASSERT(ip0->i_ino != ip1->i_ino);
			
 
				 
			
 
				-/*
			
 
				- * The file has too many extents to fit into
			
 
				- * the inode, so they are in B-tree format.
			
 
				- * Allocate a buffer for the root of the B-tree
			
 
				- * and copy the root into it.  The i_extents
			
 
				- * field will remain NULL until all of the
			
 
				- * extents are read in (when they are needed).
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_iformat_btree(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	xfs_dinode_t		*dip,
			
 
				-	int			whichfork)
			
 
				-{
			
 
				-	struct xfs_mount	*mp = ip->i_mount;
			
 
				-	xfs_bmdr_block_t	*dfp;
			
 
				-	xfs_ifork_t		*ifp;
			
 
				-	/* REFERENCED */
			
 
				-	int			nrecs;
			
 
				-	int			size;
			
 
				-
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
			
 
				-	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
			
 
				-	nrecs = be16_to_cpu(dfp->bb_numrecs);
			
 
				-
			
 
				-	/*
			
 
				-	 * blow out if -- fork has less extents than can fit in
			
 
				-	 * fork (fork shouldn't be a btree format), root btree
			
 
				-	 * block has more records than can fit into the fork,
			
 
				-	 * or the number of extents is greater than the number of
			
 
				-	 * blocks.
			
 
				-	 */
			
 
				-	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
			
 
				-					XFS_IFORK_MAXEXT(ip, whichfork) ||
			
 
				-		     XFS_BMDR_SPACE_CALC(nrecs) >
			
 
				-					XFS_DFORK_SIZE(dip, mp, whichfork) ||
			
 
				-		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
			
 
				-		xfs_warn(mp, "corrupt inode %Lu (btree).",
			
 
				-					(unsigned long long) ip->i_ino);
			
 
				-		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
			
 
				-					 mp, dip);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	ifp->if_broot_bytes = size;
			
 
				-	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
			
 
				-	ASSERT(ifp->if_broot != NULL);
			
 
				-	/*
			
 
				-	 * Copy and convert from the on-disk structure
			
 
				-	 * to the in-memory structure.
			
 
				-	 */
			
 
				-	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
			
 
				-			 ifp->if_broot, size);
			
 
				-	ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				-	ifp->if_flags |= XFS_IFBROOT;
			
 
				+	if (ip0->i_ino > ip1->i_ino) {
			
 
				+		temp = ip0;
			
 
				+		ip0 = ip1;
			
 
				+		ip1 = temp;
			
 
				+	}
			
 
				 
			
 
				-	return 0;
			
 
				-}
			
 
				+ again:
			
 
				+	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
			
 
				 
			
 
				-STATIC void
			
 
				-xfs_dinode_from_disk(
			
 
				-	xfs_icdinode_t		*to,
			
 
				-	xfs_dinode_t		*from)
			
 
				-{
			
 
				-	to->di_magic = be16_to_cpu(from->di_magic);
			
 
				-	to->di_mode = be16_to_cpu(from->di_mode);
			
 
				-	to->di_version = from ->di_version;
			
 
				-	to->di_format = from->di_format;
			
 
				-	to->di_onlink = be16_to_cpu(from->di_onlink);
			
 
				-	to->di_uid = be32_to_cpu(from->di_uid);
			
 
				-	to->di_gid = be32_to_cpu(from->di_gid);
			
 
				-	to->di_nlink = be32_to_cpu(from->di_nlink);
			
 
				-	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
			
 
				-	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
			
 
				-	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
			
 
				-	to->di_flushiter = be16_to_cpu(from->di_flushiter);
			
 
				-	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
			
 
				-	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
			
 
				-	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
			
 
				-	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
			
 
				-	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
			
 
				-	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
			
 
				-	to->di_size = be64_to_cpu(from->di_size);
			
 
				-	to->di_nblocks = be64_to_cpu(from->di_nblocks);
			
 
				-	to->di_extsize = be32_to_cpu(from->di_extsize);
			
 
				-	to->di_nextents = be32_to_cpu(from->di_nextents);
			
 
				-	to->di_anextents = be16_to_cpu(from->di_anextents);
			
 
				-	to->di_forkoff = from->di_forkoff;
			
 
				-	to->di_aformat	= from->di_aformat;
			
 
				-	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
			
 
				-	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
			
 
				-	to->di_flags	= be16_to_cpu(from->di_flags);
			
 
				-	to->di_gen	= be32_to_cpu(from->di_gen);
			
 
				-
			
 
				-	if (to->di_version == 3) {
			
 
				-		to->di_changecount = be64_to_cpu(from->di_changecount);
			
 
				-		to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
			
 
				-		to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
			
 
				-		to->di_flags2 = be64_to_cpu(from->di_flags2);
			
 
				-		to->di_ino = be64_to_cpu(from->di_ino);
			
 
				-		to->di_lsn = be64_to_cpu(from->di_lsn);
			
 
				-		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
			
 
				-		uuid_copy(&to->di_uuid, &from->di_uuid);
			
 
				+	/*
			
 
				+	 * If the first lock we have locked is in the AIL, we must TRY to get
			
 
				+	 * the second lock. If we can't get it, we must release the first one
			
 
				+	 * and try again.
			
 
				+	 */
			
 
				+	lp = (xfs_log_item_t *)ip0->i_itemp;
			
 
				+	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
			
 
				+		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
			
 
				+			xfs_iunlock(ip0, lock_mode);
			
 
				+			if ((++attempts % 5) == 0)
			
 
				+				delay(1); /* Don't just spin the CPU */
			
 
				+			goto again;
			
 
				+		}
			
 
				+	} else {
			
 
				+		xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+
			
 
				 void
			
 
				-xfs_dinode_to_disk(
			
 
				-	xfs_dinode_t		*to,
			
 
				-	xfs_icdinode_t		*from)
			
 
				+__xfs_iflock(
			
 
				+	struct xfs_inode	*ip)
			
 
				 {
			
 
				-	to->di_magic = cpu_to_be16(from->di_magic);
			
 
				-	to->di_mode = cpu_to_be16(from->di_mode);
			
 
				-	to->di_version = from ->di_version;
			
 
				-	to->di_format = from->di_format;
			
 
				-	to->di_onlink = cpu_to_be16(from->di_onlink);
			
 
				-	to->di_uid = cpu_to_be32(from->di_uid);
			
 
				-	to->di_gid = cpu_to_be32(from->di_gid);
			
 
				-	to->di_nlink = cpu_to_be32(from->di_nlink);
			
 
				-	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
			
 
				-	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
			
 
				-	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
			
 
				-	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
			
 
				-	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
			
 
				-	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
			
 
				-	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
			
 
				-	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
			
 
				-	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
			
 
				-	to->di_size = cpu_to_be64(from->di_size);
			
 
				-	to->di_nblocks = cpu_to_be64(from->di_nblocks);
			
 
				-	to->di_extsize = cpu_to_be32(from->di_extsize);
			
 
				-	to->di_nextents = cpu_to_be32(from->di_nextents);
			
 
				-	to->di_anextents = cpu_to_be16(from->di_anextents);
			
 
				-	to->di_forkoff = from->di_forkoff;
			
 
				-	to->di_aformat = from->di_aformat;
			
 
				-	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
			
 
				-	to->di_dmstate = cpu_to_be16(from->di_dmstate);
			
 
				-	to->di_flags = cpu_to_be16(from->di_flags);
			
 
				-	to->di_gen = cpu_to_be32(from->di_gen);
			
 
				-
			
 
				-	if (from->di_version == 3) {
			
 
				-		to->di_changecount = cpu_to_be64(from->di_changecount);
			
 
				-		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
			
 
				-		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
			
 
				-		to->di_flags2 = cpu_to_be64(from->di_flags2);
			
 
				-		to->di_ino = cpu_to_be64(from->di_ino);
			
 
				-		to->di_lsn = cpu_to_be64(from->di_lsn);
			
 
				-		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
			
 
				-		uuid_copy(&to->di_uuid, &from->di_uuid);
			
 
				-		to->di_flushiter = 0;
			
 
				-	} else {
			
 
				-		to->di_flushiter = cpu_to_be16(from->di_flushiter);
			
 
				-	}
			
 
				+	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
			
 
				+	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
			
 
				+
			
 
				+	do {
			
 
				+		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
			
 
				+		if (xfs_isiflocked(ip))
			
 
				+			io_schedule();
			
 
				+	} while (!xfs_iflock_nowait(ip));
			
 
				+
			
 
				+	finish_wait(wq, &wait.wait);
			
 
				 }
			
 
				 
			
 
				 STATIC uint
			
@@ -987,234 +567,49 @@ xfs_dic2xflags(
 
				 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
			
 
				 }
			
 
				 
			
 
				-static bool
			
 
				-xfs_dinode_verify(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	struct xfs_inode	*ip,
			
 
				-	struct xfs_dinode	*dip)
			
 
				-{
			
 
				-	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
			
 
				-		return false;
			
 
				-
			
 
				-	/* only version 3 or greater inodes are extensively verified here */
			
 
				-	if (dip->di_version < 3)
			
 
				-		return true;
			
 
				-
			
 
				-	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return false;
			
 
				-	if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
			
 
				-			      offsetof(struct xfs_dinode, di_crc)))
			
 
				-		return false;
			
 
				-	if (be64_to_cpu(dip->di_ino) != ip->i_ino)
			
 
				-		return false;
			
 
				-	if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
			
 
				-		return false;
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xfs_dinode_calc_crc(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	struct xfs_dinode	*dip)
			
 
				-{
			
 
				-	__uint32_t		crc;
			
 
				-
			
 
				-	if (dip->di_version < 3)
			
 
				-		return;
			
 
				-
			
 
				-	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
			
 
				-	crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
			
 
				-			      offsetof(struct xfs_dinode, di_crc));
			
 
				-	dip->di_crc = xfs_end_cksum(crc);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				- * Read the disk inode attributes into the in-core inode structure.
			
 
				- *
			
 
				- * For version 5 superblocks, if we are initialising a new inode and we are not
			
 
				- * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
			
 
				- * inode core with a random generation number. If we are keeping inodes around,
			
 
				- * we need to read the inode cluster to get the existing generation number off
			
 
				- * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
			
 
				- * format) then log recovery is dependent on the di_flushiter field being
			
 
				- * initialised from the current on-disk value and hence we must also read the
			
 
				- * inode off disk.
			
 
				+ * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
			
 
				+ * is allowed, otherwise it has to be an exact match. If a CI match is found,
			
 
				+ * ci_name->name will point to a the actual name (caller must free) or
			
 
				+ * will be set to NULL if an exact match is found.
			
 
				  */
			
 
				 int
			
 
				-xfs_iread(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	xfs_trans_t	*tp,
			
 
				-	xfs_inode_t	*ip,
			
 
				-	uint		iget_flags)
			
 
				+xfs_lookup(
			
 
				+	xfs_inode_t		*dp,
			
 
				+	struct xfs_name		*name,
			
 
				+	xfs_inode_t		**ipp,
			
 
				+	struct xfs_name		*ci_name)
			
 
				 {
			
 
				-	xfs_buf_t	*bp;
			
 
				-	xfs_dinode_t	*dip;
			
 
				-	int		error;
			
 
				-
			
 
				-	/*
			
 
				-	 * Fill in the location information in the in-core inode.
			
 
				-	 */
			
 
				-	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	/* shortcut IO on inode allocation if possible */
			
 
				-	if ((iget_flags & XFS_IGET_CREATE) &&
			
 
				-	    xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				-	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
			
 
				-		/* initialise the on-disk inode core */
			
 
				-		memset(&ip->i_d, 0, sizeof(ip->i_d));
			
 
				-		ip->i_d.di_magic = XFS_DINODE_MAGIC;
			
 
				-		ip->i_d.di_gen = prandom_u32();
			
 
				-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				-			ip->i_d.di_version = 3;
			
 
				-			ip->i_d.di_ino = ip->i_ino;
			
 
				-			uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
			
 
				-		} else
			
 
				-			ip->i_d.di_version = 2;
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Get pointers to the on-disk inode and the buffer containing it.
			
 
				-	 */
			
 
				-	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				-
			
 
				-	/* even unallocated inodes are verified */
			
 
				-	if (!xfs_dinode_verify(mp, ip, dip)) {
			
 
				-		xfs_alert(mp, "%s: validation failed for inode %lld failed",
			
 
				-				__func__, ip->i_ino);
			
 
				+	xfs_ino_t		inum;
			
 
				+	int			error;
			
 
				+	uint			lock_mode;
			
 
				 
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
			
 
				-		error = XFS_ERROR(EFSCORRUPTED);
			
 
				-		goto out_brelse;
			
 
				-	}
			
 
				+	trace_xfs_lookup(dp, name);
			
 
				 
			
 
				-	/*
			
 
				-	 * If the on-disk inode is already linked to a directory
			
 
				-	 * entry, copy all of the inode into the in-core inode.
			
 
				-	 * xfs_iformat() handles copying in the inode format
			
 
				-	 * specific information.
			
 
				-	 * Otherwise, just get the truly permanent information.
			
 
				-	 */
			
 
				-	if (dip->di_mode) {
			
 
				-		xfs_dinode_from_disk(&ip->i_d, dip);
			
 
				-		error = xfs_iformat(ip, dip);
			
 
				-		if (error)  {
			
 
				-#ifdef DEBUG
			
 
				-			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
			
 
				-				__func__, error);
			
 
				-#endif /* DEBUG */
			
 
				-			goto out_brelse;
			
 
				-		}
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * Partial initialisation of the in-core inode. Just the bits
			
 
				-		 * that xfs_ialloc won't overwrite or relies on being correct.
			
 
				-		 */
			
 
				-		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
			
 
				-		ip->i_d.di_version = dip->di_version;
			
 
				-		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
			
 
				-		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
			
 
				-
			
 
				-		if (dip->di_version == 3) {
			
 
				-			ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
			
 
				-			uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
			
 
				-		}
			
 
				+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
			
 
				+		return XFS_ERROR(EIO);
			
 
				 
			
 
				-		/*
			
 
				-		 * Make sure to pull in the mode here as well in
			
 
				-		 * case the inode is released without being used.
			
 
				-		 * This ensures that xfs_inactive() will see that
			
 
				-		 * the inode is already free and not try to mess
			
 
				-		 * with the uninitialized part of it.
			
 
				-		 */
			
 
				-		ip->i_d.di_mode = 0;
			
 
				-	}
			
 
				+	lock_mode = xfs_ilock_map_shared(dp);
			
 
				+	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
			
 
				+	xfs_iunlock_map_shared(dp, lock_mode);
			
 
				 
			
 
				-	/*
			
 
				-	 * The inode format changed when we moved the link count and
			
 
				-	 * made it 32 bits long.  If this is an old format inode,
			
 
				-	 * convert it in memory to look like a new one.  If it gets
			
 
				-	 * flushed to disk we will convert back before flushing or
			
 
				-	 * logging it.  We zero out the new projid field and the old link
			
 
				-	 * count field.  We'll handle clearing the pad field (the remains
			
 
				-	 * of the old uuid field) when we actually convert the inode to
			
 
				-	 * the new format. We don't change the version number so that we
			
 
				-	 * can distinguish this from a real new format inode.
			
 
				-	 */
			
 
				-	if (ip->i_d.di_version == 1) {
			
 
				-		ip->i_d.di_nlink = ip->i_d.di_onlink;
			
 
				-		ip->i_d.di_onlink = 0;
			
 
				-		xfs_set_projid(ip, 0);
			
 
				-	}
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				 
			
 
				-	ip->i_delayed_blks = 0;
			
 
				+	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
			
 
				+	if (error)
			
 
				+		goto out_free_name;
			
 
				 
			
 
				-	/*
			
 
				-	 * Mark the buffer containing the inode as something to keep
			
 
				-	 * around for a while.  This helps to keep recently accessed
			
 
				-	 * meta-data in-core longer.
			
 
				-	 */
			
 
				-	xfs_buf_set_ref(bp, XFS_INO_REF);
			
 
				+	return 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * Use xfs_trans_brelse() to release the buffer containing the on-disk
			
 
				-	 * inode, because it was acquired with xfs_trans_read_buf() in
			
 
				-	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
			
 
				-	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
			
 
				-	 * will only release the buffer if it is not dirty within the
			
 
				-	 * transaction.  It will be OK to release the buffer in this case,
			
 
				-	 * because inodes on disk are never destroyed and we will be locking the
			
 
				-	 * new in-core inode before putting it in the cache where other
			
 
				-	 * processes can find it.  Thus we don't have to worry about the inode
			
 
				-	 * being changed just because we released the buffer.
			
 
				-	 */
			
 
				- out_brelse:
			
 
				-	xfs_trans_brelse(tp, bp);
			
 
				+out_free_name:
			
 
				+	if (ci_name)
			
 
				+		kmem_free(ci_name->name);
			
 
				+out:
			
 
				+	*ipp = NULL;
			
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Read in extents from a btree-format inode.
			
 
				- * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
			
 
				- */
			
 
				-int
			
 
				-xfs_iread_extents(
			
 
				-	xfs_trans_t	*tp,
			
 
				-	xfs_inode_t	*ip,
			
 
				-	int		whichfork)
			
 
				-{
			
 
				-	int		error;
			
 
				-	xfs_ifork_t	*ifp;
			
 
				-	xfs_extnum_t	nextents;
			
 
				-
			
 
				-	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
			
 
				-		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
			
 
				-				 ip->i_mount);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-
			
 
				-	/*
			
 
				-	 * We know that the size is valid (it's checked in iformat_btree)
			
 
				-	 */
			
 
				-	ifp->if_bytes = ifp->if_real_bytes = 0;
			
 
				-	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				-	xfs_iext_add(ifp, 0, nextents);
			
 
				-	error = xfs_bmap_read_extents(tp, ip, whichfork);
			
 
				-	if (error) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-		ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				-		return error;
			
 
				-	}
			
 
				-	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Allocate an inode on disk and return a copy of its in-core version.
			
 
				  * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
			
@@ -1295,8 +690,8 @@ xfs_ialloc(
 
				 	ip->i_d.di_onlink = 0;
			
 
				 	ip->i_d.di_nlink = nlink;
			
 
				 	ASSERT(ip->i_d.di_nlink == nlink);
			
 
				-	ip->i_d.di_uid = current_fsuid();
			
 
				-	ip->i_d.di_gid = current_fsgid();
			
 
				+	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
			
 
				+	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
			
 
				 	xfs_set_projid(ip, prid);
			
 
				 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
			
 
				 
			
@@ -1335,7 +730,7 @@ xfs_ialloc(
 
				 	 */
			
 
				 	if ((irix_sgid_inherit) &&
			
 
				 	    (ip->i_d.di_mode & S_ISGID) &&
			
 
				-	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
			
 
				+	    (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
			
 
				 		ip->i_d.di_mode &= ~S_ISGID;
			
 
				 	}
			
 
				 
			
@@ -1467,31 +862,608 @@ xfs_ialloc(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Free up the underlying blocks past new_size.  The new size must be smaller
			
 
				- * than the current size.  This routine can be used both for the attribute and
			
 
				- * data fork, and does not modify the inode size, which is left to the caller.
			
 
				+ * Allocates a new inode from disk and return a pointer to the
			
 
				+ * incore copy. This routine will internally commit the current
			
 
				+ * transaction and allocate a new one if the Space Manager needed
			
 
				+ * to do an allocation to replenish the inode free-list.
			
 
				  *
			
 
				- * The transaction passed to this routine must have made a permanent log
			
 
				- * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
			
 
				- * given transaction and start new ones, so make sure everything involved in
			
 
				- * the transaction is tidy before calling here.  Some transaction will be
			
 
				- * returned to the caller to be committed.  The incoming transaction must
			
 
				- * already include the inode, and both inode locks must be held exclusively.
			
 
				- * The inode must also be "held" within the transaction.  On return the inode
			
 
				- * will be "held" within the returned transaction.  This routine does NOT
			
 
				- * require any disk space to be reserved for it within the transaction.
			
 
				+ * This routine is designed to be called from xfs_create and
			
 
				+ * xfs_create_dir.
			
 
				  *
			
 
				- * If we get an error, we must return with the inode locked and linked into the
			
 
				- * current transaction. This keeps things simple for the higher level code,
			
 
				- * because it always knows that the inode is locked and held in the transaction
			
 
				- * that returns to it whether errors occur or not.  We don't mark the inode
			
 
				- * dirty on error so that transactions can be easily aborted if possible.
			
 
				  */
			
 
				 int
			
 
				-xfs_itruncate_extents(
			
 
				-	struct xfs_trans	**tpp,
			
 
				-	struct xfs_inode	*ip,
			
 
				-	int			whichfork,
			
 
				+xfs_dir_ialloc(
			
 
				+	xfs_trans_t	**tpp,		/* input: current transaction;
			
 
				+					   output: may be a new transaction. */
			
 
				+	xfs_inode_t	*dp,		/* directory within whose allocate
			
 
				+					   the inode. */
			
 
				+	umode_t		mode,
			
 
				+	xfs_nlink_t	nlink,
			
 
				+	xfs_dev_t	rdev,
			
 
				+	prid_t		prid,		/* project id */
			
 
				+	int		okalloc,	/* ok to allocate new space */
			
 
				+	xfs_inode_t	**ipp,		/* pointer to inode; it will be
			
 
				+					   locked. */
			
 
				+	int		*committed)
			
 
				+
			
 
				+{
			
 
				+	xfs_trans_t	*tp;
			
 
				+	xfs_trans_t	*ntp;
			
 
				+	xfs_inode_t	*ip;
			
 
				+	xfs_buf_t	*ialloc_context = NULL;
			
 
				+	int		code;
			
 
				+	void		*dqinfo;
			
 
				+	uint		tflags;
			
 
				+
			
 
				+	tp = *tpp;
			
 
				+	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
			
 
				+
			
 
				+	/*
			
 
				+	 * xfs_ialloc will return a pointer to an incore inode if
			
 
				+	 * the Space Manager has an available inode on the free
			
 
				+	 * list. Otherwise, it will do an allocation and replenish
			
 
				+	 * the freelist.  Since we can only do one allocation per
			
 
				+	 * transaction without deadlocks, we will need to commit the
			
 
				+	 * current transaction and start a new one.  We will then
			
 
				+	 * need to call xfs_ialloc again to get the inode.
			
 
				+	 *
			
 
				+	 * If xfs_ialloc did an allocation to replenish the freelist,
			
 
				+	 * it returns the bp containing the head of the freelist as
			
 
				+	 * ialloc_context. We will hold a lock on it across the
			
 
				+	 * transaction commit so that no other process can steal
			
 
				+	 * the inode(s) that we've just allocated.
			
 
				+	 */
			
 
				+	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
			
 
				+			  &ialloc_context, &ip);
			
 
				+
			
 
				+	/*
			
 
				+	 * Return an error if we were unable to allocate a new inode.
			
 
				+	 * This should only happen if we run out of space on disk or
			
 
				+	 * encounter a disk error.
			
 
				+	 */
			
 
				+	if (code) {
			
 
				+		*ipp = NULL;
			
 
				+		return code;
			
 
				+	}
			
 
				+	if (!ialloc_context && !ip) {
			
 
				+		*ipp = NULL;
			
 
				+		return XFS_ERROR(ENOSPC);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If the AGI buffer is non-NULL, then we were unable to get an
			
 
				+	 * inode in one operation.  We need to commit the current
			
 
				+	 * transaction and call xfs_ialloc() again.  It is guaranteed
			
 
				+	 * to succeed the second time.
			
 
				+	 */
			
 
				+	if (ialloc_context) {
			
 
				+		struct xfs_trans_res tres;
			
 
				+
			
 
				+		/*
			
 
				+		 * Normally, xfs_trans_commit releases all the locks.
			
 
				+		 * We call bhold to hang on to the ialloc_context across
			
 
				+		 * the commit.  Holding this buffer prevents any other
			
 
				+		 * processes from doing any allocations in this
			
 
				+		 * allocation group.
			
 
				+		 */
			
 
				+		xfs_trans_bhold(tp, ialloc_context);
			
 
				+		/*
			
 
				+		 * Save the log reservation so we can use
			
 
				+		 * them in the next transaction.
			
 
				+		 */
			
 
				+		tres.tr_logres = xfs_trans_get_log_res(tp);
			
 
				+		tres.tr_logcount = xfs_trans_get_log_count(tp);
			
 
				+
			
 
				+		/*
			
 
				+		 * We want the quota changes to be associated with the next
			
 
				+		 * transaction, NOT this one. So, detach the dqinfo from this
			
 
				+		 * and attach it to the next transaction.
			
 
				+		 */
			
 
				+		dqinfo = NULL;
			
 
				+		tflags = 0;
			
 
				+		if (tp->t_dqinfo) {
			
 
				+			dqinfo = (void *)tp->t_dqinfo;
			
 
				+			tp->t_dqinfo = NULL;
			
 
				+			tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
			
 
				+			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
			
 
				+		}
			
 
				+
			
 
				+		ntp = xfs_trans_dup(tp);
			
 
				+		code = xfs_trans_commit(tp, 0);
			
 
				+		tp = ntp;
			
 
				+		if (committed != NULL) {
			
 
				+			*committed = 1;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * If we get an error during the commit processing,
			
 
				+		 * release the buffer that is still held and return
			
 
				+		 * to the caller.
			
 
				+		 */
			
 
				+		if (code) {
			
 
				+			xfs_buf_relse(ialloc_context);
			
 
				+			if (dqinfo) {
			
 
				+				tp->t_dqinfo = dqinfo;
			
 
				+				xfs_trans_free_dqinfo(tp);
			
 
				+			}
			
 
				+			*tpp = ntp;
			
 
				+			*ipp = NULL;
			
 
				+			return code;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * transaction commit worked ok so we can drop the extra ticket
			
 
				+		 * reference that we gained in xfs_trans_dup()
			
 
				+		 */
			
 
				+		xfs_log_ticket_put(tp->t_ticket);
			
 
				+		tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
			
 
				+		code = xfs_trans_reserve(tp, &tres, 0, 0);
			
 
				+
			
 
				+		/*
			
 
				+		 * Re-attach the quota info that we detached from prev trx.
			
 
				+		 */
			
 
				+		if (dqinfo) {
			
 
				+			tp->t_dqinfo = dqinfo;
			
 
				+			tp->t_flags |= tflags;
			
 
				+		}
			
 
				+
			
 
				+		if (code) {
			
 
				+			xfs_buf_relse(ialloc_context);
			
 
				+			*tpp = ntp;
			
 
				+			*ipp = NULL;
			
 
				+			return code;
			
 
				+		}
			
 
				+		xfs_trans_bjoin(tp, ialloc_context);
			
 
				+
			
 
				+		/*
			
 
				+		 * Call ialloc again. Since we've locked out all
			
 
				+		 * other allocations in this allocation group,
			
 
				+		 * this call should always succeed.
			
 
				+		 */
			
 
				+		code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
			
 
				+				  okalloc, &ialloc_context, &ip);
			
 
				+
			
 
				+		/*
			
 
				+		 * If we get an error at this point, return to the caller
			
 
				+		 * so that the current transaction can be aborted.
			
 
				+		 */
			
 
				+		if (code) {
			
 
				+			*tpp = tp;
			
 
				+			*ipp = NULL;
			
 
				+			return code;
			
 
				+		}
			
 
				+		ASSERT(!ialloc_context && ip);
			
 
				+
			
 
				+	} else {
			
 
				+		if (committed != NULL)
			
 
				+			*committed = 0;
			
 
				+	}
			
 
				+
			
 
				+	*ipp = ip;
			
 
				+	*tpp = tp;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Decrement the link count on an inode & log the change.
			
 
				+ * If this causes the link count to go to zero, initiate the
			
 
				+ * logging activity required to truncate a file.
			
 
				+ */
			
 
				+int				/* error */
			
 
				+xfs_droplink(
			
 
				+	xfs_trans_t *tp,
			
 
				+	xfs_inode_t *ip)
			
 
				+{
			
 
				+	int	error;
			
 
				+
			
 
				+	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
			
 
				+
			
 
				+	ASSERT (ip->i_d.di_nlink > 0);
			
 
				+	ip->i_d.di_nlink--;
			
 
				+	drop_nlink(VFS_I(ip));
			
 
				+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+
			
 
				+	error = 0;
			
 
				+	if (ip->i_d.di_nlink == 0) {
			
 
				+		/*
			
 
				+		 * We're dropping the last link to this file.
			
 
				+		 * Move the on-disk inode to the AGI unlinked list.
			
 
				+		 * From xfs_inactive() we will pull the inode from
			
 
				+		 * the list and free it.
			
 
				+		 */
			
 
				+		error = xfs_iunlink(tp, ip);
			
 
				+	}
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This gets called when the inode's version needs to be changed from 1 to 2.
			
 
				+ * Currently this happens when the nlink field overflows the old 16-bit value
			
 
				+ * or when chproj is called to change the project for the first time.
			
 
				+ * As a side effect the superblock version will also get rev'd
			
 
				+ * to contain the NLINK bit.
			
 
				+ */
			
 
				+void
			
 
				+xfs_bump_ino_vers2(
			
 
				+	xfs_trans_t	*tp,
			
 
				+	xfs_inode_t	*ip)
			
 
				+{
			
 
				+	xfs_mount_t	*mp;
			
 
				+
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				+	ASSERT(ip->i_d.di_version == 1);
			
 
				+
			
 
				+	ip->i_d.di_version = 2;
			
 
				+	ip->i_d.di_onlink = 0;
			
 
				+	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
			
 
				+	mp = tp->t_mountp;
			
 
				+	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
			
 
				+		spin_lock(&mp->m_sb_lock);
			
 
				+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
			
 
				+			xfs_sb_version_addnlink(&mp->m_sb);
			
 
				+			spin_unlock(&mp->m_sb_lock);
			
 
				+			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
			
 
				+		} else {
			
 
				+			spin_unlock(&mp->m_sb_lock);
			
 
				+		}
			
 
				+	}
			
 
				+	/* Caller must log the inode */
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Increment the link count on an inode & log the change.
			
 
				+ */
			
 
				+int
			
 
				+xfs_bumplink(
			
 
				+	xfs_trans_t *tp,
			
 
				+	xfs_inode_t *ip)
			
 
				+{
			
 
				+	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
			
 
				+
			
 
				+	ASSERT(ip->i_d.di_nlink > 0);
			
 
				+	ip->i_d.di_nlink++;
			
 
				+	inc_nlink(VFS_I(ip));
			
 
				+	if ((ip->i_d.di_version == 1) &&
			
 
				+	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
			
 
				+		/*
			
 
				+		 * The inode has increased its number of links beyond
			
 
				+		 * what can fit in an old format inode.  It now needs
			
 
				+		 * to be converted to a version 2 inode with a 32 bit
			
 
				+		 * link count.  If this is the first inode in the file
			
 
				+		 * system to do this, then we need to bump the superblock
			
 
				+		 * version number as well.
			
 
				+		 */
			
 
				+		xfs_bump_ino_vers2(tp, ip);
			
 
				+	}
			
 
				+
			
 
				+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_create(
			
 
				+	xfs_inode_t		*dp,
			
 
				+	struct xfs_name		*name,
			
 
				+	umode_t			mode,
			
 
				+	xfs_dev_t		rdev,
			
 
				+	xfs_inode_t		**ipp)
			
 
				+{
			
 
				+	int			is_dir = S_ISDIR(mode);
			
 
				+	struct xfs_mount	*mp = dp->i_mount;
			
 
				+	struct xfs_inode	*ip = NULL;
			
 
				+	struct xfs_trans	*tp = NULL;
			
 
				+	int			error;
			
 
				+	xfs_bmap_free_t		free_list;
			
 
				+	xfs_fsblock_t		first_block;
			
 
				+	bool                    unlock_dp_on_error = false;
			
 
				+	uint			cancel_flags;
			
 
				+	int			committed;
			
 
				+	prid_t			prid;
			
 
				+	struct xfs_dquot	*udqp = NULL;
			
 
				+	struct xfs_dquot	*gdqp = NULL;
			
 
				+	struct xfs_dquot	*pdqp = NULL;
			
 
				+	struct xfs_trans_res	tres;
			
 
				+	uint			resblks;
			
 
				+
			
 
				+	trace_xfs_create(dp, name);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
			
 
				+		prid = xfs_get_projid(dp);
			
 
				+	else
			
 
				+		prid = XFS_PROJID_DEFAULT;
			
 
				+
			
 
				+	/*
			
 
				+	 * Make sure that we have allocated dquot(s) on disk.
			
 
				+	 */
			
 
				+	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
			
 
				+					xfs_kgid_to_gid(current_fsgid()), prid,
			
 
				+					XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
			
 
				+					&udqp, &gdqp, &pdqp);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	if (is_dir) {
			
 
				+		rdev = 0;
			
 
				+		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
			
 
				+		tres.tr_logres = M_RES(mp)->tr_mkdir.tr_logres;
			
 
				+		tres.tr_logcount = XFS_MKDIR_LOG_COUNT;
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
			
 
				+	} else {
			
 
				+		resblks = XFS_CREATE_SPACE_RES(mp, name->len);
			
 
				+		tres.tr_logres = M_RES(mp)->tr_create.tr_logres;
			
 
				+		tres.tr_logcount = XFS_CREATE_LOG_COUNT;
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
			
 
				+	}
			
 
				+
			
 
				+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				+
			
 
				+	/*
			
 
				+	 * Initially assume that the file does not exist and
			
 
				+	 * reserve the resources for that case.  If that is not
			
 
				+	 * the case we'll drop the one we have and get a more
			
 
				+	 * appropriate transaction later.
			
 
				+	 */
			
 
				+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
			
 
				+	error = xfs_trans_reserve(tp, &tres, resblks, 0);
			
 
				+	if (error == ENOSPC) {
			
 
				+		/* flush outstanding delalloc blocks and retry */
			
 
				+		xfs_flush_inodes(mp);
			
 
				+		error = xfs_trans_reserve(tp, &tres, resblks, 0);
			
 
				+	}
			
 
				+	if (error == ENOSPC) {
			
 
				+		/* No space at all so try a "no-allocation" reservation */
			
 
				+		resblks = 0;
			
 
				+		error = xfs_trans_reserve(tp, &tres, 0, 0);
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		cancel_flags = 0;
			
 
				+		goto out_trans_cancel;
			
 
				+	}
			
 
				+
			
 
				+	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
			
 
				+	unlock_dp_on_error = true;
			
 
				+
			
 
				+	xfs_bmap_init(&free_list, &first_block);
			
 
				+
			
 
				+	/*
			
 
				+	 * Reserve disk quota and the inode.
			
 
				+	 */
			
 
				+	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
			
 
				+						pdqp, resblks, 1, 0);
			
 
				+	if (error)
			
 
				+		goto out_trans_cancel;
			
 
				+
			
 
				+	error = xfs_dir_canenter(tp, dp, name, resblks);
			
 
				+	if (error)
			
 
				+		goto out_trans_cancel;
			
 
				+
			
 
				+	/*
			
 
				+	 * A newly created regular or special file just has one directory
			
 
				+	 * entry pointing to them, but a directory also the "." entry
			
 
				+	 * pointing to itself.
			
 
				+	 */
			
 
				+	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
			
 
				+			       prid, resblks > 0, &ip, &committed);
			
 
				+	if (error) {
			
 
				+		if (error == ENOSPC)
			
 
				+			goto out_trans_cancel;
			
 
				+		goto out_trans_abort;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Now we join the directory inode to the transaction.  We do not do it
			
 
				+	 * earlier because xfs_dir_ialloc might commit the previous transaction
			
 
				+	 * (and release all the locks).  An error from here on will result in
			
 
				+	 * the transaction cancel unlocking dp so don't do it explicitly in the
			
 
				+	 * error path.
			
 
				+	 */
			
 
				+	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
			
 
				+	unlock_dp_on_error = false;
			
 
				+
			
 
				+	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
			
 
				+					&first_block, &free_list, resblks ?
			
 
				+					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
			
 
				+	if (error) {
			
 
				+		ASSERT(error != ENOSPC);
			
 
				+		goto out_trans_abort;
			
 
				+	}
			
 
				+	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				+	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
			
 
				+
			
 
				+	if (is_dir) {
			
 
				+		error = xfs_dir_init(tp, ip, dp);
			
 
				+		if (error)
			
 
				+			goto out_bmap_cancel;
			
 
				+
			
 
				+		error = xfs_bumplink(tp, dp);
			
 
				+		if (error)
			
 
				+			goto out_bmap_cancel;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is a synchronous mount, make sure that the
			
 
				+	 * create transaction goes to disk before returning to
			
 
				+	 * the user.
			
 
				+	 */
			
 
				+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Attach the dquot(s) to the inodes and modify them incore.
			
 
				+	 * These ids of the inode couldn't have changed since the new
			
 
				+	 * inode has been locked ever since it was created.
			
 
				+	 */
			
 
				+	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
			
 
				+
			
 
				+	error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				+	if (error)
			
 
				+		goto out_bmap_cancel;
			
 
				+
			
 
				+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+	if (error)
			
 
				+		goto out_release_inode;
			
 
				+
			
 
				+	xfs_qm_dqrele(udqp);
			
 
				+	xfs_qm_dqrele(gdqp);
			
 
				+	xfs_qm_dqrele(pdqp);
			
 
				+
			
 
				+	*ipp = ip;
			
 
				+	return 0;
			
 
				+
			
 
				+ out_bmap_cancel:
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+ out_trans_abort:
			
 
				+	cancel_flags |= XFS_TRANS_ABORT;
			
 
				+ out_trans_cancel:
			
 
				+	xfs_trans_cancel(tp, cancel_flags);
			
 
				+ out_release_inode:
			
 
				+	/*
			
 
				+	 * Wait until after the current transaction is aborted to
			
 
				+	 * release the inode.  This prevents recursive transactions
			
 
				+	 * and deadlocks from xfs_inactive.
			
 
				+	 */
			
 
				+	if (ip)
			
 
				+		IRELE(ip);
			
 
				+
			
 
				+	xfs_qm_dqrele(udqp);
			
 
				+	xfs_qm_dqrele(gdqp);
			
 
				+	xfs_qm_dqrele(pdqp);
			
 
				+
			
 
				+	if (unlock_dp_on_error)
			
 
				+		xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_link(
			
 
				+	xfs_inode_t		*tdp,
			
 
				+	xfs_inode_t		*sip,
			
 
				+	struct xfs_name		*target_name)
			
 
				+{
			
 
				+	xfs_mount_t		*mp = tdp->i_mount;
			
 
				+	xfs_trans_t		*tp;
			
 
				+	int			error;
			
 
				+	xfs_bmap_free_t         free_list;
			
 
				+	xfs_fsblock_t           first_block;
			
 
				+	int			cancel_flags;
			
 
				+	int			committed;
			
 
				+	int			resblks;
			
 
				+
			
 
				+	trace_xfs_link(tdp, target_name);
			
 
				+
			
 
				+	ASSERT(!S_ISDIR(sip->i_d.di_mode));
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	error = xfs_qm_dqattach(sip, 0);
			
 
				+	if (error)
			
 
				+		goto std_return;
			
 
				+
			
 
				+	error = xfs_qm_dqattach(tdp, 0);
			
 
				+	if (error)
			
 
				+		goto std_return;
			
 
				+
			
 
				+	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
			
 
				+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				+	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
			
 
				+	if (error == ENOSPC) {
			
 
				+		resblks = 0;
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		cancel_flags = 0;
			
 
				+		goto error_return;
			
 
				+	}
			
 
				+
			
 
				+	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are using project inheritance, we only allow hard link
			
 
				+	 * creation in our tree when the project IDs are the same; else
			
 
				+	 * the tree quota mechanism could be circumvented.
			
 
				+	 */
			
 
				+	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
			
 
				+		     (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
			
 
				+		error = XFS_ERROR(EXDEV);
			
 
				+		goto error_return;
			
 
				+	}
			
 
				+
			
 
				+	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
			
 
				+	if (error)
			
 
				+		goto error_return;
			
 
				+
			
 
				+	xfs_bmap_init(&free_list, &first_block);
			
 
				+
			
 
				+	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
			
 
				+					&first_block, &free_list, resblks);
			
 
				+	if (error)
			
 
				+		goto abort_return;
			
 
				+	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				+	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
			
 
				+
			
 
				+	error = xfs_bumplink(tp, sip);
			
 
				+	if (error)
			
 
				+		goto abort_return;
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is a synchronous mount, make sure that the
			
 
				+	 * link transaction goes to disk before returning to
			
 
				+	 * the user.
			
 
				+	 */
			
 
				+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+	}
			
 
				+
			
 
				+	error = xfs_bmap_finish (&tp, &free_list, &committed);
			
 
				+	if (error) {
			
 
				+		xfs_bmap_cancel(&free_list);
			
 
				+		goto abort_return;
			
 
				+	}
			
 
				+
			
 
				+	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+
			
 
				+ abort_return:
			
 
				+	cancel_flags |= XFS_TRANS_ABORT;
			
 
				+ error_return:
			
 
				+	xfs_trans_cancel(tp, cancel_flags);
			
 
				+ std_return:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Free up the underlying blocks past new_size.  The new size must be smaller
			
 
				+ * than the current size.  This routine can be used both for the attribute and
			
 
				+ * data fork, and does not modify the inode size, which is left to the caller.
			
 
				+ *
			
 
				+ * The transaction passed to this routine must have made a permanent log
			
 
				+ * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
			
 
				+ * given transaction and start new ones, so make sure everything involved in
			
 
				+ * the transaction is tidy before calling here.  Some transaction will be
			
 
				+ * returned to the caller to be committed.  The incoming transaction must
			
 
				+ * already include the inode, and both inode locks must be held exclusively.
			
 
				+ * The inode must also be "held" within the transaction.  On return the inode
			
 
				+ * will be "held" within the returned transaction.  This routine does NOT
			
 
				+ * require any disk space to be reserved for it within the transaction.
			
 
				+ *
			
 
				+ * If we get an error, we must return with the inode locked and linked into the
			
 
				+ * current transaction. This keeps things simple for the higher level code,
			
 
				+ * because it always knows that the inode is locked and held in the transaction
			
 
				+ * that returns to it whether errors occur or not.  We don't mark the inode
			
 
				+ * dirty on error so that transactions can be easily aborted if possible.
			
 
				+ */
			
 
				+int
			
 
				+xfs_itruncate_extents(
			
 
				+	struct xfs_trans	**tpp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	int			whichfork,
			
 
				 	xfs_fsize_t		new_size)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
@@ -1572,37 +1544,299 @@ xfs_itruncate_extents(
 
				 			goto out;
			
 
				 
			
 
				 		/*
			
 
				-		 * Transaction commit worked ok so we can drop the extra ticket
			
 
				-		 * reference that we gained in xfs_trans_dup()
			
 
				+		 * Transaction commit worked ok so we can drop the extra ticket
			
 
				+		 * reference that we gained in xfs_trans_dup()
			
 
				+		 */
			
 
				+		xfs_log_ticket_put(tp->t_ticket);
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Always re-log the inode so that our permanent transaction can keep
			
 
				+	 * on rolling it forward in the log.
			
 
				+	 */
			
 
				+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+
			
 
				+	trace_xfs_itruncate_extents_end(ip, new_size);
			
 
				+
			
 
				+out:
			
 
				+	*tpp = tp;
			
 
				+	return error;
			
 
				+out_bmap_cancel:
			
 
				+	/*
			
 
				+	 * If the bunmapi call encounters an error, return to the caller where
			
 
				+	 * the transaction can be properly aborted.  We just need to make sure
			
 
				+	 * we're not holding any resources that we were not when we came in.
			
 
				+	 */
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+	goto out;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_release(
			
 
				+	xfs_inode_t	*ip)
			
 
				+{
			
 
				+	xfs_mount_t	*mp = ip->i_mount;
			
 
				+	int		error;
			
 
				+
			
 
				+	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* If this is a read-only mount, don't do this (would generate I/O) */
			
 
				+	if (mp->m_flags & XFS_MOUNT_RDONLY)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (!XFS_FORCED_SHUTDOWN(mp)) {
			
 
				+		int truncated;
			
 
				+
			
 
				+		/*
			
 
				+		 * If we are using filestreams, and we have an unlinked
			
 
				+		 * file that we are processing the last close on, then nothing
			
 
				+		 * will be able to reopen and write to this file. Purge this
			
 
				+		 * inode from the filestreams cache so that it doesn't delay
			
 
				+		 * teardown of the inode.
			
 
				+		 */
			
 
				+		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
			
 
				+			xfs_filestream_deassociate(ip);
			
 
				+
			
 
				+		/*
			
 
				+		 * If we previously truncated this file and removed old data
			
 
				+		 * in the process, we want to initiate "early" writeout on
			
 
				+		 * the last close.  This is an attempt to combat the notorious
			
 
				+		 * NULL files problem which is particularly noticeable from a
			
 
				+		 * truncate down, buffered (re-)write (delalloc), followed by
			
 
				+		 * a crash.  What we are effectively doing here is
			
 
				+		 * significantly reducing the time window where we'd otherwise
			
 
				+		 * be exposed to that problem.
			
 
				+		 */
			
 
				+		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
			
 
				+		if (truncated) {
			
 
				+			xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
			
 
				+			if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
			
 
				+				error = -filemap_flush(VFS_I(ip)->i_mapping);
			
 
				+				if (error)
			
 
				+					return error;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (ip->i_d.di_nlink == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (xfs_can_free_eofblocks(ip, false)) {
			
 
				+
			
 
				+		/*
			
 
				+		 * If we can't get the iolock just skip truncating the blocks
			
 
				+		 * past EOF because we could deadlock with the mmap_sem
			
 
				+		 * otherwise.  We'll get another chance to drop them once the
			
 
				+		 * last reference to the inode is dropped, so we'll never leak
			
 
				+		 * blocks permanently.
			
 
				+		 *
			
 
				+		 * Further, check if the inode is being opened, written and
			
 
				+		 * closed frequently and we have delayed allocation blocks
			
 
				+		 * outstanding (e.g. streaming writes from the NFS server),
			
 
				+		 * truncating the blocks past EOF will cause fragmentation to
			
 
				+		 * occur.
			
 
				+		 *
			
 
				+		 * In this case don't do the truncation, either, but we have to
			
 
				+		 * be careful how we detect this case. Blocks beyond EOF show
			
 
				+		 * up as i_delayed_blks even when the inode is clean, so we
			
 
				+		 * need to truncate them away first before checking for a dirty
			
 
				+		 * release. Hence on the first dirty close we will still remove
			
 
				+		 * the speculative allocation, but after that we will leave it
			
 
				+		 * in place.
			
 
				+		 */
			
 
				+		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
			
 
				+			return 0;
			
 
				+
			
 
				+		error = xfs_free_eofblocks(mp, ip, true);
			
 
				+		if (error && error != EAGAIN)
			
 
				+			return error;
			
 
				+
			
 
				+		/* delalloc blocks after truncation means it really is dirty */
			
 
				+		if (ip->i_delayed_blks)
			
 
				+			xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_inactive
			
 
				+ *
			
 
				+ * This is called when the vnode reference count for the vnode
			
 
				+ * goes to zero.  If the file has been unlinked, then it must
			
 
				+ * now be truncated.  Also, we clear all of the read-ahead state
			
 
				+ * kept for the inode here since the file is now closed.
			
 
				+ */
			
 
				+int
			
 
				+xfs_inactive(
			
 
				+	xfs_inode_t	*ip)
			
 
				+{
			
 
				+	xfs_bmap_free_t		free_list;
			
 
				+	xfs_fsblock_t		first_block;
			
 
				+	int			committed;
			
 
				+	struct xfs_trans	*tp;
			
 
				+	struct xfs_mount	*mp;
			
 
				+	struct xfs_trans_res	*resp;
			
 
				+	int			error;
			
 
				+	int			truncate = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the inode is already free, then there can be nothing
			
 
				+	 * to clean up here.
			
 
				+	 */
			
 
				+	if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
			
 
				+		ASSERT(ip->i_df.if_real_bytes == 0);
			
 
				+		ASSERT(ip->i_df.if_broot_bytes == 0);
			
 
				+		return VN_INACTIVE_CACHE;
			
 
				+	}
			
 
				+
			
 
				+	mp = ip->i_mount;
			
 
				+
			
 
				+	error = 0;
			
 
				+
			
 
				+	/* If this is a read-only mount, don't do this (would generate I/O) */
			
 
				+	if (mp->m_flags & XFS_MOUNT_RDONLY)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (ip->i_d.di_nlink != 0) {
			
 
				+		/*
			
 
				+		 * force is true because we are evicting an inode from the
			
 
				+		 * cache. Post-eof blocks must be freed, lest we end up with
			
 
				+		 * broken free space accounting.
			
 
				+		 */
			
 
				+		if (xfs_can_free_eofblocks(ip, true)) {
			
 
				+			error = xfs_free_eofblocks(mp, ip, false);
			
 
				+			if (error)
			
 
				+				return VN_INACTIVE_CACHE;
			
 
				+		}
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (S_ISREG(ip->i_d.di_mode) &&
			
 
				+	    (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
			
 
				+	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
			
 
				+		truncate = 1;
			
 
				+
			
 
				+	error = xfs_qm_dqattach(ip, 0);
			
 
				+	if (error)
			
 
				+		return VN_INACTIVE_CACHE;
			
 
				+
			
 
				+	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
			
 
				+	resp = (truncate || S_ISLNK(ip->i_d.di_mode)) ?
			
 
				+		&M_RES(mp)->tr_itruncate : &M_RES(mp)->tr_ifree;
			
 
				+
			
 
				+	error = xfs_trans_reserve(tp, resp, 0, 0);
			
 
				+	if (error) {
			
 
				+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				+		xfs_trans_cancel(tp, 0);
			
 
				+		return VN_INACTIVE_CACHE;
			
 
				+	}
			
 
				+
			
 
				+	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, ip, 0);
			
 
				+
			
 
				+	if (S_ISLNK(ip->i_d.di_mode)) {
			
 
				+		error = xfs_inactive_symlink(ip, &tp);
			
 
				+		if (error)
			
 
				+			goto out_cancel;
			
 
				+	} else if (truncate) {
			
 
				+		ip->i_d.di_size = 0;
			
 
				+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+
			
 
				+		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
			
 
				+		if (error)
			
 
				+			goto out_cancel;
			
 
				+
			
 
				+		ASSERT(ip->i_d.di_nextents == 0);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If there are attributes associated with the file then blow them away
			
 
				+	 * now.  The code calls a routine that recursively deconstructs the
			
 
				+	 * attribute fork.  We need to just commit the current transaction
			
 
				+	 * because we can't use it for xfs_attr_inactive().
			
 
				+	 */
			
 
				+	if (ip->i_d.di_anextents > 0) {
			
 
				+		ASSERT(ip->i_d.di_forkoff != 0);
			
 
				+
			
 
				+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+		if (error)
			
 
				+			goto out_unlock;
			
 
				+
			
 
				+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+		error = xfs_attr_inactive(ip);
			
 
				+		if (error)
			
 
				+			goto out;
			
 
				+
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
			
 
				+		if (error) {
			
 
				+			xfs_trans_cancel(tp, 0);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+		xfs_trans_ijoin(tp, ip, 0);
			
 
				+	}
			
 
				+
			
 
				+	if (ip->i_afp)
			
 
				+		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
			
 
				+
			
 
				+	ASSERT(ip->i_d.di_anextents == 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * Free the inode.
			
 
				+	 */
			
 
				+	xfs_bmap_init(&free_list, &first_block);
			
 
				+	error = xfs_ifree(tp, ip, &free_list);
			
 
				+	if (error) {
			
 
				+		/*
			
 
				+		 * If we fail to free the inode, shut down.  The cancel
			
 
				+		 * might do that, we need to make sure.  Otherwise the
			
 
				+		 * inode might be lost for a long time or forever.
			
 
				+		 */
			
 
				+		if (!XFS_FORCED_SHUTDOWN(mp)) {
			
 
				+			xfs_notice(mp, "%s: xfs_ifree returned error %d",
			
 
				+				__func__, error);
			
 
				+			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
			
 
				+		}
			
 
				+		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Credit the quota account(s). The inode is gone.
			
 
				+		 */
			
 
				+		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
			
 
				+
			
 
				+		/*
			
 
				+		 * Just ignore errors at this point.  There is nothing we can
			
 
				+		 * do except to try to keep going. Make sure it's not a silent
			
 
				+		 * error.
			
 
				 		 */
			
 
				-		xfs_log_ticket_put(tp->t_ticket);
			
 
				-		error = xfs_trans_reserve(tp, 0,
			
 
				-					XFS_ITRUNCATE_LOG_RES(mp), 0,
			
 
				-					XFS_TRANS_PERM_LOG_RES,
			
 
				-					XFS_ITRUNCATE_LOG_COUNT);
			
 
				+		error = xfs_bmap_finish(&tp,  &free_list, &committed);
			
 
				 		if (error)
			
 
				-			goto out;
			
 
				+			xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
			
 
				+				__func__, error);
			
 
				+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+		if (error)
			
 
				+			xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
			
 
				+				__func__, error);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Always re-log the inode so that our permanent transaction can keep
			
 
				-	 * on rolling it forward in the log.
			
 
				+	 * Release the dquots held by inode, if any.
			
 
				 	 */
			
 
				-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				-
			
 
				-	trace_xfs_itruncate_extents_end(ip, new_size);
			
 
				-
			
 
				+	xfs_qm_dqdetach(ip);
			
 
				+out_unlock:
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 out:
			
 
				-	*tpp = tp;
			
 
				-	return error;
			
 
				-out_bmap_cancel:
			
 
				-	/*
			
 
				-	 * If the bunmapi call encounters an error, return to the caller where
			
 
				-	 * the transaction can be properly aborted.  We just need to make sure
			
 
				-	 * we're not holding any resources that we were not when we came in.
			
 
				-	 */
			
 
				-	xfs_bmap_cancel(&free_list);
			
 
				-	goto out;
			
 
				+	return VN_INACTIVE_CACHE;
			
 
				+out_cancel:
			
 
				+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
			
 
				+	goto out_unlock;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1861,7 +2095,7 @@ xfs_iunlink_remove(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * A big issue when freeing the inode cluster is is that we _cannot_ skip any
			
 
				+ * A big issue when freeing the inode cluster is that we _cannot_ skip any
			
 
				  * inodes that are in memory - they all must be marked stale and attached to
			
 
				  * the cluster buffer.
			
 
				  */
			
@@ -2093,272 +2327,6 @@ xfs_ifree(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Reallocate the space for if_broot based on the number of records
			
 
				- * being added or deleted as indicated in rec_diff.  Move the records
			
 
				- * and pointers in if_broot to fit the new size.  When shrinking this
			
 
				- * will eliminate holes between the records and pointers created by
			
 
				- * the caller.  When growing this will create holes to be filled in
			
 
				- * by the caller.
			
 
				- *
			
 
				- * The caller must not request to add more records than would fit in
			
 
				- * the on-disk inode root.  If the if_broot is currently NULL, then
			
 
				- * if we adding records one will be allocated.  The caller must also
			
 
				- * not request that the number of records go below zero, although
			
 
				- * it can go to zero.
			
 
				- *
			
 
				- * ip -- the inode whose if_broot area is changing
			
 
				- * ext_diff -- the change in the number of records, positive or negative,
			
 
				- *	 requested for the if_broot array.
			
 
				- */
			
 
				-void
			
 
				-xfs_iroot_realloc(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	int			rec_diff,
			
 
				-	int			whichfork)
			
 
				-{
			
 
				-	struct xfs_mount	*mp = ip->i_mount;
			
 
				-	int			cur_max;
			
 
				-	xfs_ifork_t		*ifp;
			
 
				-	struct xfs_btree_block	*new_broot;
			
 
				-	int			new_max;
			
 
				-	size_t			new_size;
			
 
				-	char			*np;
			
 
				-	char			*op;
			
 
				-
			
 
				-	/*
			
 
				-	 * Handle the degenerate case quietly.
			
 
				-	 */
			
 
				-	if (rec_diff == 0) {
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	if (rec_diff > 0) {
			
 
				-		/*
			
 
				-		 * If there wasn't any memory allocated before, just
			
 
				-		 * allocate it now and get out.
			
 
				-		 */
			
 
				-		if (ifp->if_broot_bytes == 0) {
			
 
				-			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
			
 
				-			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
			
 
				-			ifp->if_broot_bytes = (int)new_size;
			
 
				-			return;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If there is already an existing if_broot, then we need
			
 
				-		 * to realloc() it and shift the pointers to their new
			
 
				-		 * location.  The records don't change location because
			
 
				-		 * they are kept butted up against the btree block header.
			
 
				-		 */
			
 
				-		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
			
 
				-		new_max = cur_max + rec_diff;
			
 
				-		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
			
 
				-		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
			
 
				-				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
			
 
				-				KM_SLEEP | KM_NOFS);
			
 
				-		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
			
 
				-						     ifp->if_broot_bytes);
			
 
				-		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
			
 
				-						     (int)new_size);
			
 
				-		ifp->if_broot_bytes = (int)new_size;
			
 
				-		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
			
 
				-			XFS_IFORK_SIZE(ip, whichfork));
			
 
				-		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * rec_diff is less than 0.  In this case, we are shrinking the
			
 
				-	 * if_broot buffer.  It must already exist.  If we go to zero
			
 
				-	 * records, just get rid of the root and clear the status bit.
			
 
				-	 */
			
 
				-	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
			
 
				-	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
			
 
				-	new_max = cur_max + rec_diff;
			
 
				-	ASSERT(new_max >= 0);
			
 
				-	if (new_max > 0)
			
 
				-		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
			
 
				-	else
			
 
				-		new_size = 0;
			
 
				-	if (new_size > 0) {
			
 
				-		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
			
 
				-		/*
			
 
				-		 * First copy over the btree block header.
			
 
				-		 */
			
 
				-		memcpy(new_broot, ifp->if_broot,
			
 
				-			XFS_BMBT_BLOCK_LEN(ip->i_mount));
			
 
				-	} else {
			
 
				-		new_broot = NULL;
			
 
				-		ifp->if_flags &= ~XFS_IFBROOT;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Only copy the records and pointers if there are any.
			
 
				-	 */
			
 
				-	if (new_max > 0) {
			
 
				-		/*
			
 
				-		 * First copy the records.
			
 
				-		 */
			
 
				-		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
			
 
				-		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
			
 
				-		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
			
 
				-
			
 
				-		/*
			
 
				-		 * Then copy the pointers.
			
 
				-		 */
			
 
				-		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
			
 
				-						     ifp->if_broot_bytes);
			
 
				-		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
			
 
				-						     (int)new_size);
			
 
				-		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
			
 
				-	}
			
 
				-	kmem_free(ifp->if_broot);
			
 
				-	ifp->if_broot = new_broot;
			
 
				-	ifp->if_broot_bytes = (int)new_size;
			
 
				-	if (ifp->if_broot)
			
 
				-		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
			
 
				-			XFS_IFORK_SIZE(ip, whichfork));
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * This is called when the amount of space needed for if_data
			
 
				- * is increased or decreased.  The change in size is indicated by
			
 
				- * the number of bytes that need to be added or deleted in the
			
 
				- * byte_diff parameter.
			
 
				- *
			
 
				- * If the amount of space needed has decreased below the size of the
			
 
				- * inline buffer, then switch to using the inline buffer.  Otherwise,
			
 
				- * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
			
 
				- * to what is needed.
			
 
				- *
			
 
				- * ip -- the inode whose if_data area is changing
			
 
				- * byte_diff -- the change in the number of bytes, positive or negative,
			
 
				- *	 requested for the if_data array.
			
 
				- */
			
 
				-void
			
 
				-xfs_idata_realloc(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	int		byte_diff,
			
 
				-	int		whichfork)
			
 
				-{
			
 
				-	xfs_ifork_t	*ifp;
			
 
				-	int		new_size;
			
 
				-	int		real_size;
			
 
				-
			
 
				-	if (byte_diff == 0) {
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	new_size = (int)ifp->if_bytes + byte_diff;
			
 
				-	ASSERT(new_size >= 0);
			
 
				-
			
 
				-	if (new_size == 0) {
			
 
				-		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				-			kmem_free(ifp->if_u1.if_data);
			
 
				-		}
			
 
				-		ifp->if_u1.if_data = NULL;
			
 
				-		real_size = 0;
			
 
				-	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
			
 
				-		/*
			
 
				-		 * If the valid extents/data can fit in if_inline_ext/data,
			
 
				-		 * copy them from the malloc'd vector and free it.
			
 
				-		 */
			
 
				-		if (ifp->if_u1.if_data == NULL) {
			
 
				-			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				-		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				-			ASSERT(ifp->if_real_bytes != 0);
			
 
				-			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
			
 
				-			      new_size);
			
 
				-			kmem_free(ifp->if_u1.if_data);
			
 
				-			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				-		}
			
 
				-		real_size = 0;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * Stuck with malloc/realloc.
			
 
				-		 * For inline data, the underlying buffer must be
			
 
				-		 * a multiple of 4 bytes in size so that it can be
			
 
				-		 * logged and stay on word boundaries.  We enforce
			
 
				-		 * that here.
			
 
				-		 */
			
 
				-		real_size = roundup(new_size, 4);
			
 
				-		if (ifp->if_u1.if_data == NULL) {
			
 
				-			ASSERT(ifp->if_real_bytes == 0);
			
 
				-			ifp->if_u1.if_data = kmem_alloc(real_size,
			
 
				-							KM_SLEEP | KM_NOFS);
			
 
				-		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				-			/*
			
 
				-			 * Only do the realloc if the underlying size
			
 
				-			 * is really changing.
			
 
				-			 */
			
 
				-			if (ifp->if_real_bytes != real_size) {
			
 
				-				ifp->if_u1.if_data =
			
 
				-					kmem_realloc(ifp->if_u1.if_data,
			
 
				-							real_size,
			
 
				-							ifp->if_real_bytes,
			
 
				-							KM_SLEEP | KM_NOFS);
			
 
				-			}
			
 
				-		} else {
			
 
				-			ASSERT(ifp->if_real_bytes == 0);
			
 
				-			ifp->if_u1.if_data = kmem_alloc(real_size,
			
 
				-							KM_SLEEP | KM_NOFS);
			
 
				-			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
			
 
				-				ifp->if_bytes);
			
 
				-		}
			
 
				-	}
			
 
				-	ifp->if_real_bytes = real_size;
			
 
				-	ifp->if_bytes = new_size;
			
 
				-	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xfs_idestroy_fork(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	int		whichfork)
			
 
				-{
			
 
				-	xfs_ifork_t	*ifp;
			
 
				-
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	if (ifp->if_broot != NULL) {
			
 
				-		kmem_free(ifp->if_broot);
			
 
				-		ifp->if_broot = NULL;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If the format is local, then we can't have an extents
			
 
				-	 * array so just look for an inline data array.  If we're
			
 
				-	 * not local then we may or may not have an extents list,
			
 
				-	 * so check and free it up if we do.
			
 
				-	 */
			
 
				-	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
			
 
				-		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
			
 
				-		    (ifp->if_u1.if_data != NULL)) {
			
 
				-			ASSERT(ifp->if_real_bytes != 0);
			
 
				-			kmem_free(ifp->if_u1.if_data);
			
 
				-			ifp->if_u1.if_data = NULL;
			
 
				-			ifp->if_real_bytes = 0;
			
 
				-		}
			
 
				-	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
			
 
				-		   ((ifp->if_flags & XFS_IFEXTIREC) ||
			
 
				-		    ((ifp->if_u1.if_extents != NULL) &&
			
 
				-		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
			
 
				-		ASSERT(ifp->if_real_bytes != 0);
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	}
			
 
				-	ASSERT(ifp->if_u1.if_extents == NULL ||
			
 
				-	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
			
 
				-	ASSERT(ifp->if_real_bytes == 0);
			
 
				-	if (whichfork == XFS_ATTR_FORK) {
			
 
				-		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
			
 
				-		ip->i_afp = NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * This is called to unpin an inode.  The caller must have the inode locked
			
 
				  * in at least shared mode so that the buffer cannot be subsequently pinned
			
@@ -2402,162 +2370,471 @@ xfs_iunpin_wait(
 
				 		__xfs_iunpin_wait(ip);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * xfs_iextents_copy()
			
 
				- *
			
 
				- * This is called to copy the REAL extents (as opposed to the delayed
			
 
				- * allocation extents) from the inode into the given buffer.  It
			
 
				- * returns the number of bytes copied into the buffer.
			
 
				- *
			
 
				- * If there are no delayed allocation extents, then we can just
			
 
				- * memcpy() the extents into the buffer.  Otherwise, we need to
			
 
				- * examine each extent in turn and skip those which are delayed.
			
 
				- */
			
 
				 int
			
 
				-xfs_iextents_copy(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	xfs_bmbt_rec_t		*dp,
			
 
				-	int			whichfork)
			
 
				+xfs_remove(
			
 
				+	xfs_inode_t             *dp,
			
 
				+	struct xfs_name		*name,
			
 
				+	xfs_inode_t		*ip)
			
 
				 {
			
 
				-	int			copied;
			
 
				-	int			i;
			
 
				-	xfs_ifork_t		*ifp;
			
 
				-	int			nrecs;
			
 
				-	xfs_fsblock_t		start_block;
			
 
				+	xfs_mount_t		*mp = dp->i_mount;
			
 
				+	xfs_trans_t             *tp = NULL;
			
 
				+	int			is_dir = S_ISDIR(ip->i_d.di_mode);
			
 
				+	int                     error = 0;
			
 
				+	xfs_bmap_free_t         free_list;
			
 
				+	xfs_fsblock_t           first_block;
			
 
				+	int			cancel_flags;
			
 
				+	int			committed;
			
 
				+	int			link_zero;
			
 
				+	uint			resblks;
			
 
				+	uint			log_count;
			
 
				 
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
			
 
				-	ASSERT(ifp->if_bytes > 0);
			
 
				+	trace_xfs_remove(dp, name);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	error = xfs_qm_dqattach(dp, 0);
			
 
				+	if (error)
			
 
				+		goto std_return;
			
 
				 
			
 
				-	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
			
 
				-	ASSERT(nrecs > 0);
			
 
				+	error = xfs_qm_dqattach(ip, 0);
			
 
				+	if (error)
			
 
				+		goto std_return;
			
 
				+
			
 
				+	if (is_dir) {
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
			
 
				+		log_count = XFS_DEFAULT_LOG_COUNT;
			
 
				+	} else {
			
 
				+		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
			
 
				+		log_count = XFS_REMOVE_LOG_COUNT;
			
 
				+	}
			
 
				+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				 
			
 
				 	/*
			
 
				-	 * There are some delayed allocation extents in the
			
 
				-	 * inode, so copy the extents one at a time and skip
			
 
				-	 * the delayed ones.  There must be at least one
			
 
				-	 * non-delayed extent.
			
 
				+	 * We try to get the real space reservation first,
			
 
				+	 * allowing for directory btree deletion(s) implying
			
 
				+	 * possible bmap insert(s).  If we can't get the space
			
 
				+	 * reservation then we use 0 instead, and avoid the bmap
			
 
				+	 * btree insert(s) in the directory code by, if the bmap
			
 
				+	 * insert tries to happen, instead trimming the LAST
			
 
				+	 * block from the directory.
			
 
				 	 */
			
 
				-	copied = 0;
			
 
				-	for (i = 0; i < nrecs; i++) {
			
 
				-		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				-		start_block = xfs_bmbt_get_startblock(ep);
			
 
				-		if (isnullstartblock(start_block)) {
			
 
				-			/*
			
 
				-			 * It's a delayed allocation extent, so skip it.
			
 
				-			 */
			
 
				-			continue;
			
 
				+	resblks = XFS_REMOVE_SPACE_RES(mp);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
			
 
				+	if (error == ENOSPC) {
			
 
				+		resblks = 0;
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		ASSERT(error != ENOSPC);
			
 
				+		cancel_flags = 0;
			
 
				+		goto out_trans_cancel;
			
 
				+	}
			
 
				+
			
 
				+	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * If we're removing a directory perform some additional validation.
			
 
				+	 */
			
 
				+	if (is_dir) {
			
 
				+		ASSERT(ip->i_d.di_nlink >= 2);
			
 
				+		if (ip->i_d.di_nlink != 2) {
			
 
				+			error = XFS_ERROR(ENOTEMPTY);
			
 
				+			goto out_trans_cancel;
			
 
				 		}
			
 
				+		if (!xfs_dir_isempty(ip)) {
			
 
				+			error = XFS_ERROR(ENOTEMPTY);
			
 
				+			goto out_trans_cancel;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	xfs_bmap_init(&free_list, &first_block);
			
 
				+	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
			
 
				+					&first_block, &free_list, resblks);
			
 
				+	if (error) {
			
 
				+		ASSERT(error != ENOENT);
			
 
				+		goto out_bmap_cancel;
			
 
				+	}
			
 
				+	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				+
			
 
				+	if (is_dir) {
			
 
				+		/*
			
 
				+		 * Drop the link from ip's "..".
			
 
				+		 */
			
 
				+		error = xfs_droplink(tp, dp);
			
 
				+		if (error)
			
 
				+			goto out_bmap_cancel;
			
 
				 
			
 
				-		/* Translate to on disk format */
			
 
				-		put_unaligned(cpu_to_be64(ep->l0), &dp->l0);
			
 
				-		put_unaligned(cpu_to_be64(ep->l1), &dp->l1);
			
 
				-		dp++;
			
 
				-		copied++;
			
 
				+		/*
			
 
				+		 * Drop the "." link from ip to self.
			
 
				+		 */
			
 
				+		error = xfs_droplink(tp, ip);
			
 
				+		if (error)
			
 
				+			goto out_bmap_cancel;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * When removing a non-directory we need to log the parent
			
 
				+		 * inode here.  For a directory this is done implicitly
			
 
				+		 * by the xfs_droplink call for the ".." entry.
			
 
				+		 */
			
 
				+		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
			
 
				 	}
			
 
				-	ASSERT(copied != 0);
			
 
				-	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
			
 
				 
			
 
				-	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
			
 
				+	/*
			
 
				+	 * Drop the link from dp to ip.
			
 
				+	 */
			
 
				+	error = xfs_droplink(tp, ip);
			
 
				+	if (error)
			
 
				+		goto out_bmap_cancel;
			
 
				+
			
 
				+	/*
			
 
				+	 * Determine if this is the last link while
			
 
				+	 * we are in the transaction.
			
 
				+	 */
			
 
				+	link_zero = (ip->i_d.di_nlink == 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is a synchronous mount, make sure that the
			
 
				+	 * remove transaction goes to disk before returning to
			
 
				+	 * the user.
			
 
				+	 */
			
 
				+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+
			
 
				+	error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				+	if (error)
			
 
				+		goto out_bmap_cancel;
			
 
				+
			
 
				+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+	if (error)
			
 
				+		goto std_return;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are using filestreams, kill the stream association.
			
 
				+	 * If the file is still open it may get a new one but that
			
 
				+	 * will get killed on last close in xfs_close() so we don't
			
 
				+	 * have to worry about that.
			
 
				+	 */
			
 
				+	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
			
 
				+		xfs_filestream_deassociate(ip);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+ out_bmap_cancel:
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+	cancel_flags |= XFS_TRANS_ABORT;
			
 
				+ out_trans_cancel:
			
 
				+	xfs_trans_cancel(tp, cancel_flags);
			
 
				+ std_return:
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Each of the following cases stores data into the same region
			
 
				- * of the on-disk inode, so only one of them can be valid at
			
 
				- * any given time. While it is possible to have conflicting formats
			
 
				- * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
			
 
				- * in EXTENTS format, this can only happen when the fork has
			
 
				- * changed formats after being modified but before being flushed.
			
 
				- * In these cases, the format always takes precedence, because the
			
 
				- * format indicates the current state of the fork.
			
 
				+ * Enter all inodes for a rename transaction into a sorted array.
			
 
				  */
			
 
				-/*ARGSUSED*/
			
 
				 STATIC void
			
 
				-xfs_iflush_fork(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	xfs_dinode_t		*dip,
			
 
				-	xfs_inode_log_item_t	*iip,
			
 
				-	int			whichfork,
			
 
				-	xfs_buf_t		*bp)
			
 
				-{
			
 
				-	char			*cp;
			
 
				-	xfs_ifork_t		*ifp;
			
 
				-	xfs_mount_t		*mp;
			
 
				-	static const short	brootflag[2] =
			
 
				-		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
			
 
				-	static const short	dataflag[2] =
			
 
				-		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
			
 
				-	static const short	extflag[2] =
			
 
				-		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
			
 
				-
			
 
				-	if (!iip)
			
 
				-		return;
			
 
				-	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				-	/*
			
 
				-	 * This can happen if we gave up in iformat in an error path,
			
 
				-	 * for the attribute fork.
			
 
				-	 */
			
 
				-	if (!ifp) {
			
 
				-		ASSERT(whichfork == XFS_ATTR_FORK);
			
 
				-		return;
			
 
				-	}
			
 
				-	cp = XFS_DFORK_PTR(dip, whichfork);
			
 
				-	mp = ip->i_mount;
			
 
				-	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
			
 
				-	case XFS_DINODE_FMT_LOCAL:
			
 
				-		if ((iip->ili_fields & dataflag[whichfork]) &&
			
 
				-		    (ifp->if_bytes > 0)) {
			
 
				-			ASSERT(ifp->if_u1.if_data != NULL);
			
 
				-			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
			
 
				-			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				+xfs_sort_for_rename(
			
 
				+	xfs_inode_t	*dp1,	/* in: old (source) directory inode */
			
 
				+	xfs_inode_t	*dp2,	/* in: new (target) directory inode */
			
 
				+	xfs_inode_t	*ip1,	/* in: inode of old entry */
			
 
				+	xfs_inode_t	*ip2,	/* in: inode of new entry, if it
			
 
				+				   already exists, NULL otherwise. */
			
 
				+	xfs_inode_t	**i_tab,/* out: array of inode returned, sorted */
			
 
				+	int		*num_inodes)  /* out: number of inodes in array */
			
 
				+{
			
 
				+	xfs_inode_t		*temp;
			
 
				+	int			i, j;
			
 
				+
			
 
				+	/*
			
 
				+	 * i_tab contains a list of pointers to inodes.  We initialize
			
 
				+	 * the table here & we'll sort it.  We will then use it to
			
 
				+	 * order the acquisition of the inode locks.
			
 
				+	 *
			
 
				+	 * Note that the table may contain duplicates.  e.g., dp1 == dp2.
			
 
				+	 */
			
 
				+	i_tab[0] = dp1;
			
 
				+	i_tab[1] = dp2;
			
 
				+	i_tab[2] = ip1;
			
 
				+	if (ip2) {
			
 
				+		*num_inodes = 4;
			
 
				+		i_tab[3] = ip2;
			
 
				+	} else {
			
 
				+		*num_inodes = 3;
			
 
				+		i_tab[3] = NULL;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Sort the elements via bubble sort.  (Remember, there are at
			
 
				+	 * most 4 elements to sort, so this is adequate.)
			
 
				+	 */
			
 
				+	for (i = 0; i < *num_inodes; i++) {
			
 
				+		for (j = 1; j < *num_inodes; j++) {
			
 
				+			if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
			
 
				+				temp = i_tab[j];
			
 
				+				i_tab[j] = i_tab[j-1];
			
 
				+				i_tab[j-1] = temp;
			
 
				+			}
			
 
				 		}
			
 
				-		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_rename
			
 
				+ */
			
 
				+int
			
 
				+xfs_rename(
			
 
				+	xfs_inode_t	*src_dp,
			
 
				+	struct xfs_name	*src_name,
			
 
				+	xfs_inode_t	*src_ip,
			
 
				+	xfs_inode_t	*target_dp,
			
 
				+	struct xfs_name	*target_name,
			
 
				+	xfs_inode_t	*target_ip)
			
 
				+{
			
 
				+	xfs_trans_t	*tp = NULL;
			
 
				+	xfs_mount_t	*mp = src_dp->i_mount;
			
 
				+	int		new_parent;		/* moving to a new dir */
			
 
				+	int		src_is_directory;	/* src_name is a directory */
			
 
				+	int		error;
			
 
				+	xfs_bmap_free_t free_list;
			
 
				+	xfs_fsblock_t   first_block;
			
 
				+	int		cancel_flags;
			
 
				+	int		committed;
			
 
				+	xfs_inode_t	*inodes[4];
			
 
				+	int		spaceres;
			
 
				+	int		num_inodes;
			
 
				+
			
 
				+	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
			
 
				+
			
 
				+	new_parent = (src_dp != target_dp);
			
 
				+	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
			
 
				+
			
 
				+	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
			
 
				+				inodes, &num_inodes);
			
 
				+
			
 
				+	xfs_bmap_init(&free_list, &first_block);
			
 
				+	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
			
 
				+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				+	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
			
 
				+	if (error == ENOSPC) {
			
 
				+		spaceres = 0;
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		xfs_trans_cancel(tp, 0);
			
 
				+		goto std_return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Attach the dquots to the inodes
			
 
				+	 */
			
 
				+	error = xfs_qm_vop_rename_dqattach(inodes);
			
 
				+	if (error) {
			
 
				+		xfs_trans_cancel(tp, cancel_flags);
			
 
				+		goto std_return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Lock all the participating inodes. Depending upon whether
			
 
				+	 * the target_name exists in the target directory, and
			
 
				+	 * whether the target directory is the same as the source
			
 
				+	 * directory, we can lock from 2 to 4 inodes.
			
 
				+	 */
			
 
				+	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * Join all the inodes to the transaction. From this point on,
			
 
				+	 * we can rely on either trans_commit or trans_cancel to unlock
			
 
				+	 * them.
			
 
				+	 */
			
 
				+	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
			
 
				+	if (new_parent)
			
 
				+		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
			
 
				+	if (target_ip)
			
 
				+		xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are using project inheritance, we only allow renames
			
 
				+	 * into our tree when the project IDs are the same; else the
			
 
				+	 * tree quota mechanism would be circumvented.
			
 
				+	 */
			
 
				+	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
			
 
				+		     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
			
 
				+		error = XFS_ERROR(EXDEV);
			
 
				+		goto error_return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Set up the target.
			
 
				+	 */
			
 
				+	if (target_ip == NULL) {
			
 
				+		/*
			
 
				+		 * If there's no space reservation, check the entry will
			
 
				+		 * fit before actually inserting it.
			
 
				+		 */
			
 
				+		error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
			
 
				+		if (error)
			
 
				+			goto error_return;
			
 
				+		/*
			
 
				+		 * If target does not exist and the rename crosses
			
 
				+		 * directories, adjust the target directory link count
			
 
				+		 * to account for the ".." reference from the new entry.
			
 
				+		 */
			
 
				+		error = xfs_dir_createname(tp, target_dp, target_name,
			
 
				+						src_ip->i_ino, &first_block,
			
 
				+						&free_list, spaceres);
			
 
				+		if (error == ENOSPC)
			
 
				+			goto error_return;
			
 
				+		if (error)
			
 
				+			goto abort_return;
			
 
				+
			
 
				+		xfs_trans_ichgtime(tp, target_dp,
			
 
				+					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				 
			
 
				-	case XFS_DINODE_FMT_EXTENTS:
			
 
				-		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
			
 
				-		       !(iip->ili_fields & extflag[whichfork]));
			
 
				-		if ((iip->ili_fields & extflag[whichfork]) &&
			
 
				-		    (ifp->if_bytes > 0)) {
			
 
				-			ASSERT(xfs_iext_get_ext(ifp, 0));
			
 
				-			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
			
 
				-			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
			
 
				-				whichfork);
			
 
				+		if (new_parent && src_is_directory) {
			
 
				+			error = xfs_bumplink(tp, target_dp);
			
 
				+			if (error)
			
 
				+				goto abort_return;
			
 
				 		}
			
 
				-		break;
			
 
				+	} else { /* target_ip != NULL */
			
 
				+		/*
			
 
				+		 * If target exists and it's a directory, check that both
			
 
				+		 * target and source are directories and that target can be
			
 
				+		 * destroyed, or that neither is a directory.
			
 
				+		 */
			
 
				+		if (S_ISDIR(target_ip->i_d.di_mode)) {
			
 
				+			/*
			
 
				+			 * Make sure target dir is empty.
			
 
				+			 */
			
 
				+			if (!(xfs_dir_isempty(target_ip)) ||
			
 
				+			    (target_ip->i_d.di_nlink > 2)) {
			
 
				+				error = XFS_ERROR(EEXIST);
			
 
				+				goto error_return;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Link the source inode under the target name.
			
 
				+		 * If the source inode is a directory and we are moving
			
 
				+		 * it across directories, its ".." entry will be
			
 
				+		 * inconsistent until we replace that down below.
			
 
				+		 *
			
 
				+		 * In case there is already an entry with the same
			
 
				+		 * name at the destination directory, remove it first.
			
 
				+		 */
			
 
				+		error = xfs_dir_replace(tp, target_dp, target_name,
			
 
				+					src_ip->i_ino,
			
 
				+					&first_block, &free_list, spaceres);
			
 
				+		if (error)
			
 
				+			goto abort_return;
			
 
				+
			
 
				+		xfs_trans_ichgtime(tp, target_dp,
			
 
				+					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				+
			
 
				+		/*
			
 
				+		 * Decrement the link count on the target since the target
			
 
				+		 * dir no longer points to it.
			
 
				+		 */
			
 
				+		error = xfs_droplink(tp, target_ip);
			
 
				+		if (error)
			
 
				+			goto abort_return;
			
 
				+
			
 
				+		if (src_is_directory) {
			
 
				+			/*
			
 
				+			 * Drop the link from the old "." entry.
			
 
				+			 */
			
 
				+			error = xfs_droplink(tp, target_ip);
			
 
				+			if (error)
			
 
				+				goto abort_return;
			
 
				+		}
			
 
				+	} /* target_ip != NULL */
			
 
				+
			
 
				+	/*
			
 
				+	 * Remove the source.
			
 
				+	 */
			
 
				+	if (new_parent && src_is_directory) {
			
 
				+		/*
			
 
				+		 * Rewrite the ".." entry to point to the new
			
 
				+		 * directory.
			
 
				+		 */
			
 
				+		error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
			
 
				+					target_dp->i_ino,
			
 
				+					&first_block, &free_list, spaceres);
			
 
				+		ASSERT(error != EEXIST);
			
 
				+		if (error)
			
 
				+			goto abort_return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We always want to hit the ctime on the source inode.
			
 
				+	 *
			
 
				+	 * This isn't strictly required by the standards since the source
			
 
				+	 * inode isn't really being changed, but old unix file systems did
			
 
				+	 * it and some incremental backup programs won't work without it.
			
 
				+	 */
			
 
				+	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
			
 
				+	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
			
 
				+
			
 
				+	/*
			
 
				+	 * Adjust the link count on src_dp.  This is necessary when
			
 
				+	 * renaming a directory, either within one parent when
			
 
				+	 * the target existed, or across two parent directories.
			
 
				+	 */
			
 
				+	if (src_is_directory && (new_parent || target_ip != NULL)) {
			
 
				+
			
 
				+		/*
			
 
				+		 * Decrement link count on src_directory since the
			
 
				+		 * entry that's moved no longer points to it.
			
 
				+		 */
			
 
				+		error = xfs_droplink(tp, src_dp);
			
 
				+		if (error)
			
 
				+			goto abort_return;
			
 
				+	}
			
 
				 
			
 
				-	case XFS_DINODE_FMT_BTREE:
			
 
				-		if ((iip->ili_fields & brootflag[whichfork]) &&
			
 
				-		    (ifp->if_broot_bytes > 0)) {
			
 
				-			ASSERT(ifp->if_broot != NULL);
			
 
				-			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
			
 
				-			        XFS_IFORK_SIZE(ip, whichfork));
			
 
				-			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
			
 
				-				(xfs_bmdr_block_t *)cp,
			
 
				-				XFS_DFORK_SIZE(dip, mp, whichfork));
			
 
				-		}
			
 
				-		break;
			
 
				+	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
			
 
				+					&first_block, &free_list, spaceres);
			
 
				+	if (error)
			
 
				+		goto abort_return;
			
 
				 
			
 
				-	case XFS_DINODE_FMT_DEV:
			
 
				-		if (iip->ili_fields & XFS_ILOG_DEV) {
			
 
				-			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				-			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
			
 
				-		}
			
 
				-		break;
			
 
				+	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				+	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
			
 
				+	if (new_parent)
			
 
				+		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
			
 
				 
			
 
				-	case XFS_DINODE_FMT_UUID:
			
 
				-		if (iip->ili_fields & XFS_ILOG_UUID) {
			
 
				-			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				-			memcpy(XFS_DFORK_DPTR(dip),
			
 
				-			       &ip->i_df.if_u2.if_uuid,
			
 
				-			       sizeof(uuid_t));
			
 
				-		}
			
 
				-		break;
			
 
				+	/*
			
 
				+	 * If this is a synchronous mount, make sure that the
			
 
				+	 * rename transaction goes to disk before returning to
			
 
				+	 * the user.
			
 
				+	 */
			
 
				+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+	}
			
 
				 
			
 
				-	default:
			
 
				-		ASSERT(0);
			
 
				-		break;
			
 
				+	error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				+	if (error) {
			
 
				+		xfs_bmap_cancel(&free_list);
			
 
				+		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
			
 
				+				 XFS_TRANS_ABORT));
			
 
				+		goto std_return;
			
 
				 	}
			
 
				+
			
 
				+	/*
			
 
				+	 * trans_commit will unlock src_ip, target_ip & decrement
			
 
				+	 * the vnode references.
			
 
				+	 */
			
 
				+	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+
			
 
				+ abort_return:
			
 
				+	cancel_flags |= XFS_TRANS_ABORT;
			
 
				+ error_return:
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+	xfs_trans_cancel(tp, cancel_flags);
			
 
				+ std_return:
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 STATIC int
			
@@ -2816,7 +3093,6 @@ abort_out:
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 STATIC int
			
 
				 xfs_iflush_int(
			
 
				 	struct xfs_inode	*ip,
			
@@ -3004,1072 +3280,3 @@ xfs_iflush_int(
 
				 corrupt_out:
			
 
				 	return XFS_ERROR(EFSCORRUPTED);
			
 
				 }
			
 
				-
			
 
				-/*
			
 
				- * Return a pointer to the extent record at file index idx.
			
 
				- */
			
 
				-xfs_bmbt_rec_host_t *
			
 
				-xfs_iext_get_ext(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx)		/* index of target extent */
			
 
				-{
			
 
				-	ASSERT(idx >= 0);
			
 
				-	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
			
 
				-
			
 
				-	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
			
 
				-		return ifp->if_u1.if_ext_irec->er_extbuf;
			
 
				-	} else if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		xfs_ext_irec_t	*erp;		/* irec pointer */
			
 
				-		int		erp_idx = 0;	/* irec index */
			
 
				-		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
			
 
				-
			
 
				-		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
			
 
				-		return &erp->er_extbuf[page_idx];
			
 
				-	} else if (ifp->if_bytes) {
			
 
				-		return &ifp->if_u1.if_extents[idx];
			
 
				-	} else {
			
 
				-		return NULL;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Insert new item(s) into the extent records for incore inode
			
 
				- * fork 'ifp'.  'count' new items are inserted at index 'idx'.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_insert(
			
 
				-	xfs_inode_t	*ip,		/* incore inode pointer */
			
 
				-	xfs_extnum_t	idx,		/* starting index of new items */
			
 
				-	xfs_extnum_t	count,		/* number of inserted items */
			
 
				-	xfs_bmbt_irec_t	*new,		/* items to insert */
			
 
				-	int		state)		/* type of extent conversion */
			
 
				-{
			
 
				-	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
			
 
				-	xfs_extnum_t	i;		/* extent record index */
			
 
				-
			
 
				-	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
			
 
				-	xfs_iext_add(ifp, idx, count);
			
 
				-	for (i = idx; i < idx + count; i++, new++)
			
 
				-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when the amount of space required for incore file
			
 
				- * extents needs to be increased. The ext_diff parameter stores the
			
 
				- * number of new extents being added and the idx parameter contains
			
 
				- * the extent index where the new extents will be added. If the new
			
 
				- * extents are being appended, then we just need to (re)allocate and
			
 
				- * initialize the space. Otherwise, if the new extents are being
			
 
				- * inserted into the middle of the existing entries, a bit more work
			
 
				- * is required to make room for the new extents to be inserted. The
			
 
				- * caller is responsible for filling in the new extent entries upon
			
 
				- * return.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_add(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin adding exts */
			
 
				-	int		ext_diff)	/* number of extents to add */
			
 
				-{
			
 
				-	int		byte_diff;	/* new bytes being added */
			
 
				-	int		new_size;	/* size of extents after adding */
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-	ASSERT((idx >= 0) && (idx <= nextents));
			
 
				-	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
			
 
				-	new_size = ifp->if_bytes + byte_diff;
			
 
				-	/*
			
 
				-	 * If the new number of extents (nextents + ext_diff)
			
 
				-	 * fits inside the inode, then continue to use the inline
			
 
				-	 * extent buffer.
			
 
				-	 */
			
 
				-	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
			
 
				-		if (idx < nextents) {
			
 
				-			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
			
 
				-				&ifp->if_u2.if_inline_ext[idx],
			
 
				-				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
			
 
				-			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
			
 
				-		}
			
 
				-		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				-		ifp->if_real_bytes = 0;
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Otherwise use a linear (direct) extent list.
			
 
				-	 * If the extents are currently inside the inode,
			
 
				-	 * xfs_iext_realloc_direct will switch us from
			
 
				-	 * inline to direct extent allocation mode.
			
 
				-	 */
			
 
				-	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
			
 
				-		xfs_iext_realloc_direct(ifp, new_size);
			
 
				-		if (idx < nextents) {
			
 
				-			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
			
 
				-				&ifp->if_u1.if_extents[idx],
			
 
				-				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
			
 
				-			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
			
 
				-		}
			
 
				-	}
			
 
				-	/* Indirection array */
			
 
				-	else {
			
 
				-		xfs_ext_irec_t	*erp;
			
 
				-		int		erp_idx = 0;
			
 
				-		int		page_idx = idx;
			
 
				-
			
 
				-		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
			
 
				-		if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
			
 
				-		} else {
			
 
				-			xfs_iext_irec_init(ifp);
			
 
				-			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-			erp = ifp->if_u1.if_ext_irec;
			
 
				-		}
			
 
				-		/* Extents fit in target extent page */
			
 
				-		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
			
 
				-			if (page_idx < erp->er_extcount) {
			
 
				-				memmove(&erp->er_extbuf[page_idx + ext_diff],
			
 
				-					&erp->er_extbuf[page_idx],
			
 
				-					(erp->er_extcount - page_idx) *
			
 
				-					sizeof(xfs_bmbt_rec_t));
			
 
				-				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
			
 
				-			}
			
 
				-			erp->er_extcount += ext_diff;
			
 
				-			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				-		}
			
 
				-		/* Insert a new extent page */
			
 
				-		else if (erp) {
			
 
				-			xfs_iext_add_indirect_multi(ifp,
			
 
				-				erp_idx, page_idx, ext_diff);
			
 
				-		}
			
 
				-		/*
			
 
				-		 * If extent(s) are being appended to the last page in
			
 
				-		 * the indirection array and the new extent(s) don't fit
			
 
				-		 * in the page, then erp is NULL and erp_idx is set to
			
 
				-		 * the next index needed in the indirection array.
			
 
				-		 */
			
 
				-		else {
			
 
				-			int	count = ext_diff;
			
 
				-
			
 
				-			while (count) {
			
 
				-				erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				-				erp->er_extcount = count;
			
 
				-				count -= MIN(count, (int)XFS_LINEAR_EXTS);
			
 
				-				if (count) {
			
 
				-					erp_idx++;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when incore extents are being added to the indirection
			
 
				- * array and the new extents do not fit in the target extent list. The
			
 
				- * erp_idx parameter contains the irec index for the target extent list
			
 
				- * in the indirection array, and the idx parameter contains the extent
			
 
				- * index within the list. The number of extents being added is stored
			
 
				- * in the count parameter.
			
 
				- *
			
 
				- *    |-------|   |-------|
			
 
				- *    |       |   |       |    idx - number of extents before idx
			
 
				- *    |  idx  |   | count |
			
 
				- *    |       |   |       |    count - number of extents being inserted at idx
			
 
				- *    |-------|   |-------|
			
 
				- *    | count |   | nex2  |    nex2 - number of extents after idx + count
			
 
				- *    |-------|   |-------|
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_add_indirect_multi(
			
 
				-	xfs_ifork_t	*ifp,			/* inode fork pointer */
			
 
				-	int		erp_idx,		/* target extent irec index */
			
 
				-	xfs_extnum_t	idx,			/* index within target list */
			
 
				-	int		count)			/* new extents being added */
			
 
				-{
			
 
				-	int		byte_diff;		/* new bytes being added */
			
 
				-	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
			
 
				-	xfs_extnum_t	ext_diff;		/* number of extents to add */
			
 
				-	xfs_extnum_t	ext_cnt;		/* new extents still needed */
			
 
				-	xfs_extnum_t	nex2;			/* extents after idx + count */
			
 
				-	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
			
 
				-	int		nlists;			/* number of irec's (lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-	nex2 = erp->er_extcount - idx;
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-
			
 
				-	/*
			
 
				-	 * Save second part of target extent list
			
 
				-	 * (all extents past */
			
 
				-	if (nex2) {
			
 
				-		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
			
 
				-		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
			
 
				-		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
			
 
				-		erp->er_extcount -= nex2;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
			
 
				-		memset(&erp->er_extbuf[idx], 0, byte_diff);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Add the new extents to the end of the target
			
 
				-	 * list, then allocate new irec record(s) and
			
 
				-	 * extent buffer(s) as needed to store the rest
			
 
				-	 * of the new extents.
			
 
				-	 */
			
 
				-	ext_cnt = count;
			
 
				-	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
			
 
				-	if (ext_diff) {
			
 
				-		erp->er_extcount += ext_diff;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				-		ext_cnt -= ext_diff;
			
 
				-	}
			
 
				-	while (ext_cnt) {
			
 
				-		erp_idx++;
			
 
				-		erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				-		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
			
 
				-		erp->er_extcount = ext_diff;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				-		ext_cnt -= ext_diff;
			
 
				-	}
			
 
				-
			
 
				-	/* Add nex2 extents back to indirection array */
			
 
				-	if (nex2) {
			
 
				-		xfs_extnum_t	ext_avail;
			
 
				-		int		i;
			
 
				-
			
 
				-		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
			
 
				-		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
			
 
				-		i = 0;
			
 
				-		/*
			
 
				-		 * If nex2 extents fit in the current page, append
			
 
				-		 * nex2_ep after the new extents.
			
 
				-		 */
			
 
				-		if (nex2 <= ext_avail) {
			
 
				-			i = erp->er_extcount;
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Otherwise, check if space is available in the
			
 
				-		 * next page.
			
 
				-		 */
			
 
				-		else if ((erp_idx < nlists - 1) &&
			
 
				-			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
			
 
				-			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
			
 
				-			erp_idx++;
			
 
				-			erp++;
			
 
				-			/* Create a hole for nex2 extents */
			
 
				-			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
			
 
				-				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
			
 
				-		}
			
 
				-		/*
			
 
				-		 * Final choice, create a new extent page for
			
 
				-		 * nex2 extents.
			
 
				-		 */
			
 
				-		else {
			
 
				-			erp_idx++;
			
 
				-			erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				-		}
			
 
				-		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
			
 
				-		kmem_free(nex2_ep);
			
 
				-		erp->er_extcount += nex2;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when the amount of space required for incore file
			
 
				- * extents needs to be decreased. The ext_diff parameter stores the
			
 
				- * number of extents to be removed and the idx parameter contains
			
 
				- * the extent index where the extents will be removed from.
			
 
				- *
			
 
				- * If the amount of space needed has decreased below the linear
			
 
				- * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
			
 
				- * extent array.  Otherwise, use kmem_realloc() to adjust the
			
 
				- * size to what is needed.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove(
			
 
				-	xfs_inode_t	*ip,		/* incore inode pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				-	int		ext_diff,	/* number of extents to remove */
			
 
				-	int		state)		/* type of extent conversion */
			
 
				-{
			
 
				-	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		new_size;	/* size of extents after removal */
			
 
				-
			
 
				-	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
			
 
				-
			
 
				-	ASSERT(ext_diff > 0);
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
			
 
				-
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	} else if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		xfs_iext_remove_indirect(ifp, idx, ext_diff);
			
 
				-	} else if (ifp->if_real_bytes) {
			
 
				-		xfs_iext_remove_direct(ifp, idx, ext_diff);
			
 
				-	} else {
			
 
				-		xfs_iext_remove_inline(ifp, idx, ext_diff);
			
 
				-	}
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This removes ext_diff extents from the inline buffer, beginning
			
 
				- * at extent index idx.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove_inline(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				-	int		ext_diff)	/* number of extents to remove */
			
 
				-{
			
 
				-	int		nextents;	/* number of extents in file */
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				-	ASSERT(idx < XFS_INLINE_EXTS);
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-	ASSERT(((nextents - ext_diff) > 0) &&
			
 
				-		(nextents - ext_diff) < XFS_INLINE_EXTS);
			
 
				-
			
 
				-	if (idx + ext_diff < nextents) {
			
 
				-		memmove(&ifp->if_u2.if_inline_ext[idx],
			
 
				-			&ifp->if_u2.if_inline_ext[idx + ext_diff],
			
 
				-			(nextents - (idx + ext_diff)) *
			
 
				-			 sizeof(xfs_bmbt_rec_t));
			
 
				-		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
			
 
				-			0, ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	} else {
			
 
				-		memset(&ifp->if_u2.if_inline_ext[idx], 0,
			
 
				-			ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This removes ext_diff extents from a linear (direct) extent list,
			
 
				- * beginning at extent index idx. If the extents are being removed
			
 
				- * from the end of the list (ie. truncate) then we just need to re-
			
 
				- * allocate the list to remove the extra space. Otherwise, if the
			
 
				- * extents are being removed from the middle of the existing extent
			
 
				- * entries, then we first need to move the extent records beginning
			
 
				- * at idx + ext_diff up in the list to overwrite the records being
			
 
				- * removed, then remove the extra space via kmem_realloc.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove_direct(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				-	int		ext_diff)	/* number of extents to remove */
			
 
				-{
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		new_size;	/* size of extents after removal */
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				-	new_size = ifp->if_bytes -
			
 
				-		(ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-		return;
			
 
				-	}
			
 
				-	/* Move extents up in the list (if needed) */
			
 
				-	if (idx + ext_diff < nextents) {
			
 
				-		memmove(&ifp->if_u1.if_extents[idx],
			
 
				-			&ifp->if_u1.if_extents[idx + ext_diff],
			
 
				-			(nextents - (idx + ext_diff)) *
			
 
				-			 sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
			
 
				-		0, ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				-	/*
			
 
				-	 * Reallocate the direct extent list. If the extents
			
 
				-	 * will fit inside the inode then xfs_iext_realloc_direct
			
 
				-	 * will switch from direct to inline extent allocation
			
 
				-	 * mode for us.
			
 
				-	 */
			
 
				-	xfs_iext_realloc_direct(ifp, new_size);
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called when incore extents are being removed from the
			
 
				- * indirection array and the extents being removed span multiple extent
			
 
				- * buffers. The idx parameter contains the file extent index where we
			
 
				- * want to begin removing extents, and the count parameter contains
			
 
				- * how many extents need to be removed.
			
 
				- *
			
 
				- *    |-------|   |-------|
			
 
				- *    | nex1  |   |       |    nex1 - number of extents before idx
			
 
				- *    |-------|   | count |
			
 
				- *    |       |   |       |    count - number of extents being removed at idx
			
 
				- *    | count |   |-------|
			
 
				- *    |       |   | nex2  |    nex2 - number of extents after idx + count
			
 
				- *    |-------|   |-------|
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_remove_indirect(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	idx,		/* index to begin removing extents */
			
 
				-	int		count)		/* number of extents to remove */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	int		erp_idx = 0;	/* indirection array index */
			
 
				-	xfs_extnum_t	ext_cnt;	/* extents left to remove */
			
 
				-	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
			
 
				-	xfs_extnum_t	nex1;		/* number of extents before idx */
			
 
				-	xfs_extnum_t	nex2;		/* extents after idx + count */
			
 
				-	int		page_idx = idx;	/* index in target extent list */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
			
 
				-	ASSERT(erp != NULL);
			
 
				-	nex1 = page_idx;
			
 
				-	ext_cnt = count;
			
 
				-	while (ext_cnt) {
			
 
				-		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
			
 
				-		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
			
 
				-		/*
			
 
				-		 * Check for deletion of entire list;
			
 
				-		 * xfs_iext_irec_remove() updates extent offsets.
			
 
				-		 */
			
 
				-		if (ext_diff == erp->er_extcount) {
			
 
				-			xfs_iext_irec_remove(ifp, erp_idx);
			
 
				-			ext_cnt -= ext_diff;
			
 
				-			nex1 = 0;
			
 
				-			if (ext_cnt) {
			
 
				-				ASSERT(erp_idx < ifp->if_real_bytes /
			
 
				-					XFS_IEXT_BUFSZ);
			
 
				-				erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-				nex1 = 0;
			
 
				-				continue;
			
 
				-			} else {
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		/* Move extents up (if needed) */
			
 
				-		if (nex2) {
			
 
				-			memmove(&erp->er_extbuf[nex1],
			
 
				-				&erp->er_extbuf[nex1 + ext_diff],
			
 
				-				nex2 * sizeof(xfs_bmbt_rec_t));
			
 
				-		}
			
 
				-		/* Zero out rest of page */
			
 
				-		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
			
 
				-			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
			
 
				-		/* Update remaining counters */
			
 
				-		erp->er_extcount -= ext_diff;
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
			
 
				-		ext_cnt -= ext_diff;
			
 
				-		nex1 = 0;
			
 
				-		erp_idx++;
			
 
				-		erp++;
			
 
				-	}
			
 
				-	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
			
 
				-	xfs_iext_irec_compact(ifp);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Create, destroy, or resize a linear (direct) block of extents.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_realloc_direct(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		new_size)	/* new size of extents */
			
 
				-{
			
 
				-	int		rnew_size;	/* real new size of extents */
			
 
				-
			
 
				-	rnew_size = new_size;
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
			
 
				-		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
			
 
				-		 (new_size != ifp->if_real_bytes)));
			
 
				-
			
 
				-	/* Free extent records */
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	}
			
 
				-	/* Resize direct extent list and zero any new bytes */
			
 
				-	else if (ifp->if_real_bytes) {
			
 
				-		/* Check if extents will fit inside the inode */
			
 
				-		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
			
 
				-			xfs_iext_direct_to_inline(ifp, new_size /
			
 
				-				(uint)sizeof(xfs_bmbt_rec_t));
			
 
				-			ifp->if_bytes = new_size;
			
 
				-			return;
			
 
				-		}
			
 
				-		if (!is_power_of_2(new_size)){
			
 
				-			rnew_size = roundup_pow_of_two(new_size);
			
 
				-		}
			
 
				-		if (rnew_size != ifp->if_real_bytes) {
			
 
				-			ifp->if_u1.if_extents =
			
 
				-				kmem_realloc(ifp->if_u1.if_extents,
			
 
				-						rnew_size,
			
 
				-						ifp->if_real_bytes, KM_NOFS);
			
 
				-		}
			
 
				-		if (rnew_size > ifp->if_real_bytes) {
			
 
				-			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
			
 
				-				(uint)sizeof(xfs_bmbt_rec_t)], 0,
			
 
				-				rnew_size - ifp->if_real_bytes);
			
 
				-		}
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Switch from the inline extent buffer to a direct
			
 
				-	 * extent list. Be sure to include the inline extent
			
 
				-	 * bytes in new_size.
			
 
				-	 */
			
 
				-	else {
			
 
				-		new_size += ifp->if_bytes;
			
 
				-		if (!is_power_of_2(new_size)) {
			
 
				-			rnew_size = roundup_pow_of_two(new_size);
			
 
				-		}
			
 
				-		xfs_iext_inline_to_direct(ifp, rnew_size);
			
 
				-	}
			
 
				-	ifp->if_real_bytes = rnew_size;
			
 
				-	ifp->if_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Switch from linear (direct) extent records to inline buffer.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_direct_to_inline(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	nextents)	/* number of extents in file */
			
 
				-{
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
			
 
				-	ASSERT(nextents <= XFS_INLINE_EXTS);
			
 
				-	/*
			
 
				-	 * The inline buffer was zeroed when we switched
			
 
				-	 * from inline to direct extent allocation mode,
			
 
				-	 * so we don't need to clear it here.
			
 
				-	 */
			
 
				-	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
			
 
				-		nextents * sizeof(xfs_bmbt_rec_t));
			
 
				-	kmem_free(ifp->if_u1.if_extents);
			
 
				-	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				-	ifp->if_real_bytes = 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Switch from inline buffer to linear (direct) extent records.
			
 
				- * new_size should already be rounded up to the next power of 2
			
 
				- * by the caller (when appropriate), so use new_size as it is.
			
 
				- * However, since new_size may be rounded up, we can't update
			
 
				- * if_bytes here. It is the caller's responsibility to update
			
 
				- * if_bytes upon return.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_inline_to_direct(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		new_size)	/* number of extents in file */
			
 
				-{
			
 
				-	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
			
 
				-	memset(ifp->if_u1.if_extents, 0, new_size);
			
 
				-	if (ifp->if_bytes) {
			
 
				-		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
			
 
				-			ifp->if_bytes);
			
 
				-		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
			
 
				-			sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-	ifp->if_real_bytes = new_size;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Resize an extent indirection array to new_size bytes.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_iext_realloc_indirect(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		new_size)	/* new indirection array size */
			
 
				-{
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-	int		size;		/* current indirection array size */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	size = nlists * sizeof(xfs_ext_irec_t);
			
 
				-	ASSERT(ifp->if_real_bytes);
			
 
				-	ASSERT((new_size >= 0) && (new_size != size));
			
 
				-	if (new_size == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	} else {
			
 
				-		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
			
 
				-			kmem_realloc(ifp->if_u1.if_ext_irec,
			
 
				-				new_size, size, KM_NOFS);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Switch from indirection array to linear (direct) extent allocations.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_iext_indirect_to_direct(
			
 
				-	 xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		size;		/* size of file extents */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-	ASSERT(nextents <= XFS_LINEAR_EXTS);
			
 
				-	size = nextents * sizeof(xfs_bmbt_rec_t);
			
 
				-
			
 
				-	xfs_iext_irec_compact_pages(ifp);
			
 
				-	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
			
 
				-
			
 
				-	ep = ifp->if_u1.if_ext_irec->er_extbuf;
			
 
				-	kmem_free(ifp->if_u1.if_ext_irec);
			
 
				-	ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				-	ifp->if_u1.if_extents = ep;
			
 
				-	ifp->if_bytes = size;
			
 
				-	if (nextents < XFS_LINEAR_EXTS) {
			
 
				-		xfs_iext_realloc_direct(ifp, size);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Free incore file extents.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_destroy(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		int	erp_idx;
			
 
				-		int	nlists;
			
 
				-
			
 
				-		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
			
 
				-			xfs_iext_irec_remove(ifp, erp_idx);
			
 
				-		}
			
 
				-		ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				-	} else if (ifp->if_real_bytes) {
			
 
				-		kmem_free(ifp->if_u1.if_extents);
			
 
				-	} else if (ifp->if_bytes) {
			
 
				-		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
			
 
				-			sizeof(xfs_bmbt_rec_t));
			
 
				-	}
			
 
				-	ifp->if_u1.if_extents = NULL;
			
 
				-	ifp->if_real_bytes = 0;
			
 
				-	ifp->if_bytes = 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return a pointer to the extent record for file system block bno.
			
 
				- */
			
 
				-xfs_bmbt_rec_host_t *			/* pointer to found extent record */
			
 
				-xfs_iext_bno_to_ext(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_fileoff_t	bno,		/* block number to search for */
			
 
				-	xfs_extnum_t	*idxp)		/* index of target extent */
			
 
				-{
			
 
				-	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
			
 
				-	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
			
 
				-	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
			
 
				-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
			
 
				-	int		high;		/* upper boundary in search */
			
 
				-	xfs_extnum_t	idx = 0;	/* index of target extent */
			
 
				-	int		low;		/* lower boundary in search */
			
 
				-	xfs_extnum_t	nextents;	/* number of file extents */
			
 
				-	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
			
 
				-
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-	if (nextents == 0) {
			
 
				-		*idxp = 0;
			
 
				-		return NULL;
			
 
				-	}
			
 
				-	low = 0;
			
 
				-	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		/* Find target extent list */
			
 
				-		int	erp_idx = 0;
			
 
				-		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
			
 
				-		base = erp->er_extbuf;
			
 
				-		high = erp->er_extcount - 1;
			
 
				-	} else {
			
 
				-		base = ifp->if_u1.if_extents;
			
 
				-		high = nextents - 1;
			
 
				-	}
			
 
				-	/* Binary search extent records */
			
 
				-	while (low <= high) {
			
 
				-		idx = (low + high) >> 1;
			
 
				-		ep = base + idx;
			
 
				-		startoff = xfs_bmbt_get_startoff(ep);
			
 
				-		blockcount = xfs_bmbt_get_blockcount(ep);
			
 
				-		if (bno < startoff) {
			
 
				-			high = idx - 1;
			
 
				-		} else if (bno >= startoff + blockcount) {
			
 
				-			low = idx + 1;
			
 
				-		} else {
			
 
				-			/* Convert back to file-based extent index */
			
 
				-			if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-				idx += erp->er_extoff;
			
 
				-			}
			
 
				-			*idxp = idx;
			
 
				-			return ep;
			
 
				-		}
			
 
				-	}
			
 
				-	/* Convert back to file-based extent index */
			
 
				-	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		idx += erp->er_extoff;
			
 
				-	}
			
 
				-	if (bno >= startoff + blockcount) {
			
 
				-		if (++idx == nextents) {
			
 
				-			ep = NULL;
			
 
				-		} else {
			
 
				-			ep = xfs_iext_get_ext(ifp, idx);
			
 
				-		}
			
 
				-	}
			
 
				-	*idxp = idx;
			
 
				-	return ep;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return a pointer to the indirection array entry containing the
			
 
				- * extent record for filesystem block bno. Store the index of the
			
 
				- * target irec in *erp_idxp.
			
 
				- */
			
 
				-xfs_ext_irec_t *			/* pointer to found extent record */
			
 
				-xfs_iext_bno_to_irec(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_fileoff_t	bno,		/* block number to search for */
			
 
				-	int		*erp_idxp)	/* irec index of target ext list */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
			
 
				-	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
			
 
				-	int		erp_idx;	/* indirection array index */
			
 
				-	int		nlists;		/* number of extent irec's (lists) */
			
 
				-	int		high;		/* binary search upper limit */
			
 
				-	int		low;		/* binary search lower limit */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	erp_idx = 0;
			
 
				-	low = 0;
			
 
				-	high = nlists - 1;
			
 
				-	while (low <= high) {
			
 
				-		erp_idx = (low + high) >> 1;
			
 
				-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
			
 
				-		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
			
 
				-			high = erp_idx - 1;
			
 
				-		} else if (erp_next && bno >=
			
 
				-			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
			
 
				-			low = erp_idx + 1;
			
 
				-		} else {
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	*erp_idxp = erp_idx;
			
 
				-	return erp;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Return a pointer to the indirection array entry containing the
			
 
				- * extent record at file extent index *idxp. Store the index of the
			
 
				- * target irec in *erp_idxp and store the page index of the target
			
 
				- * extent record in *idxp.
			
 
				- */
			
 
				-xfs_ext_irec_t *
			
 
				-xfs_iext_idx_to_irec(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
			
 
				-	int		*erp_idxp,	/* pointer to target irec */
			
 
				-	int		realloc)	/* new bytes were just added */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
			
 
				-	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
			
 
				-	int		erp_idx;	/* indirection array index */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-	int		high;		/* binary search upper limit */
			
 
				-	int		low;		/* binary search lower limit */
			
 
				-	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	ASSERT(page_idx >= 0);
			
 
				-	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
			
 
				-	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
			
 
				-
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	erp_idx = 0;
			
 
				-	low = 0;
			
 
				-	high = nlists - 1;
			
 
				-
			
 
				-	/* Binary search extent irec's */
			
 
				-	while (low <= high) {
			
 
				-		erp_idx = (low + high) >> 1;
			
 
				-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-		prev = erp_idx > 0 ? erp - 1 : NULL;
			
 
				-		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
			
 
				-		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
			
 
				-			high = erp_idx - 1;
			
 
				-		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
			
 
				-			   (page_idx == erp->er_extoff + erp->er_extcount &&
			
 
				-			    !realloc)) {
			
 
				-			low = erp_idx + 1;
			
 
				-		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
			
 
				-			   erp->er_extcount == XFS_LINEAR_EXTS) {
			
 
				-			ASSERT(realloc);
			
 
				-			page_idx = 0;
			
 
				-			erp_idx++;
			
 
				-			erp = erp_idx < nlists ? erp + 1 : NULL;
			
 
				-			break;
			
 
				-		} else {
			
 
				-			page_idx -= erp->er_extoff;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	*idxp = page_idx;
			
 
				-	*erp_idxp = erp_idx;
			
 
				-	return(erp);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Allocate and initialize an indirection array once the space needed
			
 
				- * for incore extents increases above XFS_IEXT_BUFSZ.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_init(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-
			
 
				-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-	ASSERT(nextents <= XFS_LINEAR_EXTS);
			
 
				-
			
 
				-	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
			
 
				-
			
 
				-	if (nextents == 0) {
			
 
				-		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
			
 
				-	} else if (!ifp->if_real_bytes) {
			
 
				-		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
			
 
				-	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
			
 
				-		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
			
 
				-	}
			
 
				-	erp->er_extbuf = ifp->if_u1.if_extents;
			
 
				-	erp->er_extcount = nextents;
			
 
				-	erp->er_extoff = 0;
			
 
				-
			
 
				-	ifp->if_flags |= XFS_IFEXTIREC;
			
 
				-	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
			
 
				-	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
			
 
				-	ifp->if_u1.if_ext_irec = erp;
			
 
				-
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Allocate and initialize a new entry in the indirection array.
			
 
				- */
			
 
				-xfs_ext_irec_t *
			
 
				-xfs_iext_irec_new(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		erp_idx)	/* index for new irec */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	int		i;		/* loop counter */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-
			
 
				-	/* Resize indirection array */
			
 
				-	xfs_iext_realloc_indirect(ifp, ++nlists *
			
 
				-				  sizeof(xfs_ext_irec_t));
			
 
				-	/*
			
 
				-	 * Move records down in the array so the
			
 
				-	 * new page can use erp_idx.
			
 
				-	 */
			
 
				-	erp = ifp->if_u1.if_ext_irec;
			
 
				-	for (i = nlists - 1; i > erp_idx; i--) {
			
 
				-		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
			
 
				-	}
			
 
				-	ASSERT(i == erp_idx);
			
 
				-
			
 
				-	/* Initialize new extent record */
			
 
				-	erp = ifp->if_u1.if_ext_irec;
			
 
				-	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
			
 
				-	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
			
 
				-	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
			
 
				-	erp[erp_idx].er_extcount = 0;
			
 
				-	erp[erp_idx].er_extoff = erp_idx > 0 ?
			
 
				-		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
			
 
				-	return (&erp[erp_idx]);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Remove a record from the indirection array.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_remove(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		erp_idx)	/* irec index to remove */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				-	int		i;		/* loop counter */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-	if (erp->er_extbuf) {
			
 
				-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
			
 
				-			-erp->er_extcount);
			
 
				-		kmem_free(erp->er_extbuf);
			
 
				-	}
			
 
				-	/* Compact extent records */
			
 
				-	erp = ifp->if_u1.if_ext_irec;
			
 
				-	for (i = erp_idx; i < nlists - 1; i++) {
			
 
				-		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Manually free the last extent record from the indirection
			
 
				-	 * array.  A call to xfs_iext_realloc_indirect() with a size
			
 
				-	 * of zero would result in a call to xfs_iext_destroy() which
			
 
				-	 * would in turn call this function again, creating a nasty
			
 
				-	 * infinite loop.
			
 
				-	 */
			
 
				-	if (--nlists) {
			
 
				-		xfs_iext_realloc_indirect(ifp,
			
 
				-			nlists * sizeof(xfs_ext_irec_t));
			
 
				-	} else {
			
 
				-		kmem_free(ifp->if_u1.if_ext_irec);
			
 
				-	}
			
 
				-	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called to clean up large amounts of unused memory allocated
			
 
				- * by the indirection array.  Before compacting anything though, verify
			
 
				- * that the indirection array is still needed and switch back to the
			
 
				- * linear extent list (or even the inline buffer) if possible.  The
			
 
				- * compaction policy is as follows:
			
 
				- *
			
 
				- *    Full Compaction: Extents fit into a single page (or inline buffer)
			
 
				- * Partial Compaction: Extents occupy less than 50% of allocated space
			
 
				- *      No Compaction: Extents occupy at least 50% of allocated space
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_compact(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				-
			
 
				-	if (nextents == 0) {
			
 
				-		xfs_iext_destroy(ifp);
			
 
				-	} else if (nextents <= XFS_INLINE_EXTS) {
			
 
				-		xfs_iext_indirect_to_direct(ifp);
			
 
				-		xfs_iext_direct_to_inline(ifp, nextents);
			
 
				-	} else if (nextents <= XFS_LINEAR_EXTS) {
			
 
				-		xfs_iext_indirect_to_direct(ifp);
			
 
				-	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
			
 
				-		xfs_iext_irec_compact_pages(ifp);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Combine extents from neighboring extent pages.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_compact_pages(
			
 
				-	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				-{
			
 
				-	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
			
 
				-	int		erp_idx = 0;	/* indirection array index */
			
 
				-	int		nlists;		/* number of irec's (ex lists) */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	while (erp_idx < nlists - 1) {
			
 
				-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				-		erp_next = erp + 1;
			
 
				-		if (erp_next->er_extcount <=
			
 
				-		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
			
 
				-			memcpy(&erp->er_extbuf[erp->er_extcount],
			
 
				-				erp_next->er_extbuf, erp_next->er_extcount *
			
 
				-				sizeof(xfs_bmbt_rec_t));
			
 
				-			erp->er_extcount += erp_next->er_extcount;
			
 
				-			/*
			
 
				-			 * Free page before removing extent record
			
 
				-			 * so er_extoffs don't get modified in
			
 
				-			 * xfs_iext_irec_remove.
			
 
				-			 */
			
 
				-			kmem_free(erp_next->er_extbuf);
			
 
				-			erp_next->er_extbuf = NULL;
			
 
				-			xfs_iext_irec_remove(ifp, erp_idx + 1);
			
 
				-			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-		} else {
			
 
				-			erp_idx++;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called to update the er_extoff field in the indirection
			
 
				- * array when extents have been added or removed from one of the
			
 
				- * extent lists. erp_idx contains the irec index to begin updating
			
 
				- * at and ext_diff contains the number of extents that were added
			
 
				- * or removed.
			
 
				- */
			
 
				-void
			
 
				-xfs_iext_irec_update_extoffs(
			
 
				-	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				-	int		erp_idx,	/* irec index to update */
			
 
				-	int		ext_diff)	/* number of new extents */
			
 
				-{
			
 
				-	int		i;		/* loop counter */
			
 
				-	int		nlists;		/* number of irec's (ex lists */
			
 
				-
			
 
				-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-	for (i = erp_idx; i < nlists; i++) {
			
 
				-		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Test whether it is appropriate to check an inode for and free post EOF
			
 
				- * blocks. The 'force' parameter determines whether we should also consider
			
 
				- * regular files that are marked preallocated or append-only.
			
 
				- */
			
 
				-bool
			
 
				-xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
			
 
				-{
			
 
				-	/* prealloc/delalloc exists only on regular files */
			
 
				-	if (!S_ISREG(ip->i_d.di_mode))
			
 
				-		return false;
			
 
				-
			
 
				-	/*
			
 
				-	 * Zero sized files with no cached pages and delalloc blocks will not
			
 
				-	 * have speculative prealloc/delalloc blocks to remove.
			
 
				-	 */
			
 
				-	if (VFS_I(ip)->i_size == 0 &&
			
 
				-	    VN_CACHED(VFS_I(ip)) == 0 &&
			
 
				-	    ip->i_delayed_blks == 0)
			
 
				-		return false;
			
 
				-
			
 
				-	/* If we haven't read in the extent list, then don't do it now. */
			
 
				-	if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
			
 
				-		return false;
			
 
				-
			
 
				-	/*
			
 
				-	 * Do not free real preallocated or append-only files unless the file
			
 
				-	 * has delalloc blocks and we are forced to remove them.
			
 
				-	 */
			
 
				-	if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
			
 
				-		if (!force || ip->i_delayed_blks == 0)
			
 
				-			return false;
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,225 +18,15 @@
 
				 #ifndef	__XFS_INODE_H__
			
 
				 #define	__XFS_INODE_H__
			
 
				 
			
 
				-struct posix_acl;
			
 
				-struct xfs_dinode;
			
 
				-struct xfs_inode;
			
 
				-
			
 
				-/*
			
 
				- * Fork identifiers.
			
 
				- */
			
 
				-#define	XFS_DATA_FORK	0
			
 
				-#define	XFS_ATTR_FORK	1
			
 
				-
			
 
				-/*
			
 
				- * The following xfs_ext_irec_t struct introduces a second (top) level
			
 
				- * to the in-core extent allocation scheme. These structs are allocated
			
 
				- * in a contiguous block, creating an indirection array where each entry
			
 
				- * (irec) contains a pointer to a buffer of in-core extent records which
			
 
				- * it manages. Each extent buffer is 4k in size, since 4k is the system
			
 
				- * page size on Linux i386 and systems with larger page sizes don't seem
			
 
				- * to gain much, if anything, by using their native page size as the
			
 
				- * extent buffer size. Also, using 4k extent buffers everywhere provides
			
 
				- * a consistent interface for CXFS across different platforms.
			
 
				- *
			
 
				- * There is currently no limit on the number of irec's (extent lists)
			
 
				- * allowed, so heavily fragmented files may require an indirection array
			
 
				- * which spans multiple system pages of memory. The number of extents
			
 
				- * which would require this amount of contiguous memory is very large
			
 
				- * and should not cause problems in the foreseeable future. However,
			
 
				- * if the memory needed for the contiguous array ever becomes a problem,
			
 
				- * it is possible that a third level of indirection may be required.
			
 
				- */
			
 
				-typedef struct xfs_ext_irec {
			
 
				-	xfs_bmbt_rec_host_t *er_extbuf;	/* block of extent records */
			
 
				-	xfs_extnum_t	er_extoff;	/* extent offset in file */
			
 
				-	xfs_extnum_t	er_extcount;	/* number of extents in page/block */
			
 
				-} xfs_ext_irec_t;
			
 
				+#include "xfs_inode_buf.h"
			
 
				+#include "xfs_inode_fork.h"
			
 
				 
			
 
				 /*
			
 
				- * File incore extent information, present for each of data & attr forks.
			
 
				+ * Kernel only inode definitions
			
 
				  */
			
 
				-#define	XFS_IEXT_BUFSZ		4096
			
 
				-#define	XFS_LINEAR_EXTS		(XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
			
 
				-#define	XFS_INLINE_EXTS		2
			
 
				-#define	XFS_INLINE_DATA		32
			
 
				-typedef struct xfs_ifork {
			
 
				-	int			if_bytes;	/* bytes in if_u1 */
			
 
				-	int			if_real_bytes;	/* bytes allocated in if_u1 */
			
 
				-	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
			
 
				-	short			if_broot_bytes;	/* bytes allocated for root */
			
 
				-	unsigned char		if_flags;	/* per-fork flags */
			
 
				-	union {
			
 
				-		xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
			
 
				-		xfs_ext_irec_t	*if_ext_irec;	/* irec map file exts */
			
 
				-		char		*if_data;	/* inline file data */
			
 
				-	} if_u1;
			
 
				-	union {
			
 
				-		xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
			
 
				-						/* very small file extents */
			
 
				-		char		if_inline_data[XFS_INLINE_DATA];
			
 
				-						/* very small file data */
			
 
				-		xfs_dev_t	if_rdev;	/* dev number if special */
			
 
				-		uuid_t		if_uuid;	/* mount point value */
			
 
				-	} if_u2;
			
 
				-} xfs_ifork_t;
			
 
				-
			
 
				-/*
			
 
				- * Inode location information.  Stored in the inode and passed to
			
 
				- * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
			
 
				- */
			
 
				-struct xfs_imap {
			
 
				-	xfs_daddr_t	im_blkno;	/* starting BB of inode chunk */
			
 
				-	ushort		im_len;		/* length in BBs of inode chunk */
			
 
				-	ushort		im_boffset;	/* inode offset in block in bytes */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * This is the xfs in-core inode structure.
			
 
				- * Most of the on-disk inode is embedded in the i_d field.
			
 
				- *
			
 
				- * The extent pointers/inline file space, however, are managed
			
 
				- * separately.  The memory for this information is pointed to by
			
 
				- * the if_u1 unions depending on the type of the data.
			
 
				- * This is used to linearize the array of extents for fast in-core
			
 
				- * access.  This is used until the file's number of extents
			
 
				- * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers
			
 
				- * are accessed through the buffer cache.
			
 
				- *
			
 
				- * Other state kept in the in-core inode is used for identification,
			
 
				- * locking, transactional updating, etc of the inode.
			
 
				- *
			
 
				- * Generally, we do not want to hold the i_rlock while holding the
			
 
				- * i_ilock. Hierarchy is i_iolock followed by i_rlock.
			
 
				- *
			
 
				- * xfs_iptr_t contains all the inode fields up to and including the
			
 
				- * i_mnext and i_mprev fields, it is used as a marker in the inode
			
 
				- * chain off the mount structure by xfs_sync calls.
			
 
				- */
			
 
				-
			
 
				-typedef struct xfs_ictimestamp {
			
 
				-	__int32_t	t_sec;		/* timestamp seconds */
			
 
				-	__int32_t	t_nsec;		/* timestamp nanoseconds */
			
 
				-} xfs_ictimestamp_t;
			
 
				-
			
 
				-/*
			
 
				- * NOTE:  This structure must be kept identical to struct xfs_dinode
			
 
				- * 	  in xfs_dinode.h except for the endianness annotations.
			
 
				- */
			
 
				-typedef struct xfs_icdinode {
			
 
				-	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
			
 
				-	__uint16_t	di_mode;	/* mode and type of file */
			
 
				-	__int8_t	di_version;	/* inode version */
			
 
				-	__int8_t	di_format;	/* format of di_c data */
			
 
				-	__uint16_t	di_onlink;	/* old number of links to file */
			
 
				-	__uint32_t	di_uid;		/* owner's user id */
			
 
				-	__uint32_t	di_gid;		/* owner's group id */
			
 
				-	__uint32_t	di_nlink;	/* number of links to file */
			
 
				-	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
			
 
				-	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
			
 
				-	__uint8_t	di_pad[6];	/* unused, zeroed space */
			
 
				-	__uint16_t	di_flushiter;	/* incremented on flush */
			
 
				-	xfs_ictimestamp_t di_atime;	/* time last accessed */
			
 
				-	xfs_ictimestamp_t di_mtime;	/* time last modified */
			
 
				-	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
			
 
				-	xfs_fsize_t	di_size;	/* number of bytes in file */
			
 
				-	xfs_drfsbno_t	di_nblocks;	/* # of direct & btree blocks used */
			
 
				-	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
			
 
				-	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
			
 
				-	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
			
 
				-	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
			
 
				-	__int8_t	di_aformat;	/* format of attr fork's data */
			
 
				-	__uint32_t	di_dmevmask;	/* DMIG event mask */
			
 
				-	__uint16_t	di_dmstate;	/* DMIG state info */
			
 
				-	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
			
 
				-	__uint32_t	di_gen;		/* generation number */
			
 
				-
			
 
				-	/* di_next_unlinked is the only non-core field in the old dinode */
			
 
				-	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
			
 
				-
			
 
				-	/* start of the extended dinode, writable fields */
			
 
				-	__uint32_t	di_crc;		/* CRC of the inode */
			
 
				-	__uint64_t	di_changecount;	/* number of attribute changes */
			
 
				-	xfs_lsn_t	di_lsn;		/* flush sequence */
			
 
				-	__uint64_t	di_flags2;	/* more random flags */
			
 
				-	__uint8_t	di_pad2[16];	/* more padding for future expansion */
			
 
				-
			
 
				-	/* fields only written to during inode creation */
			
 
				-	xfs_ictimestamp_t di_crtime;	/* time created */
			
 
				-	xfs_ino_t	di_ino;		/* inode number */
			
 
				-	uuid_t		di_uuid;	/* UUID of the filesystem */
			
 
				-
			
 
				-	/* structure must be padded to 64 bit alignment */
			
 
				-} xfs_icdinode_t;
			
 
				-
			
 
				-static inline uint xfs_icdinode_size(int version)
			
 
				-{
			
 
				-	if (version == 3)
			
 
				-		return sizeof(struct xfs_icdinode);
			
 
				-	return offsetof(struct xfs_icdinode, di_next_unlinked);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Flags for xfs_ichgtime().
			
 
				- */
			
 
				-#define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */
			
 
				-#define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */
			
 
				-#define	XFS_ICHGTIME_CREATE	0x4	/* inode create timestamp */
			
 
				-
			
 
				-/*
			
 
				- * Per-fork incore inode flags.
			
 
				- */
			
 
				-#define	XFS_IFINLINE	0x01	/* Inline data is read in */
			
 
				-#define	XFS_IFEXTENTS	0x02	/* All extent pointers are read in */
			
 
				-#define	XFS_IFBROOT	0x04	/* i_broot points to the bmap b-tree root */
			
 
				-#define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
			
 
				-
			
 
				-/*
			
 
				- * Fork handling.
			
 
				- */
			
 
				-
			
 
				-#define XFS_IFORK_Q(ip)			((ip)->i_d.di_forkoff != 0)
			
 
				-#define XFS_IFORK_BOFF(ip)		((int)((ip)->i_d.di_forkoff << 3))
			
 
				-
			
 
				-#define XFS_IFORK_PTR(ip,w)		\
			
 
				-	((w) == XFS_DATA_FORK ? \
			
 
				-		&(ip)->i_df : \
			
 
				-		(ip)->i_afp)
			
 
				-#define XFS_IFORK_DSIZE(ip) \
			
 
				-	(XFS_IFORK_Q(ip) ? \
			
 
				-		XFS_IFORK_BOFF(ip) : \
			
 
				-		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
			
 
				-#define XFS_IFORK_ASIZE(ip) \
			
 
				-	(XFS_IFORK_Q(ip) ? \
			
 
				-		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
			
 
				-			XFS_IFORK_BOFF(ip) : \
			
 
				-		0)
			
 
				-#define XFS_IFORK_SIZE(ip,w) \
			
 
				-	((w) == XFS_DATA_FORK ? \
			
 
				-		XFS_IFORK_DSIZE(ip) : \
			
 
				-		XFS_IFORK_ASIZE(ip))
			
 
				-#define XFS_IFORK_FORMAT(ip,w) \
			
 
				-	((w) == XFS_DATA_FORK ? \
			
 
				-		(ip)->i_d.di_format : \
			
 
				-		(ip)->i_d.di_aformat)
			
 
				-#define XFS_IFORK_FMT_SET(ip,w,n) \
			
 
				-	((w) == XFS_DATA_FORK ? \
			
 
				-		((ip)->i_d.di_format = (n)) : \
			
 
				-		((ip)->i_d.di_aformat = (n)))
			
 
				-#define XFS_IFORK_NEXTENTS(ip,w) \
			
 
				-	((w) == XFS_DATA_FORK ? \
			
 
				-		(ip)->i_d.di_nextents : \
			
 
				-		(ip)->i_d.di_anextents)
			
 
				-#define XFS_IFORK_NEXT_SET(ip,w,n) \
			
 
				-	((w) == XFS_DATA_FORK ? \
			
 
				-		((ip)->i_d.di_nextents = (n)) : \
			
 
				-		((ip)->i_d.di_anextents = (n)))
			
 
				-#define XFS_IFORK_MAXEXT(ip, w) \
			
 
				-	(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
			
 
				-
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				 
			
 
				+struct xfs_dinode;
			
 
				+struct xfs_inode;
			
 
				 struct xfs_buf;
			
 
				 struct xfs_bmap_free;
			
 
				 struct xfs_bmbt_irec;
			
@@ -525,9 +315,21 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 
				 	 ((pip)->i_d.di_mode & S_ISGID))
			
 
				 
			
 
				 
			
 
				-/*
			
 
				- * xfs_inode.c prototypes.
			
 
				- */
			
 
				+int		xfs_release(struct xfs_inode *ip);
			
 
				+int		xfs_inactive(struct xfs_inode *ip);
			
 
				+int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
			
 
				+			   struct xfs_inode **ipp, struct xfs_name *ci_name);
			
 
				+int		xfs_create(struct xfs_inode *dp, struct xfs_name *name,
			
 
				+			   umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
			
 
				+int		xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
			
 
				+			   struct xfs_inode *ip);
			
 
				+int		xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
			
 
				+			 struct xfs_name *target_name);
			
 
				+int		xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
			
 
				+			   struct xfs_inode *src_ip, struct xfs_inode *target_dp,
			
 
				+			   struct xfs_name *target_name,
			
 
				+			   struct xfs_inode *target_ip);
			
 
				+
			
 
				 void		xfs_ilock(xfs_inode_t *, uint);
			
 
				 int		xfs_ilock_nowait(xfs_inode_t *, uint);
			
 
				 void		xfs_iunlock(xfs_inode_t *, uint);
			
@@ -548,13 +350,28 @@ int		xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
 
				 int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
			
 
				 
			
 
				 void		xfs_iext_realloc(xfs_inode_t *, int, int);
			
 
				+
			
 
				 void		xfs_iunpin_wait(xfs_inode_t *);
			
 
				+#define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
			
 
				+
			
 
				 int		xfs_iflush(struct xfs_inode *, struct xfs_buf **);
			
 
				 void		xfs_lock_inodes(xfs_inode_t **, int, uint);
			
 
				 void		xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
			
 
				 
			
 
				 xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
			
 
				 
			
 
				+int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
			
 
				+			       xfs_nlink_t, xfs_dev_t, prid_t, int,
			
 
				+			       struct xfs_inode **, int *);
			
 
				+int		xfs_droplink(struct xfs_trans *, struct xfs_inode *);
			
 
				+int		xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
			
 
				+void		xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *);
			
 
				+
			
 
				+/* from xfs_file.c */
			
 
				+int		xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
			
 
				+int		xfs_iozero(struct xfs_inode *, loff_t, size_t);
			
 
				+
			
 
				+
			
 
				 #define IHOLD(ip) \
			
 
				 do { \
			
 
				 	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
			
@@ -568,65 +385,6 @@ do { \
 
				 	iput(VFS_I(ip)); \
			
 
				 } while (0)
			
 
				 
			
 
				-#endif /* __KERNEL__ */
			
 
				-
			
 
				-/*
			
 
				- * Flags for xfs_iget()
			
 
				- */
			
 
				-#define XFS_IGET_CREATE		0x1
			
 
				-#define XFS_IGET_UNTRUSTED	0x2
			
 
				-#define XFS_IGET_DONTCACHE	0x4
			
 
				-
			
 
				-int		xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
			
 
				-			       struct xfs_imap *, struct xfs_dinode **,
			
 
				-			       struct xfs_buf **, uint, uint);
			
 
				-int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
			
 
				-			  struct xfs_inode *, uint);
			
 
				-void		xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
			
 
				-void		xfs_dinode_to_disk(struct xfs_dinode *,
			
 
				-				   struct xfs_icdinode *);
			
 
				-void		xfs_idestroy_fork(struct xfs_inode *, int);
			
 
				-void		xfs_idata_realloc(struct xfs_inode *, int, int);
			
 
				-void		xfs_iroot_realloc(struct xfs_inode *, int, int);
			
 
				-int		xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
			
 
				-int		xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
			
 
				-
			
 
				-xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
			
 
				-void		xfs_iext_insert(xfs_inode_t *, xfs_extnum_t, xfs_extnum_t,
			
 
				-				xfs_bmbt_irec_t *, int);
			
 
				-void		xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
			
 
				-void		xfs_iext_remove(xfs_inode_t *, xfs_extnum_t, int, int);
			
 
				-void		xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
			
 
				-void		xfs_iext_realloc_direct(xfs_ifork_t *, int);
			
 
				-void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
			
 
				-void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
			
 
				-void		xfs_iext_destroy(xfs_ifork_t *);
			
 
				-xfs_bmbt_rec_host_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *);
			
 
				-xfs_ext_irec_t	*xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
			
 
				-xfs_ext_irec_t	*xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
			
 
				-void		xfs_iext_irec_init(xfs_ifork_t *);
			
 
				-xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int);
			
 
				-void		xfs_iext_irec_remove(xfs_ifork_t *, int);
			
 
				-void		xfs_iext_irec_compact(xfs_ifork_t *);
			
 
				-void		xfs_iext_irec_compact_pages(xfs_ifork_t *);
			
 
				-void		xfs_iext_irec_compact_full(xfs_ifork_t *);
			
 
				-void		xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
			
 
				-bool		xfs_can_free_eofblocks(struct xfs_inode *, bool);
			
 
				-
			
 
				-#define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
			
 
				-
			
 
				-#if defined(DEBUG)
			
 
				-void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
			
 
				-#else
			
 
				-#define	xfs_inobp_check(mp, bp)
			
 
				-#endif /* DEBUG */
			
 
				-
			
 
				-extern struct kmem_zone	*xfs_ifork_zone;
			
 
				 extern struct kmem_zone	*xfs_inode_zone;
			
 
				-extern struct kmem_zone	*xfs_ili_zone;
			
 
				-extern const struct xfs_buf_ops xfs_inode_buf_ops;
			
 
				 
			
 
				 #endif	/* __XFS_INODE_H__ */
			
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -0,0 +1,483 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_icache.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+
			
 
				+/*
			
 
				+ * Check that none of the inode's in the buffer have a next
			
 
				+ * unlinked field of 0.
			
 
				+ */
			
 
				+#if defined(DEBUG)
			
 
				+void
			
 
				+xfs_inobp_check(
			
 
				+	xfs_mount_t	*mp,
			
 
				+	xfs_buf_t	*bp)
			
 
				+{
			
 
				+	int		i;
			
 
				+	int		j;
			
 
				+	xfs_dinode_t	*dip;
			
 
				+
			
 
				+	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
			
 
				+
			
 
				+	for (i = 0; i < j; i++) {
			
 
				+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
			
 
				+					i * mp->m_sb.sb_inodesize);
			
 
				+		if (!dip->di_next_unlinked)  {
			
 
				+			xfs_alert(mp,
			
 
				+	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
			
 
				+				bp);
			
 
				+			ASSERT(dip->di_next_unlinked);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * If we are doing readahead on an inode buffer, we might be in log recovery
			
 
				+ * reading an inode allocation buffer that hasn't yet been replayed, and hence
			
 
				+ * has not had the inode cores stamped into it. Hence for readahead, the buffer
			
 
				+ * may be potentially invalid.
			
 
				+ *
			
 
				+ * If the readahead buffer is invalid, we don't want to mark it with an error,
			
 
				+ * but we do want to clear the DONE status of the buffer so that a followup read
			
 
				+ * will re-read it from disk. This will ensure that we don't get an unnecessary
			
 
				+ * warnings during log recovery and we don't get unnecssary panics on debug
			
 
				+ * kernels.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_inode_buf_verify(
			
 
				+	struct xfs_buf	*bp,
			
 
				+	bool		readahead)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	int		i;
			
 
				+	int		ni;
			
 
				+
			
 
				+	/*
			
 
				+	 * Validate the magic number and version of every inode in the buffer
			
 
				+	 */
			
 
				+	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
			
 
				+	for (i = 0; i < ni; i++) {
			
 
				+		int		di_ok;
			
 
				+		xfs_dinode_t	*dip;
			
 
				+
			
 
				+		dip = (struct xfs_dinode *)xfs_buf_offset(bp,
			
 
				+					(i << mp->m_sb.sb_inodelog));
			
 
				+		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
			
 
				+			    XFS_DINODE_GOOD_VERSION(dip->di_version);
			
 
				+		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
			
 
				+						XFS_ERRTAG_ITOBP_INOTOBP,
			
 
				+						XFS_RANDOM_ITOBP_INOTOBP))) {
			
 
				+			if (readahead) {
			
 
				+				bp->b_flags &= ~XBF_DONE;
			
 
				+				return;
			
 
				+			}
			
 
				+
			
 
				+			xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
			
 
				+					     mp, dip);
			
 
				+#ifdef DEBUG
			
 
				+			xfs_emerg(mp,
			
 
				+				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
			
 
				+				(unsigned long long)bp->b_bn, i,
			
 
				+				be16_to_cpu(dip->di_magic));
			
 
				+			ASSERT(0);
			
 
				+#endif
			
 
				+		}
			
 
				+	}
			
 
				+	xfs_inobp_check(mp, bp);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+xfs_inode_buf_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	xfs_inode_buf_verify(bp, false);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_inode_buf_readahead_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	xfs_inode_buf_verify(bp, true);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_inode_buf_write_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	xfs_inode_buf_verify(bp, false);
			
 
				+}
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_inode_buf_ops = {
			
 
				+	.verify_read = xfs_inode_buf_read_verify,
			
 
				+	.verify_write = xfs_inode_buf_write_verify,
			
 
				+};
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
			
 
				+	.verify_read = xfs_inode_buf_readahead_verify,
			
 
				+	.verify_write = xfs_inode_buf_write_verify,
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * This routine is called to map an inode to the buffer containing the on-disk
			
 
				+ * version of the inode.  It returns a pointer to the buffer containing the
			
 
				+ * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
			
 
				+ * pointer to the on-disk inode within that buffer.
			
 
				+ *
			
 
				+ * If a non-zero error is returned, then the contents of bpp and dipp are
			
 
				+ * undefined.
			
 
				+ */
			
 
				+int
			
 
				+xfs_imap_to_bp(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_imap		*imap,
			
 
				+	struct xfs_dinode       **dipp,
			
 
				+	struct xfs_buf		**bpp,
			
 
				+	uint			buf_flags,
			
 
				+	uint			iget_flags)
			
 
				+{
			
 
				+	struct xfs_buf		*bp;
			
 
				+	int			error;
			
 
				+
			
 
				+	buf_flags |= XBF_UNMAPPED;
			
 
				+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
			
 
				+				   (int)imap->im_len, buf_flags, &bp,
			
 
				+				   &xfs_inode_buf_ops);
			
 
				+	if (error) {
			
 
				+		if (error == EAGAIN) {
			
 
				+			ASSERT(buf_flags & XBF_TRYLOCK);
			
 
				+			return error;
			
 
				+		}
			
 
				+
			
 
				+		if (error == EFSCORRUPTED &&
			
 
				+		    (iget_flags & XFS_IGET_UNTRUSTED))
			
 
				+			return XFS_ERROR(EINVAL);
			
 
				+
			
 
				+		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
			
 
				+			__func__, error);
			
 
				+		return error;
			
 
				+	}
			
 
				+
			
 
				+	*bpp = bp;
			
 
				+	*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xfs_dinode_from_disk(
			
 
				+	xfs_icdinode_t		*to,
			
 
				+	xfs_dinode_t		*from)
			
 
				+{
			
 
				+	to->di_magic = be16_to_cpu(from->di_magic);
			
 
				+	to->di_mode = be16_to_cpu(from->di_mode);
			
 
				+	to->di_version = from ->di_version;
			
 
				+	to->di_format = from->di_format;
			
 
				+	to->di_onlink = be16_to_cpu(from->di_onlink);
			
 
				+	to->di_uid = be32_to_cpu(from->di_uid);
			
 
				+	to->di_gid = be32_to_cpu(from->di_gid);
			
 
				+	to->di_nlink = be32_to_cpu(from->di_nlink);
			
 
				+	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
			
 
				+	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
			
 
				+	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
			
 
				+	to->di_flushiter = be16_to_cpu(from->di_flushiter);
			
 
				+	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
			
 
				+	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
			
 
				+	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
			
 
				+	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
			
 
				+	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
			
 
				+	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
			
 
				+	to->di_size = be64_to_cpu(from->di_size);
			
 
				+	to->di_nblocks = be64_to_cpu(from->di_nblocks);
			
 
				+	to->di_extsize = be32_to_cpu(from->di_extsize);
			
 
				+	to->di_nextents = be32_to_cpu(from->di_nextents);
			
 
				+	to->di_anextents = be16_to_cpu(from->di_anextents);
			
 
				+	to->di_forkoff = from->di_forkoff;
			
 
				+	to->di_aformat	= from->di_aformat;
			
 
				+	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
			
 
				+	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
			
 
				+	to->di_flags	= be16_to_cpu(from->di_flags);
			
 
				+	to->di_gen	= be32_to_cpu(from->di_gen);
			
 
				+
			
 
				+	if (to->di_version == 3) {
			
 
				+		to->di_changecount = be64_to_cpu(from->di_changecount);
			
 
				+		to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
			
 
				+		to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
			
 
				+		to->di_flags2 = be64_to_cpu(from->di_flags2);
			
 
				+		to->di_ino = be64_to_cpu(from->di_ino);
			
 
				+		to->di_lsn = be64_to_cpu(from->di_lsn);
			
 
				+		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
			
 
				+		uuid_copy(&to->di_uuid, &from->di_uuid);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_dinode_to_disk(
			
 
				+	xfs_dinode_t		*to,
			
 
				+	xfs_icdinode_t		*from)
			
 
				+{
			
 
				+	to->di_magic = cpu_to_be16(from->di_magic);
			
 
				+	to->di_mode = cpu_to_be16(from->di_mode);
			
 
				+	to->di_version = from ->di_version;
			
 
				+	to->di_format = from->di_format;
			
 
				+	to->di_onlink = cpu_to_be16(from->di_onlink);
			
 
				+	to->di_uid = cpu_to_be32(from->di_uid);
			
 
				+	to->di_gid = cpu_to_be32(from->di_gid);
			
 
				+	to->di_nlink = cpu_to_be32(from->di_nlink);
			
 
				+	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
			
 
				+	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
			
 
				+	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
			
 
				+	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
			
 
				+	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
			
 
				+	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
			
 
				+	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
			
 
				+	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
			
 
				+	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
			
 
				+	to->di_size = cpu_to_be64(from->di_size);
			
 
				+	to->di_nblocks = cpu_to_be64(from->di_nblocks);
			
 
				+	to->di_extsize = cpu_to_be32(from->di_extsize);
			
 
				+	to->di_nextents = cpu_to_be32(from->di_nextents);
			
 
				+	to->di_anextents = cpu_to_be16(from->di_anextents);
			
 
				+	to->di_forkoff = from->di_forkoff;
			
 
				+	to->di_aformat = from->di_aformat;
			
 
				+	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
			
 
				+	to->di_dmstate = cpu_to_be16(from->di_dmstate);
			
 
				+	to->di_flags = cpu_to_be16(from->di_flags);
			
 
				+	to->di_gen = cpu_to_be32(from->di_gen);
			
 
				+
			
 
				+	if (from->di_version == 3) {
			
 
				+		to->di_changecount = cpu_to_be64(from->di_changecount);
			
 
				+		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
			
 
				+		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
			
 
				+		to->di_flags2 = cpu_to_be64(from->di_flags2);
			
 
				+		to->di_ino = cpu_to_be64(from->di_ino);
			
 
				+		to->di_lsn = cpu_to_be64(from->di_lsn);
			
 
				+		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
			
 
				+		uuid_copy(&to->di_uuid, &from->di_uuid);
			
 
				+		to->di_flushiter = 0;
			
 
				+	} else {
			
 
				+		to->di_flushiter = cpu_to_be16(from->di_flushiter);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+xfs_dinode_verify(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_dinode	*dip)
			
 
				+{
			
 
				+	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
			
 
				+		return false;
			
 
				+
			
 
				+	/* only version 3 or greater inodes are extensively verified here */
			
 
				+	if (dip->di_version < 3)
			
 
				+		return true;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return false;
			
 
				+	if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
			
 
				+			      offsetof(struct xfs_dinode, di_crc)))
			
 
				+		return false;
			
 
				+	if (be64_to_cpu(dip->di_ino) != ip->i_ino)
			
 
				+		return false;
			
 
				+	if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_dinode_calc_crc(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dinode	*dip)
			
 
				+{
			
 
				+	__uint32_t		crc;
			
 
				+
			
 
				+	if (dip->di_version < 3)
			
 
				+		return;
			
 
				+
			
 
				+	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
			
 
				+	crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
			
 
				+			      offsetof(struct xfs_dinode, di_crc));
			
 
				+	dip->di_crc = xfs_end_cksum(crc);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Read the disk inode attributes into the in-core inode structure.
			
 
				+ *
			
 
				+ * For version 5 superblocks, if we are initialising a new inode and we are not
			
 
				+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
			
 
				+ * inode core with a random generation number. If we are keeping inodes around,
			
 
				+ * we need to read the inode cluster to get the existing generation number off
			
 
				+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
			
 
				+ * format) then log recovery is dependent on the di_flushiter field being
			
 
				+ * initialised from the current on-disk value and hence we must also read the
			
 
				+ * inode off disk.
			
 
				+ */
			
 
				+int
			
 
				+xfs_iread(
			
 
				+	xfs_mount_t	*mp,
			
 
				+	xfs_trans_t	*tp,
			
 
				+	xfs_inode_t	*ip,
			
 
				+	uint		iget_flags)
			
 
				+{
			
 
				+	xfs_buf_t	*bp;
			
 
				+	xfs_dinode_t	*dip;
			
 
				+	int		error;
			
 
				+
			
 
				+	/*
			
 
				+	 * Fill in the location information in the in-core inode.
			
 
				+	 */
			
 
				+	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/* shortcut IO on inode allocation if possible */
			
 
				+	if ((iget_flags & XFS_IGET_CREATE) &&
			
 
				+	    xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				+	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
			
 
				+		/* initialise the on-disk inode core */
			
 
				+		memset(&ip->i_d, 0, sizeof(ip->i_d));
			
 
				+		ip->i_d.di_magic = XFS_DINODE_MAGIC;
			
 
				+		ip->i_d.di_gen = prandom_u32();
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+			ip->i_d.di_version = 3;
			
 
				+			ip->i_d.di_ino = ip->i_ino;
			
 
				+			uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
			
 
				+		} else
			
 
				+			ip->i_d.di_version = 2;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Get pointers to the on-disk inode and the buffer containing it.
			
 
				+	 */
			
 
				+	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/* even unallocated inodes are verified */
			
 
				+	if (!xfs_dinode_verify(mp, ip, dip)) {
			
 
				+		xfs_alert(mp, "%s: validation failed for inode %lld failed",
			
 
				+				__func__, ip->i_ino);
			
 
				+
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
			
 
				+		error = XFS_ERROR(EFSCORRUPTED);
			
 
				+		goto out_brelse;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If the on-disk inode is already linked to a directory
			
 
				+	 * entry, copy all of the inode into the in-core inode.
			
 
				+	 * xfs_iformat_fork() handles copying in the inode format
			
 
				+	 * specific information.
			
 
				+	 * Otherwise, just get the truly permanent information.
			
 
				+	 */
			
 
				+	if (dip->di_mode) {
			
 
				+		xfs_dinode_from_disk(&ip->i_d, dip);
			
 
				+		error = xfs_iformat_fork(ip, dip);
			
 
				+		if (error)  {
			
 
				+#ifdef DEBUG
			
 
				+			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
			
 
				+				__func__, error);
			
 
				+#endif /* DEBUG */
			
 
				+			goto out_brelse;
			
 
				+		}
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Partial initialisation of the in-core inode. Just the bits
			
 
				+		 * that xfs_ialloc won't overwrite or relies on being correct.
			
 
				+		 */
			
 
				+		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
			
 
				+		ip->i_d.di_version = dip->di_version;
			
 
				+		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
			
 
				+		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
			
 
				+
			
 
				+		if (dip->di_version == 3) {
			
 
				+			ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
			
 
				+			uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Make sure to pull in the mode here as well in
			
 
				+		 * case the inode is released without being used.
			
 
				+		 * This ensures that xfs_inactive() will see that
			
 
				+		 * the inode is already free and not try to mess
			
 
				+		 * with the uninitialized part of it.
			
 
				+		 */
			
 
				+		ip->i_d.di_mode = 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * The inode format changed when we moved the link count and
			
 
				+	 * made it 32 bits long.  If this is an old format inode,
			
 
				+	 * convert it in memory to look like a new one.  If it gets
			
 
				+	 * flushed to disk we will convert back before flushing or
			
 
				+	 * logging it.  We zero out the new projid field and the old link
			
 
				+	 * count field.  We'll handle clearing the pad field (the remains
			
 
				+	 * of the old uuid field) when we actually convert the inode to
			
 
				+	 * the new format. We don't change the version number so that we
			
 
				+	 * can distinguish this from a real new format inode.
			
 
				+	 */
			
 
				+	if (ip->i_d.di_version == 1) {
			
 
				+		ip->i_d.di_nlink = ip->i_d.di_onlink;
			
 
				+		ip->i_d.di_onlink = 0;
			
 
				+		xfs_set_projid(ip, 0);
			
 
				+	}
			
 
				+
			
 
				+	ip->i_delayed_blks = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Mark the buffer containing the inode as something to keep
			
 
				+	 * around for a while.  This helps to keep recently accessed
			
 
				+	 * meta-data in-core longer.
			
 
				+	 */
			
 
				+	xfs_buf_set_ref(bp, XFS_INO_REF);
			
 
				+
			
 
				+	/*
			
 
				+	 * Use xfs_trans_brelse() to release the buffer containing the on-disk
			
 
				+	 * inode, because it was acquired with xfs_trans_read_buf() in
			
 
				+	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
			
 
				+	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
			
 
				+	 * will only release the buffer if it is not dirty within the
			
 
				+	 * transaction.  It will be OK to release the buffer in this case,
			
 
				+	 * because inodes on disk are never destroyed and we will be locking the
			
 
				+	 * new in-core inode before putting it in the cache where other
			
 
				+	 * processes can find it.  Thus we don't have to worry about the inode
			
 
				+	 * being changed just because we released the buffer.
			
 
				+	 */
			
 
				+ out_brelse:
			
 
				+	xfs_trans_brelse(tp, bp);
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/xfs_inode_buf.h
+++ b/fs/xfs/xfs_inode_buf.h
@@ -0,0 +1,53 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef	__XFS_INODE_BUF_H__
			
 
				+#define	__XFS_INODE_BUF_H__
			
 
				+
			
 
				+struct xfs_inode;
			
 
				+struct xfs_dinode;
			
 
				+struct xfs_icdinode;
			
 
				+
			
 
				+/*
			
 
				+ * Inode location information.  Stored in the inode and passed to
			
 
				+ * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
			
 
				+ */
			
 
				+struct xfs_imap {
			
 
				+	xfs_daddr_t	im_blkno;	/* starting BB of inode chunk */
			
 
				+	ushort		im_len;		/* length in BBs of inode chunk */
			
 
				+	ushort		im_boffset;	/* inode offset in block in bytes */
			
 
				+};
			
 
				+
			
 
				+int		xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
			
 
				+			       struct xfs_imap *, struct xfs_dinode **,
			
 
				+			       struct xfs_buf **, uint, uint);
			
 
				+int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
			
 
				+			  struct xfs_inode *, uint);
			
 
				+void		xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
			
 
				+void		xfs_dinode_to_disk(struct xfs_dinode *,
			
 
				+				   struct xfs_icdinode *);
			
 
				+
			
 
				+#if defined(DEBUG)
			
 
				+void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
			
 
				+#else
			
 
				+#define	xfs_inobp_check(mp, bp)
			
 
				+#endif /* DEBUG */
			
 
				+
			
 
				+extern const struct xfs_buf_ops xfs_inode_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
			
 
				+
			
 
				+#endif	/* __XFS_INODE_BUF_H__ */
			
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/xfs_inode_fork.c
@@ -0,0 +1,1920 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include <linux/log2.h>
			
 
				+
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_inum.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_alloc_btree.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_attr_sf.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				+#include "xfs_inode_item.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_quota.h"
			
 
				+#include "xfs_filestream.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_icache.h"
			
 
				+
			
 
				+kmem_zone_t *xfs_ifork_zone;
			
 
				+
			
 
				+STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
			
 
				+STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				+STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				+
			
 
				+#ifdef DEBUG
			
 
				+/*
			
 
				+ * Make sure that the extents in the given memory buffer
			
 
				+ * are valid.
			
 
				+ */
			
 
				+void
			
 
				+xfs_validate_extents(
			
 
				+	xfs_ifork_t		*ifp,
			
 
				+	int			nrecs,
			
 
				+	xfs_exntfmt_t		fmt)
			
 
				+{
			
 
				+	xfs_bmbt_irec_t		irec;
			
 
				+	xfs_bmbt_rec_host_t	rec;
			
 
				+	int			i;
			
 
				+
			
 
				+	for (i = 0; i < nrecs; i++) {
			
 
				+		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				+		rec.l0 = get_unaligned(&ep->l0);
			
 
				+		rec.l1 = get_unaligned(&ep->l1);
			
 
				+		xfs_bmbt_get_all(&rec, &irec);
			
 
				+		if (fmt == XFS_EXTFMT_NOSTATE)
			
 
				+			ASSERT(irec.br_state == XFS_EXT_NORM);
			
 
				+	}
			
 
				+}
			
 
				+#else /* DEBUG */
			
 
				+#define xfs_validate_extents(ifp, nrecs, fmt)
			
 
				+#endif /* DEBUG */
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Move inode type and inode format specific information from the
			
 
				+ * on-disk inode to the in-core inode.  For fifos, devs, and sockets
			
 
				+ * this means set if_rdev to the proper value.  For files, directories,
			
 
				+ * and symlinks this means to bring in the in-line data or extent
			
 
				+ * pointers.  For a file in B-tree format, only the root is immediately
			
 
				+ * brought in-core.  The rest will be in-lined in if_extents when it
			
 
				+ * is first referenced (see xfs_iread_extents()).
			
 
				+ */
			
 
				+int
			
 
				+xfs_iformat_fork(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	xfs_dinode_t		*dip)
			
 
				+{
			
 
				+	xfs_attr_shortform_t	*atp;
			
 
				+	int			size;
			
 
				+	int			error = 0;
			
 
				+	xfs_fsize_t             di_size;
			
 
				+
			
 
				+	if (unlikely(be32_to_cpu(dip->di_nextents) +
			
 
				+		     be16_to_cpu(dip->di_anextents) >
			
 
				+		     be64_to_cpu(dip->di_nblocks))) {
			
 
				+		xfs_warn(ip->i_mount,
			
 
				+			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
			
 
				+			(unsigned long long)ip->i_ino,
			
 
				+			(int)(be32_to_cpu(dip->di_nextents) +
			
 
				+			      be16_to_cpu(dip->di_anextents)),
			
 
				+			(unsigned long long)
			
 
				+				be64_to_cpu(dip->di_nblocks));
			
 
				+		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
			
 
				+				     ip->i_mount, dip);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
			
 
				+		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
			
 
				+			(unsigned long long)ip->i_ino,
			
 
				+			dip->di_forkoff);
			
 
				+		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
			
 
				+				     ip->i_mount, dip);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
			
 
				+		     !ip->i_mount->m_rtdev_targp)) {
			
 
				+		xfs_warn(ip->i_mount,
			
 
				+			"corrupt dinode %Lu, has realtime flag set.",
			
 
				+			ip->i_ino);
			
 
				+		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
			
 
				+				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	switch (ip->i_d.di_mode & S_IFMT) {
			
 
				+	case S_IFIFO:
			
 
				+	case S_IFCHR:
			
 
				+	case S_IFBLK:
			
 
				+	case S_IFSOCK:
			
 
				+		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
			
 
				+			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
			
 
				+					      ip->i_mount, dip);
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		}
			
 
				+		ip->i_d.di_size = 0;
			
 
				+		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
			
 
				+		break;
			
 
				+
			
 
				+	case S_IFREG:
			
 
				+	case S_IFLNK:
			
 
				+	case S_IFDIR:
			
 
				+		switch (dip->di_format) {
			
 
				+		case XFS_DINODE_FMT_LOCAL:
			
 
				+			/*
			
 
				+			 * no local regular files yet
			
 
				+			 */
			
 
				+			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
			
 
				+				xfs_warn(ip->i_mount,
			
 
				+			"corrupt inode %Lu (local format for regular file).",
			
 
				+					(unsigned long long) ip->i_ino);
			
 
				+				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
			
 
				+						     XFS_ERRLEVEL_LOW,
			
 
				+						     ip->i_mount, dip);
			
 
				+				return XFS_ERROR(EFSCORRUPTED);
			
 
				+			}
			
 
				+
			
 
				+			di_size = be64_to_cpu(dip->di_size);
			
 
				+			if (unlikely(di_size < 0 ||
			
 
				+				     di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
			
 
				+				xfs_warn(ip->i_mount,
			
 
				+			"corrupt inode %Lu (bad size %Ld for local inode).",
			
 
				+					(unsigned long long) ip->i_ino,
			
 
				+					(long long) di_size);
			
 
				+				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
			
 
				+						     XFS_ERRLEVEL_LOW,
			
 
				+						     ip->i_mount, dip);
			
 
				+				return XFS_ERROR(EFSCORRUPTED);
			
 
				+			}
			
 
				+
			
 
				+			size = (int)di_size;
			
 
				+			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
			
 
				+			break;
			
 
				+		case XFS_DINODE_FMT_EXTENTS:
			
 
				+			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
			
 
				+			break;
			
 
				+		case XFS_DINODE_FMT_BTREE:
			
 
				+			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
			
 
				+			break;
			
 
				+		default:
			
 
				+			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
			
 
				+					 ip->i_mount);
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		return error;
			
 
				+	}
			
 
				+	if (!XFS_DFORK_Q(dip))
			
 
				+		return 0;
			
 
				+
			
 
				+	ASSERT(ip->i_afp == NULL);
			
 
				+	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
			
 
				+
			
 
				+	switch (dip->di_aformat) {
			
 
				+	case XFS_DINODE_FMT_LOCAL:
			
 
				+		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
			
 
				+		size = be16_to_cpu(atp->hdr.totsize);
			
 
				+
			
 
				+		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
			
 
				+			xfs_warn(ip->i_mount,
			
 
				+				"corrupt inode %Lu (bad attr fork size %Ld).",
			
 
				+				(unsigned long long) ip->i_ino,
			
 
				+				(long long) size);
			
 
				+			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
			
 
				+					     XFS_ERRLEVEL_LOW,
			
 
				+					     ip->i_mount, dip);
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		}
			
 
				+
			
 
				+		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
			
 
				+		break;
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
			
 
				+		break;
			
 
				+	default:
			
 
				+		error = XFS_ERROR(EFSCORRUPTED);
			
 
				+		break;
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
			
 
				+		ip->i_afp = NULL;
			
 
				+		xfs_idestroy_fork(ip, XFS_DATA_FORK);
			
 
				+	}
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The file is in-lined in the on-disk inode.
			
 
				+ * If it fits into if_inline_data, then copy
			
 
				+ * it there, otherwise allocate a buffer for it
			
 
				+ * and copy the data there.  Either way, set
			
 
				+ * if_data to point at the data.
			
 
				+ * If we allocate a buffer for the data, make
			
 
				+ * sure that its size is a multiple of 4 and
			
 
				+ * record the real size in i_real_bytes.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_iformat_local(
			
 
				+	xfs_inode_t	*ip,
			
 
				+	xfs_dinode_t	*dip,
			
 
				+	int		whichfork,
			
 
				+	int		size)
			
 
				+{
			
 
				+	xfs_ifork_t	*ifp;
			
 
				+	int		real_size;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the size is unreasonable, then something
			
 
				+	 * is wrong and we just bail out rather than crash in
			
 
				+	 * kmem_alloc() or memcpy() below.
			
 
				+	 */
			
 
				+	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
			
 
				+		xfs_warn(ip->i_mount,
			
 
				+	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
			
 
				+			(unsigned long long) ip->i_ino, size,
			
 
				+			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
			
 
				+		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
			
 
				+				     ip->i_mount, dip);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	real_size = 0;
			
 
				+	if (size == 0)
			
 
				+		ifp->if_u1.if_data = NULL;
			
 
				+	else if (size <= sizeof(ifp->if_u2.if_inline_data))
			
 
				+		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				+	else {
			
 
				+		real_size = roundup(size, 4);
			
 
				+		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
			
 
				+	}
			
 
				+	ifp->if_bytes = size;
			
 
				+	ifp->if_real_bytes = real_size;
			
 
				+	if (size)
			
 
				+		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
			
 
				+	ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				+	ifp->if_flags |= XFS_IFINLINE;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The file consists of a set of extents all
			
 
				+ * of which fit into the on-disk inode.
			
 
				+ * If there are few enough extents to fit into
			
 
				+ * the if_inline_ext, then copy them there.
			
 
				+ * Otherwise allocate a buffer for them and copy
			
 
				+ * them into it.  Either way, set if_extents
			
 
				+ * to point at the extents.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_iformat_extents(
			
 
				+	xfs_inode_t	*ip,
			
 
				+	xfs_dinode_t	*dip,
			
 
				+	int		whichfork)
			
 
				+{
			
 
				+	xfs_bmbt_rec_t	*dp;
			
 
				+	xfs_ifork_t	*ifp;
			
 
				+	int		nex;
			
 
				+	int		size;
			
 
				+	int		i;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
			
 
				+	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+
			
 
				+	/*
			
 
				+	 * If the number of extents is unreasonable, then something
			
 
				+	 * is wrong and we just bail out rather than crash in
			
 
				+	 * kmem_alloc() or memcpy() below.
			
 
				+	 */
			
 
				+	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
			
 
				+		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
			
 
				+			(unsigned long long) ip->i_ino, nex);
			
 
				+		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
			
 
				+				     ip->i_mount, dip);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	ifp->if_real_bytes = 0;
			
 
				+	if (nex == 0)
			
 
				+		ifp->if_u1.if_extents = NULL;
			
 
				+	else if (nex <= XFS_INLINE_EXTS)
			
 
				+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				+	else
			
 
				+		xfs_iext_add(ifp, 0, nex);
			
 
				+
			
 
				+	ifp->if_bytes = size;
			
 
				+	if (size) {
			
 
				+		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
			
 
				+		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
			
 
				+		for (i = 0; i < nex; i++, dp++) {
			
 
				+			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				+			ep->l0 = get_unaligned_be64(&dp->l0);
			
 
				+			ep->l1 = get_unaligned_be64(&dp->l1);
			
 
				+		}
			
 
				+		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
			
 
				+		if (whichfork != XFS_DATA_FORK ||
			
 
				+			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
			
 
				+				if (unlikely(xfs_check_nostate_extents(
			
 
				+				    ifp, 0, nex))) {
			
 
				+					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
			
 
				+							 XFS_ERRLEVEL_LOW,
			
 
				+							 ip->i_mount);
			
 
				+					return XFS_ERROR(EFSCORRUPTED);
			
 
				+				}
			
 
				+	}
			
 
				+	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The file has too many extents to fit into
			
 
				+ * the inode, so they are in B-tree format.
			
 
				+ * Allocate a buffer for the root of the B-tree
			
 
				+ * and copy the root into it.  The i_extents
			
 
				+ * field will remain NULL until all of the
			
 
				+ * extents are read in (when they are needed).
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_iformat_btree(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	xfs_dinode_t		*dip,
			
 
				+	int			whichfork)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	xfs_bmdr_block_t	*dfp;
			
 
				+	xfs_ifork_t		*ifp;
			
 
				+	/* REFERENCED */
			
 
				+	int			nrecs;
			
 
				+	int			size;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
			
 
				+	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
			
 
				+	nrecs = be16_to_cpu(dfp->bb_numrecs);
			
 
				+
			
 
				+	/*
			
 
				+	 * blow out if -- fork has less extents than can fit in
			
 
				+	 * fork (fork shouldn't be a btree format), root btree
			
 
				+	 * block has more records than can fit into the fork,
			
 
				+	 * or the number of extents is greater than the number of
			
 
				+	 * blocks.
			
 
				+	 */
			
 
				+	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
			
 
				+					XFS_IFORK_MAXEXT(ip, whichfork) ||
			
 
				+		     XFS_BMDR_SPACE_CALC(nrecs) >
			
 
				+					XFS_DFORK_SIZE(dip, mp, whichfork) ||
			
 
				+		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
			
 
				+		xfs_warn(mp, "corrupt inode %Lu (btree).",
			
 
				+					(unsigned long long) ip->i_ino);
			
 
				+		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
			
 
				+					 mp, dip);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	ifp->if_broot_bytes = size;
			
 
				+	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
			
 
				+	ASSERT(ifp->if_broot != NULL);
			
 
				+	/*
			
 
				+	 * Copy and convert from the on-disk structure
			
 
				+	 * to the in-memory structure.
			
 
				+	 */
			
 
				+	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
			
 
				+			 ifp->if_broot, size);
			
 
				+	ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				+	ifp->if_flags |= XFS_IFBROOT;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Read in extents from a btree-format inode.
			
 
				+ * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
			
 
				+ */
			
 
				+int
			
 
				+xfs_iread_extents(
			
 
				+	xfs_trans_t	*tp,
			
 
				+	xfs_inode_t	*ip,
			
 
				+	int		whichfork)
			
 
				+{
			
 
				+	int		error;
			
 
				+	xfs_ifork_t	*ifp;
			
 
				+	xfs_extnum_t	nextents;
			
 
				+
			
 
				+	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
			
 
				+		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
			
 
				+				 ip->i_mount);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+
			
 
				+	/*
			
 
				+	 * We know that the size is valid (it's checked in iformat_btree)
			
 
				+	 */
			
 
				+	ifp->if_bytes = ifp->if_real_bytes = 0;
			
 
				+	ifp->if_flags |= XFS_IFEXTENTS;
			
 
				+	xfs_iext_add(ifp, 0, nextents);
			
 
				+	error = xfs_bmap_read_extents(tp, ip, whichfork);
			
 
				+	if (error) {
			
 
				+		xfs_iext_destroy(ifp);
			
 
				+		ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				+		return error;
			
 
				+	}
			
 
				+	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
			
 
				+	return 0;
			
 
				+}
			
 
				+/*
			
 
				+ * Reallocate the space for if_broot based on the number of records
			
 
				+ * being added or deleted as indicated in rec_diff.  Move the records
			
 
				+ * and pointers in if_broot to fit the new size.  When shrinking this
			
 
				+ * will eliminate holes between the records and pointers created by
			
 
				+ * the caller.  When growing this will create holes to be filled in
			
 
				+ * by the caller.
			
 
				+ *
			
 
				+ * The caller must not request to add more records than would fit in
			
 
				+ * the on-disk inode root.  If the if_broot is currently NULL, then
			
 
				+ * if we are adding records, one will be allocated.  The caller must also
			
 
				+ * not request that the number of records go below zero, although
			
 
				+ * it can go to zero.
			
 
				+ *
			
 
				+ * ip -- the inode whose if_broot area is changing
			
 
				+ * ext_diff -- the change in the number of records, positive or negative,
			
 
				+ *	 requested for the if_broot array.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iroot_realloc(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	int			rec_diff,
			
 
				+	int			whichfork)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	int			cur_max;
			
 
				+	xfs_ifork_t		*ifp;
			
 
				+	struct xfs_btree_block	*new_broot;
			
 
				+	int			new_max;
			
 
				+	size_t			new_size;
			
 
				+	char			*np;
			
 
				+	char			*op;
			
 
				+
			
 
				+	/*
			
 
				+	 * Handle the degenerate case quietly.
			
 
				+	 */
			
 
				+	if (rec_diff == 0) {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	if (rec_diff > 0) {
			
 
				+		/*
			
 
				+		 * If there wasn't any memory allocated before, just
			
 
				+		 * allocate it now and get out.
			
 
				+		 */
			
 
				+		if (ifp->if_broot_bytes == 0) {
			
 
				+			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
			
 
				+			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
			
 
				+			ifp->if_broot_bytes = (int)new_size;
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * If there is already an existing if_broot, then we need
			
 
				+		 * to realloc() it and shift the pointers to their new
			
 
				+		 * location.  The records don't change location because
			
 
				+		 * they are kept butted up against the btree block header.
			
 
				+		 */
			
 
				+		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
			
 
				+		new_max = cur_max + rec_diff;
			
 
				+		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
			
 
				+		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
			
 
				+				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
			
 
				+				KM_SLEEP | KM_NOFS);
			
 
				+		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
			
 
				+						     ifp->if_broot_bytes);
			
 
				+		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
			
 
				+						     (int)new_size);
			
 
				+		ifp->if_broot_bytes = (int)new_size;
			
 
				+		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
			
 
				+			XFS_IFORK_SIZE(ip, whichfork));
			
 
				+		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * rec_diff is less than 0.  In this case, we are shrinking the
			
 
				+	 * if_broot buffer.  It must already exist.  If we go to zero
			
 
				+	 * records, just get rid of the root and clear the status bit.
			
 
				+	 */
			
 
				+	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
			
 
				+	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
			
 
				+	new_max = cur_max + rec_diff;
			
 
				+	ASSERT(new_max >= 0);
			
 
				+	if (new_max > 0)
			
 
				+		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
			
 
				+	else
			
 
				+		new_size = 0;
			
 
				+	if (new_size > 0) {
			
 
				+		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
			
 
				+		/*
			
 
				+		 * First copy over the btree block header.
			
 
				+		 */
			
 
				+		memcpy(new_broot, ifp->if_broot,
			
 
				+			XFS_BMBT_BLOCK_LEN(ip->i_mount));
			
 
				+	} else {
			
 
				+		new_broot = NULL;
			
 
				+		ifp->if_flags &= ~XFS_IFBROOT;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Only copy the records and pointers if there are any.
			
 
				+	 */
			
 
				+	if (new_max > 0) {
			
 
				+		/*
			
 
				+		 * First copy the records.
			
 
				+		 */
			
 
				+		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
			
 
				+		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
			
 
				+		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
			
 
				+
			
 
				+		/*
			
 
				+		 * Then copy the pointers.
			
 
				+		 */
			
 
				+		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
			
 
				+						     ifp->if_broot_bytes);
			
 
				+		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
			
 
				+						     (int)new_size);
			
 
				+		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
			
 
				+	}
			
 
				+	kmem_free(ifp->if_broot);
			
 
				+	ifp->if_broot = new_broot;
			
 
				+	ifp->if_broot_bytes = (int)new_size;
			
 
				+	if (ifp->if_broot)
			
 
				+		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
			
 
				+			XFS_IFORK_SIZE(ip, whichfork));
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * This is called when the amount of space needed for if_data
			
 
				+ * is increased or decreased.  The change in size is indicated by
			
 
				+ * the number of bytes that need to be added or deleted in the
			
 
				+ * byte_diff parameter.
			
 
				+ *
			
 
				+ * If the amount of space needed has decreased below the size of the
			
 
				+ * inline buffer, then switch to using the inline buffer.  Otherwise,
			
 
				+ * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
			
 
				+ * to what is needed.
			
 
				+ *
			
 
				+ * ip -- the inode whose if_data area is changing
			
 
				+ * byte_diff -- the change in the number of bytes, positive or negative,
			
 
				+ *	 requested for the if_data array.
			
 
				+ */
			
 
				+void
			
 
				+xfs_idata_realloc(
			
 
				+	xfs_inode_t	*ip,
			
 
				+	int		byte_diff,
			
 
				+	int		whichfork)
			
 
				+{
			
 
				+	xfs_ifork_t	*ifp;
			
 
				+	int		new_size;
			
 
				+	int		real_size;
			
 
				+
			
 
				+	if (byte_diff == 0) {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	new_size = (int)ifp->if_bytes + byte_diff;
			
 
				+	ASSERT(new_size >= 0);
			
 
				+
			
 
				+	if (new_size == 0) {
			
 
				+		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				+			kmem_free(ifp->if_u1.if_data);
			
 
				+		}
			
 
				+		ifp->if_u1.if_data = NULL;
			
 
				+		real_size = 0;
			
 
				+	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
			
 
				+		/*
			
 
				+		 * If the valid extents/data can fit in if_inline_ext/data,
			
 
				+		 * copy them from the malloc'd vector and free it.
			
 
				+		 */
			
 
				+		if (ifp->if_u1.if_data == NULL) {
			
 
				+			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				+		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				+			ASSERT(ifp->if_real_bytes != 0);
			
 
				+			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
			
 
				+			      new_size);
			
 
				+			kmem_free(ifp->if_u1.if_data);
			
 
				+			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
			
 
				+		}
			
 
				+		real_size = 0;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Stuck with malloc/realloc.
			
 
				+		 * For inline data, the underlying buffer must be
			
 
				+		 * a multiple of 4 bytes in size so that it can be
			
 
				+		 * logged and stay on word boundaries.  We enforce
			
 
				+		 * that here.
			
 
				+		 */
			
 
				+		real_size = roundup(new_size, 4);
			
 
				+		if (ifp->if_u1.if_data == NULL) {
			
 
				+			ASSERT(ifp->if_real_bytes == 0);
			
 
				+			ifp->if_u1.if_data = kmem_alloc(real_size,
			
 
				+							KM_SLEEP | KM_NOFS);
			
 
				+		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
			
 
				+			/*
			
 
				+			 * Only do the realloc if the underlying size
			
 
				+			 * is really changing.
			
 
				+			 */
			
 
				+			if (ifp->if_real_bytes != real_size) {
			
 
				+				ifp->if_u1.if_data =
			
 
				+					kmem_realloc(ifp->if_u1.if_data,
			
 
				+							real_size,
			
 
				+							ifp->if_real_bytes,
			
 
				+							KM_SLEEP | KM_NOFS);
			
 
				+			}
			
 
				+		} else {
			
 
				+			ASSERT(ifp->if_real_bytes == 0);
			
 
				+			ifp->if_u1.if_data = kmem_alloc(real_size,
			
 
				+							KM_SLEEP | KM_NOFS);
			
 
				+			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
			
 
				+				ifp->if_bytes);
			
 
				+		}
			
 
				+	}
			
 
				+	ifp->if_real_bytes = real_size;
			
 
				+	ifp->if_bytes = new_size;
			
 
				+	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_idestroy_fork(
			
 
				+	xfs_inode_t	*ip,
			
 
				+	int		whichfork)
			
 
				+{
			
 
				+	xfs_ifork_t	*ifp;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	if (ifp->if_broot != NULL) {
			
 
				+		kmem_free(ifp->if_broot);
			
 
				+		ifp->if_broot = NULL;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If the format is local, then we can't have an extents
			
 
				+	 * array so just look for an inline data array.  If we're
			
 
				+	 * not local then we may or may not have an extents list,
			
 
				+	 * so check and free it up if we do.
			
 
				+	 */
			
 
				+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
			
 
				+		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
			
 
				+		    (ifp->if_u1.if_data != NULL)) {
			
 
				+			ASSERT(ifp->if_real_bytes != 0);
			
 
				+			kmem_free(ifp->if_u1.if_data);
			
 
				+			ifp->if_u1.if_data = NULL;
			
 
				+			ifp->if_real_bytes = 0;
			
 
				+		}
			
 
				+	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
			
 
				+		   ((ifp->if_flags & XFS_IFEXTIREC) ||
			
 
				+		    ((ifp->if_u1.if_extents != NULL) &&
			
 
				+		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
			
 
				+		ASSERT(ifp->if_real_bytes != 0);
			
 
				+		xfs_iext_destroy(ifp);
			
 
				+	}
			
 
				+	ASSERT(ifp->if_u1.if_extents == NULL ||
			
 
				+	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
			
 
				+	ASSERT(ifp->if_real_bytes == 0);
			
 
				+	if (whichfork == XFS_ATTR_FORK) {
			
 
				+		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
			
 
				+		ip->i_afp = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_iextents_copy()
			
 
				+ *
			
 
				+ * This is called to copy the REAL extents (as opposed to the delayed
			
 
				+ * allocation extents) from the inode into the given buffer.  It
			
 
				+ * returns the number of bytes copied into the buffer.
			
 
				+ *
			
 
				+ * If there are no delayed allocation extents, then we can just
			
 
				+ * memcpy() the extents into the buffer.  Otherwise, we need to
			
 
				+ * examine each extent in turn and skip those which are delayed.
			
 
				+ */
			
 
				+int
			
 
				+xfs_iextents_copy(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	xfs_bmbt_rec_t		*dp,
			
 
				+	int			whichfork)
			
 
				+{
			
 
				+	int			copied;
			
 
				+	int			i;
			
 
				+	xfs_ifork_t		*ifp;
			
 
				+	int			nrecs;
			
 
				+	xfs_fsblock_t		start_block;
			
 
				+
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
			
 
				+	ASSERT(ifp->if_bytes > 0);
			
 
				+
			
 
				+	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
			
 
				+	ASSERT(nrecs > 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * There are some delayed allocation extents in the
			
 
				+	 * inode, so copy the extents one at a time and skip
			
 
				+	 * the delayed ones.  There must be at least one
			
 
				+	 * non-delayed extent.
			
 
				+	 */
			
 
				+	copied = 0;
			
 
				+	for (i = 0; i < nrecs; i++) {
			
 
				+		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
			
 
				+		start_block = xfs_bmbt_get_startblock(ep);
			
 
				+		if (isnullstartblock(start_block)) {
			
 
				+			/*
			
 
				+			 * It's a delayed allocation extent, so skip it.
			
 
				+			 */
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Translate to on disk format */
			
 
				+		put_unaligned_be64(ep->l0, &dp->l0);
			
 
				+		put_unaligned_be64(ep->l1, &dp->l1);
			
 
				+		dp++;
			
 
				+		copied++;
			
 
				+	}
			
 
				+	ASSERT(copied != 0);
			
 
				+	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
			
 
				+
			
 
				+	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Each of the following cases stores data into the same region
			
 
				+ * of the on-disk inode, so only one of them can be valid at
			
 
				+ * any given time. While it is possible to have conflicting formats
			
 
				+ * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
			
 
				+ * in EXTENTS format, this can only happen when the fork has
			
 
				+ * changed formats after being modified but before being flushed.
			
 
				+ * In these cases, the format always takes precedence, because the
			
 
				+ * format indicates the current state of the fork.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iflush_fork(
			
 
				+	xfs_inode_t		*ip,
			
 
				+	xfs_dinode_t		*dip,
			
 
				+	xfs_inode_log_item_t	*iip,
			
 
				+	int			whichfork,
			
 
				+	xfs_buf_t		*bp)
			
 
				+{
			
 
				+	char			*cp;
			
 
				+	xfs_ifork_t		*ifp;
			
 
				+	xfs_mount_t		*mp;
			
 
				+	static const short	brootflag[2] =
			
 
				+		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
			
 
				+	static const short	dataflag[2] =
			
 
				+		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
			
 
				+	static const short	extflag[2] =
			
 
				+		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
			
 
				+
			
 
				+	if (!iip)
			
 
				+		return;
			
 
				+	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				+	/*
			
 
				+	 * This can happen if we gave up in iformat in an error path,
			
 
				+	 * for the attribute fork.
			
 
				+	 */
			
 
				+	if (!ifp) {
			
 
				+		ASSERT(whichfork == XFS_ATTR_FORK);
			
 
				+		return;
			
 
				+	}
			
 
				+	cp = XFS_DFORK_PTR(dip, whichfork);
			
 
				+	mp = ip->i_mount;
			
 
				+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
			
 
				+	case XFS_DINODE_FMT_LOCAL:
			
 
				+		if ((iip->ili_fields & dataflag[whichfork]) &&
			
 
				+		    (ifp->if_bytes > 0)) {
			
 
				+			ASSERT(ifp->if_u1.if_data != NULL);
			
 
				+			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
			
 
				+			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	case XFS_DINODE_FMT_EXTENTS:
			
 
				+		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
			
 
				+		       !(iip->ili_fields & extflag[whichfork]));
			
 
				+		if ((iip->ili_fields & extflag[whichfork]) &&
			
 
				+		    (ifp->if_bytes > 0)) {
			
 
				+			ASSERT(xfs_iext_get_ext(ifp, 0));
			
 
				+			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
			
 
				+			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
			
 
				+				whichfork);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	case XFS_DINODE_FMT_BTREE:
			
 
				+		if ((iip->ili_fields & brootflag[whichfork]) &&
			
 
				+		    (ifp->if_broot_bytes > 0)) {
			
 
				+			ASSERT(ifp->if_broot != NULL);
			
 
				+			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
			
 
				+			        XFS_IFORK_SIZE(ip, whichfork));
			
 
				+			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
			
 
				+				(xfs_bmdr_block_t *)cp,
			
 
				+				XFS_DFORK_SIZE(dip, mp, whichfork));
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	case XFS_DINODE_FMT_DEV:
			
 
				+		if (iip->ili_fields & XFS_ILOG_DEV) {
			
 
				+			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				+			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	case XFS_DINODE_FMT_UUID:
			
 
				+		if (iip->ili_fields & XFS_ILOG_UUID) {
			
 
				+			ASSERT(whichfork == XFS_DATA_FORK);
			
 
				+			memcpy(XFS_DFORK_DPTR(dip),
			
 
				+			       &ip->i_df.if_u2.if_uuid,
			
 
				+			       sizeof(uuid_t));
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				+		ASSERT(0);
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return a pointer to the extent record at file index idx.
			
 
				+ */
			
 
				+xfs_bmbt_rec_host_t *
			
 
				+xfs_iext_get_ext(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_extnum_t	idx)		/* index of target extent */
			
 
				+{
			
 
				+	ASSERT(idx >= 0);
			
 
				+	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
			
 
				+
			
 
				+	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
			
 
				+		return ifp->if_u1.if_ext_irec->er_extbuf;
			
 
				+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				+		xfs_ext_irec_t	*erp;		/* irec pointer */
			
 
				+		int		erp_idx = 0;	/* irec index */
			
 
				+		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
			
 
				+
			
 
				+		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
			
 
				+		return &erp->er_extbuf[page_idx];
			
 
				+	} else if (ifp->if_bytes) {
			
 
				+		return &ifp->if_u1.if_extents[idx];
			
 
				+	} else {
			
 
				+		return NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Insert new item(s) into the extent records for incore inode
			
 
				+ * fork 'ifp'.  'count' new items are inserted at index 'idx'.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_insert(
			
 
				+	xfs_inode_t	*ip,		/* incore inode pointer */
			
 
				+	xfs_extnum_t	idx,		/* starting index of new items */
			
 
				+	xfs_extnum_t	count,		/* number of inserted items */
			
 
				+	xfs_bmbt_irec_t	*new,		/* items to insert */
			
 
				+	int		state)		/* type of extent conversion */
			
 
				+{
			
 
				+	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
			
 
				+	xfs_extnum_t	i;		/* extent record index */
			
 
				+
			
 
				+	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
			
 
				+	xfs_iext_add(ifp, idx, count);
			
 
				+	for (i = idx; i < idx + count; i++, new++)
			
 
				+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is called when the amount of space required for incore file
			
 
				+ * extents needs to be increased. The ext_diff parameter stores the
			
 
				+ * number of new extents being added and the idx parameter contains
			
 
				+ * the extent index where the new extents will be added. If the new
			
 
				+ * extents are being appended, then we just need to (re)allocate and
			
 
				+ * initialize the space. Otherwise, if the new extents are being
			
 
				+ * inserted into the middle of the existing entries, a bit more work
			
 
				+ * is required to make room for the new extents to be inserted. The
			
 
				+ * caller is responsible for filling in the new extent entries upon
			
 
				+ * return.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_add(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_extnum_t	idx,		/* index to begin adding exts */
			
 
				+	int		ext_diff)	/* number of extents to add */
			
 
				+{
			
 
				+	int		byte_diff;	/* new bytes being added */
			
 
				+	int		new_size;	/* size of extents after adding */
			
 
				+	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				+
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+	ASSERT((idx >= 0) && (idx <= nextents));
			
 
				+	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
			
 
				+	new_size = ifp->if_bytes + byte_diff;
			
 
				+	/*
			
 
				+	 * If the new number of extents (nextents + ext_diff)
			
 
				+	 * fits inside the inode, then continue to use the inline
			
 
				+	 * extent buffer.
			
 
				+	 */
			
 
				+	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
			
 
				+		if (idx < nextents) {
			
 
				+			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
			
 
				+				&ifp->if_u2.if_inline_ext[idx],
			
 
				+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
			
 
				+			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
			
 
				+		}
			
 
				+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				+		ifp->if_real_bytes = 0;
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Otherwise use a linear (direct) extent list.
			
 
				+	 * If the extents are currently inside the inode,
			
 
				+	 * xfs_iext_realloc_direct will switch us from
			
 
				+	 * inline to direct extent allocation mode.
			
 
				+	 */
			
 
				+	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
			
 
				+		xfs_iext_realloc_direct(ifp, new_size);
			
 
				+		if (idx < nextents) {
			
 
				+			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
			
 
				+				&ifp->if_u1.if_extents[idx],
			
 
				+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
			
 
				+			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
			
 
				+		}
			
 
				+	}
			
 
				+	/* Indirection array */
			
 
				+	else {
			
 
				+		xfs_ext_irec_t	*erp;
			
 
				+		int		erp_idx = 0;
			
 
				+		int		page_idx = idx;
			
 
				+
			
 
				+		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
			
 
				+		if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				+			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
			
 
				+		} else {
			
 
				+			xfs_iext_irec_init(ifp);
			
 
				+			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+			erp = ifp->if_u1.if_ext_irec;
			
 
				+		}
			
 
				+		/* Extents fit in target extent page */
			
 
				+		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
			
 
				+			if (page_idx < erp->er_extcount) {
			
 
				+				memmove(&erp->er_extbuf[page_idx + ext_diff],
			
 
				+					&erp->er_extbuf[page_idx],
			
 
				+					(erp->er_extcount - page_idx) *
			
 
				+					sizeof(xfs_bmbt_rec_t));
			
 
				+				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
			
 
				+			}
			
 
				+			erp->er_extcount += ext_diff;
			
 
				+			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				+		}
			
 
				+		/* Insert a new extent page */
			
 
				+		else if (erp) {
			
 
				+			xfs_iext_add_indirect_multi(ifp,
			
 
				+				erp_idx, page_idx, ext_diff);
			
 
				+		}
			
 
				+		/*
			
 
				+		 * If extent(s) are being appended to the last page in
			
 
				+		 * the indirection array and the new extent(s) don't fit
			
 
				+		 * in the page, then erp is NULL and erp_idx is set to
			
 
				+		 * the next index needed in the indirection array.
			
 
				+		 */
			
 
				+		else {
			
 
				+			int	count = ext_diff;
			
 
				+
			
 
				+			while (count) {
			
 
				+				erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				+				erp->er_extcount = count;
			
 
				+				count -= MIN(count, (int)XFS_LINEAR_EXTS);
			
 
				+				if (count) {
			
 
				+					erp_idx++;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	ifp->if_bytes = new_size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is called when incore extents are being added to the indirection
			
 
				+ * array and the new extents do not fit in the target extent list. The
			
 
				+ * erp_idx parameter contains the irec index for the target extent list
			
 
				+ * in the indirection array, and the idx parameter contains the extent
			
 
				+ * index within the list. The number of extents being added is stored
			
 
				+ * in the count parameter.
			
 
				+ *
			
 
				+ *    |-------|   |-------|
			
 
				+ *    |       |   |       |    idx - number of extents before idx
			
 
				+ *    |  idx  |   | count |
			
 
				+ *    |       |   |       |    count - number of extents being inserted at idx
			
 
				+ *    |-------|   |-------|
			
 
				+ *    | count |   | nex2  |    nex2 - number of extents after idx + count
			
 
				+ *    |-------|   |-------|
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_add_indirect_multi(
			
 
				+	xfs_ifork_t	*ifp,			/* inode fork pointer */
			
 
				+	int		erp_idx,		/* target extent irec index */
			
 
				+	xfs_extnum_t	idx,			/* index within target list */
			
 
				+	int		count)			/* new extents being added */
			
 
				+{
			
 
				+	int		byte_diff;		/* new bytes being added */
			
 
				+	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
			
 
				+	xfs_extnum_t	ext_diff;		/* number of extents to add */
			
 
				+	xfs_extnum_t	ext_cnt;		/* new extents still needed */
			
 
				+	xfs_extnum_t	nex2;			/* extents after idx + count */
			
 
				+	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
			
 
				+	int		nlists;			/* number of irec's (lists) */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				+	nex2 = erp->er_extcount - idx;
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+
			
 
				+	/*
			
 
				+	 * Save second part of target extent list
			
 
				+	 * (all extents past */
			
 
				+	if (nex2) {
			
 
				+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
			
 
				+		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
			
 
				+		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
			
 
				+		erp->er_extcount -= nex2;
			
 
				+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
			
 
				+		memset(&erp->er_extbuf[idx], 0, byte_diff);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Add the new extents to the end of the target
			
 
				+	 * list, then allocate new irec record(s) and
			
 
				+	 * extent buffer(s) as needed to store the rest
			
 
				+	 * of the new extents.
			
 
				+	 */
			
 
				+	ext_cnt = count;
			
 
				+	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
			
 
				+	if (ext_diff) {
			
 
				+		erp->er_extcount += ext_diff;
			
 
				+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				+		ext_cnt -= ext_diff;
			
 
				+	}
			
 
				+	while (ext_cnt) {
			
 
				+		erp_idx++;
			
 
				+		erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				+		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
			
 
				+		erp->er_extcount = ext_diff;
			
 
				+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
			
 
				+		ext_cnt -= ext_diff;
			
 
				+	}
			
 
				+
			
 
				+	/* Add nex2 extents back to indirection array */
			
 
				+	if (nex2) {
			
 
				+		xfs_extnum_t	ext_avail;
			
 
				+		int		i;
			
 
				+
			
 
				+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
			
 
				+		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
			
 
				+		i = 0;
			
 
				+		/*
			
 
				+		 * If nex2 extents fit in the current page, append
			
 
				+		 * nex2_ep after the new extents.
			
 
				+		 */
			
 
				+		if (nex2 <= ext_avail) {
			
 
				+			i = erp->er_extcount;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Otherwise, check if space is available in the
			
 
				+		 * next page.
			
 
				+		 */
			
 
				+		else if ((erp_idx < nlists - 1) &&
			
 
				+			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
			
 
				+			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
			
 
				+			erp_idx++;
			
 
				+			erp++;
			
 
				+			/* Create a hole for nex2 extents */
			
 
				+			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
			
 
				+				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Final choice, create a new extent page for
			
 
				+		 * nex2 extents.
			
 
				+		 */
			
 
				+		else {
			
 
				+			erp_idx++;
			
 
				+			erp = xfs_iext_irec_new(ifp, erp_idx);
			
 
				+		}
			
 
				+		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
			
 
				+		kmem_free(nex2_ep);
			
 
				+		erp->er_extcount += nex2;
			
 
				+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is called when the amount of space required for incore file
			
 
				+ * extents needs to be decreased. The ext_diff parameter stores the
			
 
				+ * number of extents to be removed and the idx parameter contains
			
 
				+ * the extent index where the extents will be removed from.
			
 
				+ *
			
 
				+ * If the amount of space needed has decreased below the linear
			
 
				+ * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
			
 
				+ * extent array.  Otherwise, use kmem_realloc() to adjust the
			
 
				+ * size to what is needed.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_remove(
			
 
				+	xfs_inode_t	*ip,		/* incore inode pointer */
			
 
				+	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				+	int		ext_diff,	/* number of extents to remove */
			
 
				+	int		state)		/* type of extent conversion */
			
 
				+{
			
 
				+	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
			
 
				+	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				+	int		new_size;	/* size of extents after removal */
			
 
				+
			
 
				+	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
			
 
				+
			
 
				+	ASSERT(ext_diff > 0);
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
			
 
				+
			
 
				+	if (new_size == 0) {
			
 
				+		xfs_iext_destroy(ifp);
			
 
				+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				+		xfs_iext_remove_indirect(ifp, idx, ext_diff);
			
 
				+	} else if (ifp->if_real_bytes) {
			
 
				+		xfs_iext_remove_direct(ifp, idx, ext_diff);
			
 
				+	} else {
			
 
				+		xfs_iext_remove_inline(ifp, idx, ext_diff);
			
 
				+	}
			
 
				+	ifp->if_bytes = new_size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This removes ext_diff extents from the inline buffer, beginning
			
 
				+ * at extent index idx.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_remove_inline(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				+	int		ext_diff)	/* number of extents to remove */
			
 
				+{
			
 
				+	int		nextents;	/* number of extents in file */
			
 
				+
			
 
				+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				+	ASSERT(idx < XFS_INLINE_EXTS);
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+	ASSERT(((nextents - ext_diff) > 0) &&
			
 
				+		(nextents - ext_diff) < XFS_INLINE_EXTS);
			
 
				+
			
 
				+	if (idx + ext_diff < nextents) {
			
 
				+		memmove(&ifp->if_u2.if_inline_ext[idx],
			
 
				+			&ifp->if_u2.if_inline_ext[idx + ext_diff],
			
 
				+			(nextents - (idx + ext_diff)) *
			
 
				+			 sizeof(xfs_bmbt_rec_t));
			
 
				+		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
			
 
				+			0, ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				+	} else {
			
 
				+		memset(&ifp->if_u2.if_inline_ext[idx], 0,
			
 
				+			ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This removes ext_diff extents from a linear (direct) extent list,
			
 
				+ * beginning at extent index idx. If the extents are being removed
			
 
				+ * from the end of the list (ie. truncate) then we just need to re-
			
 
				+ * allocate the list to remove the extra space. Otherwise, if the
			
 
				+ * extents are being removed from the middle of the existing extent
			
 
				+ * entries, then we first need to move the extent records beginning
			
 
				+ * at idx + ext_diff up in the list to overwrite the records being
			
 
				+ * removed, then remove the extra space via kmem_realloc.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_remove_direct(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_extnum_t	idx,		/* index to begin removing exts */
			
 
				+	int		ext_diff)	/* number of extents to remove */
			
 
				+{
			
 
				+	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				+	int		new_size;	/* size of extents after removal */
			
 
				+
			
 
				+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				+	new_size = ifp->if_bytes -
			
 
				+		(ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+
			
 
				+	if (new_size == 0) {
			
 
				+		xfs_iext_destroy(ifp);
			
 
				+		return;
			
 
				+	}
			
 
				+	/* Move extents up in the list (if needed) */
			
 
				+	if (idx + ext_diff < nextents) {
			
 
				+		memmove(&ifp->if_u1.if_extents[idx],
			
 
				+			&ifp->if_u1.if_extents[idx + ext_diff],
			
 
				+			(nextents - (idx + ext_diff)) *
			
 
				+			 sizeof(xfs_bmbt_rec_t));
			
 
				+	}
			
 
				+	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
			
 
				+		0, ext_diff * sizeof(xfs_bmbt_rec_t));
			
 
				+	/*
			
 
				+	 * Reallocate the direct extent list. If the extents
			
 
				+	 * will fit inside the inode then xfs_iext_realloc_direct
			
 
				+	 * will switch from direct to inline extent allocation
			
 
				+	 * mode for us.
			
 
				+	 */
			
 
				+	xfs_iext_realloc_direct(ifp, new_size);
			
 
				+	ifp->if_bytes = new_size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is called when incore extents are being removed from the
			
 
				+ * indirection array and the extents being removed span multiple extent
			
 
				+ * buffers. The idx parameter contains the file extent index where we
			
 
				+ * want to begin removing extents, and the count parameter contains
			
 
				+ * how many extents need to be removed.
			
 
				+ *
			
 
				+ *    |-------|   |-------|
			
 
				+ *    | nex1  |   |       |    nex1 - number of extents before idx
			
 
				+ *    |-------|   | count |
			
 
				+ *    |       |   |       |    count - number of extents being removed at idx
			
 
				+ *    | count |   |-------|
			
 
				+ *    |       |   | nex2  |    nex2 - number of extents after idx + count
			
 
				+ *    |-------|   |-------|
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_remove_indirect(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_extnum_t	idx,		/* index to begin removing extents */
			
 
				+	int		count)		/* number of extents to remove */
			
 
				+{
			
 
				+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				+	int		erp_idx = 0;	/* indirection array index */
			
 
				+	xfs_extnum_t	ext_cnt;	/* extents left to remove */
			
 
				+	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
			
 
				+	xfs_extnum_t	nex1;		/* number of extents before idx */
			
 
				+	xfs_extnum_t	nex2;		/* extents after idx + count */
			
 
				+	int		page_idx = idx;	/* index in target extent list */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
			
 
				+	ASSERT(erp != NULL);
			
 
				+	nex1 = page_idx;
			
 
				+	ext_cnt = count;
			
 
				+	while (ext_cnt) {
			
 
				+		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
			
 
				+		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
			
 
				+		/*
			
 
				+		 * Check for deletion of entire list;
			
 
				+		 * xfs_iext_irec_remove() updates extent offsets.
			
 
				+		 */
			
 
				+		if (ext_diff == erp->er_extcount) {
			
 
				+			xfs_iext_irec_remove(ifp, erp_idx);
			
 
				+			ext_cnt -= ext_diff;
			
 
				+			nex1 = 0;
			
 
				+			if (ext_cnt) {
			
 
				+				ASSERT(erp_idx < ifp->if_real_bytes /
			
 
				+					XFS_IEXT_BUFSZ);
			
 
				+				erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				+				nex1 = 0;
			
 
				+				continue;
			
 
				+			} else {
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		/* Move extents up (if needed) */
			
 
				+		if (nex2) {
			
 
				+			memmove(&erp->er_extbuf[nex1],
			
 
				+				&erp->er_extbuf[nex1 + ext_diff],
			
 
				+				nex2 * sizeof(xfs_bmbt_rec_t));
			
 
				+		}
			
 
				+		/* Zero out rest of page */
			
 
				+		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
			
 
				+			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
			
 
				+		/* Update remaining counters */
			
 
				+		erp->er_extcount -= ext_diff;
			
 
				+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
			
 
				+		ext_cnt -= ext_diff;
			
 
				+		nex1 = 0;
			
 
				+		erp_idx++;
			
 
				+		erp++;
			
 
				+	}
			
 
				+	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
			
 
				+	xfs_iext_irec_compact(ifp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Create, destroy, or resize a linear (direct) block of extents.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_realloc_direct(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	int		new_size)	/* new size of extents */
			
 
				+{
			
 
				+	int		rnew_size;	/* real new size of extents */
			
 
				+
			
 
				+	rnew_size = new_size;
			
 
				+
			
 
				+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
			
 
				+		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
			
 
				+		 (new_size != ifp->if_real_bytes)));
			
 
				+
			
 
				+	/* Free extent records */
			
 
				+	if (new_size == 0) {
			
 
				+		xfs_iext_destroy(ifp);
			
 
				+	}
			
 
				+	/* Resize direct extent list and zero any new bytes */
			
 
				+	else if (ifp->if_real_bytes) {
			
 
				+		/* Check if extents will fit inside the inode */
			
 
				+		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
			
 
				+			xfs_iext_direct_to_inline(ifp, new_size /
			
 
				+				(uint)sizeof(xfs_bmbt_rec_t));
			
 
				+			ifp->if_bytes = new_size;
			
 
				+			return;
			
 
				+		}
			
 
				+		if (!is_power_of_2(new_size)){
			
 
				+			rnew_size = roundup_pow_of_two(new_size);
			
 
				+		}
			
 
				+		if (rnew_size != ifp->if_real_bytes) {
			
 
				+			ifp->if_u1.if_extents =
			
 
				+				kmem_realloc(ifp->if_u1.if_extents,
			
 
				+						rnew_size,
			
 
				+						ifp->if_real_bytes, KM_NOFS);
			
 
				+		}
			
 
				+		if (rnew_size > ifp->if_real_bytes) {
			
 
				+			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
			
 
				+				(uint)sizeof(xfs_bmbt_rec_t)], 0,
			
 
				+				rnew_size - ifp->if_real_bytes);
			
 
				+		}
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Switch from the inline extent buffer to a direct
			
 
				+	 * extent list. Be sure to include the inline extent
			
 
				+	 * bytes in new_size.
			
 
				+	 */
			
 
				+	else {
			
 
				+		new_size += ifp->if_bytes;
			
 
				+		if (!is_power_of_2(new_size)) {
			
 
				+			rnew_size = roundup_pow_of_two(new_size);
			
 
				+		}
			
 
				+		xfs_iext_inline_to_direct(ifp, rnew_size);
			
 
				+	}
			
 
				+	ifp->if_real_bytes = rnew_size;
			
 
				+	ifp->if_bytes = new_size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Switch from linear (direct) extent records to inline buffer.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_direct_to_inline(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_extnum_t	nextents)	/* number of extents in file */
			
 
				+{
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
			
 
				+	ASSERT(nextents <= XFS_INLINE_EXTS);
			
 
				+	/*
			
 
				+	 * The inline buffer was zeroed when we switched
			
 
				+	 * from inline to direct extent allocation mode,
			
 
				+	 * so we don't need to clear it here.
			
 
				+	 */
			
 
				+	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
			
 
				+		nextents * sizeof(xfs_bmbt_rec_t));
			
 
				+	kmem_free(ifp->if_u1.if_extents);
			
 
				+	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
			
 
				+	ifp->if_real_bytes = 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Switch from inline buffer to linear (direct) extent records.
			
 
				+ * new_size should already be rounded up to the next power of 2
			
 
				+ * by the caller (when appropriate), so use new_size as it is.
			
 
				+ * However, since new_size may be rounded up, we can't update
			
 
				+ * if_bytes here. It is the caller's responsibility to update
			
 
				+ * if_bytes upon return.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_inline_to_direct(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	int		new_size)	/* number of extents in file */
			
 
				+{
			
 
				+	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
			
 
				+	memset(ifp->if_u1.if_extents, 0, new_size);
			
 
				+	if (ifp->if_bytes) {
			
 
				+		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
			
 
				+			ifp->if_bytes);
			
 
				+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
			
 
				+			sizeof(xfs_bmbt_rec_t));
			
 
				+	}
			
 
				+	ifp->if_real_bytes = new_size;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Resize an extent indirection array to new_size bytes.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_iext_realloc_indirect(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	int		new_size)	/* new indirection array size */
			
 
				+{
			
 
				+	int		nlists;		/* number of irec's (ex lists) */
			
 
				+	int		size;		/* current indirection array size */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	size = nlists * sizeof(xfs_ext_irec_t);
			
 
				+	ASSERT(ifp->if_real_bytes);
			
 
				+	ASSERT((new_size >= 0) && (new_size != size));
			
 
				+	if (new_size == 0) {
			
 
				+		xfs_iext_destroy(ifp);
			
 
				+	} else {
			
 
				+		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
			
 
				+			kmem_realloc(ifp->if_u1.if_ext_irec,
			
 
				+				new_size, size, KM_NOFS);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Switch from indirection array to linear (direct) extent allocations.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_iext_indirect_to_direct(
			
 
				+	 xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				+{
			
 
				+	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
			
 
				+	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				+	int		size;		/* size of file extents */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+	ASSERT(nextents <= XFS_LINEAR_EXTS);
			
 
				+	size = nextents * sizeof(xfs_bmbt_rec_t);
			
 
				+
			
 
				+	xfs_iext_irec_compact_pages(ifp);
			
 
				+	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
			
 
				+
			
 
				+	ep = ifp->if_u1.if_ext_irec->er_extbuf;
			
 
				+	kmem_free(ifp->if_u1.if_ext_irec);
			
 
				+	ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				+	ifp->if_u1.if_extents = ep;
			
 
				+	ifp->if_bytes = size;
			
 
				+	if (nextents < XFS_LINEAR_EXTS) {
			
 
				+		xfs_iext_realloc_direct(ifp, size);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Free incore file extents.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_destroy(
			
 
				+	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				+{
			
 
				+	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				+		int	erp_idx;
			
 
				+		int	nlists;
			
 
				+
			
 
				+		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
			
 
				+			xfs_iext_irec_remove(ifp, erp_idx);
			
 
				+		}
			
 
				+		ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				+	} else if (ifp->if_real_bytes) {
			
 
				+		kmem_free(ifp->if_u1.if_extents);
			
 
				+	} else if (ifp->if_bytes) {
			
 
				+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
			
 
				+			sizeof(xfs_bmbt_rec_t));
			
 
				+	}
			
 
				+	ifp->if_u1.if_extents = NULL;
			
 
				+	ifp->if_real_bytes = 0;
			
 
				+	ifp->if_bytes = 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return a pointer to the extent record for file system block bno.
			
 
				+ */
			
 
				+xfs_bmbt_rec_host_t *			/* pointer to found extent record */
			
 
				+xfs_iext_bno_to_ext(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_fileoff_t	bno,		/* block number to search for */
			
 
				+	xfs_extnum_t	*idxp)		/* index of target extent */
			
 
				+{
			
 
				+	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
			
 
				+	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
			
 
				+	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
			
 
				+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
			
 
				+	int		high;		/* upper boundary in search */
			
 
				+	xfs_extnum_t	idx = 0;	/* index of target extent */
			
 
				+	int		low;		/* lower boundary in search */
			
 
				+	xfs_extnum_t	nextents;	/* number of file extents */
			
 
				+	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
			
 
				+
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+	if (nextents == 0) {
			
 
				+		*idxp = 0;
			
 
				+		return NULL;
			
 
				+	}
			
 
				+	low = 0;
			
 
				+	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				+		/* Find target extent list */
			
 
				+		int	erp_idx = 0;
			
 
				+		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
			
 
				+		base = erp->er_extbuf;
			
 
				+		high = erp->er_extcount - 1;
			
 
				+	} else {
			
 
				+		base = ifp->if_u1.if_extents;
			
 
				+		high = nextents - 1;
			
 
				+	}
			
 
				+	/* Binary search extent records */
			
 
				+	while (low <= high) {
			
 
				+		idx = (low + high) >> 1;
			
 
				+		ep = base + idx;
			
 
				+		startoff = xfs_bmbt_get_startoff(ep);
			
 
				+		blockcount = xfs_bmbt_get_blockcount(ep);
			
 
				+		if (bno < startoff) {
			
 
				+			high = idx - 1;
			
 
				+		} else if (bno >= startoff + blockcount) {
			
 
				+			low = idx + 1;
			
 
				+		} else {
			
 
				+			/* Convert back to file-based extent index */
			
 
				+			if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				+				idx += erp->er_extoff;
			
 
				+			}
			
 
				+			*idxp = idx;
			
 
				+			return ep;
			
 
				+		}
			
 
				+	}
			
 
				+	/* Convert back to file-based extent index */
			
 
				+	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				+		idx += erp->er_extoff;
			
 
				+	}
			
 
				+	if (bno >= startoff + blockcount) {
			
 
				+		if (++idx == nextents) {
			
 
				+			ep = NULL;
			
 
				+		} else {
			
 
				+			ep = xfs_iext_get_ext(ifp, idx);
			
 
				+		}
			
 
				+	}
			
 
				+	*idxp = idx;
			
 
				+	return ep;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return a pointer to the indirection array entry containing the
			
 
				+ * extent record for filesystem block bno. Store the index of the
			
 
				+ * target irec in *erp_idxp.
			
 
				+ */
			
 
				+xfs_ext_irec_t *			/* pointer to found extent record */
			
 
				+xfs_iext_bno_to_irec(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_fileoff_t	bno,		/* block number to search for */
			
 
				+	int		*erp_idxp)	/* irec index of target ext list */
			
 
				+{
			
 
				+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
			
 
				+	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
			
 
				+	int		erp_idx;	/* indirection array index */
			
 
				+	int		nlists;		/* number of extent irec's (lists) */
			
 
				+	int		high;		/* binary search upper limit */
			
 
				+	int		low;		/* binary search lower limit */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	erp_idx = 0;
			
 
				+	low = 0;
			
 
				+	high = nlists - 1;
			
 
				+	while (low <= high) {
			
 
				+		erp_idx = (low + high) >> 1;
			
 
				+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				+		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
			
 
				+		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
			
 
				+			high = erp_idx - 1;
			
 
				+		} else if (erp_next && bno >=
			
 
				+			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
			
 
				+			low = erp_idx + 1;
			
 
				+		} else {
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	*erp_idxp = erp_idx;
			
 
				+	return erp;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return a pointer to the indirection array entry containing the
			
 
				+ * extent record at file extent index *idxp. Store the index of the
			
 
				+ * target irec in *erp_idxp and store the page index of the target
			
 
				+ * extent record in *idxp.
			
 
				+ */
			
 
				+xfs_ext_irec_t *
			
 
				+xfs_iext_idx_to_irec(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
			
 
				+	int		*erp_idxp,	/* pointer to target irec */
			
 
				+	int		realloc)	/* new bytes were just added */
			
 
				+{
			
 
				+	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
			
 
				+	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
			
 
				+	int		erp_idx;	/* indirection array index */
			
 
				+	int		nlists;		/* number of irec's (ex lists) */
			
 
				+	int		high;		/* binary search upper limit */
			
 
				+	int		low;		/* binary search lower limit */
			
 
				+	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	ASSERT(page_idx >= 0);
			
 
				+	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
			
 
				+	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
			
 
				+
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	erp_idx = 0;
			
 
				+	low = 0;
			
 
				+	high = nlists - 1;
			
 
				+
			
 
				+	/* Binary search extent irec's */
			
 
				+	while (low <= high) {
			
 
				+		erp_idx = (low + high) >> 1;
			
 
				+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				+		prev = erp_idx > 0 ? erp - 1 : NULL;
			
 
				+		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
			
 
				+		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
			
 
				+			high = erp_idx - 1;
			
 
				+		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
			
 
				+			   (page_idx == erp->er_extoff + erp->er_extcount &&
			
 
				+			    !realloc)) {
			
 
				+			low = erp_idx + 1;
			
 
				+		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
			
 
				+			   erp->er_extcount == XFS_LINEAR_EXTS) {
			
 
				+			ASSERT(realloc);
			
 
				+			page_idx = 0;
			
 
				+			erp_idx++;
			
 
				+			erp = erp_idx < nlists ? erp + 1 : NULL;
			
 
				+			break;
			
 
				+		} else {
			
 
				+			page_idx -= erp->er_extoff;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	*idxp = page_idx;
			
 
				+	*erp_idxp = erp_idx;
			
 
				+	return(erp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Allocate and initialize an indirection array once the space needed
			
 
				+ * for incore extents increases above XFS_IEXT_BUFSZ.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_irec_init(
			
 
				+	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				+{
			
 
				+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				+	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				+
			
 
				+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+	ASSERT(nextents <= XFS_LINEAR_EXTS);
			
 
				+
			
 
				+	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
			
 
				+
			
 
				+	if (nextents == 0) {
			
 
				+		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
			
 
				+	} else if (!ifp->if_real_bytes) {
			
 
				+		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
			
 
				+	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
			
 
				+		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
			
 
				+	}
			
 
				+	erp->er_extbuf = ifp->if_u1.if_extents;
			
 
				+	erp->er_extcount = nextents;
			
 
				+	erp->er_extoff = 0;
			
 
				+
			
 
				+	ifp->if_flags |= XFS_IFEXTIREC;
			
 
				+	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
			
 
				+	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
			
 
				+	ifp->if_u1.if_ext_irec = erp;
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Allocate and initialize a new entry in the indirection array.
			
 
				+ */
			
 
				+xfs_ext_irec_t *
			
 
				+xfs_iext_irec_new(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	int		erp_idx)	/* index for new irec */
			
 
				+{
			
 
				+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				+	int		i;		/* loop counter */
			
 
				+	int		nlists;		/* number of irec's (ex lists) */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+
			
 
				+	/* Resize indirection array */
			
 
				+	xfs_iext_realloc_indirect(ifp, ++nlists *
			
 
				+				  sizeof(xfs_ext_irec_t));
			
 
				+	/*
			
 
				+	 * Move records down in the array so the
			
 
				+	 * new page can use erp_idx.
			
 
				+	 */
			
 
				+	erp = ifp->if_u1.if_ext_irec;
			
 
				+	for (i = nlists - 1; i > erp_idx; i--) {
			
 
				+		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
			
 
				+	}
			
 
				+	ASSERT(i == erp_idx);
			
 
				+
			
 
				+	/* Initialize new extent record */
			
 
				+	erp = ifp->if_u1.if_ext_irec;
			
 
				+	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
			
 
				+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
			
 
				+	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
			
 
				+	erp[erp_idx].er_extcount = 0;
			
 
				+	erp[erp_idx].er_extoff = erp_idx > 0 ?
			
 
				+		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
			
 
				+	return (&erp[erp_idx]);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Remove a record from the indirection array.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_irec_remove(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	int		erp_idx)	/* irec index to remove */
			
 
				+{
			
 
				+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
			
 
				+	int		i;		/* loop counter */
			
 
				+	int		nlists;		/* number of irec's (ex lists) */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				+	if (erp->er_extbuf) {
			
 
				+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
			
 
				+			-erp->er_extcount);
			
 
				+		kmem_free(erp->er_extbuf);
			
 
				+	}
			
 
				+	/* Compact extent records */
			
 
				+	erp = ifp->if_u1.if_ext_irec;
			
 
				+	for (i = erp_idx; i < nlists - 1; i++) {
			
 
				+		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Manually free the last extent record from the indirection
			
 
				+	 * array.  A call to xfs_iext_realloc_indirect() with a size
			
 
				+	 * of zero would result in a call to xfs_iext_destroy() which
			
 
				+	 * would in turn call this function again, creating a nasty
			
 
				+	 * infinite loop.
			
 
				+	 */
			
 
				+	if (--nlists) {
			
 
				+		xfs_iext_realloc_indirect(ifp,
			
 
				+			nlists * sizeof(xfs_ext_irec_t));
			
 
				+	} else {
			
 
				+		kmem_free(ifp->if_u1.if_ext_irec);
			
 
				+	}
			
 
				+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is called to clean up large amounts of unused memory allocated
			
 
				+ * by the indirection array.  Before compacting anything though, verify
			
 
				+ * that the indirection array is still needed and switch back to the
			
 
				+ * linear extent list (or even the inline buffer) if possible.  The
			
 
				+ * compaction policy is as follows:
			
 
				+ *
			
 
				+ *    Full Compaction: Extents fit into a single page (or inline buffer)
			
 
				+ * Partial Compaction: Extents occupy less than 50% of allocated space
			
 
				+ *      No Compaction: Extents occupy at least 50% of allocated space
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_irec_compact(
			
 
				+	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				+{
			
 
				+	xfs_extnum_t	nextents;	/* number of extents in file */
			
 
				+	int		nlists;		/* number of irec's (ex lists) */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
			
 
				+
			
 
				+	if (nextents == 0) {
			
 
				+		xfs_iext_destroy(ifp);
			
 
				+	} else if (nextents <= XFS_INLINE_EXTS) {
			
 
				+		xfs_iext_indirect_to_direct(ifp);
			
 
				+		xfs_iext_direct_to_inline(ifp, nextents);
			
 
				+	} else if (nextents <= XFS_LINEAR_EXTS) {
			
 
				+		xfs_iext_indirect_to_direct(ifp);
			
 
				+	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
			
 
				+		xfs_iext_irec_compact_pages(ifp);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Combine extents from neighboring extent pages.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_irec_compact_pages(
			
 
				+	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				+{
			
 
				+	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
			
 
				+	int		erp_idx = 0;	/* indirection array index */
			
 
				+	int		nlists;		/* number of irec's (ex lists) */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	while (erp_idx < nlists - 1) {
			
 
				+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
			
 
				+		erp_next = erp + 1;
			
 
				+		if (erp_next->er_extcount <=
			
 
				+		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
			
 
				+			memcpy(&erp->er_extbuf[erp->er_extcount],
			
 
				+				erp_next->er_extbuf, erp_next->er_extcount *
			
 
				+				sizeof(xfs_bmbt_rec_t));
			
 
				+			erp->er_extcount += erp_next->er_extcount;
			
 
				+			/*
			
 
				+			 * Free page before removing extent record
			
 
				+			 * so er_extoffs don't get modified in
			
 
				+			 * xfs_iext_irec_remove.
			
 
				+			 */
			
 
				+			kmem_free(erp_next->er_extbuf);
			
 
				+			erp_next->er_extbuf = NULL;
			
 
				+			xfs_iext_irec_remove(ifp, erp_idx + 1);
			
 
				+			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+		} else {
			
 
				+			erp_idx++;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is called to update the er_extoff field in the indirection
			
 
				+ * array when extents have been added or removed from one of the
			
 
				+ * extent lists. erp_idx contains the irec index to begin updating
			
 
				+ * at and ext_diff contains the number of extents that were added
			
 
				+ * or removed.
			
 
				+ */
			
 
				+void
			
 
				+xfs_iext_irec_update_extoffs(
			
 
				+	xfs_ifork_t	*ifp,		/* inode fork pointer */
			
 
				+	int		erp_idx,	/* irec index to update */
			
 
				+	int		ext_diff)	/* number of new extents */
			
 
				+{
			
 
				+	int		i;		/* loop counter */
			
 
				+	int		nlists;		/* number of irec's (ex lists */
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	for (i = erp_idx; i < nlists; i++) {
			
 
				+		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
			
 
				+	}
			
 
				+}
			
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/xfs_inode_fork.h
@@ -0,0 +1,171 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef	__XFS_INODE_FORK_H__
			
 
				+#define	__XFS_INODE_FORK_H__
			
 
				+
			
 
				+struct xfs_inode_log_item;
			
 
				+
			
 
				+/*
			
 
				+ * The following xfs_ext_irec_t struct introduces a second (top) level
			
 
				+ * to the in-core extent allocation scheme. These structs are allocated
			
 
				+ * in a contiguous block, creating an indirection array where each entry
			
 
				+ * (irec) contains a pointer to a buffer of in-core extent records which
			
 
				+ * it manages. Each extent buffer is 4k in size, since 4k is the system
			
 
				+ * page size on Linux i386 and systems with larger page sizes don't seem
			
 
				+ * to gain much, if anything, by using their native page size as the
			
 
				+ * extent buffer size. Also, using 4k extent buffers everywhere provides
			
 
				+ * a consistent interface for CXFS across different platforms.
			
 
				+ *
			
 
				+ * There is currently no limit on the number of irec's (extent lists)
			
 
				+ * allowed, so heavily fragmented files may require an indirection array
			
 
				+ * which spans multiple system pages of memory. The number of extents
			
 
				+ * which would require this amount of contiguous memory is very large
			
 
				+ * and should not cause problems in the foreseeable future. However,
			
 
				+ * if the memory needed for the contiguous array ever becomes a problem,
			
 
				+ * it is possible that a third level of indirection may be required.
			
 
				+ */
			
 
				+typedef struct xfs_ext_irec {
			
 
				+	xfs_bmbt_rec_host_t *er_extbuf;	/* block of extent records */
			
 
				+	xfs_extnum_t	er_extoff;	/* extent offset in file */
			
 
				+	xfs_extnum_t	er_extcount;	/* number of extents in page/block */
			
 
				+} xfs_ext_irec_t;
			
 
				+
			
 
				+/*
			
 
				+ * File incore extent information, present for each of data & attr forks.
			
 
				+ */
			
 
				+#define	XFS_IEXT_BUFSZ		4096
			
 
				+#define	XFS_LINEAR_EXTS		(XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
			
 
				+#define	XFS_INLINE_EXTS		2
			
 
				+#define	XFS_INLINE_DATA		32
			
 
				+typedef struct xfs_ifork {
			
 
				+	int			if_bytes;	/* bytes in if_u1 */
			
 
				+	int			if_real_bytes;	/* bytes allocated in if_u1 */
			
 
				+	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
			
 
				+	short			if_broot_bytes;	/* bytes allocated for root */
			
 
				+	unsigned char		if_flags;	/* per-fork flags */
			
 
				+	union {
			
 
				+		xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
			
 
				+		xfs_ext_irec_t	*if_ext_irec;	/* irec map file exts */
			
 
				+		char		*if_data;	/* inline file data */
			
 
				+	} if_u1;
			
 
				+	union {
			
 
				+		xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
			
 
				+						/* very small file extents */
			
 
				+		char		if_inline_data[XFS_INLINE_DATA];
			
 
				+						/* very small file data */
			
 
				+		xfs_dev_t	if_rdev;	/* dev number if special */
			
 
				+		uuid_t		if_uuid;	/* mount point value */
			
 
				+	} if_u2;
			
 
				+} xfs_ifork_t;
			
 
				+
			
 
				+/*
			
 
				+ * Per-fork incore inode flags.
			
 
				+ */
			
 
				+#define	XFS_IFINLINE	0x01	/* Inline data is read in */
			
 
				+#define	XFS_IFEXTENTS	0x02	/* All extent pointers are read in */
			
 
				+#define	XFS_IFBROOT	0x04	/* i_broot points to the bmap b-tree root */
			
 
				+#define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
			
 
				+
			
 
				+/*
			
 
				+ * Fork handling.
			
 
				+ */
			
 
				+
			
 
				+#define XFS_IFORK_Q(ip)			((ip)->i_d.di_forkoff != 0)
			
 
				+#define XFS_IFORK_BOFF(ip)		((int)((ip)->i_d.di_forkoff << 3))
			
 
				+
			
 
				+#define XFS_IFORK_PTR(ip,w)		\
			
 
				+	((w) == XFS_DATA_FORK ? \
			
 
				+		&(ip)->i_df : \
			
 
				+		(ip)->i_afp)
			
 
				+#define XFS_IFORK_DSIZE(ip) \
			
 
				+	(XFS_IFORK_Q(ip) ? \
			
 
				+		XFS_IFORK_BOFF(ip) : \
			
 
				+		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
			
 
				+#define XFS_IFORK_ASIZE(ip) \
			
 
				+	(XFS_IFORK_Q(ip) ? \
			
 
				+		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
			
 
				+			XFS_IFORK_BOFF(ip) : \
			
 
				+		0)
			
 
				+#define XFS_IFORK_SIZE(ip,w) \
			
 
				+	((w) == XFS_DATA_FORK ? \
			
 
				+		XFS_IFORK_DSIZE(ip) : \
			
 
				+		XFS_IFORK_ASIZE(ip))
			
 
				+#define XFS_IFORK_FORMAT(ip,w) \
			
 
				+	((w) == XFS_DATA_FORK ? \
			
 
				+		(ip)->i_d.di_format : \
			
 
				+		(ip)->i_d.di_aformat)
			
 
				+#define XFS_IFORK_FMT_SET(ip,w,n) \
			
 
				+	((w) == XFS_DATA_FORK ? \
			
 
				+		((ip)->i_d.di_format = (n)) : \
			
 
				+		((ip)->i_d.di_aformat = (n)))
			
 
				+#define XFS_IFORK_NEXTENTS(ip,w) \
			
 
				+	((w) == XFS_DATA_FORK ? \
			
 
				+		(ip)->i_d.di_nextents : \
			
 
				+		(ip)->i_d.di_anextents)
			
 
				+#define XFS_IFORK_NEXT_SET(ip,w,n) \
			
 
				+	((w) == XFS_DATA_FORK ? \
			
 
				+		((ip)->i_d.di_nextents = (n)) : \
			
 
				+		((ip)->i_d.di_anextents = (n)))
			
 
				+#define XFS_IFORK_MAXEXT(ip, w) \
			
 
				+	(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
			
 
				+
			
 
				+int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
			
 
				+void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
			
 
				+				struct xfs_inode_log_item *, int,
			
 
				+				struct xfs_buf *);
			
 
				+void		xfs_idestroy_fork(struct xfs_inode *, int);
			
 
				+void		xfs_idata_realloc(struct xfs_inode *, int, int);
			
 
				+void		xfs_iroot_realloc(struct xfs_inode *, int, int);
			
 
				+int		xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
			
 
				+int		xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
			
 
				+				  int);
			
 
				+
			
 
				+struct xfs_bmbt_rec_host *
			
 
				+		xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
			
 
				+void		xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
			
 
				+				struct xfs_bmbt_irec *, int);
			
 
				+void		xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				+void		xfs_iext_add_indirect_multi(struct xfs_ifork *, int,
			
 
				+					    xfs_extnum_t, int);
			
 
				+void		xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int);
			
 
				+void		xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				+void		xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				+void		xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int);
			
 
				+void		xfs_iext_realloc_direct(struct xfs_ifork *, int);
			
 
				+void		xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t);
			
 
				+void		xfs_iext_inline_to_direct(struct xfs_ifork *, int);
			
 
				+void		xfs_iext_destroy(struct xfs_ifork *);
			
 
				+struct xfs_bmbt_rec_host *
			
 
				+		xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *);
			
 
				+struct xfs_ext_irec *
			
 
				+		xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *);
			
 
				+struct xfs_ext_irec *
			
 
				+		xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *,
			
 
				+				     int);
			
 
				+void		xfs_iext_irec_init(struct xfs_ifork *);
			
 
				+struct xfs_ext_irec *
			
 
				+		xfs_iext_irec_new(struct xfs_ifork *, int);
			
 
				+void		xfs_iext_irec_remove(struct xfs_ifork *, int);
			
 
				+void		xfs_iext_irec_compact(struct xfs_ifork *);
			
 
				+void		xfs_iext_irec_compact_pages(struct xfs_ifork *);
			
 
				+void		xfs_iext_irec_compact_full(struct xfs_ifork *);
			
 
				+void		xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
			
 
				+
			
 
				+extern struct kmem_zone	*xfs_ifork_zone;
			
 
				+
			
 
				+#endif	/* __XFS_INODE_FORK_H__ */
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -47,32 +47,44 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
 
				  * inode core, and possibly one for the inode data/extents/b-tree root
			
 
				  * and one for the inode attribute data/extents/b-tree root.
			
 
				  */
			
 
				-STATIC uint
			
 
				+STATIC void
			
 
				 xfs_inode_item_size(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	int			*nvecs,
			
 
				+	int			*nbytes)
			
 
				 {
			
 
				 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
			
 
				 	struct xfs_inode	*ip = iip->ili_inode;
			
 
				-	uint			nvecs = 2;
			
 
				+
			
 
				+	*nvecs += 2;
			
 
				+	*nbytes += sizeof(struct xfs_inode_log_format) +
			
 
				+		   xfs_icdinode_size(ip->i_d.di_version);
			
 
				 
			
 
				 	switch (ip->i_d.di_format) {
			
 
				 	case XFS_DINODE_FMT_EXTENTS:
			
 
				 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
			
 
				 		    ip->i_d.di_nextents > 0 &&
			
 
				-		    ip->i_df.if_bytes > 0)
			
 
				-			nvecs++;
			
 
				+		    ip->i_df.if_bytes > 0) {
			
 
				+			/* worst case, doesn't subtract delalloc extents */
			
 
				+			*nbytes += XFS_IFORK_DSIZE(ip);
			
 
				+			*nvecs += 1;
			
 
				+		}
			
 
				 		break;
			
 
				 
			
 
				 	case XFS_DINODE_FMT_BTREE:
			
 
				 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
			
 
				-		    ip->i_df.if_broot_bytes > 0)
			
 
				-			nvecs++;
			
 
				+		    ip->i_df.if_broot_bytes > 0) {
			
 
				+			*nbytes += ip->i_df.if_broot_bytes;
			
 
				+			*nvecs += 1;
			
 
				+		}
			
 
				 		break;
			
 
				 
			
 
				 	case XFS_DINODE_FMT_LOCAL:
			
 
				 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
			
 
				-		    ip->i_df.if_bytes > 0)
			
 
				-			nvecs++;
			
 
				+		    ip->i_df.if_bytes > 0) {
			
 
				+			*nbytes += roundup(ip->i_df.if_bytes, 4);
			
 
				+			*nvecs += 1;
			
 
				+		}
			
 
				 		break;
			
 
				 
			
 
				 	case XFS_DINODE_FMT_DEV:
			
@@ -85,7 +97,7 @@ xfs_inode_item_size(
 
				 	}
			
 
				 
			
 
				 	if (!XFS_IFORK_Q(ip))
			
 
				-		return nvecs;
			
 
				+		return;
			
 
				 
			
 
				 
			
 
				 	/*
			
@@ -95,28 +107,33 @@ xfs_inode_item_size(
 
				 	case XFS_DINODE_FMT_EXTENTS:
			
 
				 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
			
 
				 		    ip->i_d.di_anextents > 0 &&
			
 
				-		    ip->i_afp->if_bytes > 0)
			
 
				-			nvecs++;
			
 
				+		    ip->i_afp->if_bytes > 0) {
			
 
				+			/* worst case, doesn't subtract unused space */
			
 
				+			*nbytes += XFS_IFORK_ASIZE(ip);
			
 
				+			*nvecs += 1;
			
 
				+		}
			
 
				 		break;
			
 
				 
			
 
				 	case XFS_DINODE_FMT_BTREE:
			
 
				 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
			
 
				-		    ip->i_afp->if_broot_bytes > 0)
			
 
				-			nvecs++;
			
 
				+		    ip->i_afp->if_broot_bytes > 0) {
			
 
				+			*nbytes += ip->i_afp->if_broot_bytes;
			
 
				+			*nvecs += 1;
			
 
				+		}
			
 
				 		break;
			
 
				 
			
 
				 	case XFS_DINODE_FMT_LOCAL:
			
 
				 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
			
 
				-		    ip->i_afp->if_bytes > 0)
			
 
				-			nvecs++;
			
 
				+		    ip->i_afp->if_bytes > 0) {
			
 
				+			*nbytes += roundup(ip->i_afp->if_bytes, 4);
			
 
				+			*nvecs += 1;
			
 
				+		}
			
 
				 		break;
			
 
				 
			
 
				 	default:
			
 
				 		ASSERT(0);
			
 
				 		break;
			
 
				 	}
			
 
				-
			
 
				-	return nvecs;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -18,123 +18,13 @@
 
				 #ifndef	__XFS_INODE_ITEM_H__
			
 
				 #define	__XFS_INODE_ITEM_H__
			
 
				 
			
 
				-/*
			
 
				- * This is the structure used to lay out an inode log item in the
			
 
				- * log.  The size of the inline data/extents/b-tree root to be logged
			
 
				- * (if any) is indicated in the ilf_dsize field.  Changes to this structure
			
 
				- * must be added on to the end.
			
 
				- */
			
 
				-typedef struct xfs_inode_log_format {
			
 
				-	__uint16_t		ilf_type;	/* inode log item type */
			
 
				-	__uint16_t		ilf_size;	/* size of this item */
			
 
				-	__uint32_t		ilf_fields;	/* flags for fields logged */
			
 
				-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
			
 
				-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
			
 
				-	__uint64_t		ilf_ino;	/* inode number */
			
 
				-	union {
			
 
				-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				-		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				-	} ilf_u;
			
 
				-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
			
 
				-	__int32_t		ilf_len;	/* len of inode buffer */
			
 
				-	__int32_t		ilf_boffset;	/* off of inode in buffer */
			
 
				-} xfs_inode_log_format_t;
			
 
				-
			
 
				-typedef struct xfs_inode_log_format_32 {
			
 
				-	__uint16_t		ilf_type;	/* inode log item type */
			
 
				-	__uint16_t		ilf_size;	/* size of this item */
			
 
				-	__uint32_t		ilf_fields;	/* flags for fields logged */
			
 
				-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
			
 
				-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
			
 
				-	__uint64_t		ilf_ino;	/* inode number */
			
 
				-	union {
			
 
				-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				-		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				-	} ilf_u;
			
 
				-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
			
 
				-	__int32_t		ilf_len;	/* len of inode buffer */
			
 
				-	__int32_t		ilf_boffset;	/* off of inode in buffer */
			
 
				-} __attribute__((packed)) xfs_inode_log_format_32_t;
			
 
				-
			
 
				-typedef struct xfs_inode_log_format_64 {
			
 
				-	__uint16_t		ilf_type;	/* inode log item type */
			
 
				-	__uint16_t		ilf_size;	/* size of this item */
			
 
				-	__uint32_t		ilf_fields;	/* flags for fields logged */
			
 
				-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
			
 
				-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
			
 
				-	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
			
 
				-	__uint64_t		ilf_ino;	/* inode number */
			
 
				-	union {
			
 
				-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				-		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				-	} ilf_u;
			
 
				-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
			
 
				-	__int32_t		ilf_len;	/* len of inode buffer */
			
 
				-	__int32_t		ilf_boffset;	/* off of inode in buffer */
			
 
				-} xfs_inode_log_format_64_t;
			
 
				-
			
 
				-/*
			
 
				- * Flags for xfs_trans_log_inode flags field.
			
 
				- */
			
 
				-#define	XFS_ILOG_CORE	0x001	/* log standard inode fields */
			
 
				-#define	XFS_ILOG_DDATA	0x002	/* log i_df.if_data */
			
 
				-#define	XFS_ILOG_DEXT	0x004	/* log i_df.if_extents */
			
 
				-#define	XFS_ILOG_DBROOT	0x008	/* log i_df.i_broot */
			
 
				-#define	XFS_ILOG_DEV	0x010	/* log the dev field */
			
 
				-#define	XFS_ILOG_UUID	0x020	/* log the uuid field */
			
 
				-#define	XFS_ILOG_ADATA	0x040	/* log i_af.if_data */
			
 
				-#define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
			
 
				-#define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * The timestamps are dirty, but not necessarily anything else in the inode
			
 
				- * core.  Unlike the other fields above this one must never make it to disk
			
 
				- * in the ilf_fields of the inode_log_format, but is purely store in-memory in
			
 
				- * ili_fields in the inode_log_item.
			
 
				- */
			
 
				-#define XFS_ILOG_TIMESTAMP	0x4000
			
 
				-
			
 
				-#define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
			
 
				-				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
			
 
				-				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
			
 
				-				 XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
			
 
				-
			
 
				-#define	XFS_ILOG_DFORK		(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
			
 
				-				 XFS_ILOG_DBROOT)
			
 
				-
			
 
				-#define	XFS_ILOG_AFORK		(XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
			
 
				-				 XFS_ILOG_ABROOT)
			
 
				-
			
 
				-#define	XFS_ILOG_ALL		(XFS_ILOG_CORE | XFS_ILOG_DDATA | \
			
 
				-				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
			
 
				-				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
			
 
				-				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
			
 
				-				 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
			
 
				-
			
 
				-static inline int xfs_ilog_fbroot(int w)
			
 
				-{
			
 
				-	return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
			
 
				-}
			
 
				-
			
 
				-static inline int xfs_ilog_fext(int w)
			
 
				-{
			
 
				-	return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
			
 
				-}
			
 
				-
			
 
				-static inline int xfs_ilog_fdata(int w)
			
 
				-{
			
 
				-	return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
			
 
				-}
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				+/* kernel only definitions */
			
 
				 
			
 
				 struct xfs_buf;
			
 
				 struct xfs_bmbt_rec;
			
 
				 struct xfs_inode;
			
 
				 struct xfs_mount;
			
 
				 
			
 
				-
			
 
				 typedef struct xfs_inode_log_item {
			
 
				 	xfs_log_item_t		ili_item;	   /* common portion */
			
 
				 	struct xfs_inode	*ili_inode;	   /* inode ptr */
			
@@ -151,7 +41,6 @@ typedef struct xfs_inode_log_item {
 
				 	xfs_inode_log_format_t	ili_format;	   /* logged structure */
			
 
				 } xfs_inode_log_item_t;
			
 
				 
			
 
				-
			
 
				 static inline int xfs_inode_clean(xfs_inode_t *ip)
			
 
				 {
			
 
				 	return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
			
@@ -165,6 +54,6 @@ extern void xfs_iflush_abort(struct xfs_inode *, bool);
 
				 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
			
 
				 					 xfs_inode_log_format_t *);
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				+extern struct kmem_zone	*xfs_ili_zone;
			
 
				 
			
 
				 #endif	/* __XFS_INODE_ITEM_H__ */
			
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
@@ -32,17 +33,16 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				-#include "xfs_utils.h"
			
 
				-#include "xfs_dfrag.h"
			
 
				 #include "xfs_fsops.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_discard.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_export.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				+#include "xfs_symlink.h"
			
 
				 
			
 
				 #include <linux/capability.h>
			
 
				 #include <linux/dcache.h>
			
@@ -350,6 +350,40 @@ xfs_readlink_by_handle(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				+int
			
 
				+xfs_set_dmattrs(
			
 
				+	xfs_inode_t     *ip,
			
 
				+	u_int		evmask,
			
 
				+	u_int16_t	state)
			
 
				+{
			
 
				+	xfs_mount_t	*mp = ip->i_mount;
			
 
				+	xfs_trans_t	*tp;
			
 
				+	int		error;
			
 
				+
			
 
				+	if (!capable(CAP_SYS_ADMIN))
			
 
				+		return XFS_ERROR(EPERM);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
			
 
				+	if (error) {
			
 
				+		xfs_trans_cancel(tp, 0);
			
 
				+		return error;
			
 
				+	}
			
 
				+	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
			
 
				+
			
 
				+	ip->i_d.di_dmevmask = evmask;
			
 
				+	ip->i_d.di_dmstate  = state;
			
 
				+
			
 
				+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+	error = xfs_trans_commit(tp, 0);
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 STATIC int
			
 
				 xfs_fssetdm_by_handle(
			
 
				 	struct file		*parfilp,
			
@@ -967,7 +1001,7 @@ xfs_ioctl_setattr(
 
				 	 * first do an error checking pass.
			
 
				 	 */
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
			
 
				-	code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
			
 
				+	code = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
			
 
				 	if (code)
			
 
				 		goto error_return;
			
 
				 
			
@@ -981,15 +1015,22 @@ xfs_ioctl_setattr(
 
				 	 * to the file owner ID, except in cases where the
			
 
				 	 * CAP_FSETID capability is applicable.
			
 
				 	 */
			
 
				-	if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
			
 
				+	if (!inode_owner_or_capable(VFS_I(ip))) {
			
 
				 		code = XFS_ERROR(EPERM);
			
 
				 		goto error_return;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Do a quota reservation only if projid is actually going to change.
			
 
				+	 * Only allow changing of projid from init_user_ns since it is a
			
 
				+	 * non user namespace aware identifier.
			
 
				 	 */
			
 
				 	if (mask & FSX_PROJID) {
			
 
				+		if (current_user_ns() != &init_user_ns) {
			
 
				+			code = XFS_ERROR(EINVAL);
			
 
				+			goto error_return;
			
 
				+		}
			
 
				+
			
 
				 		if (XFS_IS_QUOTA_RUNNING(mp) &&
			
 
				 		    XFS_IS_PQUOTA_ON(mp) &&
			
 
				 		    xfs_get_projid(ip) != fa->fsx_projid) {
			
@@ -1103,7 +1144,7 @@ xfs_ioctl_setattr(
 
				 		 * cleared upon successful return from chown()
			
 
				 		 */
			
 
				 		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
			
 
				-		    !capable(CAP_FSETID))
			
 
				+		    !inode_capable(VFS_I(ip), CAP_FSETID))
			
 
				 			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
			
 
				 
			
 
				 		/*
			
@@ -1328,6 +1369,75 @@ xfs_ioc_getbmapx(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+int
			
 
				+xfs_ioc_swapext(
			
 
				+	xfs_swapext_t	*sxp)
			
 
				+{
			
 
				+	xfs_inode_t     *ip, *tip;
			
 
				+	struct fd	f, tmp;
			
 
				+	int		error = 0;
			
 
				+
			
 
				+	/* Pull information for the target fd */
			
 
				+	f = fdget((int)sxp->sx_fdtarget);
			
 
				+	if (!f.file) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!(f.file->f_mode & FMODE_WRITE) ||
			
 
				+	    !(f.file->f_mode & FMODE_READ) ||
			
 
				+	    (f.file->f_flags & O_APPEND)) {
			
 
				+		error = XFS_ERROR(EBADF);
			
 
				+		goto out_put_file;
			
 
				+	}
			
 
				+
			
 
				+	tmp = fdget((int)sxp->sx_fdtmp);
			
 
				+	if (!tmp.file) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out_put_file;
			
 
				+	}
			
 
				+
			
 
				+	if (!(tmp.file->f_mode & FMODE_WRITE) ||
			
 
				+	    !(tmp.file->f_mode & FMODE_READ) ||
			
 
				+	    (tmp.file->f_flags & O_APPEND)) {
			
 
				+		error = XFS_ERROR(EBADF);
			
 
				+		goto out_put_tmp_file;
			
 
				+	}
			
 
				+
			
 
				+	if (IS_SWAPFILE(file_inode(f.file)) ||
			
 
				+	    IS_SWAPFILE(file_inode(tmp.file))) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out_put_tmp_file;
			
 
				+	}
			
 
				+
			
 
				+	ip = XFS_I(file_inode(f.file));
			
 
				+	tip = XFS_I(file_inode(tmp.file));
			
 
				+
			
 
				+	if (ip->i_mount != tip->i_mount) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out_put_tmp_file;
			
 
				+	}
			
 
				+
			
 
				+	if (ip->i_ino == tip->i_ino) {
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto out_put_tmp_file;
			
 
				+	}
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				+		error = XFS_ERROR(EIO);
			
 
				+		goto out_put_tmp_file;
			
 
				+	}
			
 
				+
			
 
				+	error = xfs_swap_extents(ip, tip, sxp);
			
 
				+
			
 
				+ out_put_tmp_file:
			
 
				+	fdput(tmp);
			
 
				+ out_put_file:
			
 
				+	fdput(f);
			
 
				+ out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Note: some of the ioctl's return positive numbers as a
			
 
				  * byte count indicating success, such as readlink_by_handle.
			
@@ -1472,7 +1582,7 @@ xfs_file_ioctl(
 
				 		error = mnt_want_write_file(filp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				-		error = xfs_swapext(&sxp);
			
 
				+		error = xfs_ioc_swapext(&sxp);
			
 
				 		mnt_drop_write_file(filp);
			
 
				 		return -error;
			
 
				 	}
			
@@ -1610,23 +1720,23 @@ xfs_file_ioctl(
 
				 		return -error;
			
 
				 
			
 
				 	case XFS_IOC_FREE_EOFBLOCKS: {
			
 
				-		struct xfs_eofblocks eofb;
			
 
				+		struct xfs_fs_eofblocks eofb;
			
 
				+		struct xfs_eofblocks keofb;
			
 
				 
			
 
				-		if (copy_from_user(&eofb, arg, sizeof(eofb)))
			
 
				-			return -XFS_ERROR(EFAULT);
			
 
				+		if (!capable(CAP_SYS_ADMIN))
			
 
				+			return -EPERM;
			
 
				 
			
 
				-		if (eofb.eof_version != XFS_EOFBLOCKS_VERSION)
			
 
				-			return -XFS_ERROR(EINVAL);
			
 
				+		if (mp->m_flags & XFS_MOUNT_RDONLY)
			
 
				+			return -XFS_ERROR(EROFS);
			
 
				 
			
 
				-		if (eofb.eof_flags & ~XFS_EOF_FLAGS_VALID)
			
 
				-			return -XFS_ERROR(EINVAL);
			
 
				+		if (copy_from_user(&eofb, arg, sizeof(eofb)))
			
 
				+			return -XFS_ERROR(EFAULT);
			
 
				 
			
 
				-		if (memchr_inv(&eofb.pad32, 0, sizeof(eofb.pad32)) ||
			
 
				-		    memchr_inv(eofb.pad64, 0, sizeof(eofb.pad64)))
			
 
				-			return -XFS_ERROR(EINVAL);
			
 
				+		error = xfs_fs_eofblocks_from_user(&eofb, &keofb);
			
 
				+		if (error)
			
 
				+			return -error;
			
 
				 
			
 
				-		error = xfs_icache_free_eofblocks(mp, &eofb);
			
 
				-		return -error;
			
 
				+		return -xfs_icache_free_eofblocks(mp, &keofb);
			
 
				 	}
			
 
				 
			
 
				 	default:
			
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -27,6 +27,10 @@ xfs_ioc_space(
 
				 	unsigned int		cmd,
			
 
				 	xfs_flock64_t		*bf);
			
 
				 
			
 
				+int
			
 
				+xfs_ioc_swapext(
			
 
				+	xfs_swapext_t	*sxp);
			
 
				+
			
 
				 extern int
			
 
				 xfs_find_handle(
			
 
				 	unsigned int		cmd,
			
@@ -82,4 +86,10 @@ xfs_file_compat_ioctl(
 
				 	unsigned int		cmd,
			
 
				 	unsigned long		arg);
			
 
				 
			
 
				+extern int
			
 
				+xfs_set_dmattrs(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	u_int			evmask,
			
 
				+	u_int16_t		state);
			
 
				+
			
 
				 #endif
			
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -33,8 +33,6 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_itable.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_dfrag.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_fsops.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_rtalloc.h"
			
@@ -644,7 +642,7 @@ xfs_file_compat_ioctl(
 
				 		error = mnt_want_write_file(filp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				-		error = xfs_swapext(&sxp);
			
 
				+		error = xfs_ioc_swapext(&sxp);
			
 
				 		mnt_drop_write_file(filp);
			
 
				 		return -error;
			
 
				 	}
			
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
@@ -32,13 +33,13 @@
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_btree.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_itable.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_iomap.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
@@ -187,10 +188,8 @@ xfs_iomap_write_direct(
 
				 	 * Allocate and setup the transaction
			
 
				 	 */
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
			
 
				-	error = xfs_trans_reserve(tp, resblks,
			
 
				-			XFS_WRITE_LOG_RES(mp), resrtextents,
			
 
				-			XFS_TRANS_PERM_LOG_RES,
			
 
				-			XFS_WRITE_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
			
 
				+				  resblks, resrtextents);
			
 
				 	/*
			
 
				 	 * Check for running out of space, note: need lock to return
			
 
				 	 */
			
@@ -698,10 +697,8 @@ xfs_iomap_write_allocate(
 
				 			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
			
 
				 			tp->t_flags |= XFS_TRANS_RESERVE;
			
 
				 			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
			
 
				-			error = xfs_trans_reserve(tp, nres,
			
 
				-					XFS_WRITE_LOG_RES(mp),
			
 
				-					0, XFS_TRANS_PERM_LOG_RES,
			
 
				-					XFS_WRITE_LOG_COUNT);
			
 
				+			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
			
 
				+						  nres, 0);
			
 
				 			if (error) {
			
 
				 				xfs_trans_cancel(tp, 0);
			
 
				 				return XFS_ERROR(error);
			
@@ -864,10 +861,8 @@ xfs_iomap_write_unwritten(
 
				 		sb_start_intwrite(mp->m_super);
			
 
				 		tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
			
 
				 		tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
			
 
				-		error = xfs_trans_reserve(tp, resblks,
			
 
				-				XFS_WRITE_LOG_RES(mp), 0,
			
 
				-				XFS_TRANS_PERM_LOG_RES,
			
 
				-				XFS_WRITE_LOG_COUNT);
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
			
 
				+					  resblks, 0);
			
 
				 		if (error) {
			
 
				 			xfs_trans_cancel(tp, 0);
			
 
				 			return XFS_ERROR(error);
			
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_acl.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
@@ -29,16 +30,19 @@
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_itable.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				-#include "xfs_utils.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				+#include "xfs_symlink.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2_priv.h"
			
 
				 
			
 
				 #include <linux/capability.h>
			
 
				 #include <linux/xattr.h>
			
@@ -87,10 +91,12 @@ xfs_init_security(
 
				 static void
			
 
				 xfs_dentry_to_name(
			
 
				 	struct xfs_name	*namep,
			
 
				-	struct dentry	*dentry)
			
 
				+	struct dentry	*dentry,
			
 
				+	int		mode)
			
 
				 {
			
 
				 	namep->name = dentry->d_name.name;
			
 
				 	namep->len = dentry->d_name.len;
			
 
				+	namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT];
			
 
				 }
			
 
				 
			
 
				 STATIC void
			
@@ -106,7 +112,7 @@ xfs_cleanup_inode(
 
				 	 * xfs_init_security we must back out.
			
 
				 	 * ENOSPC can hit here, among other things.
			
 
				 	 */
			
 
				-	xfs_dentry_to_name(&teardown, dentry);
			
 
				+	xfs_dentry_to_name(&teardown, dentry, 0);
			
 
				 
			
 
				 	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
			
 
				 	iput(inode);
			
@@ -146,7 +152,7 @@ xfs_vn_mknod(
 
				 			mode &= ~current_umask();
			
 
				 	}
			
 
				 
			
 
				-	xfs_dentry_to_name(&name, dentry);
			
 
				+	xfs_dentry_to_name(&name, dentry, mode);
			
 
				 	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
			
 
				 	if (unlikely(error))
			
 
				 		goto out_free_acl;
			
@@ -207,7 +213,7 @@ xfs_vn_lookup(
 
				 	if (dentry->d_name.len >= MAXNAMELEN)
			
 
				 		return ERR_PTR(-ENAMETOOLONG);
			
 
				 
			
 
				-	xfs_dentry_to_name(&name, dentry);
			
 
				+	xfs_dentry_to_name(&name, dentry, 0);
			
 
				 	error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
			
 
				 	if (unlikely(error)) {
			
 
				 		if (unlikely(error != ENOENT))
			
@@ -234,7 +240,7 @@ xfs_vn_ci_lookup(
 
				 	if (dentry->d_name.len >= MAXNAMELEN)
			
 
				 		return ERR_PTR(-ENAMETOOLONG);
			
 
				 
			
 
				-	xfs_dentry_to_name(&xname, dentry);
			
 
				+	xfs_dentry_to_name(&xname, dentry, 0);
			
 
				 	error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
			
 
				 	if (unlikely(error)) {
			
 
				 		if (unlikely(error != ENOENT))
			
@@ -269,7 +275,7 @@ xfs_vn_link(
 
				 	struct xfs_name	name;
			
 
				 	int		error;
			
 
				 
			
 
				-	xfs_dentry_to_name(&name, dentry);
			
 
				+	xfs_dentry_to_name(&name, dentry, inode->i_mode);
			
 
				 
			
 
				 	error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
			
 
				 	if (unlikely(error))
			
@@ -288,7 +294,7 @@ xfs_vn_unlink(
 
				 	struct xfs_name	name;
			
 
				 	int		error;
			
 
				 
			
 
				-	xfs_dentry_to_name(&name, dentry);
			
 
				+	xfs_dentry_to_name(&name, dentry, 0);
			
 
				 
			
 
				 	error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
			
 
				 	if (error)
			
@@ -318,7 +324,7 @@ xfs_vn_symlink(
 
				 
			
 
				 	mode = S_IFLNK |
			
 
				 		(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
			
 
				-	xfs_dentry_to_name(&name, dentry);
			
 
				+	xfs_dentry_to_name(&name, dentry, mode);
			
 
				 
			
 
				 	error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
			
 
				 	if (unlikely(error))
			
@@ -350,12 +356,12 @@ xfs_vn_rename(
 
				 	struct xfs_name	oname;
			
 
				 	struct xfs_name	nname;
			
 
				 
			
 
				-	xfs_dentry_to_name(&oname, odentry);
			
 
				-	xfs_dentry_to_name(&nname, ndentry);
			
 
				+	xfs_dentry_to_name(&oname, odentry, 0);
			
 
				+	xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
			
 
				 
			
 
				 	return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
			
 
				 			   XFS_I(ndir), &nname, new_inode ?
			
 
				-			   			XFS_I(new_inode) : NULL);
			
 
				+						XFS_I(new_inode) : NULL);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -420,8 +426,8 @@ xfs_vn_getattr(
 
				 	stat->dev = inode->i_sb->s_dev;
			
 
				 	stat->mode = ip->i_d.di_mode;
			
 
				 	stat->nlink = ip->i_d.di_nlink;
			
 
				-	stat->uid = ip->i_d.di_uid;
			
 
				-	stat->gid = ip->i_d.di_gid;
			
 
				+	stat->uid = inode->i_uid;
			
 
				+	stat->gid = inode->i_gid;
			
 
				 	stat->ino = ip->i_ino;
			
 
				 	stat->atime = inode->i_atime;
			
 
				 	stat->mtime = inode->i_mtime;
			
@@ -485,8 +491,8 @@ xfs_setattr_nonsize(
 
				 	int			mask = iattr->ia_valid;
			
 
				 	xfs_trans_t		*tp;
			
 
				 	int			error;
			
 
				-	uid_t			uid = 0, iuid = 0;
			
 
				-	gid_t			gid = 0, igid = 0;
			
 
				+	kuid_t			uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID;
			
 
				+	kgid_t			gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID;
			
 
				 	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
			
 
				 	struct xfs_dquot	*olddquot1 = NULL, *olddquot2 = NULL;
			
 
				 
			
@@ -522,13 +528,13 @@ xfs_setattr_nonsize(
 
				 			uid = iattr->ia_uid;
			
 
				 			qflags |= XFS_QMOPT_UQUOTA;
			
 
				 		} else {
			
 
				-			uid = ip->i_d.di_uid;
			
 
				+			uid = inode->i_uid;
			
 
				 		}
			
 
				 		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
			
 
				 			gid = iattr->ia_gid;
			
 
				 			qflags |= XFS_QMOPT_GQUOTA;
			
 
				 		}  else {
			
 
				-			gid = ip->i_d.di_gid;
			
 
				+			gid = inode->i_gid;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -538,14 +544,16 @@ xfs_setattr_nonsize(
 
				 		 */
			
 
				 		ASSERT(udqp == NULL);
			
 
				 		ASSERT(gdqp == NULL);
			
 
				-		error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
			
 
				-					 qflags, &udqp, &gdqp, NULL);
			
 
				+		error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
			
 
				+					   xfs_kgid_to_gid(gid),
			
 
				+					   xfs_get_projid(ip),
			
 
				+					   qflags, &udqp, &gdqp, NULL);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
			
 
				 	if (error)
			
 
				 		goto out_dqrele;
			
 
				 
			
@@ -561,8 +569,8 @@ xfs_setattr_nonsize(
 
				 		 * while we didn't have the inode locked, inode's dquot(s)
			
 
				 		 * would have changed also.
			
 
				 		 */
			
 
				-		iuid = ip->i_d.di_uid;
			
 
				-		igid = ip->i_d.di_gid;
			
 
				+		iuid = inode->i_uid;
			
 
				+		igid = inode->i_gid;
			
 
				 		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
			
 
				 		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
			
 
				 
			
@@ -571,8 +579,8 @@ xfs_setattr_nonsize(
 
				 		 * going to change.
			
 
				 		 */
			
 
				 		if (XFS_IS_QUOTA_RUNNING(mp) &&
			
 
				-		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
			
 
				-		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
			
 
				+		    ((XFS_IS_UQUOTA_ON(mp) && !uid_eq(iuid, uid)) ||
			
 
				+		     (XFS_IS_GQUOTA_ON(mp) && !gid_eq(igid, gid)))) {
			
 
				 			ASSERT(tp);
			
 
				 			error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
			
 
				 						NULL, capable(CAP_FOWNER) ?
			
@@ -602,17 +610,17 @@ xfs_setattr_nonsize(
 
				 		 * Change the ownerships and register quota modifications
			
 
				 		 * in the transaction.
			
 
				 		 */
			
 
				-		if (iuid != uid) {
			
 
				+		if (!uid_eq(iuid, uid)) {
			
 
				 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
			
 
				 				ASSERT(mask & ATTR_UID);
			
 
				 				ASSERT(udqp);
			
 
				 				olddquot1 = xfs_qm_vop_chown(tp, ip,
			
 
				 							&ip->i_udquot, udqp);
			
 
				 			}
			
 
				-			ip->i_d.di_uid = uid;
			
 
				+			ip->i_d.di_uid = xfs_kuid_to_uid(uid);
			
 
				 			inode->i_uid = uid;
			
 
				 		}
			
 
				-		if (igid != gid) {
			
 
				+		if (!gid_eq(igid, gid)) {
			
 
				 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
			
 
				 				ASSERT(!XFS_IS_PQUOTA_ON(mp));
			
 
				 				ASSERT(mask & ATTR_GID);
			
@@ -620,7 +628,7 @@ xfs_setattr_nonsize(
 
				 				olddquot2 = xfs_qm_vop_chown(tp, ip,
			
 
				 							&ip->i_gdquot, gdqp);
			
 
				 			}
			
 
				-			ip->i_d.di_gid = gid;
			
 
				+			ip->i_d.di_gid = xfs_kgid_to_gid(gid);
			
 
				 			inode->i_gid = gid;
			
 
				 		}
			
 
				 	}
			
@@ -807,9 +815,7 @@ xfs_setattr_size(
 
				 		goto out_unlock;
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
			
 
				-				 XFS_TRANS_PERM_LOG_RES,
			
 
				-				 XFS_ITRUNCATE_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
			
 
				 	if (error)
			
 
				 		goto out_trans_cancel;
			
 
				 
			
@@ -932,7 +938,7 @@ xfs_vn_update_time(
 
				 	trace_xfs_update_time(ip);
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return -error;
			
@@ -1173,8 +1179,8 @@ xfs_setup_inode(
 
				 
			
 
				 	inode->i_mode	= ip->i_d.di_mode;
			
 
				 	set_nlink(inode, ip->i_d.di_nlink);
			
 
				-	inode->i_uid	= ip->i_d.di_uid;
			
 
				-	inode->i_gid	= ip->i_d.di_gid;
			
 
				+	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
			
 
				+	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
			
 
				 
			
 
				 	switch (inode->i_mode & S_IFMT) {
			
 
				 	case S_IFBLK:
			
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -27,4 +27,17 @@ extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 
				 
			
 
				 extern void xfs_setup_inode(struct xfs_inode *);
			
 
				 
			
 
				+/*
			
 
				+ * Internal setattr interfaces.
			
 
				+ */
			
 
				+#define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
			
 
				+#define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if op would block */
			
 
				+#define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
			
 
				+#define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */
			
 
				+#define XFS_ATTR_SYNC		0x10	/* synchronous operation required */
			
 
				+
			
 
				+extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
			
 
				+			       int flags);
			
 
				+extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
			
 
				+
			
 
				 #endif /* __XFS_IOPS_H__ */
			
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -32,6 +32,38 @@
 
				 # define XFS_BIG_INUMS	0
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * Kernel specific type declarations for XFS
			
 
				+ */
			
 
				+typedef signed char		__int8_t;
			
 
				+typedef unsigned char		__uint8_t;
			
 
				+typedef signed short int	__int16_t;
			
 
				+typedef unsigned short int	__uint16_t;
			
 
				+typedef signed int		__int32_t;
			
 
				+typedef unsigned int		__uint32_t;
			
 
				+typedef signed long long int	__int64_t;
			
 
				+typedef unsigned long long int	__uint64_t;
			
 
				+
			
 
				+typedef __uint32_t		inst_t;		/* an instruction */
			
 
				+
			
 
				+typedef __s64			xfs_off_t;	/* <file offset> type */
			
 
				+typedef unsigned long long	xfs_ino_t;	/* <inode> type */
			
 
				+typedef __s64			xfs_daddr_t;	/* <disk address> type */
			
 
				+typedef char *			xfs_caddr_t;	/* <core address> type */
			
 
				+typedef __u32			xfs_dev_t;
			
 
				+typedef __u32			xfs_nlink_t;
			
 
				+
			
 
				+/* __psint_t is the same size as a pointer */
			
 
				+#if (BITS_PER_LONG == 32)
			
 
				+typedef __int32_t __psint_t;
			
 
				+typedef __uint32_t __psunsigned_t;
			
 
				+#elif (BITS_PER_LONG == 64)
			
 
				+typedef __int64_t __psint_t;
			
 
				+typedef __uint64_t __psunsigned_t;
			
 
				+#else
			
 
				+#error BITS_PER_LONG must be 32 or 64
			
 
				+#endif
			
 
				+
			
 
				 #include "xfs_types.h"
			
 
				 
			
 
				 #include "kmem.h"
			
@@ -114,8 +146,6 @@
 
				 #define xfs_inherit_sync	xfs_params.inherit_sync.val
			
 
				 #define xfs_inherit_nodump	xfs_params.inherit_nodump.val
			
 
				 #define xfs_inherit_noatime	xfs_params.inherit_noatim.val
			
 
				-#define xfs_buf_timer_centisecs	xfs_params.xfs_buf_timer.val
			
 
				-#define xfs_buf_age_centisecs	xfs_params.xfs_buf_age.val
			
 
				 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
			
 
				 #define xfs_rotorstep		xfs_params.rotorstep.val
			
 
				 #define xfs_inherit_nodefrag	xfs_params.inherit_nodfrg.val
			
@@ -159,6 +189,32 @@
 
				 #define MAX(a,b)	(max(a,b))
			
 
				 #define howmany(x, y)	(((x)+((y)-1))/(y))
			
 
				 
			
 
				+/* Kernel uid/gid conversion. These are used to convert to/from the on disk
			
 
				+ * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
			
 
				+ * The conversion here is type only, the value will remain the same since we
			
 
				+ * are converting to the init_user_ns. The uid is later mapped to a particular
			
 
				+ * user namespace value when crossing the kernel/user boundary.
			
 
				+ */
			
 
				+static inline __uint32_t xfs_kuid_to_uid(kuid_t uid)
			
 
				+{
			
 
				+	return from_kuid(&init_user_ns, uid);
			
 
				+}
			
 
				+
			
 
				+static inline kuid_t xfs_uid_to_kuid(__uint32_t uid)
			
 
				+{
			
 
				+	return make_kuid(&init_user_ns, uid);
			
 
				+}
			
 
				+
			
 
				+static inline __uint32_t xfs_kgid_to_gid(kgid_t gid)
			
 
				+{
			
 
				+	return from_kgid(&init_user_ns, gid);
			
 
				+}
			
 
				+
			
 
				+static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
			
 
				+{
			
 
				+	return make_kgid(&init_user_ns, gid);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Various platform dependent calls that don't fit anywhere else
			
 
				  */
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -614,7 +614,8 @@ xfs_log_mount(
 
				 	xfs_daddr_t	blk_offset,
			
 
				 	int		num_bblks)
			
 
				 {
			
 
				-	int		error;
			
 
				+	int		error = 0;
			
 
				+	int		min_logfsbs;
			
 
				 
			
 
				 	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
			
 
				 		xfs_notice(mp, "Mounting Filesystem");
			
@@ -630,6 +631,50 @@ xfs_log_mount(
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Validate the given log space and drop a critical message via syslog
			
 
				+	 * if the log size is too small that would lead to some unexpected
			
 
				+	 * situations in transaction log space reservation stage.
			
 
				+	 *
			
 
				+	 * Note: we can't just reject the mount if the validation fails.  This
			
 
				+	 * would mean that people would have to downgrade their kernel just to
			
 
				+	 * remedy the situation as there is no way to grow the log (short of
			
 
				+	 * black magic surgery with xfs_db).
			
 
				+	 *
			
 
				+	 * We can, however, reject mounts for CRC format filesystems, as the
			
 
				+	 * mkfs binary being used to make the filesystem should never create a
			
 
				+	 * filesystem with a log that is too small.
			
 
				+	 */
			
 
				+	min_logfsbs = xfs_log_calc_minimum_size(mp);
			
 
				+
			
 
				+	if (mp->m_sb.sb_logblocks < min_logfsbs) {
			
 
				+		xfs_warn(mp,
			
 
				+		"Log size %d blocks too small, minimum size is %d blocks",
			
 
				+			 mp->m_sb.sb_logblocks, min_logfsbs);
			
 
				+		error = EINVAL;
			
 
				+	} else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
			
 
				+		xfs_warn(mp,
			
 
				+		"Log size %d blocks too large, maximum size is %lld blocks",
			
 
				+			 mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
			
 
				+		error = EINVAL;
			
 
				+	} else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
			
 
				+		xfs_warn(mp,
			
 
				+		"log size %lld bytes too large, maximum size is %lld bytes",
			
 
				+			 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
			
 
				+			 XFS_MAX_LOG_BYTES);
			
 
				+		error = EINVAL;
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+			xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
			
 
				+			ASSERT(0);
			
 
				+			goto out_free_log;
			
 
				+		}
			
 
				+		xfs_crit(mp,
			
 
				+"Log size out of supported range. Continuing onwards, but if log hangs are\n"
			
 
				+"experienced then please report this message in the bug report.");
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Initialize the AIL now we have a log.
			
 
				 	 */
			
@@ -720,7 +765,7 @@ xfs_log_mount_finish(xfs_mount_t *mp)
 
				  * Unmount record used to have a string "Unmount filesystem--" in the
			
 
				  * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
			
 
				  * We just write the magic number now since that particular field isn't
			
 
				- * currently architecture converted and "nUmount" is a bit foo.
			
 
				+ * currently architecture converted and "Unmount" is a bit foo.
			
 
				  * As far as I know, there weren't any dependencies on the old behaviour.
			
 
				  */
			
 
				 
			
@@ -1941,7 +1986,7 @@ xlog_print_tic_res(
 
				 
			
 
				 	xfs_alert_tag(mp, XFS_PTAG_LOGRES,
			
 
				 		"xlog_write: reservation ran out. Need to up reservation");
			
 
				-	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
			
 
				+	xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2044,7 +2089,7 @@ xlog_write_setup_ophdr(
 
				  * Set up the parameters of the region copy into the log. This has
			
 
				  * to handle region write split across multiple log buffers - this
			
 
				  * state is kept external to this function so that this code can
			
 
				- * can be written in an obvious, self documenting manner.
			
 
				+ * be written in an obvious, self documenting manner.
			
 
				  */
			
 
				 static int
			
 
				 xlog_write_setup_copy(
			
@@ -3391,24 +3436,17 @@ xfs_log_ticket_get(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Allocate and initialise a new log ticket.
			
 
				+ * Figure out the total log space unit (in bytes) that would be
			
 
				+ * required for a log ticket.
			
 
				  */
			
 
				-struct xlog_ticket *
			
 
				-xlog_ticket_alloc(
			
 
				-	struct xlog	*log,
			
 
				-	int		unit_bytes,
			
 
				-	int		cnt,
			
 
				-	char		client,
			
 
				-	bool		permanent,
			
 
				-	xfs_km_flags_t	alloc_flags)
			
 
				+int
			
 
				+xfs_log_calc_unit_res(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	int			unit_bytes)
			
 
				 {
			
 
				-	struct xlog_ticket *tic;
			
 
				-	uint		num_headers;
			
 
				-	int		iclog_space;
			
 
				-
			
 
				-	tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
			
 
				-	if (!tic)
			
 
				-		return NULL;
			
 
				+	struct xlog		*log = mp->m_log;
			
 
				+	int			iclog_space;
			
 
				+	uint			num_headers;
			
 
				 
			
 
				 	/*
			
 
				 	 * Permanent reservations have up to 'cnt'-1 active log operations
			
@@ -3483,20 +3521,43 @@ xlog_ticket_alloc(
 
				 	unit_bytes += log->l_iclog_hsize;
			
 
				 
			
 
				 	/* for roundoff padding for transaction data and one for commit record */
			
 
				-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
			
 
				-	    log->l_mp->m_sb.sb_logsunit > 1) {
			
 
				+	if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
			
 
				 		/* log su roundoff */
			
 
				-		unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
			
 
				+		unit_bytes += 2 * mp->m_sb.sb_logsunit;
			
 
				 	} else {
			
 
				 		/* BB roundoff */
			
 
				-		unit_bytes += 2*BBSIZE;
			
 
				+		unit_bytes += 2 * BBSIZE;
			
 
				         }
			
 
				 
			
 
				+	return unit_bytes;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Allocate and initialise a new log ticket.
			
 
				+ */
			
 
				+struct xlog_ticket *
			
 
				+xlog_ticket_alloc(
			
 
				+	struct xlog		*log,
			
 
				+	int			unit_bytes,
			
 
				+	int			cnt,
			
 
				+	char			client,
			
 
				+	bool			permanent,
			
 
				+	xfs_km_flags_t		alloc_flags)
			
 
				+{
			
 
				+	struct xlog_ticket	*tic;
			
 
				+	int			unit_res;
			
 
				+
			
 
				+	tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
			
 
				+	if (!tic)
			
 
				+		return NULL;
			
 
				+
			
 
				+	unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes);
			
 
				+
			
 
				 	atomic_set(&tic->t_ref, 1);
			
 
				 	tic->t_task		= current;
			
 
				 	INIT_LIST_HEAD(&tic->t_queue);
			
 
				-	tic->t_unit_res		= unit_bytes;
			
 
				-	tic->t_curr_res		= unit_bytes;
			
 
				+	tic->t_unit_res		= unit_res;
			
 
				+	tic->t_curr_res		= unit_res;
			
 
				 	tic->t_cnt		= cnt;
			
 
				 	tic->t_ocnt		= cnt;
			
 
				 	tic->t_tid		= prandom_u32();
			
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -18,14 +18,30 @@
 
				 #ifndef	__XFS_LOG_H__
			
 
				 #define __XFS_LOG_H__
			
 
				 
			
 
				-/* get lsn fields */
			
 
				-#define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
			
 
				-#define BLOCK_LSN(lsn) ((uint)(lsn))
			
 
				+#include "xfs_log_format.h"
			
 
				 
			
 
				-/* this is used in a spot where we might otherwise double-endian-flip */
			
 
				-#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0])
			
 
				+struct xfs_log_vec {
			
 
				+	struct xfs_log_vec	*lv_next;	/* next lv in build list */
			
 
				+	int			lv_niovecs;	/* number of iovecs in lv */
			
 
				+	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
			
 
				+	struct xfs_log_item	*lv_item;	/* owner */
			
 
				+	char			*lv_buf;	/* formatted buffer */
			
 
				+	int			lv_buf_len;	/* size of formatted buffer */
			
 
				+	int			lv_size;	/* size of allocated lv */
			
 
				+};
			
 
				+
			
 
				+#define XFS_LOG_VEC_ORDERED	(-1)
			
 
				+
			
 
				+/*
			
 
				+ * Structure used to pass callback function and the function's argument
			
 
				+ * to the log manager.
			
 
				+ */
			
 
				+typedef struct xfs_log_callback {
			
 
				+	struct xfs_log_callback	*cb_next;
			
 
				+	void			(*cb_func)(void *, int);
			
 
				+	void			*cb_arg;
			
 
				+} xfs_log_callback_t;
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				 /*
			
 
				  * By comparing each component, we don't have to worry about extra
			
 
				  * endian issues in treating two 32 bit numbers as one 64 bit number
			
@@ -59,67 +75,6 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 
				  */
			
 
				 #define XFS_LOG_SYNC		0x1
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				-
			
 
				-/* Log Clients */
			
 
				-#define XFS_TRANSACTION		0x69
			
 
				-#define XFS_VOLUME		0x2
			
 
				-#define XFS_LOG			0xaa
			
 
				-
			
 
				-
			
 
				-/* Region types for iovec's i_type */
			
 
				-#define XLOG_REG_TYPE_BFORMAT		1
			
 
				-#define XLOG_REG_TYPE_BCHUNK		2
			
 
				-#define XLOG_REG_TYPE_EFI_FORMAT	3
			
 
				-#define XLOG_REG_TYPE_EFD_FORMAT	4
			
 
				-#define XLOG_REG_TYPE_IFORMAT		5
			
 
				-#define XLOG_REG_TYPE_ICORE		6
			
 
				-#define XLOG_REG_TYPE_IEXT		7
			
 
				-#define XLOG_REG_TYPE_IBROOT		8
			
 
				-#define XLOG_REG_TYPE_ILOCAL		9
			
 
				-#define XLOG_REG_TYPE_IATTR_EXT		10
			
 
				-#define XLOG_REG_TYPE_IATTR_BROOT	11
			
 
				-#define XLOG_REG_TYPE_IATTR_LOCAL	12
			
 
				-#define XLOG_REG_TYPE_QFORMAT		13
			
 
				-#define XLOG_REG_TYPE_DQUOT		14
			
 
				-#define XLOG_REG_TYPE_QUOTAOFF		15
			
 
				-#define XLOG_REG_TYPE_LRHEADER		16
			
 
				-#define XLOG_REG_TYPE_UNMOUNT		17
			
 
				-#define XLOG_REG_TYPE_COMMIT		18
			
 
				-#define XLOG_REG_TYPE_TRANSHDR		19
			
 
				-#define XLOG_REG_TYPE_ICREATE		20
			
 
				-#define XLOG_REG_TYPE_MAX		20
			
 
				-
			
 
				-typedef struct xfs_log_iovec {
			
 
				-	void		*i_addr;	/* beginning address of region */
			
 
				-	int		i_len;		/* length in bytes of region */
			
 
				-	uint		i_type;		/* type of region */
			
 
				-} xfs_log_iovec_t;
			
 
				-
			
 
				-struct xfs_log_vec {
			
 
				-	struct xfs_log_vec	*lv_next;	/* next lv in build list */
			
 
				-	int			lv_niovecs;	/* number of iovecs in lv */
			
 
				-	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
			
 
				-	struct xfs_log_item	*lv_item;	/* owner */
			
 
				-	char			*lv_buf;	/* formatted buffer */
			
 
				-	int			lv_buf_len;	/* size of formatted buffer */
			
 
				-};
			
 
				-
			
 
				-#define XFS_LOG_VEC_ORDERED	(-1)
			
 
				-
			
 
				-/*
			
 
				- * Structure used to pass callback function and the function's argument
			
 
				- * to the log manager.
			
 
				- */
			
 
				-typedef struct xfs_log_callback {
			
 
				-	struct xfs_log_callback	*cb_next;
			
 
				-	void			(*cb_func)(void *, int);
			
 
				-	void			*cb_arg;
			
 
				-} xfs_log_callback_t;
			
 
				-
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				 /* Log manager interfaces */
			
 
				 struct xfs_mount;
			
 
				 struct xlog_in_core;
			
@@ -188,5 +143,4 @@ void	xfs_log_work_queue(struct xfs_mount *mp);
 
				 void	xfs_log_worker(struct work_struct *work);
			
 
				 void	xfs_log_quiesce(struct xfs_mount *mp);
			
 
				 
			
 
				-#endif
			
 
				 #endif	/* __XFS_LOG_H__ */
			
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -80,6 +80,83 @@ xlog_cil_init_post_recovery(
 
				 								log->l_curr_block);
			
 
				 }
			
 
				 
			
 
				+STATIC int
			
 
				+xlog_cil_lv_item_format(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct xfs_log_vec	*lv)
			
 
				+{
			
 
				+	int	index;
			
 
				+	char	*ptr;
			
 
				+
			
 
				+	/* format new vectors into array */
			
 
				+	lip->li_ops->iop_format(lip, lv->lv_iovecp);
			
 
				+
			
 
				+	/* copy data into existing array */
			
 
				+	ptr = lv->lv_buf;
			
 
				+	for (index = 0; index < lv->lv_niovecs; index++) {
			
 
				+		struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
			
 
				+
			
 
				+		memcpy(ptr, vec->i_addr, vec->i_len);
			
 
				+		vec->i_addr = ptr;
			
 
				+		ptr += vec->i_len;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * some size calculations for log vectors over-estimate, so the caller
			
 
				+	 * doesn't know the amount of space actually used by the item. Return
			
 
				+	 * the byte count to the caller so they can check and store it
			
 
				+	 * appropriately.
			
 
				+	 */
			
 
				+	return ptr - lv->lv_buf;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Prepare the log item for insertion into the CIL. Calculate the difference in
			
 
				+ * log space and vectors it will consume, and if it is a new item pin it as
			
 
				+ * well.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_cil_prepare_item(
			
 
				+	struct xlog		*log,
			
 
				+	struct xfs_log_vec	*lv,
			
 
				+	struct xfs_log_vec	*old_lv,
			
 
				+	int			*diff_len,
			
 
				+	int			*diff_iovecs)
			
 
				+{
			
 
				+	/* Account for the new LV being passed in */
			
 
				+	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
			
 
				+		*diff_len += lv->lv_buf_len;
			
 
				+		*diff_iovecs += lv->lv_niovecs;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If there is no old LV, this is the first time we've seen the item in
			
 
				+	 * this CIL context and so we need to pin it. If we are replacing the
			
 
				+	 * old_lv, then remove the space it accounts for and free it.
			
 
				+	 */
			
 
				+	if (!old_lv)
			
 
				+		lv->lv_item->li_ops->iop_pin(lv->lv_item);
			
 
				+	else if (old_lv != lv) {
			
 
				+		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
			
 
				+
			
 
				+		*diff_len -= old_lv->lv_buf_len;
			
 
				+		*diff_iovecs -= old_lv->lv_niovecs;
			
 
				+		kmem_free(old_lv);
			
 
				+	}
			
 
				+
			
 
				+	/* attach new log vector to log item */
			
 
				+	lv->lv_item->li_lv = lv;
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is the first time the item is being committed to the
			
 
				+	 * CIL, store the sequence number on the log item so we can
			
 
				+	 * tell in future commits whether this is the first checkpoint
			
 
				+	 * the item is being committed into.
			
 
				+	 */
			
 
				+	if (!lv->lv_item->li_seq)
			
 
				+		lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Format log item into a flat buffers
			
 
				  *
			
@@ -106,35 +183,39 @@ xlog_cil_init_post_recovery(
 
				  * format the regions into the iclog as though they are being formatted
			
 
				  * directly out of the objects themselves.
			
 
				  */
			
 
				-static struct xfs_log_vec *
			
 
				-xlog_cil_prepare_log_vecs(
			
 
				-	struct xfs_trans	*tp)
			
 
				+static void
			
 
				+xlog_cil_insert_format_items(
			
 
				+	struct xlog		*log,
			
 
				+	struct xfs_trans	*tp,
			
 
				+	int			*diff_len,
			
 
				+	int			*diff_iovecs)
			
 
				 {
			
 
				 	struct xfs_log_item_desc *lidp;
			
 
				-	struct xfs_log_vec	*lv = NULL;
			
 
				-	struct xfs_log_vec	*ret_lv = NULL;
			
 
				 
			
 
				 
			
 
				 	/* Bail out if we didn't find a log item.  */
			
 
				 	if (list_empty(&tp->t_items)) {
			
 
				 		ASSERT(0);
			
 
				-		return NULL;
			
 
				+		return;
			
 
				 	}
			
 
				 
			
 
				 	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
			
 
				-		struct xfs_log_vec *new_lv;
			
 
				-		void	*ptr;
			
 
				-		int	index;
			
 
				-		int	len = 0;
			
 
				-		uint	niovecs;
			
 
				+		struct xfs_log_item *lip = lidp->lid_item;
			
 
				+		struct xfs_log_vec *lv;
			
 
				+		struct xfs_log_vec *old_lv;
			
 
				+		int	niovecs = 0;
			
 
				+		int	nbytes = 0;
			
 
				+		int	buf_size;
			
 
				 		bool	ordered = false;
			
 
				 
			
 
				 		/* Skip items which aren't dirty in this transaction. */
			
 
				 		if (!(lidp->lid_flags & XFS_LID_DIRTY))
			
 
				 			continue;
			
 
				 
			
 
				+		/* get number of vecs and size of data to be stored */
			
 
				+		lip->li_ops->iop_size(lip, &niovecs, &nbytes);
			
 
				+
			
 
				 		/* Skip items that do not have any vectors for writing */
			
 
				-		niovecs = IOP_SIZE(lidp->lid_item);
			
 
				 		if (!niovecs)
			
 
				 			continue;
			
 
				 
			
@@ -146,109 +227,63 @@ xlog_cil_prepare_log_vecs(
 
				 		if (niovecs == XFS_LOG_VEC_ORDERED) {
			
 
				 			ordered = true;
			
 
				 			niovecs = 0;
			
 
				+			nbytes = 0;
			
 
				 		}
			
 
				 
			
 
				-		new_lv = kmem_zalloc(sizeof(*new_lv) +
			
 
				-				niovecs * sizeof(struct xfs_log_iovec),
			
 
				-				KM_SLEEP|KM_NOFS);
			
 
				-
			
 
				-		new_lv->lv_item = lidp->lid_item;
			
 
				-		new_lv->lv_niovecs = niovecs;
			
 
				-		if (ordered) {
			
 
				-			/* track as an ordered logvec */
			
 
				-			new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
			
 
				-			goto next;
			
 
				-		}
			
 
				-
			
 
				-		/* The allocated iovec region lies beyond the log vector. */
			
 
				-		new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
			
 
				+		/* grab the old item if it exists for reservation accounting */
			
 
				+		old_lv = lip->li_lv;
			
 
				 
			
 
				-		/* build the vector array and calculate it's length */
			
 
				-		IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
			
 
				-		for (index = 0; index < new_lv->lv_niovecs; index++)
			
 
				-			len += new_lv->lv_iovecp[index].i_len;
			
 
				+		/* calc buffer size */
			
 
				+		buf_size = sizeof(struct xfs_log_vec) + nbytes +
			
 
				+				niovecs * sizeof(struct xfs_log_iovec);
			
 
				 
			
 
				-		new_lv->lv_buf_len = len;
			
 
				-		new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
			
 
				-				KM_SLEEP|KM_NOFS);
			
 
				-		ptr = new_lv->lv_buf;
			
 
				+		/* compare to existing item size */
			
 
				+		if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
			
 
				+			/* same or smaller, optimise common overwrite case */
			
 
				+			lv = lip->li_lv;
			
 
				+			lv->lv_next = NULL;
			
 
				 
			
 
				-		for (index = 0; index < new_lv->lv_niovecs; index++) {
			
 
				-			struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
			
 
				+			if (ordered)
			
 
				+				goto insert;
			
 
				 
			
 
				-			memcpy(ptr, vec->i_addr, vec->i_len);
			
 
				-			vec->i_addr = ptr;
			
 
				-			ptr += vec->i_len;
			
 
				-		}
			
 
				-		ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
			
 
				-
			
 
				-next:
			
 
				-		if (!ret_lv)
			
 
				-			ret_lv = new_lv;
			
 
				-		else
			
 
				-			lv->lv_next = new_lv;
			
 
				-		lv = new_lv;
			
 
				-	}
			
 
				-
			
 
				-	return ret_lv;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Prepare the log item for insertion into the CIL. Calculate the difference in
			
 
				- * log space and vectors it will consume, and if it is a new item pin it as
			
 
				- * well.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_cil_prepare_item(
			
 
				-	struct xlog		*log,
			
 
				-	struct xfs_log_vec	*lv,
			
 
				-	int			*len,
			
 
				-	int			*diff_iovecs)
			
 
				-{
			
 
				-	struct xfs_log_vec	*old = lv->lv_item->li_lv;
			
 
				+			/*
			
 
				+			 * set the item up as though it is a new insertion so
			
 
				+			 * that the space reservation accounting is correct.
			
 
				+			 */
			
 
				+			*diff_iovecs -= lv->lv_niovecs;
			
 
				+			*diff_len -= lv->lv_buf_len;
			
 
				 
			
 
				-	if (old) {
			
 
				-		/* existing lv on log item, space used is a delta */
			
 
				-		ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) ||
			
 
				-			old->lv_buf_len == XFS_LOG_VEC_ORDERED);
			
 
				+			/* Ensure the lv is set up according to ->iop_size */
			
 
				+			lv->lv_niovecs = niovecs;
			
 
				+			lv->lv_buf = (char *)lv + buf_size - nbytes;
			
 
				 
			
 
				-		/*
			
 
				-		 * If the new item is ordered, keep the old one that is already
			
 
				-		 * tracking dirty or ordered regions
			
 
				-		 */
			
 
				-		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
			
 
				-			ASSERT(!lv->lv_buf);
			
 
				-			kmem_free(lv);
			
 
				-			return;
			
 
				+			lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
			
 
				+			goto insert;
			
 
				 		}
			
 
				 
			
 
				-		*len += lv->lv_buf_len - old->lv_buf_len;
			
 
				-		*diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
			
 
				-		kmem_free(old->lv_buf);
			
 
				-		kmem_free(old);
			
 
				-	} else {
			
 
				-		/* new lv, must pin the log item */
			
 
				-		ASSERT(!lv->lv_item->li_lv);
			
 
				-
			
 
				-		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
			
 
				-			*len += lv->lv_buf_len;
			
 
				-			*diff_iovecs += lv->lv_niovecs;
			
 
				+		/* allocate new data chunk */
			
 
				+		lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
			
 
				+		lv->lv_item = lip;
			
 
				+		lv->lv_size = buf_size;
			
 
				+		lv->lv_niovecs = niovecs;
			
 
				+		if (ordered) {
			
 
				+			/* track as an ordered logvec */
			
 
				+			ASSERT(lip->li_lv == NULL);
			
 
				+			lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
			
 
				+			goto insert;
			
 
				 		}
			
 
				-		IOP_PIN(lv->lv_item);
			
 
				 
			
 
				-	}
			
 
				+		/* The allocated iovec region lies beyond the log vector. */
			
 
				+		lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
			
 
				 
			
 
				-	/* attach new log vector to log item */
			
 
				-	lv->lv_item->li_lv = lv;
			
 
				+		/* The allocated data region lies beyond the iovec region */
			
 
				+		lv->lv_buf = (char *)lv + buf_size - nbytes;
			
 
				 
			
 
				-	/*
			
 
				-	 * If this is the first time the item is being committed to the
			
 
				-	 * CIL, store the sequence number on the log item so we can
			
 
				-	 * tell in future commits whether this is the first checkpoint
			
 
				-	 * the item is being committed into.
			
 
				-	 */
			
 
				-	if (!lv->lv_item->li_seq)
			
 
				-		lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
			
 
				+		lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
			
 
				+insert:
			
 
				+		ASSERT(lv->lv_buf_len <= nbytes);
			
 
				+		xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -261,53 +296,47 @@ xfs_cil_prepare_item(
 
				 static void
			
 
				 xlog_cil_insert_items(
			
 
				 	struct xlog		*log,
			
 
				-	struct xfs_log_vec	*log_vector,
			
 
				-	struct xlog_ticket	*ticket)
			
 
				+	struct xfs_trans	*tp)
			
 
				 {
			
 
				 	struct xfs_cil		*cil = log->l_cilp;
			
 
				 	struct xfs_cil_ctx	*ctx = cil->xc_ctx;
			
 
				-	struct xfs_log_vec	*lv;
			
 
				+	struct xfs_log_item_desc *lidp;
			
 
				 	int			len = 0;
			
 
				 	int			diff_iovecs = 0;
			
 
				 	int			iclog_space;
			
 
				 
			
 
				-	ASSERT(log_vector);
			
 
				+	ASSERT(tp);
			
 
				 
			
 
				 	/*
			
 
				-	 * Do all the accounting aggregation and switching of log vectors
			
 
				-	 * around in a separate loop to the insertion of items into the CIL.
			
 
				-	 * Then we can do a separate loop to update the CIL within a single
			
 
				-	 * lock/unlock pair. This reduces the number of round trips on the CIL
			
 
				-	 * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
			
 
				-	 * hold time for the transaction commit.
			
 
				-	 *
			
 
				-	 * If this is the first time the item is being placed into the CIL in
			
 
				-	 * this context, pin it so it can't be written to disk until the CIL is
			
 
				-	 * flushed to the iclog and the iclog written to disk.
			
 
				-	 *
			
 
				 	 * We can do this safely because the context can't checkpoint until we
			
 
				 	 * are done so it doesn't matter exactly how we update the CIL.
			
 
				 	 */
			
 
				+	xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
			
 
				+
			
 
				+	/*
			
 
				+	 * Now (re-)position everything modified at the tail of the CIL.
			
 
				+	 * We do this here so we only need to take the CIL lock once during
			
 
				+	 * the transaction commit.
			
 
				+	 */
			
 
				 	spin_lock(&cil->xc_cil_lock);
			
 
				-	for (lv = log_vector; lv; ) {
			
 
				-		struct xfs_log_vec *next = lv->lv_next;
			
 
				+	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
			
 
				+		struct xfs_log_item	*lip = lidp->lid_item;
			
 
				 
			
 
				-		ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil));
			
 
				-		lv->lv_next = NULL;
			
 
				+		/* Skip items which aren't dirty in this transaction. */
			
 
				+		if (!(lidp->lid_flags & XFS_LID_DIRTY))
			
 
				+			continue;
			
 
				 
			
 
				-		/*
			
 
				-		 * xfs_cil_prepare_item() may free the lv, so move the item on
			
 
				-		 * the CIL first.
			
 
				-		 */
			
 
				-		list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
			
 
				-		xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
			
 
				-		lv = next;
			
 
				+		list_move_tail(&lip->li_cil, &cil->xc_cil);
			
 
				 	}
			
 
				 
			
 
				 	/* account for space used by new iovec headers  */
			
 
				 	len += diff_iovecs * sizeof(xlog_op_header_t);
			
 
				 	ctx->nvecs += diff_iovecs;
			
 
				 
			
 
				+	/* attach the transaction to the CIL if it has any busy extents */
			
 
				+	if (!list_empty(&tp->t_busy))
			
 
				+		list_splice_init(&tp->t_busy, &ctx->busy_extents);
			
 
				+
			
 
				 	/*
			
 
				 	 * Now transfer enough transaction reservation to the context ticket
			
 
				 	 * for the checkpoint. The context ticket is special - the unit
			
@@ -316,10 +345,8 @@ xlog_cil_insert_items(
 
				 	 * during the transaction commit.
			
 
				 	 */
			
 
				 	if (ctx->ticket->t_curr_res == 0) {
			
 
				-		/* first commit in checkpoint, steal the header reservation */
			
 
				-		ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
			
 
				 		ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
			
 
				-		ticket->t_curr_res -= ctx->ticket->t_unit_res;
			
 
				+		tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
			
 
				 	}
			
 
				 
			
 
				 	/* do we need space for more log record headers? */
			
@@ -333,10 +360,10 @@ xlog_cil_insert_items(
 
				 		hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
			
 
				 		ctx->ticket->t_unit_res += hdrs;
			
 
				 		ctx->ticket->t_curr_res += hdrs;
			
 
				-		ticket->t_curr_res -= hdrs;
			
 
				-		ASSERT(ticket->t_curr_res >= len);
			
 
				+		tp->t_ticket->t_curr_res -= hdrs;
			
 
				+		ASSERT(tp->t_ticket->t_curr_res >= len);
			
 
				 	}
			
 
				-	ticket->t_curr_res -= len;
			
 
				+	tp->t_ticket->t_curr_res -= len;
			
 
				 	ctx->space_used += len;
			
 
				 
			
 
				 	spin_unlock(&cil->xc_cil_lock);
			
@@ -350,7 +377,6 @@ xlog_cil_free_logvec(
 
				 
			
 
				 	for (lv = log_vector; lv; ) {
			
 
				 		struct xfs_log_vec *next = lv->lv_next;
			
 
				-		kmem_free(lv->lv_buf);
			
 
				 		kmem_free(lv);
			
 
				 		lv = next;
			
 
				 	}
			
@@ -376,9 +402,9 @@ xlog_cil_committed(
 
				 	xfs_extent_busy_clear(mp, &ctx->busy_extents,
			
 
				 			     (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
			
 
				 
			
 
				-	spin_lock(&ctx->cil->xc_cil_lock);
			
 
				+	spin_lock(&ctx->cil->xc_push_lock);
			
 
				 	list_del(&ctx->committing);
			
 
				-	spin_unlock(&ctx->cil->xc_cil_lock);
			
 
				+	spin_unlock(&ctx->cil->xc_push_lock);
			
 
				 
			
 
				 	xlog_cil_free_logvec(ctx->lv_chain);
			
 
				 
			
@@ -433,7 +459,7 @@ xlog_cil_push(
 
				 	down_write(&cil->xc_ctx_lock);
			
 
				 	ctx = cil->xc_ctx;
			
 
				 
			
 
				-	spin_lock(&cil->xc_cil_lock);
			
 
				+	spin_lock(&cil->xc_push_lock);
			
 
				 	push_seq = cil->xc_push_seq;
			
 
				 	ASSERT(push_seq <= ctx->sequence);
			
 
				 
			
@@ -444,10 +470,10 @@ xlog_cil_push(
 
				 	 */
			
 
				 	if (list_empty(&cil->xc_cil)) {
			
 
				 		cil->xc_push_seq = 0;
			
 
				-		spin_unlock(&cil->xc_cil_lock);
			
 
				+		spin_unlock(&cil->xc_push_lock);
			
 
				 		goto out_skip;
			
 
				 	}
			
 
				-	spin_unlock(&cil->xc_cil_lock);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 
			
 
				 
			
 
				 	/* check for a previously pushed seqeunce */
			
@@ -515,9 +541,9 @@ xlog_cil_push(
 
				 	 * that higher sequences will wait for us to write out a commit record
			
 
				 	 * before they do.
			
 
				 	 */
			
 
				-	spin_lock(&cil->xc_cil_lock);
			
 
				+	spin_lock(&cil->xc_push_lock);
			
 
				 	list_add(&ctx->committing, &cil->xc_committing);
			
 
				-	spin_unlock(&cil->xc_cil_lock);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 	up_write(&cil->xc_ctx_lock);
			
 
				 
			
 
				 	/*
			
@@ -552,7 +578,7 @@ xlog_cil_push(
 
				 	 * order the commit records so replay will get them in the right order.
			
 
				 	 */
			
 
				 restart:
			
 
				-	spin_lock(&cil->xc_cil_lock);
			
 
				+	spin_lock(&cil->xc_push_lock);
			
 
				 	list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
			
 
				 		/*
			
 
				 		 * Higher sequences will wait for this one so skip them.
			
@@ -565,11 +591,11 @@ restart:
 
				 			 * It is still being pushed! Wait for the push to
			
 
				 			 * complete, then start again from the beginning.
			
 
				 			 */
			
 
				-			xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
			
 
				+			xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
			
 
				 			goto restart;
			
 
				 		}
			
 
				 	}
			
 
				-	spin_unlock(&cil->xc_cil_lock);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 
			
 
				 	/* xfs_log_done always frees the ticket on error. */
			
 
				 	commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
			
@@ -588,10 +614,10 @@ restart:
 
				 	 * callbacks to the iclog we can assign the commit LSN to the context
			
 
				 	 * and wake up anyone who is waiting for the commit to complete.
			
 
				 	 */
			
 
				-	spin_lock(&cil->xc_cil_lock);
			
 
				+	spin_lock(&cil->xc_push_lock);
			
 
				 	ctx->commit_lsn = commit_lsn;
			
 
				 	wake_up_all(&cil->xc_commit_wait);
			
 
				-	spin_unlock(&cil->xc_cil_lock);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 
			
 
				 	/* release the hounds! */
			
 
				 	return xfs_log_release_iclog(log->l_mp, commit_iclog);
			
@@ -644,12 +670,12 @@ xlog_cil_push_background(
 
				 	if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
			
 
				 		return;
			
 
				 
			
 
				-	spin_lock(&cil->xc_cil_lock);
			
 
				+	spin_lock(&cil->xc_push_lock);
			
 
				 	if (cil->xc_push_seq < cil->xc_current_sequence) {
			
 
				 		cil->xc_push_seq = cil->xc_current_sequence;
			
 
				 		queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
			
 
				 	}
			
 
				-	spin_unlock(&cil->xc_cil_lock);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 
			
 
				 }
			
 
				 
			
@@ -672,14 +698,14 @@ xlog_cil_push_foreground(
 
				 	 * If the CIL is empty or we've already pushed the sequence then
			
 
				 	 * there's no work we need to do.
			
 
				 	 */
			
 
				-	spin_lock(&cil->xc_cil_lock);
			
 
				+	spin_lock(&cil->xc_push_lock);
			
 
				 	if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
			
 
				-		spin_unlock(&cil->xc_cil_lock);
			
 
				+		spin_unlock(&cil->xc_push_lock);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	cil->xc_push_seq = push_seq;
			
 
				-	spin_unlock(&cil->xc_cil_lock);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 
			
 
				 	/* do the push now */
			
 
				 	xlog_cil_push(log);
			
@@ -706,43 +732,25 @@ xfs_log_commit_cil(
 
				 	int			flags)
			
 
				 {
			
 
				 	struct xlog		*log = mp->m_log;
			
 
				+	struct xfs_cil		*cil = log->l_cilp;
			
 
				 	int			log_flags = 0;
			
 
				-	struct xfs_log_vec	*log_vector;
			
 
				 
			
 
				 	if (flags & XFS_TRANS_RELEASE_LOG_RES)
			
 
				 		log_flags = XFS_LOG_REL_PERM_RESERV;
			
 
				 
			
 
				-	/*
			
 
				-	 * Do all the hard work of formatting items (including memory
			
 
				-	 * allocation) outside the CIL context lock. This prevents stalling CIL
			
 
				-	 * pushes when we are low on memory and a transaction commit spends a
			
 
				-	 * lot of time in memory reclaim.
			
 
				-	 */
			
 
				-	log_vector = xlog_cil_prepare_log_vecs(tp);
			
 
				-	if (!log_vector)
			
 
				-		return ENOMEM;
			
 
				-
			
 
				 	/* lock out background commit */
			
 
				-	down_read(&log->l_cilp->xc_ctx_lock);
			
 
				-	if (commit_lsn)
			
 
				-		*commit_lsn = log->l_cilp->xc_ctx->sequence;
			
 
				+	down_read(&cil->xc_ctx_lock);
			
 
				 
			
 
				-	/* xlog_cil_insert_items() destroys log_vector list */
			
 
				-	xlog_cil_insert_items(log, log_vector, tp->t_ticket);
			
 
				+	xlog_cil_insert_items(log, tp);
			
 
				 
			
 
				 	/* check we didn't blow the reservation */
			
 
				 	if (tp->t_ticket->t_curr_res < 0)
			
 
				-		xlog_print_tic_res(log->l_mp, tp->t_ticket);
			
 
				+		xlog_print_tic_res(mp, tp->t_ticket);
			
 
				 
			
 
				-	/* attach the transaction to the CIL if it has any busy extents */
			
 
				-	if (!list_empty(&tp->t_busy)) {
			
 
				-		spin_lock(&log->l_cilp->xc_cil_lock);
			
 
				-		list_splice_init(&tp->t_busy,
			
 
				-					&log->l_cilp->xc_ctx->busy_extents);
			
 
				-		spin_unlock(&log->l_cilp->xc_cil_lock);
			
 
				-	}
			
 
				+	tp->t_commit_lsn = cil->xc_ctx->sequence;
			
 
				+	if (commit_lsn)
			
 
				+		*commit_lsn = tp->t_commit_lsn;
			
 
				 
			
 
				-	tp->t_commit_lsn = *commit_lsn;
			
 
				 	xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
			
 
				 	xfs_trans_unreserve_and_mod_sb(tp);
			
 
				 
			
@@ -757,11 +765,11 @@ xfs_log_commit_cil(
 
				 	 * the log items. This affects (at least) processing of stale buffers,
			
 
				 	 * inodes and EFIs.
			
 
				 	 */
			
 
				-	xfs_trans_free_items(tp, *commit_lsn, 0);
			
 
				+	xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
			
 
				 
			
 
				 	xlog_cil_push_background(log);
			
 
				 
			
 
				-	up_read(&log->l_cilp->xc_ctx_lock);
			
 
				+	up_read(&cil->xc_ctx_lock);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -800,7 +808,7 @@ xlog_cil_force_lsn(
 
				 	 * on commits for those as well.
			
 
				 	 */
			
 
				 restart:
			
 
				-	spin_lock(&cil->xc_cil_lock);
			
 
				+	spin_lock(&cil->xc_push_lock);
			
 
				 	list_for_each_entry(ctx, &cil->xc_committing, committing) {
			
 
				 		if (ctx->sequence > sequence)
			
 
				 			continue;
			
@@ -809,7 +817,7 @@ restart:
 
				 			 * It is still being pushed! Wait for the push to
			
 
				 			 * complete, then start again from the beginning.
			
 
				 			 */
			
 
				-			xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
			
 
				+			xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
			
 
				 			goto restart;
			
 
				 		}
			
 
				 		if (ctx->sequence != sequence)
			
@@ -817,7 +825,7 @@ restart:
 
				 		/* found it! */
			
 
				 		commit_lsn = ctx->commit_lsn;
			
 
				 	}
			
 
				-	spin_unlock(&cil->xc_cil_lock);
			
 
				+	spin_unlock(&cil->xc_push_lock);
			
 
				 	return commit_lsn;
			
 
				 }
			
 
				 
			
@@ -875,6 +883,7 @@ xlog_cil_init(
 
				 	INIT_LIST_HEAD(&cil->xc_cil);
			
 
				 	INIT_LIST_HEAD(&cil->xc_committing);
			
 
				 	spin_lock_init(&cil->xc_cil_lock);
			
 
				+	spin_lock_init(&cil->xc_push_lock);
			
 
				 	init_rwsem(&cil->xc_ctx_lock);
			
 
				 	init_waitqueue_head(&cil->xc_commit_wait);
			
 
				 
			
--- a/fs/xfs/xfs_log_format.h
+++ b/fs/xfs/xfs_log_format.h
@@ -0,0 +1,852 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef	__XFS_LOG_FORMAT_H__
			
 
				+#define __XFS_LOG_FORMAT_H__
			
 
				+
			
 
				+struct xfs_mount;
			
 
				+struct xfs_trans_res;
			
 
				+
			
 
				+/*
			
 
				+ * On-disk Log Format definitions.
			
 
				+ *
			
 
				+ * This file contains all the on-disk format definitions used within the log. It
			
 
				+ * includes the physical log structure itself, as well as all the log item
			
 
				+ * format structures that are written into the log and intepreted by log
			
 
				+ * recovery. We start with the physical log format definitions, and then work
			
 
				+ * through all the log items definitions and everything they encode into the
			
 
				+ * log.
			
 
				+ */
			
 
				+typedef __uint32_t xlog_tid_t;
			
 
				+
			
 
				+#define XLOG_MIN_ICLOGS		2
			
 
				+#define XLOG_MAX_ICLOGS		8
			
 
				+#define XLOG_HEADER_MAGIC_NUM	0xFEEDbabe	/* Invalid cycle number */
			
 
				+#define XLOG_VERSION_1		1
			
 
				+#define XLOG_VERSION_2		2		/* Large IClogs, Log sunit */
			
 
				+#define XLOG_VERSION_OKBITS	(XLOG_VERSION_1 | XLOG_VERSION_2)
			
 
				+#define XLOG_MIN_RECORD_BSIZE	(16*1024)	/* eventually 32k */
			
 
				+#define XLOG_BIG_RECORD_BSIZE	(32*1024)	/* 32k buffers */
			
 
				+#define XLOG_MAX_RECORD_BSIZE	(256*1024)
			
 
				+#define XLOG_HEADER_CYCLE_SIZE	(32*1024)	/* cycle data in header */
			
 
				+#define XLOG_MIN_RECORD_BSHIFT	14		/* 16384 == 1 << 14 */
			
 
				+#define XLOG_BIG_RECORD_BSHIFT	15		/* 32k == 1 << 15 */
			
 
				+#define XLOG_MAX_RECORD_BSHIFT	18		/* 256k == 1 << 18 */
			
 
				+#define XLOG_BTOLSUNIT(log, b)  (((b)+(log)->l_mp->m_sb.sb_logsunit-1) / \
			
 
				+                                 (log)->l_mp->m_sb.sb_logsunit)
			
 
				+#define XLOG_LSUNITTOB(log, su) ((su) * (log)->l_mp->m_sb.sb_logsunit)
			
 
				+
			
 
				+#define XLOG_HEADER_SIZE	512
			
 
				+
			
 
				+/* Minimum number of transactions that must fit in the log (defined by mkfs) */
			
 
				+#define XFS_MIN_LOG_FACTOR	3
			
 
				+
			
 
				+#define XLOG_REC_SHIFT(log) \
			
 
				+	BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
			
 
				+	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
			
 
				+#define XLOG_TOTAL_REC_SHIFT(log) \
			
 
				+	BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
			
 
				+	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
			
 
				+
			
 
				+/* get lsn fields */
			
 
				+#define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
			
 
				+#define BLOCK_LSN(lsn) ((uint)(lsn))
			
 
				+
			
 
				+/* this is used in a spot where we might otherwise double-endian-flip */
			
 
				+#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0])
			
 
				+
			
 
				+static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
			
 
				+{
			
 
				+	return ((xfs_lsn_t)cycle << 32) | block;
			
 
				+}
			
 
				+
			
 
				+static inline uint xlog_get_cycle(char *ptr)
			
 
				+{
			
 
				+	if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
			
 
				+		return be32_to_cpu(*((__be32 *)ptr + 1));
			
 
				+	else
			
 
				+		return be32_to_cpu(*(__be32 *)ptr);
			
 
				+}
			
 
				+
			
 
				+/* Log Clients */
			
 
				+#define XFS_TRANSACTION		0x69
			
 
				+#define XFS_VOLUME		0x2
			
 
				+#define XFS_LOG			0xaa
			
 
				+
			
 
				+#define XLOG_UNMOUNT_TYPE	0x556e	/* Un for Unmount */
			
 
				+
			
 
				+/* Region types for iovec's i_type */
			
 
				+#define XLOG_REG_TYPE_BFORMAT		1
			
 
				+#define XLOG_REG_TYPE_BCHUNK		2
			
 
				+#define XLOG_REG_TYPE_EFI_FORMAT	3
			
 
				+#define XLOG_REG_TYPE_EFD_FORMAT	4
			
 
				+#define XLOG_REG_TYPE_IFORMAT		5
			
 
				+#define XLOG_REG_TYPE_ICORE		6
			
 
				+#define XLOG_REG_TYPE_IEXT		7
			
 
				+#define XLOG_REG_TYPE_IBROOT		8
			
 
				+#define XLOG_REG_TYPE_ILOCAL		9
			
 
				+#define XLOG_REG_TYPE_IATTR_EXT		10
			
 
				+#define XLOG_REG_TYPE_IATTR_BROOT	11
			
 
				+#define XLOG_REG_TYPE_IATTR_LOCAL	12
			
 
				+#define XLOG_REG_TYPE_QFORMAT		13
			
 
				+#define XLOG_REG_TYPE_DQUOT		14
			
 
				+#define XLOG_REG_TYPE_QUOTAOFF		15
			
 
				+#define XLOG_REG_TYPE_LRHEADER		16
			
 
				+#define XLOG_REG_TYPE_UNMOUNT		17
			
 
				+#define XLOG_REG_TYPE_COMMIT		18
			
 
				+#define XLOG_REG_TYPE_TRANSHDR		19
			
 
				+#define XLOG_REG_TYPE_ICREATE		20
			
 
				+#define XLOG_REG_TYPE_MAX		20
			
 
				+
			
 
				+/*
			
 
				+ * Flags to log operation header
			
 
				+ *
			
 
				+ * The first write of a new transaction will be preceded with a start
			
 
				+ * record, XLOG_START_TRANS.  Once a transaction is committed, a commit
			
 
				+ * record is written, XLOG_COMMIT_TRANS.  If a single region can not fit into
			
 
				+ * the remainder of the current active in-core log, it is split up into
			
 
				+ * multiple regions.  Each partial region will be marked with a
			
 
				+ * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS.
			
 
				+ *
			
 
				+ */
			
 
				+#define XLOG_START_TRANS	0x01	/* Start a new transaction */
			
 
				+#define XLOG_COMMIT_TRANS	0x02	/* Commit this transaction */
			
 
				+#define XLOG_CONTINUE_TRANS	0x04	/* Cont this trans into new region */
			
 
				+#define XLOG_WAS_CONT_TRANS	0x08	/* Cont this trans into new region */
			
 
				+#define XLOG_END_TRANS		0x10	/* End a continued transaction */
			
 
				+#define XLOG_UNMOUNT_TRANS	0x20	/* Unmount a filesystem transaction */
			
 
				+
			
 
				+
			
 
				+typedef struct xlog_op_header {
			
 
				+	__be32	   oh_tid;	/* transaction id of operation	:  4 b */
			
 
				+	__be32	   oh_len;	/* bytes in data region		:  4 b */
			
 
				+	__u8	   oh_clientid;	/* who sent me this		:  1 b */
			
 
				+	__u8	   oh_flags;	/*				:  1 b */
			
 
				+	__u16	   oh_res2;	/* 32 bit align			:  2 b */
			
 
				+} xlog_op_header_t;
			
 
				+
			
 
				+/* valid values for h_fmt */
			
 
				+#define XLOG_FMT_UNKNOWN  0
			
 
				+#define XLOG_FMT_LINUX_LE 1
			
 
				+#define XLOG_FMT_LINUX_BE 2
			
 
				+#define XLOG_FMT_IRIX_BE  3
			
 
				+
			
 
				+/* our fmt */
			
 
				+#ifdef XFS_NATIVE_HOST
			
 
				+#define XLOG_FMT XLOG_FMT_LINUX_BE
			
 
				+#else
			
 
				+#define XLOG_FMT XLOG_FMT_LINUX_LE
			
 
				+#endif
			
 
				+
			
 
				+typedef struct xlog_rec_header {
			
 
				+	__be32	  h_magicno;	/* log record (LR) identifier		:  4 */
			
 
				+	__be32	  h_cycle;	/* write cycle of log			:  4 */
			
 
				+	__be32	  h_version;	/* LR version				:  4 */
			
 
				+	__be32	  h_len;	/* len in bytes; should be 64-bit aligned: 4 */
			
 
				+	__be64	  h_lsn;	/* lsn of this LR			:  8 */
			
 
				+	__be64	  h_tail_lsn;	/* lsn of 1st LR w/ buffers not committed: 8 */
			
 
				+	__le32	  h_crc;	/* crc of log record                    :  4 */
			
 
				+	__be32	  h_prev_block; /* block number to previous LR		:  4 */
			
 
				+	__be32	  h_num_logops;	/* number of log operations in this LR	:  4 */
			
 
				+	__be32	  h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
			
 
				+	/* new fields */
			
 
				+	__be32    h_fmt;        /* format of log record                 :  4 */
			
 
				+	uuid_t	  h_fs_uuid;    /* uuid of FS                           : 16 */
			
 
				+	__be32	  h_size;	/* iclog size				:  4 */
			
 
				+} xlog_rec_header_t;
			
 
				+
			
 
				+typedef struct xlog_rec_ext_header {
			
 
				+	__be32	  xh_cycle;	/* write cycle of log			: 4 */
			
 
				+	__be32	  xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*	: 256 */
			
 
				+} xlog_rec_ext_header_t;
			
 
				+
			
 
				+/*
			
 
				+ * Quite misnamed, because this union lays out the actual on-disk log buffer.
			
 
				+ */
			
 
				+typedef union xlog_in_core2 {
			
 
				+	xlog_rec_header_t	hic_header;
			
 
				+	xlog_rec_ext_header_t	hic_xheader;
			
 
				+	char			hic_sector[XLOG_HEADER_SIZE];
			
 
				+} xlog_in_core_2_t;
			
 
				+
			
 
				+/* not an on-disk structure, but needed by log recovery in userspace */
			
 
				+typedef struct xfs_log_iovec {
			
 
				+	void		*i_addr;	/* beginning address of region */
			
 
				+	int		i_len;		/* length in bytes of region */
			
 
				+	uint		i_type;		/* type of region */
			
 
				+} xfs_log_iovec_t;
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Transaction Header definitions.
			
 
				+ *
			
 
				+ * This is the structure written in the log at the head of every transaction. It
			
 
				+ * identifies the type and id of the transaction, and contains the number of
			
 
				+ * items logged by the transaction so we know how many to expect during
			
 
				+ * recovery.
			
 
				+ *
			
 
				+ * Do not change the below structure without redoing the code in
			
 
				+ * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
			
 
				+ */
			
 
				+typedef struct xfs_trans_header {
			
 
				+	uint		th_magic;		/* magic number */
			
 
				+	uint		th_type;		/* transaction type */
			
 
				+	__int32_t	th_tid;			/* transaction id (unused) */
			
 
				+	uint		th_num_items;		/* num items logged by trans */
			
 
				+} xfs_trans_header_t;
			
 
				+
			
 
				+#define	XFS_TRANS_HEADER_MAGIC	0x5452414e	/* TRAN */
			
 
				+
			
 
				+/*
			
 
				+ * Log item types.
			
 
				+ */
			
 
				+#define	XFS_LI_EFI		0x1236
			
 
				+#define	XFS_LI_EFD		0x1237
			
 
				+#define	XFS_LI_IUNLINK		0x1238
			
 
				+#define	XFS_LI_INODE		0x123b	/* aligned ino chunks, var-size ibufs */
			
 
				+#define	XFS_LI_BUF		0x123c	/* v2 bufs, variable sized inode bufs */
			
 
				+#define	XFS_LI_DQUOT		0x123d
			
 
				+#define	XFS_LI_QUOTAOFF		0x123e
			
 
				+#define	XFS_LI_ICREATE		0x123f
			
 
				+
			
 
				+#define XFS_LI_TYPE_DESC \
			
 
				+	{ XFS_LI_EFI,		"XFS_LI_EFI" }, \
			
 
				+	{ XFS_LI_EFD,		"XFS_LI_EFD" }, \
			
 
				+	{ XFS_LI_IUNLINK,	"XFS_LI_IUNLINK" }, \
			
 
				+	{ XFS_LI_INODE,		"XFS_LI_INODE" }, \
			
 
				+	{ XFS_LI_BUF,		"XFS_LI_BUF" }, \
			
 
				+	{ XFS_LI_DQUOT,		"XFS_LI_DQUOT" }, \
			
 
				+	{ XFS_LI_QUOTAOFF,	"XFS_LI_QUOTAOFF" }, \
			
 
				+	{ XFS_LI_ICREATE,	"XFS_LI_ICREATE" }
			
 
				+
			
 
				+/*
			
 
				+ * Transaction types.  Used to distinguish types of buffers.
			
 
				+ */
			
 
				+#define XFS_TRANS_SETATTR_NOT_SIZE	1
			
 
				+#define XFS_TRANS_SETATTR_SIZE		2
			
 
				+#define XFS_TRANS_INACTIVE		3
			
 
				+#define XFS_TRANS_CREATE		4
			
 
				+#define XFS_TRANS_CREATE_TRUNC		5
			
 
				+#define XFS_TRANS_TRUNCATE_FILE		6
			
 
				+#define XFS_TRANS_REMOVE		7
			
 
				+#define XFS_TRANS_LINK			8
			
 
				+#define XFS_TRANS_RENAME		9
			
 
				+#define XFS_TRANS_MKDIR			10
			
 
				+#define XFS_TRANS_RMDIR			11
			
 
				+#define XFS_TRANS_SYMLINK		12
			
 
				+#define XFS_TRANS_SET_DMATTRS		13
			
 
				+#define XFS_TRANS_GROWFS		14
			
 
				+#define XFS_TRANS_STRAT_WRITE		15
			
 
				+#define XFS_TRANS_DIOSTRAT		16
			
 
				+/* 17 was XFS_TRANS_WRITE_SYNC */
			
 
				+#define	XFS_TRANS_WRITEID		18
			
 
				+#define	XFS_TRANS_ADDAFORK		19
			
 
				+#define	XFS_TRANS_ATTRINVAL		20
			
 
				+#define	XFS_TRANS_ATRUNCATE		21
			
 
				+#define	XFS_TRANS_ATTR_SET		22
			
 
				+#define	XFS_TRANS_ATTR_RM		23
			
 
				+#define	XFS_TRANS_ATTR_FLAG		24
			
 
				+#define	XFS_TRANS_CLEAR_AGI_BUCKET	25
			
 
				+#define XFS_TRANS_QM_SBCHANGE		26
			
 
				+/*
			
 
				+ * Dummy entries since we use the transaction type to index into the
			
 
				+ * trans_type[] in xlog_recover_print_trans_head()
			
 
				+ */
			
 
				+#define XFS_TRANS_DUMMY1		27
			
 
				+#define XFS_TRANS_DUMMY2		28
			
 
				+#define XFS_TRANS_QM_QUOTAOFF		29
			
 
				+#define XFS_TRANS_QM_DQALLOC		30
			
 
				+#define XFS_TRANS_QM_SETQLIM		31
			
 
				+#define XFS_TRANS_QM_DQCLUSTER		32
			
 
				+#define XFS_TRANS_QM_QINOCREATE		33
			
 
				+#define XFS_TRANS_QM_QUOTAOFF_END	34
			
 
				+#define XFS_TRANS_SB_UNIT		35
			
 
				+#define XFS_TRANS_FSYNC_TS		36
			
 
				+#define	XFS_TRANS_GROWFSRT_ALLOC	37
			
 
				+#define	XFS_TRANS_GROWFSRT_ZERO		38
			
 
				+#define	XFS_TRANS_GROWFSRT_FREE		39
			
 
				+#define	XFS_TRANS_SWAPEXT		40
			
 
				+#define	XFS_TRANS_SB_COUNT		41
			
 
				+#define	XFS_TRANS_CHECKPOINT		42
			
 
				+#define	XFS_TRANS_ICREATE		43
			
 
				+#define	XFS_TRANS_TYPE_MAX		43
			
 
				+/* new transaction types need to be reflected in xfs_logprint(8) */
			
 
				+
			
 
				+#define XFS_TRANS_TYPES \
			
 
				+	{ XFS_TRANS_SETATTR_NOT_SIZE,	"SETATTR_NOT_SIZE" }, \
			
 
				+	{ XFS_TRANS_SETATTR_SIZE,	"SETATTR_SIZE" }, \
			
 
				+	{ XFS_TRANS_INACTIVE,		"INACTIVE" }, \
			
 
				+	{ XFS_TRANS_CREATE,		"CREATE" }, \
			
 
				+	{ XFS_TRANS_CREATE_TRUNC,	"CREATE_TRUNC" }, \
			
 
				+	{ XFS_TRANS_TRUNCATE_FILE,	"TRUNCATE_FILE" }, \
			
 
				+	{ XFS_TRANS_REMOVE,		"REMOVE" }, \
			
 
				+	{ XFS_TRANS_LINK,		"LINK" }, \
			
 
				+	{ XFS_TRANS_RENAME,		"RENAME" }, \
			
 
				+	{ XFS_TRANS_MKDIR,		"MKDIR" }, \
			
 
				+	{ XFS_TRANS_RMDIR,		"RMDIR" }, \
			
 
				+	{ XFS_TRANS_SYMLINK,		"SYMLINK" }, \
			
 
				+	{ XFS_TRANS_SET_DMATTRS,	"SET_DMATTRS" }, \
			
 
				+	{ XFS_TRANS_GROWFS,		"GROWFS" }, \
			
 
				+	{ XFS_TRANS_STRAT_WRITE,	"STRAT_WRITE" }, \
			
 
				+	{ XFS_TRANS_DIOSTRAT,		"DIOSTRAT" }, \
			
 
				+	{ XFS_TRANS_WRITEID,		"WRITEID" }, \
			
 
				+	{ XFS_TRANS_ADDAFORK,		"ADDAFORK" }, \
			
 
				+	{ XFS_TRANS_ATTRINVAL,		"ATTRINVAL" }, \
			
 
				+	{ XFS_TRANS_ATRUNCATE,		"ATRUNCATE" }, \
			
 
				+	{ XFS_TRANS_ATTR_SET,		"ATTR_SET" }, \
			
 
				+	{ XFS_TRANS_ATTR_RM,		"ATTR_RM" }, \
			
 
				+	{ XFS_TRANS_ATTR_FLAG,		"ATTR_FLAG" }, \
			
 
				+	{ XFS_TRANS_CLEAR_AGI_BUCKET,	"CLEAR_AGI_BUCKET" }, \
			
 
				+	{ XFS_TRANS_QM_SBCHANGE,	"QM_SBCHANGE" }, \
			
 
				+	{ XFS_TRANS_QM_QUOTAOFF,	"QM_QUOTAOFF" }, \
			
 
				+	{ XFS_TRANS_QM_DQALLOC,		"QM_DQALLOC" }, \
			
 
				+	{ XFS_TRANS_QM_SETQLIM,		"QM_SETQLIM" }, \
			
 
				+	{ XFS_TRANS_QM_DQCLUSTER,	"QM_DQCLUSTER" }, \
			
 
				+	{ XFS_TRANS_QM_QINOCREATE,	"QM_QINOCREATE" }, \
			
 
				+	{ XFS_TRANS_QM_QUOTAOFF_END,	"QM_QOFF_END" }, \
			
 
				+	{ XFS_TRANS_SB_UNIT,		"SB_UNIT" }, \
			
 
				+	{ XFS_TRANS_FSYNC_TS,		"FSYNC_TS" }, \
			
 
				+	{ XFS_TRANS_GROWFSRT_ALLOC,	"GROWFSRT_ALLOC" }, \
			
 
				+	{ XFS_TRANS_GROWFSRT_ZERO,	"GROWFSRT_ZERO" }, \
			
 
				+	{ XFS_TRANS_GROWFSRT_FREE,	"GROWFSRT_FREE" }, \
			
 
				+	{ XFS_TRANS_SWAPEXT,		"SWAPEXT" }, \
			
 
				+	{ XFS_TRANS_SB_COUNT,		"SB_COUNT" }, \
			
 
				+	{ XFS_TRANS_CHECKPOINT,		"CHECKPOINT" }, \
			
 
				+	{ XFS_TRANS_DUMMY1,		"DUMMY1" }, \
			
 
				+	{ XFS_TRANS_DUMMY2,		"DUMMY2" }, \
			
 
				+	{ XLOG_UNMOUNT_REC_TYPE,	"UNMOUNT" }
			
 
				+
			
 
				+/*
			
 
				+ * This structure is used to track log items associated with
			
 
				+ * a transaction.  It points to the log item and keeps some
			
 
				+ * flags to track the state of the log item.  It also tracks
			
 
				+ * the amount of space needed to log the item it describes
			
 
				+ * once we get to commit processing (see xfs_trans_commit()).
			
 
				+ */
			
 
				+struct xfs_log_item_desc {
			
 
				+	struct xfs_log_item	*lid_item;
			
 
				+	struct list_head	lid_trans;
			
 
				+	unsigned char		lid_flags;
			
 
				+};
			
 
				+
			
 
				+#define XFS_LID_DIRTY		0x1
			
 
				+
			
 
				+/*
			
 
				+ * Values for t_flags.
			
 
				+ */
			
 
				+#define	XFS_TRANS_DIRTY		0x01	/* something needs to be logged */
			
 
				+#define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */
			
 
				+#define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */
			
 
				+#define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */
			
 
				+#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */
			
 
				+#define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
			
 
				+#define XFS_TRANS_FREEZE_PROT	0x40	/* Transaction has elevated writer
			
 
				+					   count in superblock */
			
 
				+
			
 
				+/*
			
 
				+ * Values for call flags parameter.
			
 
				+ */
			
 
				+#define	XFS_TRANS_RELEASE_LOG_RES	0x4
			
 
				+#define	XFS_TRANS_ABORT			0x8
			
 
				+
			
 
				+/*
			
 
				+ * Field values for xfs_trans_mod_sb.
			
 
				+ */
			
 
				+#define	XFS_TRANS_SB_ICOUNT		0x00000001
			
 
				+#define	XFS_TRANS_SB_IFREE		0x00000002
			
 
				+#define	XFS_TRANS_SB_FDBLOCKS		0x00000004
			
 
				+#define	XFS_TRANS_SB_RES_FDBLOCKS	0x00000008
			
 
				+#define	XFS_TRANS_SB_FREXTENTS		0x00000010
			
 
				+#define	XFS_TRANS_SB_RES_FREXTENTS	0x00000020
			
 
				+#define	XFS_TRANS_SB_DBLOCKS		0x00000040
			
 
				+#define	XFS_TRANS_SB_AGCOUNT		0x00000080
			
 
				+#define	XFS_TRANS_SB_IMAXPCT		0x00000100
			
 
				+#define	XFS_TRANS_SB_REXTSIZE		0x00000200
			
 
				+#define	XFS_TRANS_SB_RBMBLOCKS		0x00000400
			
 
				+#define	XFS_TRANS_SB_RBLOCKS		0x00000800
			
 
				+#define	XFS_TRANS_SB_REXTENTS		0x00001000
			
 
				+#define	XFS_TRANS_SB_REXTSLOG		0x00002000
			
 
				+
			
 
				+/*
			
 
				+ * Here we centralize the specification of XFS meta-data buffer
			
 
				+ * reference count values.  This determine how hard the buffer
			
 
				+ * cache tries to hold onto the buffer.
			
 
				+ */
			
 
				+#define	XFS_AGF_REF		4
			
 
				+#define	XFS_AGI_REF		4
			
 
				+#define	XFS_AGFL_REF		3
			
 
				+#define	XFS_INO_BTREE_REF	3
			
 
				+#define	XFS_ALLOC_BTREE_REF	2
			
 
				+#define	XFS_BMAP_BTREE_REF	2
			
 
				+#define	XFS_DIR_BTREE_REF	2
			
 
				+#define	XFS_INO_REF		2
			
 
				+#define	XFS_ATTR_BTREE_REF	1
			
 
				+#define	XFS_DQUOT_REF		1
			
 
				+
			
 
				+/*
			
 
				+ * Flags for xfs_trans_ichgtime().
			
 
				+ */
			
 
				+#define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */
			
 
				+#define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */
			
 
				+#define	XFS_ICHGTIME_CREATE	0x4	/* inode create timestamp */
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Inode Log Item Format definitions.
			
 
				+ *
			
 
				+ * This is the structure used to lay out an inode log item in the
			
 
				+ * log.  The size of the inline data/extents/b-tree root to be logged
			
 
				+ * (if any) is indicated in the ilf_dsize field.  Changes to this structure
			
 
				+ * must be added on to the end.
			
 
				+ */
			
 
				+typedef struct xfs_inode_log_format {
			
 
				+	__uint16_t		ilf_type;	/* inode log item type */
			
 
				+	__uint16_t		ilf_size;	/* size of this item */
			
 
				+	__uint32_t		ilf_fields;	/* flags for fields logged */
			
 
				+	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
			
 
				+	__uint16_t		ilf_dsize;	/* size of data/ext/root */
			
 
				+	__uint64_t		ilf_ino;	/* inode number */
			
 
				+	union {
			
 
				+		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				+		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				+	} ilf_u;
			
 
				+	__int64_t		ilf_blkno;	/* blkno of inode buffer */
			
 
				+	__int32_t		ilf_len;	/* len of inode buffer */
			
 
				+	__int32_t		ilf_boffset;	/* off of inode in buffer */
			
 
				+} xfs_inode_log_format_t;
			
 
				+
			
 
				+typedef struct xfs_inode_log_format_32 {
			
 
				+	__uint16_t		ilf_type;	/* inode log item type */
			
 
				+	__uint16_t		ilf_size;	/* size of this item */
			
 
				+	__uint32_t		ilf_fields;	/* flags for fields logged */
			
 
				+	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
			
 
				+	__uint16_t		ilf_dsize;	/* size of data/ext/root */
			
 
				+	__uint64_t		ilf_ino;	/* inode number */
			
 
				+	union {
			
 
				+		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				+		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				+	} ilf_u;
			
 
				+	__int64_t		ilf_blkno;	/* blkno of inode buffer */
			
 
				+	__int32_t		ilf_len;	/* len of inode buffer */
			
 
				+	__int32_t		ilf_boffset;	/* off of inode in buffer */
			
 
				+} __attribute__((packed)) xfs_inode_log_format_32_t;
			
 
				+
			
 
				+typedef struct xfs_inode_log_format_64 {
			
 
				+	__uint16_t		ilf_type;	/* inode log item type */
			
 
				+	__uint16_t		ilf_size;	/* size of this item */
			
 
				+	__uint32_t		ilf_fields;	/* flags for fields logged */
			
 
				+	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
			
 
				+	__uint16_t		ilf_dsize;	/* size of data/ext/root */
			
 
				+	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
			
 
				+	__uint64_t		ilf_ino;	/* inode number */
			
 
				+	union {
			
 
				+		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
			
 
				+		uuid_t		ilfu_uuid;	/* mount point value */
			
 
				+	} ilf_u;
			
 
				+	__int64_t		ilf_blkno;	/* blkno of inode buffer */
			
 
				+	__int32_t		ilf_len;	/* len of inode buffer */
			
 
				+	__int32_t		ilf_boffset;	/* off of inode in buffer */
			
 
				+} xfs_inode_log_format_64_t;
			
 
				+
			
 
				+/*
			
 
				+ * Flags for xfs_trans_log_inode flags field.
			
 
				+ */
			
 
				+#define	XFS_ILOG_CORE	0x001	/* log standard inode fields */
			
 
				+#define	XFS_ILOG_DDATA	0x002	/* log i_df.if_data */
			
 
				+#define	XFS_ILOG_DEXT	0x004	/* log i_df.if_extents */
			
 
				+#define	XFS_ILOG_DBROOT	0x008	/* log i_df.i_broot */
			
 
				+#define	XFS_ILOG_DEV	0x010	/* log the dev field */
			
 
				+#define	XFS_ILOG_UUID	0x020	/* log the uuid field */
			
 
				+#define	XFS_ILOG_ADATA	0x040	/* log i_af.if_data */
			
 
				+#define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
			
 
				+#define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * The timestamps are dirty, but not necessarily anything else in the inode
			
 
				+ * core.  Unlike the other fields above this one must never make it to disk
			
 
				+ * in the ilf_fields of the inode_log_format, but is purely store in-memory in
			
 
				+ * ili_fields in the inode_log_item.
			
 
				+ */
			
 
				+#define XFS_ILOG_TIMESTAMP	0x4000
			
 
				+
			
 
				+#define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
			
 
				+				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
			
 
				+				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
			
 
				+				 XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
			
 
				+
			
 
				+#define	XFS_ILOG_DFORK		(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
			
 
				+				 XFS_ILOG_DBROOT)
			
 
				+
			
 
				+#define	XFS_ILOG_AFORK		(XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
			
 
				+				 XFS_ILOG_ABROOT)
			
 
				+
			
 
				+#define	XFS_ILOG_ALL		(XFS_ILOG_CORE | XFS_ILOG_DDATA | \
			
 
				+				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
			
 
				+				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
			
 
				+				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
			
 
				+				 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
			
 
				+
			
 
				+static inline int xfs_ilog_fbroot(int w)
			
 
				+{
			
 
				+	return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
			
 
				+}
			
 
				+
			
 
				+static inline int xfs_ilog_fext(int w)
			
 
				+{
			
 
				+	return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
			
 
				+}
			
 
				+
			
 
				+static inline int xfs_ilog_fdata(int w)
			
 
				+{
			
 
				+	return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Incore version of the on-disk inode core structures. We log this directly
			
 
				+ * into the journal in host CPU format (for better or worse) and as such
			
 
				+ * directly mirrors the xfs_dinode structure as it must contain all the same
			
 
				+ * information.
			
 
				+ */
			
 
				+typedef struct xfs_ictimestamp {
			
 
				+	__int32_t	t_sec;		/* timestamp seconds */
			
 
				+	__int32_t	t_nsec;		/* timestamp nanoseconds */
			
 
				+} xfs_ictimestamp_t;
			
 
				+
			
 
				+/*
			
 
				+ * NOTE:  This structure must be kept identical to struct xfs_dinode
			
 
				+ *	  in xfs_dinode.h except for the endianness annotations.
			
 
				+ */
			
 
				+typedef struct xfs_icdinode {
			
 
				+	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
			
 
				+	__uint16_t	di_mode;	/* mode and type of file */
			
 
				+	__int8_t	di_version;	/* inode version */
			
 
				+	__int8_t	di_format;	/* format of di_c data */
			
 
				+	__uint16_t	di_onlink;	/* old number of links to file */
			
 
				+	__uint32_t	di_uid;		/* owner's user id */
			
 
				+	__uint32_t	di_gid;		/* owner's group id */
			
 
				+	__uint32_t	di_nlink;	/* number of links to file */
			
 
				+	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
			
 
				+	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
			
 
				+	__uint8_t	di_pad[6];	/* unused, zeroed space */
			
 
				+	__uint16_t	di_flushiter;	/* incremented on flush */
			
 
				+	xfs_ictimestamp_t di_atime;	/* time last accessed */
			
 
				+	xfs_ictimestamp_t di_mtime;	/* time last modified */
			
 
				+	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
			
 
				+	xfs_fsize_t	di_size;	/* number of bytes in file */
			
 
				+	xfs_drfsbno_t	di_nblocks;	/* # of direct & btree blocks used */
			
 
				+	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
			
 
				+	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
			
 
				+	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
			
 
				+	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
			
 
				+	__int8_t	di_aformat;	/* format of attr fork's data */
			
 
				+	__uint32_t	di_dmevmask;	/* DMIG event mask */
			
 
				+	__uint16_t	di_dmstate;	/* DMIG state info */
			
 
				+	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
			
 
				+	__uint32_t	di_gen;		/* generation number */
			
 
				+
			
 
				+	/* di_next_unlinked is the only non-core field in the old dinode */
			
 
				+	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
			
 
				+
			
 
				+	/* start of the extended dinode, writable fields */
			
 
				+	__uint32_t	di_crc;		/* CRC of the inode */
			
 
				+	__uint64_t	di_changecount;	/* number of attribute changes */
			
 
				+	xfs_lsn_t	di_lsn;		/* flush sequence */
			
 
				+	__uint64_t	di_flags2;	/* more random flags */
			
 
				+	__uint8_t	di_pad2[16];	/* more padding for future expansion */
			
 
				+
			
 
				+	/* fields only written to during inode creation */
			
 
				+	xfs_ictimestamp_t di_crtime;	/* time created */
			
 
				+	xfs_ino_t	di_ino;		/* inode number */
			
 
				+	uuid_t		di_uuid;	/* UUID of the filesystem */
			
 
				+
			
 
				+	/* structure must be padded to 64 bit alignment */
			
 
				+} xfs_icdinode_t;
			
 
				+
			
 
				+static inline uint xfs_icdinode_size(int version)
			
 
				+{
			
 
				+	if (version == 3)
			
 
				+		return sizeof(struct xfs_icdinode);
			
 
				+	return offsetof(struct xfs_icdinode, di_next_unlinked);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Buffer Log Format defintions
			
 
				+ *
			
 
				+ * These are the physical dirty bitmap defintions for the log format structure.
			
 
				+ */
			
 
				+#define	XFS_BLF_CHUNK		128
			
 
				+#define	XFS_BLF_SHIFT		7
			
 
				+#define	BIT_TO_WORD_SHIFT	5
			
 
				+#define	NBWORD			(NBBY * sizeof(unsigned int))
			
 
				+
			
 
				+/*
			
 
				+ * This flag indicates that the buffer contains on disk inodes
			
 
				+ * and requires special recovery handling.
			
 
				+ */
			
 
				+#define	XFS_BLF_INODE_BUF	(1<<0)
			
 
				+
			
 
				+/*
			
 
				+ * This flag indicates that the buffer should not be replayed
			
 
				+ * during recovery because its blocks are being freed.
			
 
				+ */
			
 
				+#define	XFS_BLF_CANCEL		(1<<1)
			
 
				+
			
 
				+/*
			
 
				+ * This flag indicates that the buffer contains on disk
			
 
				+ * user or group dquots and may require special recovery handling.
			
 
				+ */
			
 
				+#define	XFS_BLF_UDQUOT_BUF	(1<<2)
			
 
				+#define XFS_BLF_PDQUOT_BUF	(1<<3)
			
 
				+#define	XFS_BLF_GDQUOT_BUF	(1<<4)
			
 
				+
			
 
				+/*
			
 
				+ * This is the structure used to lay out a buf log item in the
			
 
				+ * log.  The data map describes which 128 byte chunks of the buffer
			
 
				+ * have been logged.
			
 
				+ */
			
 
				+#define XFS_BLF_DATAMAP_SIZE	((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
			
 
				+
			
 
				+typedef struct xfs_buf_log_format {
			
 
				+	unsigned short	blf_type;	/* buf log item type indicator */
			
 
				+	unsigned short	blf_size;	/* size of this item */
			
 
				+	ushort		blf_flags;	/* misc state */
			
 
				+	ushort		blf_len;	/* number of blocks in this buf */
			
 
				+	__int64_t	blf_blkno;	/* starting blkno of this buf */
			
 
				+	unsigned int	blf_map_size;	/* used size of data bitmap in words */
			
 
				+	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
			
 
				+} xfs_buf_log_format_t;
			
 
				+
			
 
				+/*
			
 
				+ * All buffers now need to tell recovery where the magic number
			
 
				+ * is so that it can verify and calculate the CRCs on the buffer correctly
			
 
				+ * once the changes have been replayed into the buffer.
			
 
				+ *
			
 
				+ * The type value is held in the upper 5 bits of the blf_flags field, which is
			
 
				+ * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
			
 
				+ */
			
 
				+#define XFS_BLFT_BITS	5
			
 
				+#define XFS_BLFT_SHIFT	11
			
 
				+#define XFS_BLFT_MASK	(((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
			
 
				+
			
 
				+enum xfs_blft {
			
 
				+	XFS_BLFT_UNKNOWN_BUF = 0,
			
 
				+	XFS_BLFT_UDQUOT_BUF,
			
 
				+	XFS_BLFT_PDQUOT_BUF,
			
 
				+	XFS_BLFT_GDQUOT_BUF,
			
 
				+	XFS_BLFT_BTREE_BUF,
			
 
				+	XFS_BLFT_AGF_BUF,
			
 
				+	XFS_BLFT_AGFL_BUF,
			
 
				+	XFS_BLFT_AGI_BUF,
			
 
				+	XFS_BLFT_DINO_BUF,
			
 
				+	XFS_BLFT_SYMLINK_BUF,
			
 
				+	XFS_BLFT_DIR_BLOCK_BUF,
			
 
				+	XFS_BLFT_DIR_DATA_BUF,
			
 
				+	XFS_BLFT_DIR_FREE_BUF,
			
 
				+	XFS_BLFT_DIR_LEAF1_BUF,
			
 
				+	XFS_BLFT_DIR_LEAFN_BUF,
			
 
				+	XFS_BLFT_DA_NODE_BUF,
			
 
				+	XFS_BLFT_ATTR_LEAF_BUF,
			
 
				+	XFS_BLFT_ATTR_RMT_BUF,
			
 
				+	XFS_BLFT_SB_BUF,
			
 
				+	XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
			
 
				+};
			
 
				+
			
 
				+static inline void
			
 
				+xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
			
 
				+{
			
 
				+	ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
			
 
				+	blf->blf_flags &= ~XFS_BLFT_MASK;
			
 
				+	blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
			
 
				+}
			
 
				+
			
 
				+static inline __uint16_t
			
 
				+xfs_blft_from_flags(struct xfs_buf_log_format *blf)
			
 
				+{
			
 
				+	return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * EFI/EFD log format definitions
			
 
				+ */
			
 
				+typedef struct xfs_extent {
			
 
				+	xfs_dfsbno_t	ext_start;
			
 
				+	xfs_extlen_t	ext_len;
			
 
				+} xfs_extent_t;
			
 
				+
			
 
				+/*
			
 
				+ * Since an xfs_extent_t has types (start:64, len: 32)
			
 
				+ * there are different alignments on 32 bit and 64 bit kernels.
			
 
				+ * So we provide the different variants for use by a
			
 
				+ * conversion routine.
			
 
				+ */
			
 
				+typedef struct xfs_extent_32 {
			
 
				+	__uint64_t	ext_start;
			
 
				+	__uint32_t	ext_len;
			
 
				+} __attribute__((packed)) xfs_extent_32_t;
			
 
				+
			
 
				+typedef struct xfs_extent_64 {
			
 
				+	__uint64_t	ext_start;
			
 
				+	__uint32_t	ext_len;
			
 
				+	__uint32_t	ext_pad;
			
 
				+} xfs_extent_64_t;
			
 
				+
			
 
				+/*
			
 
				+ * This is the structure used to lay out an efi log item in the
			
 
				+ * log.  The efi_extents field is a variable size array whose
			
 
				+ * size is given by efi_nextents.
			
 
				+ */
			
 
				+typedef struct xfs_efi_log_format {
			
 
				+	__uint16_t		efi_type;	/* efi log item type */
			
 
				+	__uint16_t		efi_size;	/* size of this item */
			
 
				+	__uint32_t		efi_nextents;	/* # extents to free */
			
 
				+	__uint64_t		efi_id;		/* efi identifier */
			
 
				+	xfs_extent_t		efi_extents[1];	/* array of extents to free */
			
 
				+} xfs_efi_log_format_t;
			
 
				+
			
 
				+typedef struct xfs_efi_log_format_32 {
			
 
				+	__uint16_t		efi_type;	/* efi log item type */
			
 
				+	__uint16_t		efi_size;	/* size of this item */
			
 
				+	__uint32_t		efi_nextents;	/* # extents to free */
			
 
				+	__uint64_t		efi_id;		/* efi identifier */
			
 
				+	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
			
 
				+} __attribute__((packed)) xfs_efi_log_format_32_t;
			
 
				+
			
 
				+typedef struct xfs_efi_log_format_64 {
			
 
				+	__uint16_t		efi_type;	/* efi log item type */
			
 
				+	__uint16_t		efi_size;	/* size of this item */
			
 
				+	__uint32_t		efi_nextents;	/* # extents to free */
			
 
				+	__uint64_t		efi_id;		/* efi identifier */
			
 
				+	xfs_extent_64_t		efi_extents[1];	/* array of extents to free */
			
 
				+} xfs_efi_log_format_64_t;
			
 
				+
			
 
				+/*
			
 
				+ * This is the structure used to lay out an efd log item in the
			
 
				+ * log.  The efd_extents array is a variable size array whose
			
 
				+ * size is given by efd_nextents;
			
 
				+ */
			
 
				+typedef struct xfs_efd_log_format {
			
 
				+	__uint16_t		efd_type;	/* efd log item type */
			
 
				+	__uint16_t		efd_size;	/* size of this item */
			
 
				+	__uint32_t		efd_nextents;	/* # of extents freed */
			
 
				+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
			
 
				+	xfs_extent_t		efd_extents[1];	/* array of extents freed */
			
 
				+} xfs_efd_log_format_t;
			
 
				+
			
 
				+typedef struct xfs_efd_log_format_32 {
			
 
				+	__uint16_t		efd_type;	/* efd log item type */
			
 
				+	__uint16_t		efd_size;	/* size of this item */
			
 
				+	__uint32_t		efd_nextents;	/* # of extents freed */
			
 
				+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
			
 
				+	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
			
 
				+} __attribute__((packed)) xfs_efd_log_format_32_t;
			
 
				+
			
 
				+typedef struct xfs_efd_log_format_64 {
			
 
				+	__uint16_t		efd_type;	/* efd log item type */
			
 
				+	__uint16_t		efd_size;	/* size of this item */
			
 
				+	__uint32_t		efd_nextents;	/* # of extents freed */
			
 
				+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
			
 
				+	xfs_extent_64_t		efd_extents[1];	/* array of extents freed */
			
 
				+} xfs_efd_log_format_64_t;
			
 
				+
			
 
				+/*
			
 
				+ * Dquot Log format definitions.
			
 
				+ *
			
 
				+ * The first two fields must be the type and size fitting into
			
 
				+ * 32 bits : log_recovery code assumes that.
			
 
				+ */
			
 
				+typedef struct xfs_dq_logformat {
			
 
				+	__uint16_t		qlf_type;      /* dquot log item type */
			
 
				+	__uint16_t		qlf_size;      /* size of this item */
			
 
				+	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
			
 
				+	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
			
 
				+	__int32_t		qlf_len;       /* len of dquot buffer */
			
 
				+	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
			
 
				+} xfs_dq_logformat_t;
			
 
				+
			
 
				+/*
			
 
				+ * log format struct for QUOTAOFF records.
			
 
				+ * The first two fields must be the type and size fitting into
			
 
				+ * 32 bits : log_recovery code assumes that.
			
 
				+ * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
			
 
				+ * to the first and ensures that the first logitem is taken out of the AIL
			
 
				+ * only when the last one is securely committed.
			
 
				+ */
			
 
				+typedef struct xfs_qoff_logformat {
			
 
				+	unsigned short		qf_type;	/* quotaoff log item type */
			
 
				+	unsigned short		qf_size;	/* size of this item */
			
 
				+	unsigned int		qf_flags;	/* USR and/or GRP */
			
 
				+	char			qf_pad[12];	/* padding for future */
			
 
				+} xfs_qoff_logformat_t;
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
			
 
				+ */
			
 
				+#define XFS_UQUOTA_ACCT	0x0001  /* user quota accounting ON */
			
 
				+#define XFS_UQUOTA_ENFD	0x0002  /* user quota limits enforced */
			
 
				+#define XFS_UQUOTA_CHKD	0x0004  /* quotacheck run on usr quotas */
			
 
				+#define XFS_PQUOTA_ACCT	0x0008  /* project quota accounting ON */
			
 
				+#define XFS_OQUOTA_ENFD	0x0010  /* other (grp/prj) quota limits enforced */
			
 
				+#define XFS_OQUOTA_CHKD	0x0020  /* quotacheck run on other (grp/prj) quotas */
			
 
				+#define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */
			
 
				+
			
 
				+/*
			
 
				+ * Conversion to and from the combined OQUOTA flag (if necessary)
			
 
				+ * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk()
			
 
				+ */
			
 
				+#define XFS_GQUOTA_ENFD	0x0080  /* group quota limits enforced */
			
 
				+#define XFS_GQUOTA_CHKD	0x0100  /* quotacheck run on group quotas */
			
 
				+#define XFS_PQUOTA_ENFD	0x0200  /* project quota limits enforced */
			
 
				+#define XFS_PQUOTA_CHKD	0x0400  /* quotacheck run on project quotas */
			
 
				+
			
 
				+#define XFS_ALL_QUOTA_ACCT	\
			
 
				+		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
			
 
				+#define XFS_ALL_QUOTA_ENFD	\
			
 
				+		(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD)
			
 
				+#define XFS_ALL_QUOTA_CHKD	\
			
 
				+		(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD)
			
 
				+
			
 
				+#define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
			
 
				+				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
			
 
				+				 XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\
			
 
				+				 XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\
			
 
				+				 XFS_PQUOTA_CHKD)
			
 
				+
			
 
				+/*
			
 
				+ * Inode create log item structure
			
 
				+ *
			
 
				+ * Log recovery assumes the first two entries are the type and size and they fit
			
 
				+ * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so
			
 
				+ * decoding can be done correctly.
			
 
				+ */
			
 
				+struct xfs_icreate_log {
			
 
				+	__uint16_t	icl_type;	/* type of log format structure */
			
 
				+	__uint16_t	icl_size;	/* size of log format structure */
			
 
				+	__be32		icl_ag;		/* ag being allocated in */
			
 
				+	__be32		icl_agbno;	/* start block of inode range */
			
 
				+	__be32		icl_count;	/* number of inodes to initialise */
			
 
				+	__be32		icl_isize;	/* size of inodes */
			
 
				+	__be32		icl_length;	/* length of extent to initialise */
			
 
				+	__be32		icl_gen;	/* inode generation number to use */
			
 
				+};
			
 
				+
			
 
				+int	xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
			
 
				+int	xfs_log_calc_minimum_size(struct xfs_mount *);
			
 
				+
			
 
				+
			
 
				+#endif /* __XFS_LOG_FORMAT_H__ */
			
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -24,51 +24,13 @@ struct xlog_ticket;
 
				 struct xfs_mount;
			
 
				 
			
 
				 /*
			
 
				- * Macros, structures, prototypes for internal log manager use.
			
 
				+ * Flags for log structure
			
 
				  */
			
 
				-
			
 
				-#define XLOG_MIN_ICLOGS		2
			
 
				-#define XLOG_MAX_ICLOGS		8
			
 
				-#define XLOG_HEADER_MAGIC_NUM	0xFEEDbabe	/* Invalid cycle number */
			
 
				-#define XLOG_VERSION_1		1
			
 
				-#define XLOG_VERSION_2		2		/* Large IClogs, Log sunit */
			
 
				-#define XLOG_VERSION_OKBITS	(XLOG_VERSION_1 | XLOG_VERSION_2)
			
 
				-#define XLOG_MIN_RECORD_BSIZE	(16*1024)	/* eventually 32k */
			
 
				-#define XLOG_BIG_RECORD_BSIZE	(32*1024)	/* 32k buffers */
			
 
				-#define XLOG_MAX_RECORD_BSIZE	(256*1024)
			
 
				-#define XLOG_HEADER_CYCLE_SIZE	(32*1024)	/* cycle data in header */
			
 
				-#define XLOG_MIN_RECORD_BSHIFT	14		/* 16384 == 1 << 14 */
			
 
				-#define XLOG_BIG_RECORD_BSHIFT	15		/* 32k == 1 << 15 */
			
 
				-#define XLOG_MAX_RECORD_BSHIFT	18		/* 256k == 1 << 18 */
			
 
				-#define XLOG_BTOLSUNIT(log, b)  (((b)+(log)->l_mp->m_sb.sb_logsunit-1) / \
			
 
				-                                 (log)->l_mp->m_sb.sb_logsunit)
			
 
				-#define XLOG_LSUNITTOB(log, su) ((su) * (log)->l_mp->m_sb.sb_logsunit)
			
 
				-
			
 
				-#define XLOG_HEADER_SIZE	512
			
 
				-
			
 
				-#define XLOG_REC_SHIFT(log) \
			
 
				-	BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
			
 
				-	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
			
 
				-#define XLOG_TOTAL_REC_SHIFT(log) \
			
 
				-	BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
			
 
				-	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
			
 
				-
			
 
				-static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
			
 
				-{
			
 
				-	return ((xfs_lsn_t)cycle << 32) | block;
			
 
				-}
			
 
				-
			
 
				-static inline uint xlog_get_cycle(char *ptr)
			
 
				-{
			
 
				-	if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
			
 
				-		return be32_to_cpu(*((__be32 *)ptr + 1));
			
 
				-	else
			
 
				-		return be32_to_cpu(*(__be32 *)ptr);
			
 
				-}
			
 
				-
			
 
				-#define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				+#define XLOG_ACTIVE_RECOVERY	0x2	/* in the middle of recovery */
			
 
				+#define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */
			
 
				+#define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
			
 
				+					   shutdown */
			
 
				+#define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
			
 
				 
			
 
				 /*
			
 
				  * get client id from packed copy.
			
@@ -101,27 +63,7 @@ static inline uint xlog_get_client_id(__be32 i)
 
				 #define XLOG_STATE_IOERROR   0x0080 /* IO error happened in sync'ing log */
			
 
				 #define XLOG_STATE_ALL	     0x7FFF /* All possible valid flags */
			
 
				 #define XLOG_STATE_NOTUSED   0x8000 /* This IC log not being used */
			
 
				-#endif	/* __KERNEL__ */
			
 
				 
			
 
				-/*
			
 
				- * Flags to log operation header
			
 
				- *
			
 
				- * The first write of a new transaction will be preceded with a start
			
 
				- * record, XLOG_START_TRANS.  Once a transaction is committed, a commit
			
 
				- * record is written, XLOG_COMMIT_TRANS.  If a single region can not fit into
			
 
				- * the remainder of the current active in-core log, it is split up into
			
 
				- * multiple regions.  Each partial region will be marked with a
			
 
				- * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS.
			
 
				- *
			
 
				- */
			
 
				-#define XLOG_START_TRANS	0x01	/* Start a new transaction */
			
 
				-#define XLOG_COMMIT_TRANS	0x02	/* Commit this transaction */
			
 
				-#define XLOG_CONTINUE_TRANS	0x04	/* Cont this trans into new region */
			
 
				-#define XLOG_WAS_CONT_TRANS	0x08	/* Cont this trans into new region */
			
 
				-#define XLOG_END_TRANS		0x10	/* End a continued transaction */
			
 
				-#define XLOG_UNMOUNT_TRANS	0x20	/* Unmount a filesystem transaction */
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				 /*
			
 
				  * Flags to log ticket
			
 
				  */
			
@@ -132,22 +74,6 @@ static inline uint xlog_get_client_id(__be32 i)
 
				 	{ XLOG_TIC_INITED,	"XLOG_TIC_INITED" }, \
			
 
				 	{ XLOG_TIC_PERM_RESERV,	"XLOG_TIC_PERM_RESERV" }
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				-#define XLOG_UNMOUNT_TYPE	0x556e	/* Un for Unmount */
			
 
				-
			
 
				-/*
			
 
				- * Flags for log structure
			
 
				- */
			
 
				-#define XLOG_ACTIVE_RECOVERY	0x2	/* in the middle of recovery */
			
 
				-#define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */
			
 
				-#define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
			
 
				-					   shutdown */
			
 
				-#define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
			
 
				-
			
 
				-typedef __uint32_t xlog_tid_t;
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				 /*
			
 
				  * Below are states for covering allocation transactions.
			
 
				  * By covering, we mean changing the h_tail_lsn in the last on-disk
			
@@ -223,7 +149,6 @@ typedef __uint32_t xlog_tid_t;
 
				 
			
 
				 #define XLOG_COVER_OPS		5
			
 
				 
			
 
				-
			
 
				 /* Ticket reservation region accounting */ 
			
 
				 #define XLOG_TIC_LEN_MAX	15
			
 
				 
			
@@ -258,64 +183,6 @@ typedef struct xlog_ticket {
 
				 	xlog_res_t	   t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : 8 * 15 */ 
			
 
				 } xlog_ticket_t;
			
 
				 
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-typedef struct xlog_op_header {
			
 
				-	__be32	   oh_tid;	/* transaction id of operation	:  4 b */
			
 
				-	__be32	   oh_len;	/* bytes in data region		:  4 b */
			
 
				-	__u8	   oh_clientid;	/* who sent me this		:  1 b */
			
 
				-	__u8	   oh_flags;	/*				:  1 b */
			
 
				-	__u16	   oh_res2;	/* 32 bit align			:  2 b */
			
 
				-} xlog_op_header_t;
			
 
				-
			
 
				-
			
 
				-/* valid values for h_fmt */
			
 
				-#define XLOG_FMT_UNKNOWN  0
			
 
				-#define XLOG_FMT_LINUX_LE 1
			
 
				-#define XLOG_FMT_LINUX_BE 2
			
 
				-#define XLOG_FMT_IRIX_BE  3
			
 
				-
			
 
				-/* our fmt */
			
 
				-#ifdef XFS_NATIVE_HOST
			
 
				-#define XLOG_FMT XLOG_FMT_LINUX_BE
			
 
				-#else
			
 
				-#define XLOG_FMT XLOG_FMT_LINUX_LE
			
 
				-#endif
			
 
				-
			
 
				-typedef struct xlog_rec_header {
			
 
				-	__be32	  h_magicno;	/* log record (LR) identifier		:  4 */
			
 
				-	__be32	  h_cycle;	/* write cycle of log			:  4 */
			
 
				-	__be32	  h_version;	/* LR version				:  4 */
			
 
				-	__be32	  h_len;	/* len in bytes; should be 64-bit aligned: 4 */
			
 
				-	__be64	  h_lsn;	/* lsn of this LR			:  8 */
			
 
				-	__be64	  h_tail_lsn;	/* lsn of 1st LR w/ buffers not committed: 8 */
			
 
				-	__le32	  h_crc;	/* crc of log record                    :  4 */
			
 
				-	__be32	  h_prev_block; /* block number to previous LR		:  4 */
			
 
				-	__be32	  h_num_logops;	/* number of log operations in this LR	:  4 */
			
 
				-	__be32	  h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
			
 
				-	/* new fields */
			
 
				-	__be32    h_fmt;        /* format of log record                 :  4 */
			
 
				-	uuid_t	  h_fs_uuid;    /* uuid of FS                           : 16 */
			
 
				-	__be32	  h_size;	/* iclog size				:  4 */
			
 
				-} xlog_rec_header_t;
			
 
				-
			
 
				-typedef struct xlog_rec_ext_header {
			
 
				-	__be32	  xh_cycle;	/* write cycle of log			: 4 */
			
 
				-	__be32	  xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*	: 256 */
			
 
				-} xlog_rec_ext_header_t;
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				-
			
 
				-/*
			
 
				- * Quite misnamed, because this union lays out the actual on-disk log buffer.
			
 
				- */
			
 
				-typedef union xlog_in_core2 {
			
 
				-	xlog_rec_header_t	hic_header;
			
 
				-	xlog_rec_ext_header_t	hic_xheader;
			
 
				-	char			hic_sector[XLOG_HEADER_SIZE];
			
 
				-} xlog_in_core_2_t;
			
 
				-
			
 
				 /*
			
 
				  * - A log record header is 512 bytes.  There is plenty of room to grow the
			
 
				  *	xlog_rec_header_t into the reserved space.
			
@@ -411,14 +278,17 @@ struct xfs_cil {
 
				 	struct xlog		*xc_log;
			
 
				 	struct list_head	xc_cil;
			
 
				 	spinlock_t		xc_cil_lock;
			
 
				+
			
 
				+	struct rw_semaphore	xc_ctx_lock ____cacheline_aligned_in_smp;
			
 
				 	struct xfs_cil_ctx	*xc_ctx;
			
 
				-	struct rw_semaphore	xc_ctx_lock;
			
 
				+
			
 
				+	spinlock_t		xc_push_lock ____cacheline_aligned_in_smp;
			
 
				+	xfs_lsn_t		xc_push_seq;
			
 
				 	struct list_head	xc_committing;
			
 
				 	wait_queue_head_t	xc_commit_wait;
			
 
				 	xfs_lsn_t		xc_current_sequence;
			
 
				 	struct work_struct	xc_push_work;
			
 
				-	xfs_lsn_t		xc_push_seq;
			
 
				-};
			
 
				+} ____cacheline_aligned_in_smp;
			
 
				 
			
 
				 /*
			
 
				  * The amount of log space we allow the CIL to aggregate is difficult to size.
			
@@ -686,6 +556,5 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
 
				 	schedule();
			
 
				 	remove_wait_queue(wq, &wait);
			
 
				 }
			
 
				-#endif	/* __KERNEL__ */
			
 
				 
			
 
				 #endif	/* __XFS_LOG_PRIV_H__ */
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -17,7 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
@@ -41,7 +41,6 @@
 
				 #include "xfs_extfree_item.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_quota.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_cksum.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
@@ -51,10 +50,12 @@
 
				 #include "xfs_symlink.h"
			
 
				 #include "xfs_da_btree.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				-#include "xfs_dir2_priv.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_attr_leaf.h"
			
 
				 #include "xfs_attr_remote.h"
			
 
				 
			
 
				+#define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
			
 
				+
			
 
				 STATIC int
			
 
				 xlog_find_zeroed(
			
 
				 	struct xlog	*,
			
@@ -607,7 +608,7 @@ out:
 
				 
			
 
				 /*
			
 
				  * Head is defined to be the point of the log where the next log write
			
 
				- * write could go.  This means that incomplete LR writes at the end are
			
 
				+ * could go.  This means that incomplete LR writes at the end are
			
 
				  * eliminated when calculating the head.  We aren't guaranteed that previous
			
 
				  * LR have complete transactions.  We only know that a cycle number of
			
 
				  * current cycle number -1 won't be present in the log if we start writing
			
@@ -963,6 +964,7 @@ xlog_find_tail(
 
				 	}
			
 
				 	if (!found) {
			
 
				 		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
			
 
				+		xlog_put_bp(bp);
			
 
				 		ASSERT(0);
			
 
				 		return XFS_ERROR(EIO);
			
 
				 	}
			
@@ -1144,7 +1146,8 @@ xlog_find_zeroed(
 
				 		 */
			
 
				 		xfs_warn(log->l_mp,
			
 
				 			"Log inconsistent or not a log (last==0, first!=1)");
			
 
				-		return XFS_ERROR(EINVAL);
			
 
				+		error = XFS_ERROR(EINVAL);
			
 
				+		goto bp_err;
			
 
				 	}
			
 
				 
			
 
				 	/* we have a partially zeroed log */
			
@@ -1766,19 +1769,11 @@ xlog_recover_buffer_pass1(
 
				 
			
 
				 /*
			
 
				  * Check to see whether the buffer being recovered has a corresponding
			
 
				- * entry in the buffer cancel record table.  If it does then return 1
			
 
				- * so that it will be cancelled, otherwise return 0.  If the buffer is
			
 
				- * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement
			
 
				- * the refcount on the entry in the table and remove it from the table
			
 
				- * if this is the last reference.
			
 
				- *
			
 
				- * We remove the cancel record from the table when we encounter its
			
 
				- * last occurrence in the log so that if the same buffer is re-used
			
 
				- * again after its last cancellation we actually replay the changes
			
 
				- * made at that point.
			
 
				+ * entry in the buffer cancel record table. If it is, return the cancel
			
 
				+ * buffer structure to the caller.
			
 
				  */
			
 
				-STATIC int
			
 
				-xlog_check_buffer_cancelled(
			
 
				+STATIC struct xfs_buf_cancel *
			
 
				+xlog_peek_buffer_cancelled(
			
 
				 	struct xlog		*log,
			
 
				 	xfs_daddr_t		blkno,
			
 
				 	uint			len,
			
@@ -1787,22 +1782,16 @@ xlog_check_buffer_cancelled(
 
				 	struct list_head	*bucket;
			
 
				 	struct xfs_buf_cancel	*bcp;
			
 
				 
			
 
				-	if (log->l_buf_cancel_table == NULL) {
			
 
				-		/*
			
 
				-		 * There is nothing in the table built in pass one,
			
 
				-		 * so this buffer must not be cancelled.
			
 
				-		 */
			
 
				+	if (!log->l_buf_cancel_table) {
			
 
				+		/* empty table means no cancelled buffers in the log */
			
 
				 		ASSERT(!(flags & XFS_BLF_CANCEL));
			
 
				-		return 0;
			
 
				+		return NULL;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Search for an entry in the  cancel table that matches our buffer.
			
 
				-	 */
			
 
				 	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
			
 
				 	list_for_each_entry(bcp, bucket, bc_list) {
			
 
				 		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
			
 
				-			goto found;
			
 
				+			return bcp;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1810,9 +1799,32 @@ xlog_check_buffer_cancelled(
 
				 	 * that the buffer is NOT cancelled.
			
 
				 	 */
			
 
				 	ASSERT(!(flags & XFS_BLF_CANCEL));
			
 
				-	return 0;
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If the buffer is being cancelled then return 1 so that it will be cancelled,
			
 
				+ * otherwise return 0.  If the buffer is actually a buffer cancel item
			
 
				+ * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the
			
 
				+ * table and remove it from the table if this is the last reference.
			
 
				+ *
			
 
				+ * We remove the cancel record from the table when we encounter its last
			
 
				+ * occurrence in the log so that if the same buffer is re-used again after its
			
 
				+ * last cancellation we actually replay the changes made at that point.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xlog_check_buffer_cancelled(
			
 
				+	struct xlog		*log,
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	uint			len,
			
 
				+	ushort			flags)
			
 
				+{
			
 
				+	struct xfs_buf_cancel	*bcp;
			
 
				+
			
 
				+	bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags);
			
 
				+	if (!bcp)
			
 
				+		return 0;
			
 
				 
			
 
				-found:
			
 
				 	/*
			
 
				 	 * We've go a match, so return 1 so that the recovery of this buffer
			
 
				 	 * is cancelled.  If this buffer is actually a buffer cancel log
			
@@ -1946,6 +1958,104 @@ xlog_recover_do_inode_buffer(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * V5 filesystems know the age of the buffer on disk being recovered. We can
			
 
				+ * have newer objects on disk than we are replaying, and so for these cases we
			
 
				+ * don't want to replay the current change as that will make the buffer contents
			
 
				+ * temporarily invalid on disk.
			
 
				+ *
			
 
				+ * The magic number might not match the buffer type we are going to recover
			
 
				+ * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
			
 
				+ * extract the LSN of the existing object in the buffer based on it's current
			
 
				+ * magic number.  If we don't recognise the magic number in the buffer, then
			
 
				+ * return a LSN of -1 so that the caller knows it was an unrecognised block and
			
 
				+ * so can recover the buffer.
			
 
				+ */
			
 
				+static xfs_lsn_t
			
 
				+xlog_recover_get_buf_lsn(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	__uint32_t		magic32;
			
 
				+	__uint16_t		magic16;
			
 
				+	__uint16_t		magicda;
			
 
				+	void			*blk = bp->b_addr;
			
 
				+
			
 
				+	/* v4 filesystems always recover immediately */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		goto recover_immediately;
			
 
				+
			
 
				+	magic32 = be32_to_cpu(*(__be32 *)blk);
			
 
				+	switch (magic32) {
			
 
				+	case XFS_ABTB_CRC_MAGIC:
			
 
				+	case XFS_ABTC_CRC_MAGIC:
			
 
				+	case XFS_ABTB_MAGIC:
			
 
				+	case XFS_ABTC_MAGIC:
			
 
				+	case XFS_IBT_CRC_MAGIC:
			
 
				+	case XFS_IBT_MAGIC:
			
 
				+		return be64_to_cpu(
			
 
				+				((struct xfs_btree_block *)blk)->bb_u.s.bb_lsn);
			
 
				+	case XFS_BMAP_CRC_MAGIC:
			
 
				+	case XFS_BMAP_MAGIC:
			
 
				+		return be64_to_cpu(
			
 
				+				((struct xfs_btree_block *)blk)->bb_u.l.bb_lsn);
			
 
				+	case XFS_AGF_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
			
 
				+	case XFS_AGFL_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
			
 
				+	case XFS_AGI_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
			
 
				+	case XFS_SYMLINK_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
			
 
				+	case XFS_DIR3_BLOCK_MAGIC:
			
 
				+	case XFS_DIR3_DATA_MAGIC:
			
 
				+	case XFS_DIR3_FREE_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
			
 
				+	case XFS_ATTR3_RMT_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
			
 
				+	case XFS_SB_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_sb *)blk)->sb_lsn);
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
			
 
				+	switch (magicda) {
			
 
				+	case XFS_DIR3_LEAF1_MAGIC:
			
 
				+	case XFS_DIR3_LEAFN_MAGIC:
			
 
				+	case XFS_DA3_NODE_MAGIC:
			
 
				+		return be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We do individual object checks on dquot and inode buffers as they
			
 
				+	 * have their own individual LSN records. Also, we could have a stale
			
 
				+	 * buffer here, so we have to at least recognise these buffer types.
			
 
				+	 *
			
 
				+	 * A notd complexity here is inode unlinked list processing - it logs
			
 
				+	 * the inode directly in the buffer, but we don't know which inodes have
			
 
				+	 * been modified, and there is no global buffer LSN. Hence we need to
			
 
				+	 * recover all inode buffer types immediately. This problem will be
			
 
				+	 * fixed by logical logging of the unlinked list modifications.
			
 
				+	 */
			
 
				+	magic16 = be16_to_cpu(*(__be16 *)blk);
			
 
				+	switch (magic16) {
			
 
				+	case XFS_DQUOT_MAGIC:
			
 
				+	case XFS_DINODE_MAGIC:
			
 
				+		goto recover_immediately;
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	/* unknown buffer contents, recover immediately */
			
 
				+
			
 
				+recover_immediately:
			
 
				+	return (xfs_lsn_t)-1;
			
 
				+
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Validate the recovered buffer is of the correct type and attach the
			
 
				  * appropriate buffer operations to them for writeback. Magic numbers are in a
			
@@ -1955,7 +2065,7 @@ xlog_recover_do_inode_buffer(
 
				  *	inside a struct xfs_da_blkinfo at the start of the buffer.
			
 
				  */
			
 
				 static void
			
 
				-xlog_recovery_validate_buf_type(
			
 
				+xlog_recover_validate_buf_type(
			
 
				 	struct xfs_mount	*mp,
			
 
				 	struct xfs_buf		*bp,
			
 
				 	xfs_buf_log_format_t	*buf_f)
			
@@ -2234,7 +2344,7 @@ xlog_recover_do_reg_buffer(
 
				 	 * just avoid the verification stage for non-crc filesystems
			
 
				 	 */
			
 
				 	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		xlog_recovery_validate_buf_type(mp, bp, buf_f);
			
 
				+		xlog_recover_validate_buf_type(mp, bp, buf_f);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2366,7 +2476,7 @@ xfs_qm_dqcheck(
 
				 
			
 
				 /*
			
 
				  * Perform a dquot buffer recovery.
			
 
				- * Simple algorithm: if we have found a QUOTAOFF logitem of the same type
			
 
				+ * Simple algorithm: if we have found a QUOTAOFF log item of the same type
			
 
				  * (ie. USR or GRP), then just toss this buffer away; don't recover it.
			
 
				  * Else, treat it as a regular buffer and do recovery.
			
 
				  */
			
@@ -2425,20 +2535,22 @@ xlog_recover_do_dquot_buffer(
 
				  * over the log during recovery.  During the first we build a table of
			
 
				  * those buffers which have been cancelled, and during the second we
			
 
				  * only replay those buffers which do not have corresponding cancel
			
 
				- * records in the table.  See xlog_recover_do_buffer_pass[1,2] above
			
 
				+ * records in the table.  See xlog_recover_buffer_pass[1,2] above
			
 
				  * for more details on the implementation of the table of cancel records.
			
 
				  */
			
 
				 STATIC int
			
 
				 xlog_recover_buffer_pass2(
			
 
				 	struct xlog			*log,
			
 
				 	struct list_head		*buffer_list,
			
 
				-	struct xlog_recover_item	*item)
			
 
				+	struct xlog_recover_item	*item,
			
 
				+	xfs_lsn_t			current_lsn)
			
 
				 {
			
 
				 	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
			
 
				 	xfs_mount_t		*mp = log->l_mp;
			
 
				 	xfs_buf_t		*bp;
			
 
				 	int			error;
			
 
				 	uint			buf_flags;
			
 
				+	xfs_lsn_t		lsn;
			
 
				 
			
 
				 	/*
			
 
				 	 * In this pass we only want to recover all the buffers which have
			
@@ -2463,10 +2575,17 @@ xlog_recover_buffer_pass2(
 
				 	error = bp->b_error;
			
 
				 	if (error) {
			
 
				 		xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
			
 
				-		xfs_buf_relse(bp);
			
 
				-		return error;
			
 
				+		goto out_release;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * recover the buffer only if we get an LSN from it and it's less than
			
 
				+	 * the lsn of the transaction we are replaying.
			
 
				+	 */
			
 
				+	lsn = xlog_recover_get_buf_lsn(mp, bp);
			
 
				+	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0)
			
 
				+		goto out_release;
			
 
				+
			
 
				 	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
			
 
				 		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
			
 
				 	} else if (buf_f->blf_flags &
			
@@ -2476,7 +2595,7 @@ xlog_recover_buffer_pass2(
 
				 		xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
			
 
				 	}
			
 
				 	if (error)
			
 
				-		return XFS_ERROR(error);
			
 
				+		goto out_release;
			
 
				 
			
 
				 	/*
			
 
				 	 * Perform delayed write on the buffer.  Asynchronous writes will be
			
@@ -2505,6 +2624,7 @@ xlog_recover_buffer_pass2(
 
				 		xfs_buf_delwri_queue(bp, buffer_list);
			
 
				 	}
			
 
				 
			
 
				+out_release:
			
 
				 	xfs_buf_relse(bp);
			
 
				 	return error;
			
 
				 }
			
@@ -2513,7 +2633,8 @@ STATIC int
 
				 xlog_recover_inode_pass2(
			
 
				 	struct xlog			*log,
			
 
				 	struct list_head		*buffer_list,
			
 
				-	struct xlog_recover_item	*item)
			
 
				+	struct xlog_recover_item	*item,
			
 
				+	xfs_lsn_t			current_lsn)
			
 
				 {
			
 
				 	xfs_inode_log_format_t	*in_f;
			
 
				 	xfs_mount_t		*mp = log->l_mp;
			
@@ -2592,6 +2713,20 @@ xlog_recover_inode_pass2(
 
				 		goto error;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * If the inode has an LSN in it, recover the inode only if it's less
			
 
				+	 * than the lsn of the transaction we are replaying.
			
 
				+	 */
			
 
				+	if (dip->di_version >= 3) {
			
 
				+		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
			
 
				+
			
 
				+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
			
 
				+			trace_xfs_log_recover_inode_skip(log, in_f);
			
 
				+			error = 0;
			
 
				+			goto out_release;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
			
 
				 	 * are transactional and if ordering is necessary we can determine that
			
@@ -2781,6 +2916,8 @@ write_inode_buffer:
 
				 	ASSERT(bp->b_target->bt_mount == mp);
			
 
				 	bp->b_iodone = xlog_recover_iodone;
			
 
				 	xfs_buf_delwri_queue(bp, buffer_list);
			
 
				+
			
 
				+out_release:
			
 
				 	xfs_buf_relse(bp);
			
 
				 error:
			
 
				 	if (need_free)
			
@@ -2822,7 +2959,8 @@ STATIC int
 
				 xlog_recover_dquot_pass2(
			
 
				 	struct xlog			*log,
			
 
				 	struct list_head		*buffer_list,
			
 
				-	struct xlog_recover_item	*item)
			
 
				+	struct xlog_recover_item	*item,
			
 
				+	xfs_lsn_t			current_lsn)
			
 
				 {
			
 
				 	xfs_mount_t		*mp = log->l_mp;
			
 
				 	xfs_buf_t		*bp;
			
@@ -2896,6 +3034,19 @@ xlog_recover_dquot_pass2(
 
				 		return XFS_ERROR(EIO);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * If the dquot has an LSN in it, recover the dquot only if it's less
			
 
				+	 * than the lsn of the transaction we are replaying.
			
 
				+	 */
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
			
 
				+		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
			
 
				+
			
 
				+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
			
 
				+			goto out_release;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	memcpy(ddq, recddq, item->ri_buf[1].i_len);
			
 
				 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				 		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
			
@@ -2906,9 +3057,10 @@ xlog_recover_dquot_pass2(
 
				 	ASSERT(bp->b_target->bt_mount == mp);
			
 
				 	bp->b_iodone = xlog_recover_iodone;
			
 
				 	xfs_buf_delwri_queue(bp, buffer_list);
			
 
				-	xfs_buf_relse(bp);
			
 
				 
			
 
				-	return (0);
			
 
				+out_release:
			
 
				+	xfs_buf_relse(bp);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3116,6 +3268,106 @@ xlog_recover_free_trans(
 
				 	kmem_free(trans);
			
 
				 }
			
 
				 
			
 
				+STATIC void
			
 
				+xlog_recover_buffer_ra_pass2(
			
 
				+	struct xlog                     *log,
			
 
				+	struct xlog_recover_item        *item)
			
 
				+{
			
 
				+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
			
 
				+	struct xfs_mount		*mp = log->l_mp;
			
 
				+
			
 
				+	if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno,
			
 
				+			buf_f->blf_len, buf_f->blf_flags)) {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno,
			
 
				+				buf_f->blf_len, NULL);
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xlog_recover_inode_ra_pass2(
			
 
				+	struct xlog                     *log,
			
 
				+	struct xlog_recover_item        *item)
			
 
				+{
			
 
				+	struct xfs_inode_log_format	ilf_buf;
			
 
				+	struct xfs_inode_log_format	*ilfp;
			
 
				+	struct xfs_mount		*mp = log->l_mp;
			
 
				+	int			error;
			
 
				+
			
 
				+	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
			
 
				+		ilfp = item->ri_buf[0].i_addr;
			
 
				+	} else {
			
 
				+		ilfp = &ilf_buf;
			
 
				+		memset(ilfp, 0, sizeof(*ilfp));
			
 
				+		error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp);
			
 
				+		if (error)
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				+	if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0))
			
 
				+		return;
			
 
				+
			
 
				+	xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno,
			
 
				+				ilfp->ilf_len, &xfs_inode_buf_ra_ops);
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xlog_recover_dquot_ra_pass2(
			
 
				+	struct xlog			*log,
			
 
				+	struct xlog_recover_item	*item)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = log->l_mp;
			
 
				+	struct xfs_disk_dquot	*recddq;
			
 
				+	struct xfs_dq_logformat	*dq_f;
			
 
				+	uint			type;
			
 
				+
			
 
				+
			
 
				+	if (mp->m_qflags == 0)
			
 
				+		return;
			
 
				+
			
 
				+	recddq = item->ri_buf[1].i_addr;
			
 
				+	if (recddq == NULL)
			
 
				+		return;
			
 
				+	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
			
 
				+		return;
			
 
				+
			
 
				+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
			
 
				+	ASSERT(type);
			
 
				+	if (log->l_quotaoffs_flag & type)
			
 
				+		return;
			
 
				+
			
 
				+	dq_f = item->ri_buf[0].i_addr;
			
 
				+	ASSERT(dq_f);
			
 
				+	ASSERT(dq_f->qlf_len == 1);
			
 
				+
			
 
				+	xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
			
 
				+			  XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xlog_recover_ra_pass2(
			
 
				+	struct xlog			*log,
			
 
				+	struct xlog_recover_item	*item)
			
 
				+{
			
 
				+	switch (ITEM_TYPE(item)) {
			
 
				+	case XFS_LI_BUF:
			
 
				+		xlog_recover_buffer_ra_pass2(log, item);
			
 
				+		break;
			
 
				+	case XFS_LI_INODE:
			
 
				+		xlog_recover_inode_ra_pass2(log, item);
			
 
				+		break;
			
 
				+	case XFS_LI_DQUOT:
			
 
				+		xlog_recover_dquot_ra_pass2(log, item);
			
 
				+		break;
			
 
				+	case XFS_LI_EFI:
			
 
				+	case XFS_LI_EFD:
			
 
				+	case XFS_LI_QUOTAOFF:
			
 
				+	default:
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 STATIC int
			
 
				 xlog_recover_commit_pass1(
			
 
				 	struct xlog			*log,
			
@@ -3155,15 +3407,18 @@ xlog_recover_commit_pass2(
 
				 
			
 
				 	switch (ITEM_TYPE(item)) {
			
 
				 	case XFS_LI_BUF:
			
 
				-		return xlog_recover_buffer_pass2(log, buffer_list, item);
			
 
				+		return xlog_recover_buffer_pass2(log, buffer_list, item,
			
 
				+						 trans->r_lsn);
			
 
				 	case XFS_LI_INODE:
			
 
				-		return xlog_recover_inode_pass2(log, buffer_list, item);
			
 
				+		return xlog_recover_inode_pass2(log, buffer_list, item,
			
 
				+						 trans->r_lsn);
			
 
				 	case XFS_LI_EFI:
			
 
				 		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
			
 
				 	case XFS_LI_EFD:
			
 
				 		return xlog_recover_efd_pass2(log, item);
			
 
				 	case XFS_LI_DQUOT:
			
 
				-		return xlog_recover_dquot_pass2(log, buffer_list, item);
			
 
				+		return xlog_recover_dquot_pass2(log, buffer_list, item,
			
 
				+						trans->r_lsn);
			
 
				 	case XFS_LI_ICREATE:
			
 
				 		return xlog_recover_do_icreate_pass2(log, buffer_list, item);
			
 
				 	case XFS_LI_QUOTAOFF:
			
@@ -3177,6 +3432,26 @@ xlog_recover_commit_pass2(
 
				 	}
			
 
				 }
			
 
				 
			
 
				+STATIC int
			
 
				+xlog_recover_items_pass2(
			
 
				+	struct xlog                     *log,
			
 
				+	struct xlog_recover             *trans,
			
 
				+	struct list_head                *buffer_list,
			
 
				+	struct list_head                *item_list)
			
 
				+{
			
 
				+	struct xlog_recover_item	*item;
			
 
				+	int				error = 0;
			
 
				+
			
 
				+	list_for_each_entry(item, item_list, ri_list) {
			
 
				+		error = xlog_recover_commit_pass2(log, trans,
			
 
				+					  buffer_list, item);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+	}
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Perform the transaction.
			
 
				  *
			
@@ -3189,9 +3464,16 @@ xlog_recover_commit_trans(
 
				 	struct xlog_recover	*trans,
			
 
				 	int			pass)
			
 
				 {
			
 
				-	int			error = 0, error2;
			
 
				-	xlog_recover_item_t	*item;
			
 
				-	LIST_HEAD		(buffer_list);
			
 
				+	int				error = 0;
			
 
				+	int				error2;
			
 
				+	int				items_queued = 0;
			
 
				+	struct xlog_recover_item	*item;
			
 
				+	struct xlog_recover_item	*next;
			
 
				+	LIST_HEAD			(buffer_list);
			
 
				+	LIST_HEAD			(ra_list);
			
 
				+	LIST_HEAD			(done_list);
			
 
				+
			
 
				+	#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
			
 
				 
			
 
				 	hlist_del(&trans->r_list);
			
 
				 
			
@@ -3199,14 +3481,22 @@ xlog_recover_commit_trans(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	list_for_each_entry(item, &trans->r_itemq, ri_list) {
			
 
				+	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
			
 
				 		switch (pass) {
			
 
				 		case XLOG_RECOVER_PASS1:
			
 
				 			error = xlog_recover_commit_pass1(log, trans, item);
			
 
				 			break;
			
 
				 		case XLOG_RECOVER_PASS2:
			
 
				-			error = xlog_recover_commit_pass2(log, trans,
			
 
				-							  &buffer_list, item);
			
 
				+			xlog_recover_ra_pass2(log, item);
			
 
				+			list_move_tail(&item->ri_list, &ra_list);
			
 
				+			items_queued++;
			
 
				+			if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
			
 
				+				error = xlog_recover_items_pass2(log, trans,
			
 
				+						&buffer_list, &ra_list);
			
 
				+				list_splice_tail_init(&ra_list, &done_list);
			
 
				+				items_queued = 0;
			
 
				+			}
			
 
				+
			
 
				 			break;
			
 
				 		default:
			
 
				 			ASSERT(0);
			
@@ -3216,9 +3506,19 @@ xlog_recover_commit_trans(
 
				 			goto out;
			
 
				 	}
			
 
				 
			
 
				+out:
			
 
				+	if (!list_empty(&ra_list)) {
			
 
				+		if (!error)
			
 
				+			error = xlog_recover_items_pass2(log, trans,
			
 
				+					&buffer_list, &ra_list);
			
 
				+		list_splice_tail_init(&ra_list, &done_list);
			
 
				+	}
			
 
				+
			
 
				+	if (!list_empty(&done_list))
			
 
				+		list_splice_init(&done_list, &trans->r_itemq);
			
 
				+
			
 
				 	xlog_recover_free_trans(trans);
			
 
				 
			
 
				-out:
			
 
				 	error2 = xfs_buf_delwri_submit(&buffer_list);
			
 
				 	return error ? error : error2;
			
 
				 }
			
@@ -3376,7 +3676,7 @@ xlog_recover_process_efi(
 
				 	}
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, 0);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
			
 
				 	if (error)
			
 
				 		goto abort_error;
			
 
				 	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
			
@@ -3482,8 +3782,7 @@ xlog_recover_clear_agi_bucket(
 
				 	int		error;
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
			
 
				-				  0, 0, 0);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_clearagi, 0, 0);
			
 
				 	if (error)
			
 
				 		goto out_abort;
			
 
				 
			
--- a/fs/xfs/xfs_log_rlimit.c
+++ b/fs/xfs/xfs_log_rlimit.c
@@ -0,0 +1,147 @@
 
				+/*
			
 
				+ * Copyright (c) 2013 Jie Liu.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_trans_space.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_attr_leaf.h"
			
 
				+
			
 
				+/*
			
 
				+ * Calculate the maximum length in bytes that would be required for a local
			
 
				+ * attribute value as large attributes out of line are not logged.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_log_calc_max_attrsetm_res(
			
 
				+	struct xfs_mount	*mp)
			
 
				+{
			
 
				+	int			size;
			
 
				+	int			nblks;
			
 
				+
			
 
				+	size = xfs_attr_leaf_entsize_local_max(mp->m_sb.sb_blocksize) -
			
 
				+	       MAXNAMELEN - 1;
			
 
				+	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
			
 
				+	nblks += XFS_B_TO_FSB(mp, size);
			
 
				+	nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
			
 
				+
			
 
				+	return  M_RES(mp)->tr_attrsetm.tr_logres +
			
 
				+		M_RES(mp)->tr_attrsetrt.tr_logres * nblks;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Iterate over the log space reservation table to figure out and return
			
 
				+ * the maximum one in terms of the pre-calculated values which were done
			
 
				+ * at mount time.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_log_get_max_trans_res(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_trans_res	*max_resp)
			
 
				+{
			
 
				+	struct xfs_trans_res	*resp;
			
 
				+	struct xfs_trans_res	*end_resp;
			
 
				+	int			log_space = 0;
			
 
				+	int			attr_space;
			
 
				+
			
 
				+	attr_space = xfs_log_calc_max_attrsetm_res(mp);
			
 
				+
			
 
				+	resp = (struct xfs_trans_res *)M_RES(mp);
			
 
				+	end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1);
			
 
				+	for (; resp < end_resp; resp++) {
			
 
				+		int		tmp = resp->tr_logcount > 1 ?
			
 
				+				      resp->tr_logres * resp->tr_logcount :
			
 
				+				      resp->tr_logres;
			
 
				+		if (log_space < tmp) {
			
 
				+			log_space = tmp;
			
 
				+			*max_resp = *resp;		/* struct copy */
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (attr_space > log_space) {
			
 
				+		*max_resp = M_RES(mp)->tr_attrsetm;	/* struct copy */
			
 
				+		max_resp->tr_logres = attr_space;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Calculate the minimum valid log size for the given superblock configuration.
			
 
				+ * Used to calculate the minimum log size at mkfs time, and to determine if
			
 
				+ * the log is large enough or not at mount time. Returns the minimum size in
			
 
				+ * filesystem block size units.
			
 
				+ */
			
 
				+int
			
 
				+xfs_log_calc_minimum_size(
			
 
				+	struct xfs_mount	*mp)
			
 
				+{
			
 
				+	struct xfs_trans_res	tres = {0};
			
 
				+	int			max_logres;
			
 
				+	int			min_logblks = 0;
			
 
				+	int			lsunit = 0;
			
 
				+
			
 
				+	xfs_log_get_max_trans_res(mp, &tres);
			
 
				+
			
 
				+	max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres);
			
 
				+	if (tres.tr_logcount > 1)
			
 
				+		max_logres *= tres.tr_logcount;
			
 
				+
			
 
				+	if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1)
			
 
				+		lsunit = BTOBB(mp->m_sb.sb_logsunit);
			
 
				+
			
 
				+	/*
			
 
				+	 * Two factors should be taken into account for calculating the minimum
			
 
				+	 * log space.
			
 
				+	 * 1) The fundamental limitation is that no single transaction can be
			
 
				+	 *    larger than half size of the log.
			
 
				+	 *
			
 
				+	 *    From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR
			
 
				+	 *    define, which is set to 3. That means we can definitely fit
			
 
				+	 *    maximally sized 2 transactions in the log. We'll use this same
			
 
				+	 *    value here.
			
 
				+	 *
			
 
				+	 * 2) If the lsunit option is specified, a transaction requires 2 LSU
			
 
				+	 *    for the reservation because there are two log writes that can
			
 
				+	 *    require padding - the transaction data and the commit record which
			
 
				+	 *    are written separately and both can require padding to the LSU.
			
 
				+	 *    Consider that we can have an active CIL reservation holding 2*LSU,
			
 
				+	 *    but the CIL is not over a push threshold, in this case, if we
			
 
				+	 *    don't have enough log space for at one new transaction, which
			
 
				+	 *    includes another 2*LSU in the reservation, we will run into dead
			
 
				+	 *    loop situation in log space grant procedure. i.e.
			
 
				+	 *    xlog_grant_head_wait().
			
 
				+	 *
			
 
				+	 *    Hence the log size needs to be able to contain two maximally sized
			
 
				+	 *    and padded transactions, which is (2 * (2 * LSU + maxlres)).
			
 
				+	 *
			
 
				+	 * Also, the log size should be a multiple of the log stripe unit, round
			
 
				+	 * it up to lsunit boundary if lsunit is specified.
			
 
				+	 */
			
 
				+	if (lsunit) {
			
 
				+		min_logblks = roundup_64(BTOBB(max_logres), lsunit) +
			
 
				+			      2 * lsunit;
			
 
				+	} else
			
 
				+		min_logblks = BTOBB(max_logres) + 2 * BBSIZE;
			
 
				+	min_logblks *= XFS_MIN_LOG_FACTOR;
			
 
				+
			
 
				+	return XFS_BB_TO_FSB(mp, min_logblks);
			
 
				+}
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -17,7 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
@@ -25,8 +25,10 @@
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_alloc_btree.h"
			
 
				 #include "xfs_ialloc_btree.h"
			
@@ -40,7 +42,6 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_fsops.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				 #include "xfs_cksum.h"
			
@@ -59,69 +60,6 @@ STATIC void	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 
				 #define xfs_icsb_balance_counter_locked(mp, a, b)	do { } while (0)
			
 
				 #endif
			
 
				 
			
 
				-static const struct {
			
 
				-	short offset;
			
 
				-	short type;	/* 0 = integer
			
 
				-			 * 1 = binary / string (no translation)
			
 
				-			 */
			
 
				-} xfs_sb_info[] = {
			
 
				-    { offsetof(xfs_sb_t, sb_magicnum),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_blocksize),  0 },
			
 
				-    { offsetof(xfs_sb_t, sb_dblocks),    0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rblocks),    0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rextents),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_uuid),       1 },
			
 
				-    { offsetof(xfs_sb_t, sb_logstart),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rootino),    0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rbmino),     0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rsumino),    0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rextsize),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_agblocks),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_agcount),    0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rbmblocks),  0 },
			
 
				-    { offsetof(xfs_sb_t, sb_logblocks),  0 },
			
 
				-    { offsetof(xfs_sb_t, sb_versionnum), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_sectsize),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_inodesize),  0 },
			
 
				-    { offsetof(xfs_sb_t, sb_inopblock),  0 },
			
 
				-    { offsetof(xfs_sb_t, sb_fname[0]),   1 },
			
 
				-    { offsetof(xfs_sb_t, sb_blocklog),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_sectlog),    0 },
			
 
				-    { offsetof(xfs_sb_t, sb_inodelog),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_inopblog),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_agblklog),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_rextslog),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_inprogress), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_imax_pct),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_icount),     0 },
			
 
				-    { offsetof(xfs_sb_t, sb_ifree),      0 },
			
 
				-    { offsetof(xfs_sb_t, sb_fdblocks),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_frextents),  0 },
			
 
				-    { offsetof(xfs_sb_t, sb_uquotino),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_gquotino),   0 },
			
 
				-    { offsetof(xfs_sb_t, sb_qflags),     0 },
			
 
				-    { offsetof(xfs_sb_t, sb_flags),      0 },
			
 
				-    { offsetof(xfs_sb_t, sb_shared_vn),  0 },
			
 
				-    { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_unit),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_width),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_dirblklog),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_logsectlog), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_logsectsize),0 },
			
 
				-    { offsetof(xfs_sb_t, sb_logsunit),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_features2),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_bad_features2), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_features_compat), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_features_ro_compat), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_features_incompat), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_features_log_incompat), 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_crc),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_pad),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_pquotino),	 0 },
			
 
				-    { offsetof(xfs_sb_t, sb_lsn),	 0 },
			
 
				-    { sizeof(xfs_sb_t),			 0 }
			
 
				-};
			
 
				-
			
 
				 static DEFINE_MUTEX(xfs_uuid_table_mutex);
			
 
				 static int xfs_uuid_table_size;
			
 
				 static uuid_t *xfs_uuid_table;
			
@@ -197,64 +135,6 @@ xfs_uuid_unmount(
 
				 }
			
 
				 
			
 
				 
			
 
				-/*
			
 
				- * Reference counting access wrappers to the perag structures.
			
 
				- * Because we never free per-ag structures, the only thing we
			
 
				- * have to protect against changes is the tree structure itself.
			
 
				- */
			
 
				-struct xfs_perag *
			
 
				-xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
			
 
				-{
			
 
				-	struct xfs_perag	*pag;
			
 
				-	int			ref = 0;
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
			
 
				-	if (pag) {
			
 
				-		ASSERT(atomic_read(&pag->pag_ref) >= 0);
			
 
				-		ref = atomic_inc_return(&pag->pag_ref);
			
 
				-	}
			
 
				-	rcu_read_unlock();
			
 
				-	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
			
 
				-	return pag;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * search from @first to find the next perag with the given tag set.
			
 
				- */
			
 
				-struct xfs_perag *
			
 
				-xfs_perag_get_tag(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	xfs_agnumber_t		first,
			
 
				-	int			tag)
			
 
				-{
			
 
				-	struct xfs_perag	*pag;
			
 
				-	int			found;
			
 
				-	int			ref;
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
			
 
				-					(void **)&pag, first, 1, tag);
			
 
				-	if (found <= 0) {
			
 
				-		rcu_read_unlock();
			
 
				-		return NULL;
			
 
				-	}
			
 
				-	ref = atomic_inc_return(&pag->pag_ref);
			
 
				-	rcu_read_unlock();
			
 
				-	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
			
 
				-	return pag;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xfs_perag_put(struct xfs_perag *pag)
			
 
				-{
			
 
				-	int	ref;
			
 
				-
			
 
				-	ASSERT(atomic_read(&pag->pag_ref) > 0);
			
 
				-	ref = atomic_dec_return(&pag->pag_ref);
			
 
				-	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
			
 
				-}
			
 
				-
			
 
				 STATIC void
			
 
				 __xfs_free_perag(
			
 
				 	struct rcu_head	*head)
			
@@ -307,184 +187,6 @@ xfs_sb_validate_fsb_count(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Check the validity of the SB found.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_mount_validate_sb(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	xfs_sb_t	*sbp,
			
 
				-	bool		check_inprogress,
			
 
				-	bool		check_version)
			
 
				-{
			
 
				-
			
 
				-	/*
			
 
				-	 * If the log device and data device have the
			
 
				-	 * same device number, the log is internal.
			
 
				-	 * Consequently, the sb_logstart should be non-zero.  If
			
 
				-	 * we have a zero sb_logstart in this case, we may be trying to mount
			
 
				-	 * a volume filesystem in a non-volume manner.
			
 
				-	 */
			
 
				-	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
			
 
				-		xfs_warn(mp, "bad magic number");
			
 
				-		return XFS_ERROR(EWRONGFS);
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	if (!xfs_sb_good_version(sbp)) {
			
 
				-		xfs_warn(mp, "bad version");
			
 
				-		return XFS_ERROR(EWRONGFS);
			
 
				-	}
			
 
				-
			
 
				-	if ((sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) &&
			
 
				-			(sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
			
 
				-				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))) {
			
 
				-		xfs_notice(mp,
			
 
				-"Super block has XFS_OQUOTA bits along with XFS_PQUOTA and/or XFS_GQUOTA bits.\n");
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Version 5 superblock feature mask validation. Reject combinations the
			
 
				-	 * kernel cannot support up front before checking anything else. For
			
 
				-	 * write validation, we don't need to check feature masks.
			
 
				-	 */
			
 
				-	if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
			
 
				-		xfs_alert(mp,
			
 
				-"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
			
 
				-"Use of these features in this kernel is at your own risk!");
			
 
				-
			
 
				-		if (xfs_sb_has_compat_feature(sbp,
			
 
				-					XFS_SB_FEAT_COMPAT_UNKNOWN)) {
			
 
				-			xfs_warn(mp,
			
 
				-"Superblock has unknown compatible features (0x%x) enabled.\n"
			
 
				-"Using a more recent kernel is recommended.",
			
 
				-				(sbp->sb_features_compat &
			
 
				-						XFS_SB_FEAT_COMPAT_UNKNOWN));
			
 
				-		}
			
 
				-
			
 
				-		if (xfs_sb_has_ro_compat_feature(sbp,
			
 
				-					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
			
 
				-			xfs_alert(mp,
			
 
				-"Superblock has unknown read-only compatible features (0x%x) enabled.",
			
 
				-				(sbp->sb_features_ro_compat &
			
 
				-						XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
			
 
				-			if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
			
 
				-				xfs_warn(mp,
			
 
				-"Attempted to mount read-only compatible filesystem read-write.\n"
			
 
				-"Filesystem can only be safely mounted read only.");
			
 
				-				return XFS_ERROR(EINVAL);
			
 
				-			}
			
 
				-		}
			
 
				-		if (xfs_sb_has_incompat_feature(sbp,
			
 
				-					XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
			
 
				-			xfs_warn(mp,
			
 
				-"Superblock has unknown incompatible features (0x%x) enabled.\n"
			
 
				-"Filesystem can not be safely mounted by this kernel.",
			
 
				-				(sbp->sb_features_incompat &
			
 
				-						XFS_SB_FEAT_INCOMPAT_UNKNOWN));
			
 
				-			return XFS_ERROR(EINVAL);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if (unlikely(
			
 
				-	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
			
 
				-		xfs_warn(mp,
			
 
				-		"filesystem is marked as having an external log; "
			
 
				-		"specify logdev on the mount command line.");
			
 
				-		return XFS_ERROR(EINVAL);
			
 
				-	}
			
 
				-
			
 
				-	if (unlikely(
			
 
				-	    sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
			
 
				-		xfs_warn(mp,
			
 
				-		"filesystem is marked as having an internal log; "
			
 
				-		"do not specify logdev on the mount command line.");
			
 
				-		return XFS_ERROR(EINVAL);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * More sanity checking.  Most of these were stolen directly from
			
 
				-	 * xfs_repair.
			
 
				-	 */
			
 
				-	if (unlikely(
			
 
				-	    sbp->sb_agcount <= 0					||
			
 
				-	    sbp->sb_sectsize < XFS_MIN_SECTORSIZE			||
			
 
				-	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
			
 
				-	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
			
 
				-	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
			
 
				-	    sbp->sb_sectsize != (1 << sbp->sb_sectlog)			||
			
 
				-	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
			
 
				-	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
			
 
				-	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
			
 
				-	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
			
 
				-	    sbp->sb_blocksize != (1 << sbp->sb_blocklog)		||
			
 
				-	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
			
 
				-	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
			
 
				-	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
			
 
				-	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
			
 
				-	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		||
			
 
				-	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
			
 
				-	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
			
 
				-	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
			
 
				-	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)	||
			
 
				-	    sbp->sb_dblocks == 0					||
			
 
				-	    sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)			||
			
 
				-	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
			
 
				-		XFS_CORRUPTION_ERROR("SB sanity check failed",
			
 
				-				XFS_ERRLEVEL_LOW, mp, sbp);
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Until this is fixed only page-sized or smaller data blocks work.
			
 
				-	 */
			
 
				-	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
			
 
				-		xfs_warn(mp,
			
 
				-		"File system with blocksize %d bytes. "
			
 
				-		"Only pagesize (%ld) or less will currently work.",
			
 
				-				sbp->sb_blocksize, PAGE_SIZE);
			
 
				-		return XFS_ERROR(ENOSYS);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Currently only very few inode sizes are supported.
			
 
				-	 */
			
 
				-	switch (sbp->sb_inodesize) {
			
 
				-	case 256:
			
 
				-	case 512:
			
 
				-	case 1024:
			
 
				-	case 2048:
			
 
				-		break;
			
 
				-	default:
			
 
				-		xfs_warn(mp, "inode size of %d bytes not supported",
			
 
				-				sbp->sb_inodesize);
			
 
				-		return XFS_ERROR(ENOSYS);
			
 
				-	}
			
 
				-
			
 
				-	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
			
 
				-	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
			
 
				-		xfs_warn(mp,
			
 
				-		"file system too large to be mounted on this system.");
			
 
				-		return XFS_ERROR(EFBIG);
			
 
				-	}
			
 
				-
			
 
				-	if (check_inprogress && sbp->sb_inprogress) {
			
 
				-		xfs_warn(mp, "Offline file system operation in progress!");
			
 
				-		return XFS_ERROR(EFSCORRUPTED);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Version 1 directory format has never worked on Linux.
			
 
				-	 */
			
 
				-	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
			
 
				-		xfs_warn(mp, "file system using version 1 directory format");
			
 
				-		return XFS_ERROR(ENOSYS);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 int
			
 
				 xfs_initialize_perag(
			
 
				 	xfs_mount_t	*mp,
			
@@ -569,283 +271,15 @@ out_unwind:
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-xfs_sb_quota_from_disk(struct xfs_sb *sbp)
			
 
				-{
			
 
				-	if (sbp->sb_qflags & XFS_OQUOTA_ENFD)
			
 
				-		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
			
 
				-					XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD;
			
 
				-	if (sbp->sb_qflags & XFS_OQUOTA_CHKD)
			
 
				-		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
			
 
				-					XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
			
 
				-	sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xfs_sb_from_disk(
			
 
				-	struct xfs_sb	*to,
			
 
				-	xfs_dsb_t	*from)
			
 
				-{
			
 
				-	to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
			
 
				-	to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
			
 
				-	to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
			
 
				-	to->sb_rblocks = be64_to_cpu(from->sb_rblocks);
			
 
				-	to->sb_rextents = be64_to_cpu(from->sb_rextents);
			
 
				-	memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid));
			
 
				-	to->sb_logstart = be64_to_cpu(from->sb_logstart);
			
 
				-	to->sb_rootino = be64_to_cpu(from->sb_rootino);
			
 
				-	to->sb_rbmino = be64_to_cpu(from->sb_rbmino);
			
 
				-	to->sb_rsumino = be64_to_cpu(from->sb_rsumino);
			
 
				-	to->sb_rextsize = be32_to_cpu(from->sb_rextsize);
			
 
				-	to->sb_agblocks = be32_to_cpu(from->sb_agblocks);
			
 
				-	to->sb_agcount = be32_to_cpu(from->sb_agcount);
			
 
				-	to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks);
			
 
				-	to->sb_logblocks = be32_to_cpu(from->sb_logblocks);
			
 
				-	to->sb_versionnum = be16_to_cpu(from->sb_versionnum);
			
 
				-	to->sb_sectsize = be16_to_cpu(from->sb_sectsize);
			
 
				-	to->sb_inodesize = be16_to_cpu(from->sb_inodesize);
			
 
				-	to->sb_inopblock = be16_to_cpu(from->sb_inopblock);
			
 
				-	memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname));
			
 
				-	to->sb_blocklog = from->sb_blocklog;
			
 
				-	to->sb_sectlog = from->sb_sectlog;
			
 
				-	to->sb_inodelog = from->sb_inodelog;
			
 
				-	to->sb_inopblog = from->sb_inopblog;
			
 
				-	to->sb_agblklog = from->sb_agblklog;
			
 
				-	to->sb_rextslog = from->sb_rextslog;
			
 
				-	to->sb_inprogress = from->sb_inprogress;
			
 
				-	to->sb_imax_pct = from->sb_imax_pct;
			
 
				-	to->sb_icount = be64_to_cpu(from->sb_icount);
			
 
				-	to->sb_ifree = be64_to_cpu(from->sb_ifree);
			
 
				-	to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks);
			
 
				-	to->sb_frextents = be64_to_cpu(from->sb_frextents);
			
 
				-	to->sb_uquotino = be64_to_cpu(from->sb_uquotino);
			
 
				-	to->sb_gquotino = be64_to_cpu(from->sb_gquotino);
			
 
				-	to->sb_qflags = be16_to_cpu(from->sb_qflags);
			
 
				-	to->sb_flags = from->sb_flags;
			
 
				-	to->sb_shared_vn = from->sb_shared_vn;
			
 
				-	to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt);
			
 
				-	to->sb_unit = be32_to_cpu(from->sb_unit);
			
 
				-	to->sb_width = be32_to_cpu(from->sb_width);
			
 
				-	to->sb_dirblklog = from->sb_dirblklog;
			
 
				-	to->sb_logsectlog = from->sb_logsectlog;
			
 
				-	to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
			
 
				-	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
			
 
				-	to->sb_features2 = be32_to_cpu(from->sb_features2);
			
 
				-	to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
			
 
				-	to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
			
 
				-	to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
			
 
				-	to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
			
 
				-	to->sb_features_log_incompat =
			
 
				-				be32_to_cpu(from->sb_features_log_incompat);
			
 
				-	to->sb_pad = 0;
			
 
				-	to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
			
 
				-	to->sb_lsn = be64_to_cpu(from->sb_lsn);
			
 
				-}
			
 
				-
			
 
				-static inline void
			
 
				-xfs_sb_quota_to_disk(
			
 
				-	xfs_dsb_t	*to,
			
 
				-	xfs_sb_t	*from,
			
 
				-	__int64_t	*fields)
			
 
				-{
			
 
				-	__uint16_t	qflags = from->sb_qflags;
			
 
				-
			
 
				-	if (*fields & XFS_SB_QFLAGS) {
			
 
				-		/*
			
 
				-		 * The in-core version of sb_qflags do not have
			
 
				-		 * XFS_OQUOTA_* flags, whereas the on-disk version
			
 
				-		 * does.  So, convert incore XFS_{PG}QUOTA_* flags
			
 
				-		 * to on-disk XFS_OQUOTA_* flags.
			
 
				-		 */
			
 
				-		qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
			
 
				-				XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
			
 
				-
			
 
				-		if (from->sb_qflags &
			
 
				-				(XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD))
			
 
				-			qflags |= XFS_OQUOTA_ENFD;
			
 
				-		if (from->sb_qflags &
			
 
				-				(XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))
			
 
				-			qflags |= XFS_OQUOTA_CHKD;
			
 
				-		to->sb_qflags = cpu_to_be16(qflags);
			
 
				-		*fields &= ~XFS_SB_QFLAGS;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Copy in core superblock to ondisk one.
			
 
				- *
			
 
				- * The fields argument is mask of superblock fields to copy.
			
 
				- */
			
 
				-void
			
 
				-xfs_sb_to_disk(
			
 
				-	xfs_dsb_t	*to,
			
 
				-	xfs_sb_t	*from,
			
 
				-	__int64_t	fields)
			
 
				-{
			
 
				-	xfs_caddr_t	to_ptr = (xfs_caddr_t)to;
			
 
				-	xfs_caddr_t	from_ptr = (xfs_caddr_t)from;
			
 
				-	xfs_sb_field_t	f;
			
 
				-	int		first;
			
 
				-	int		size;
			
 
				-
			
 
				-	ASSERT(fields);
			
 
				-	if (!fields)
			
 
				-		return;
			
 
				-
			
 
				-	xfs_sb_quota_to_disk(to, from, &fields);
			
 
				-	while (fields) {
			
 
				-		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
			
 
				-		first = xfs_sb_info[f].offset;
			
 
				-		size = xfs_sb_info[f + 1].offset - first;
			
 
				-
			
 
				-		ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);
			
 
				-
			
 
				-		if (size == 1 || xfs_sb_info[f].type == 1) {
			
 
				-			memcpy(to_ptr + first, from_ptr + first, size);
			
 
				-		} else {
			
 
				-			switch (size) {
			
 
				-			case 2:
			
 
				-				*(__be16 *)(to_ptr + first) =
			
 
				-					cpu_to_be16(*(__u16 *)(from_ptr + first));
			
 
				-				break;
			
 
				-			case 4:
			
 
				-				*(__be32 *)(to_ptr + first) =
			
 
				-					cpu_to_be32(*(__u32 *)(from_ptr + first));
			
 
				-				break;
			
 
				-			case 8:
			
 
				-				*(__be64 *)(to_ptr + first) =
			
 
				-					cpu_to_be64(*(__u64 *)(from_ptr + first));
			
 
				-				break;
			
 
				-			default:
			
 
				-				ASSERT(0);
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		fields &= ~(1LL << f);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-xfs_sb_verify(
			
 
				-	struct xfs_buf	*bp,
			
 
				-	bool		check_version)
			
 
				-{
			
 
				-	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_sb	sb;
			
 
				-
			
 
				-	xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
			
 
				-
			
 
				-	/*
			
 
				-	 * Only check the in progress field for the primary superblock as
			
 
				-	 * mkfs.xfs doesn't clear it from secondary superblocks.
			
 
				-	 */
			
 
				-	return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
			
 
				-				     check_version);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * If the superblock has the CRC feature bit set or the CRC field is non-null,
			
 
				- * check that the CRC is valid.  We check the CRC field is non-null because a
			
 
				- * single bit error could clear the feature bit and unused parts of the
			
 
				- * superblock are supposed to be zero. Hence a non-null crc field indicates that
			
 
				- * we've potentially lost a feature bit and we should check it anyway.
			
 
				- */
			
 
				-static void
			
 
				-xfs_sb_read_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
			
 
				-	int		error;
			
 
				-
			
 
				-	/*
			
 
				-	 * open code the version check to avoid needing to convert the entire
			
 
				-	 * superblock from disk order just to check the version number
			
 
				-	 */
			
 
				-	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
			
 
				-	    (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
			
 
				-						XFS_SB_VERSION_5) ||
			
 
				-	     dsb->sb_crc != 0)) {
			
 
				-
			
 
				-		if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
			
 
				-				      offsetof(struct xfs_sb, sb_crc))) {
			
 
				-			error = EFSCORRUPTED;
			
 
				-			goto out_error;
			
 
				-		}
			
 
				-	}
			
 
				-	error = xfs_sb_verify(bp, true);
			
 
				-
			
 
				-out_error:
			
 
				-	if (error) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				-		xfs_buf_ioerror(bp, error);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * We may be probed for a filesystem match, so we may not want to emit
			
 
				- * messages when the superblock buffer is not actually an XFS superblock.
			
 
				- * If we find an XFS superblock, the run a normal, noisy mount because we are
			
 
				- * really going to mount it and want to know about errors.
			
 
				- */
			
 
				-static void
			
 
				-xfs_sb_quiet_read_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
			
 
				-
			
 
				-
			
 
				-	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
			
 
				-		/* XFS filesystem, verify noisily! */
			
 
				-		xfs_sb_read_verify(bp);
			
 
				-		return;
			
 
				-	}
			
 
				-	/* quietly fail */
			
 
				-	xfs_buf_ioerror(bp, EWRONGFS);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-xfs_sb_write_verify(
			
 
				-	struct xfs_buf		*bp)
			
 
				-{
			
 
				-	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				-	int			error;
			
 
				-
			
 
				-	error = xfs_sb_verify(bp, false);
			
 
				-	if (error) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				-		xfs_buf_ioerror(bp, error);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return;
			
 
				-
			
 
				-	if (bip)
			
 
				-		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				-
			
 
				-	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				-			 offsetof(struct xfs_sb, sb_crc));
			
 
				-}
			
 
				-
			
 
				-const struct xfs_buf_ops xfs_sb_buf_ops = {
			
 
				-	.verify_read = xfs_sb_read_verify,
			
 
				-	.verify_write = xfs_sb_write_verify,
			
 
				-};
			
 
				-
			
 
				-static const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
			
 
				-	.verify_read = xfs_sb_quiet_read_verify,
			
 
				-	.verify_write = xfs_sb_write_verify,
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * xfs_readsb
			
 
				  *
			
 
				  * Does the initial read of the superblock.
			
 
				  */
			
 
				 int
			
 
				-xfs_readsb(xfs_mount_t *mp, int flags)
			
 
				+xfs_readsb(
			
 
				+	struct xfs_mount *mp,
			
 
				+	int		flags)
			
 
				 {
			
 
				 	unsigned int	sector_size;
			
 
				 	struct xfs_buf	*bp;
			
@@ -884,8 +318,8 @@ reread:
 
				 	 * Initialize the mount structure from the superblock.
			
 
				 	 */
			
 
				 	xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
			
 
				-
			
 
				 	xfs_sb_quota_from_disk(&mp->m_sb);
			
 
				+
			
 
				 	/*
			
 
				 	 * We must be able to do sector-sized and sector-aligned IO.
			
 
				 	 */
			
@@ -922,107 +356,6 @@ release_buf:
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-/*
			
 
				- * xfs_mount_common
			
 
				- *
			
 
				- * Mount initialization code establishing various mount
			
 
				- * fields from the superblock associated with the given
			
 
				- * mount structure
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
			
 
				-{
			
 
				-	mp->m_agfrotor = mp->m_agirotor = 0;
			
 
				-	spin_lock_init(&mp->m_agirotor_lock);
			
 
				-	mp->m_maxagi = mp->m_sb.sb_agcount;
			
 
				-	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
			
 
				-	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
			
 
				-	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
			
 
				-	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
			
 
				-	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
			
 
				-	mp->m_blockmask = sbp->sb_blocksize - 1;
			
 
				-	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
			
 
				-	mp->m_blockwmask = mp->m_blockwsize - 1;
			
 
				-
			
 
				-	mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
			
 
				-	mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
			
 
				-	mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
			
 
				-	mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
			
 
				-
			
 
				-	mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
			
 
				-	mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
			
 
				-	mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
			
 
				-	mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
			
 
				-
			
 
				-	mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
			
 
				-	mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
			
 
				-	mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
			
 
				-	mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
			
 
				-
			
 
				-	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
			
 
				-	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
			
 
				-					sbp->sb_inopblock);
			
 
				-	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * xfs_initialize_perag_data
			
 
				- *
			
 
				- * Read in each per-ag structure so we can count up the number of
			
 
				- * allocated inodes, free inodes and used filesystem blocks as this
			
 
				- * information is no longer persistent in the superblock. Once we have
			
 
				- * this information, write it into the in-core superblock structure.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
			
 
				-{
			
 
				-	xfs_agnumber_t	index;
			
 
				-	xfs_perag_t	*pag;
			
 
				-	xfs_sb_t	*sbp = &mp->m_sb;
			
 
				-	uint64_t	ifree = 0;
			
 
				-	uint64_t	ialloc = 0;
			
 
				-	uint64_t	bfree = 0;
			
 
				-	uint64_t	bfreelst = 0;
			
 
				-	uint64_t	btree = 0;
			
 
				-	int		error;
			
 
				-
			
 
				-	for (index = 0; index < agcount; index++) {
			
 
				-		/*
			
 
				-		 * read the agf, then the agi. This gets us
			
 
				-		 * all the information we need and populates the
			
 
				-		 * per-ag structures for us.
			
 
				-		 */
			
 
				-		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-
			
 
				-		error = xfs_ialloc_pagi_init(mp, NULL, index);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-		pag = xfs_perag_get(mp, index);
			
 
				-		ifree += pag->pagi_freecount;
			
 
				-		ialloc += pag->pagi_count;
			
 
				-		bfree += pag->pagf_freeblks;
			
 
				-		bfreelst += pag->pagf_flcount;
			
 
				-		btree += pag->pagf_btreeblks;
			
 
				-		xfs_perag_put(pag);
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Overwrite incore superblock counters with just-read data
			
 
				-	 */
			
 
				-	spin_lock(&mp->m_sb_lock);
			
 
				-	sbp->sb_ifree = ifree;
			
 
				-	sbp->sb_icount = ialloc;
			
 
				-	sbp->sb_fdblocks = bfree + bfreelst + btree;
			
 
				-	spin_unlock(&mp->m_sb_lock);
			
 
				-
			
 
				-	/* Fixup the per-cpu counters as well. */
			
 
				-	xfs_icsb_reinit_counters(mp);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Update alignment values based on mount options and sb values
			
 
				  */
			
@@ -1194,7 +527,7 @@ xfs_set_inoalignment(xfs_mount_t *mp)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Check that the data (and log if separate) are an ok size.
			
 
				+ * Check that the data (and log if separate) is an ok size.
			
 
				  */
			
 
				 STATIC int
			
 
				 xfs_check_sizes(xfs_mount_t *mp)
			
@@ -1264,8 +597,7 @@ xfs_mount_reset_sbqflags(
 
				 		return 0;
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
			
 
				-				  0, 0, XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		xfs_alert(mp, "%s: Superblock update failed!", __func__);
			
@@ -1315,7 +647,7 @@ xfs_mountfs(
 
				 	uint		quotaflags = 0;
			
 
				 	int		error = 0;
			
 
				 
			
 
				-	xfs_mount_common(mp, sbp);
			
 
				+	xfs_sb_mount_common(mp, sbp);
			
 
				 
			
 
				 	/*
			
 
				 	 * Check for a mismatched features2 values.  Older kernels
			
@@ -1400,7 +732,7 @@ xfs_mountfs(
 
				 	xfs_set_inoalignment(mp);
			
 
				 
			
 
				 	/*
			
 
				-	 * Check that the data (and log if separate) are an ok size.
			
 
				+	 * Check that the data (and log if separate) is an ok size.
			
 
				 	 */
			
 
				 	error = xfs_check_sizes(mp);
			
 
				 	if (error)
			
@@ -1738,8 +1070,7 @@ xfs_log_sbcount(xfs_mount_t *mp)
 
				 		return 0;
			
 
				 
			
 
				 	tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
			
 
				-				  XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return error;
			
@@ -1752,49 +1083,7 @@ xfs_log_sbcount(xfs_mount_t *mp)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * xfs_mod_sb() can be used to copy arbitrary changes to the
			
 
				- * in-core superblock into the superblock buffer to be logged.
			
 
				- * It does not provide the higher level of locking that is
			
 
				- * needed to protect the in-core superblock from concurrent
			
 
				- * access.
			
 
				- */
			
 
				-void
			
 
				-xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
			
 
				-{
			
 
				-	xfs_buf_t	*bp;
			
 
				-	int		first;
			
 
				-	int		last;
			
 
				-	xfs_mount_t	*mp;
			
 
				-	xfs_sb_field_t	f;
			
 
				-
			
 
				-	ASSERT(fields);
			
 
				-	if (!fields)
			
 
				-		return;
			
 
				-	mp = tp->t_mountp;
			
 
				-	bp = xfs_trans_getsb(tp, mp, 0);
			
 
				-	first = sizeof(xfs_sb_t);
			
 
				-	last = 0;
			
 
				-
			
 
				-	/* translate/copy */
			
 
				-
			
 
				-	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
			
 
				-
			
 
				-	/* find modified range */
			
 
				-	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
			
 
				-	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
			
 
				-	last = xfs_sb_info[f + 1].offset - 1;
			
 
				-
			
 
				-	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
			
 
				-	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
			
 
				-	first = xfs_sb_info[f].offset;
			
 
				-
			
 
				-	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
			
 
				-	xfs_trans_log_buf(tp, bp, first, last);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
			
 
				+ * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
			
 
				  * a delta to a specified field in the in-core superblock.  Simply
			
 
				  * switch on the field indicated and apply the delta to that field.
			
 
				  * Fields are not allowed to dip below zero, so if the delta would
			
@@ -2101,8 +1390,7 @@ xfs_mount_log_sb(
 
				 			 XFS_SB_VERSIONNUM));
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
			
 
				-				  XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return error;
			
@@ -2260,12 +1548,6 @@ xfs_icsb_init_counters(
 
				 	if (mp->m_sb_cnts == NULL)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-#ifdef CONFIG_HOTPLUG_CPU
			
 
				-	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
			
 
				-	mp->m_icsb_notifier.priority = 0;
			
 
				-	register_hotcpu_notifier(&mp->m_icsb_notifier);
			
 
				-#endif /* CONFIG_HOTPLUG_CPU */
			
 
				-
			
 
				 	for_each_online_cpu(i) {
			
 
				 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
			
 
				 		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
			
@@ -2278,6 +1560,13 @@ xfs_icsb_init_counters(
 
				 	 * initial balance kicks us off correctly
			
 
				 	 */
			
 
				 	mp->m_icsb_counters = -1;
			
 
				+
			
 
				+#ifdef CONFIG_HOTPLUG_CPU
			
 
				+	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
			
 
				+	mp->m_icsb_notifier.priority = 0;
			
 
				+	register_hotcpu_notifier(&mp->m_icsb_notifier);
			
 
				+#endif /* CONFIG_HOTPLUG_CPU */
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,45 +18,7 @@
 
				 #ifndef __XFS_MOUNT_H__
			
 
				 #define	__XFS_MOUNT_H__
			
 
				 
			
 
				-typedef struct xfs_trans_reservations {
			
 
				-	uint	tr_write;	/* extent alloc trans */
			
 
				-	uint	tr_itruncate;	/* truncate trans */
			
 
				-	uint	tr_rename;	/* rename trans */
			
 
				-	uint	tr_link;	/* link trans */
			
 
				-	uint	tr_remove;	/* unlink trans */
			
 
				-	uint	tr_symlink;	/* symlink trans */
			
 
				-	uint	tr_create;	/* create trans */
			
 
				-	uint	tr_mkdir;	/* mkdir trans */
			
 
				-	uint	tr_ifree;	/* inode free trans */
			
 
				-	uint	tr_ichange;	/* inode update trans */
			
 
				-	uint	tr_growdata;	/* fs data section grow trans */
			
 
				-	uint	tr_swrite;	/* sync write inode trans */
			
 
				-	uint	tr_addafork;	/* cvt inode to attributed trans */
			
 
				-	uint	tr_writeid;	/* write setuid/setgid file */
			
 
				-	uint	tr_attrinval;	/* attr fork buffer invalidation */
			
 
				-	uint	tr_attrsetm;	/* set/create an attribute at mount time */
			
 
				-	uint	tr_attrsetrt;	/* set/create an attribute at runtime */
			
 
				-	uint	tr_attrrm;	/* remove an attribute */
			
 
				-	uint	tr_clearagi;	/* clear bad agi unlinked ino bucket */
			
 
				-	uint	tr_growrtalloc;	/* grow realtime allocations */
			
 
				-	uint	tr_growrtzero;	/* grow realtime zeroing */
			
 
				-	uint	tr_growrtfree;	/* grow realtime freeing */
			
 
				-	uint	tr_qm_sbchange;	/* change quota flags */
			
 
				-	uint	tr_qm_setqlim;	/* adjust quota limits */
			
 
				-	uint	tr_qm_dqalloc;	/* allocate quota on disk */
			
 
				-	uint	tr_qm_quotaoff;	/* turn quota off */
			
 
				-	uint	tr_qm_equotaoff;/* end of turn quota off */
			
 
				-	uint	tr_sb;		/* modify superblock */
			
 
				-} xfs_trans_reservations_t;
			
 
				-
			
 
				-#ifndef __KERNEL__
			
 
				-
			
 
				-#define xfs_daddr_to_agno(mp,d) \
			
 
				-	((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
			
 
				-#define xfs_daddr_to_agbno(mp,d) \
			
 
				-	((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
			
 
				-
			
 
				-#else /* __KERNEL__ */
			
 
				+#ifdef __KERNEL__
			
 
				 
			
 
				 struct xlog;
			
 
				 struct xfs_inode;
			
@@ -174,7 +136,7 @@ typedef struct xfs_mount {
 
				 	int			m_ialloc_blks;	/* blocks in inode allocation */
			
 
				 	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
			
 
				 	uint			m_qflags;	/* quota status flags */
			
 
				-	xfs_trans_reservations_t m_reservations;/* precomputed res values */
			
 
				+	struct xfs_trans_resv	m_resv;		/* precomputed res values */
			
 
				 	__uint64_t		m_maxicount;	/* maximum inode count */
			
 
				 	__uint64_t		m_resblks;	/* total reserved blocks */
			
 
				 	__uint64_t		m_resblks_avail;/* available reserved blocks */
			
@@ -329,14 +291,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 
				 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * perag get/put wrappers for ref counting
			
 
				- */
			
 
				-struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
			
 
				-struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				-					int tag);
			
 
				-void	xfs_perag_put(struct xfs_perag *pag);
			
 
				-
			
 
				 /*
			
 
				  * Per-cpu superblock locking functions
			
 
				  */
			
@@ -366,9 +320,63 @@ typedef struct xfs_mod_sb {
 
				 	int64_t		msb_delta;	/* Change to make to specified field */
			
 
				 } xfs_mod_sb_t;
			
 
				 
			
 
				+/*
			
 
				+ * Per-ag incore structure, copies of information in agf and agi, to improve the
			
 
				+ * performance of allocation group selection. This is defined for the kernel
			
 
				+ * only, and hence is defined here instead of in xfs_ag.h. You need the struct
			
 
				+ * xfs_mount to be defined to look up a xfs_perag anyway (via mp->m_perag_tree),
			
 
				+ * so this doesn't introduce any strange header file dependencies.
			
 
				+ */
			
 
				+typedef struct xfs_perag {
			
 
				+	struct xfs_mount *pag_mount;	/* owner filesystem */
			
 
				+	xfs_agnumber_t	pag_agno;	/* AG this structure belongs to */
			
 
				+	atomic_t	pag_ref;	/* perag reference count */
			
 
				+	char		pagf_init;	/* this agf's entry is initialized */
			
 
				+	char		pagi_init;	/* this agi's entry is initialized */
			
 
				+	char		pagf_metadata;	/* the agf is preferred to be metadata */
			
 
				+	char		pagi_inodeok;	/* The agi is ok for inodes */
			
 
				+	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
			
 
				+					/* # of levels in bno & cnt btree */
			
 
				+	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
			
 
				+	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
			
 
				+	xfs_extlen_t	pagf_longest;	/* longest free space */
			
 
				+	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
			
 
				+	xfs_agino_t	pagi_freecount;	/* number of free inodes */
			
 
				+	xfs_agino_t	pagi_count;	/* number of allocated inodes */
			
 
				+
			
 
				+	/*
			
 
				+	 * Inode allocation search lookup optimisation.
			
 
				+	 * If the pagino matches, the search for new inodes
			
 
				+	 * doesn't need to search the near ones again straight away
			
 
				+	 */
			
 
				+	xfs_agino_t	pagl_pagino;
			
 
				+	xfs_agino_t	pagl_leftrec;
			
 
				+	xfs_agino_t	pagl_rightrec;
			
 
				+	spinlock_t	pagb_lock;	/* lock for pagb_tree */
			
 
				+	struct rb_root	pagb_tree;	/* ordered tree of busy extents */
			
 
				+
			
 
				+	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
			
 
				+
			
 
				+	spinlock_t	pag_ici_lock;	/* incore inode cache lock */
			
 
				+	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
			
 
				+	int		pag_ici_reclaimable;	/* reclaimable inodes */
			
 
				+	struct mutex	pag_ici_reclaim_lock;	/* serialisation point */
			
 
				+	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
			
 
				+
			
 
				+	/* buffer cache index */
			
 
				+	spinlock_t	pag_buf_lock;	/* lock for pag_buf_tree */
			
 
				+	struct rb_root	pag_buf_tree;	/* ordered tree of active buffers */
			
 
				+
			
 
				+	/* for rcu-safe freeing */
			
 
				+	struct rcu_head	rcu_head;
			
 
				+	int		pagb_count;	/* pagb slots in use */
			
 
				+} xfs_perag_t;
			
 
				+
			
 
				 extern int	xfs_log_sbcount(xfs_mount_t *);
			
 
				 extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
			
 
				 extern int	xfs_mountfs(xfs_mount_t *mp);
			
 
				+extern int	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
			
 
				+				     xfs_agnumber_t *maxagi);
			
 
				 
			
 
				 extern void	xfs_unmountfs(xfs_mount_t *);
			
 
				 extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
			
@@ -387,13 +395,4 @@ extern void	xfs_set_low_space_thresholds(struct xfs_mount *);
 
				 
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				-extern void	xfs_sb_calc_crc(struct xfs_buf	*);
			
 
				-extern void	xfs_mod_sb(struct xfs_trans *, __int64_t);
			
 
				-extern int	xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
			
 
				-					xfs_agnumber_t *);
			
 
				-extern void	xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
			
 
				-extern void	xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
			
 
				-
			
 
				-extern const struct xfs_buf_ops xfs_sb_buf_ops;
			
 
				-
			
 
				 #endif	/* __XFS_MOUNT_H__ */
			
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
@@ -37,7 +38,6 @@
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_qm.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
@@ -834,21 +834,52 @@ xfs_qm_qino_alloc(
 
				 	int		error;
			
 
				 	int		committed;
			
 
				 
			
 
				+	*ip = NULL;
			
 
				+	/*
			
 
				+	 * With superblock that doesn't have separate pquotino, we
			
 
				+	 * share an inode between gquota and pquota. If the on-disk
			
 
				+	 * superblock has GQUOTA and the filesystem is now mounted
			
 
				+	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
			
 
				+	 * vice-versa.
			
 
				+	 */
			
 
				+	if (!xfs_sb_version_has_pquotino(&mp->m_sb) &&
			
 
				+			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
			
 
				+		xfs_ino_t ino = NULLFSINO;
			
 
				+
			
 
				+		if ((flags & XFS_QMOPT_PQUOTA) &&
			
 
				+			     (mp->m_sb.sb_gquotino != NULLFSINO)) {
			
 
				+			ino = mp->m_sb.sb_gquotino;
			
 
				+			ASSERT(mp->m_sb.sb_pquotino == NULLFSINO);
			
 
				+		} else if ((flags & XFS_QMOPT_GQUOTA) &&
			
 
				+			     (mp->m_sb.sb_pquotino != NULLFSINO)) {
			
 
				+			ino = mp->m_sb.sb_pquotino;
			
 
				+			ASSERT(mp->m_sb.sb_gquotino == NULLFSINO);
			
 
				+		}
			
 
				+		if (ino != NULLFSINO) {
			
 
				+			error = xfs_iget(mp, NULL, ino, 0, 0, ip);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+			mp->m_sb.sb_gquotino = NULLFSINO;
			
 
				+			mp->m_sb.sb_pquotino = NULLFSINO;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
			
 
				-	if ((error = xfs_trans_reserve(tp,
			
 
				-				      XFS_QM_QINOCREATE_SPACE_RES(mp),
			
 
				-				      XFS_CREATE_LOG_RES(mp), 0,
			
 
				-				      XFS_TRANS_PERM_LOG_RES,
			
 
				-				      XFS_CREATE_LOG_COUNT))) {
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
			
 
				+				  XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
			
 
				+	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return error;
			
 
				 	}
			
 
				 
			
 
				-	error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
			
 
				-	if (error) {
			
 
				-		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
			
 
				-				 XFS_TRANS_ABORT);
			
 
				-		return error;
			
 
				+	if (!*ip) {
			
 
				+		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
			
 
				+								&committed);
			
 
				+		if (error) {
			
 
				+			xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
			
 
				+					 XFS_TRANS_ABORT);
			
 
				+			return error;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -860,21 +891,25 @@ xfs_qm_qino_alloc(
 
				 	if (flags & XFS_QMOPT_SBVERSION) {
			
 
				 		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
			
 
				 		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
			
 
				-				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
			
 
				-		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
			
 
				-			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
			
 
				+			XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | XFS_SB_QFLAGS)) ==
			
 
				+				(XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
			
 
				+				 XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
			
 
				+				 XFS_SB_QFLAGS));
			
 
				 
			
 
				 		xfs_sb_version_addquota(&mp->m_sb);
			
 
				 		mp->m_sb.sb_uquotino = NULLFSINO;
			
 
				 		mp->m_sb.sb_gquotino = NULLFSINO;
			
 
				+		mp->m_sb.sb_pquotino = NULLFSINO;
			
 
				 
			
 
				-		/* qflags will get updated _after_ quotacheck */
			
 
				-		mp->m_sb.sb_qflags = 0;
			
 
				+		/* qflags will get updated fully _after_ quotacheck */
			
 
				+		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
			
 
				 	}
			
 
				 	if (flags & XFS_QMOPT_UQUOTA)
			
 
				 		mp->m_sb.sb_uquotino = (*ip)->i_ino;
			
 
				-	else
			
 
				+	else if (flags & XFS_QMOPT_GQUOTA)
			
 
				 		mp->m_sb.sb_gquotino = (*ip)->i_ino;
			
 
				+	else
			
 
				+		mp->m_sb.sb_pquotino = (*ip)->i_ino;
			
 
				 	spin_unlock(&mp->m_sb_lock);
			
 
				 	xfs_mod_sb(tp, sbfields);
			
 
				 
			
@@ -1484,11 +1519,10 @@ xfs_qm_init_quotainos(
 
				 			if (error)
			
 
				 				goto error_rele;
			
 
				 		}
			
 
				-		/* XXX: Use gquotino for now */
			
 
				 		if (XFS_IS_PQUOTA_ON(mp) &&
			
 
				-		    mp->m_sb.sb_gquotino != NULLFSINO) {
			
 
				-			ASSERT(mp->m_sb.sb_gquotino > 0);
			
 
				-			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
			
 
				+		    mp->m_sb.sb_pquotino != NULLFSINO) {
			
 
				+			ASSERT(mp->m_sb.sb_pquotino > 0);
			
 
				+			error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
			
 
				 					     0, 0, &pip);
			
 
				 			if (error)
			
 
				 				goto error_rele;
			
@@ -1496,7 +1530,8 @@ xfs_qm_init_quotainos(
 
				 	} else {
			
 
				 		flags |= XFS_QMOPT_SBVERSION;
			
 
				 		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
			
 
				-			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
			
 
				+			    XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
			
 
				+			    XFS_SB_QFLAGS);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1524,9 +1559,8 @@ xfs_qm_init_quotainos(
 
				 		flags &= ~XFS_QMOPT_SBVERSION;
			
 
				 	}
			
 
				 	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
			
 
				-		/* XXX: Use XFS_SB_GQUOTINO for now */
			
 
				 		error = xfs_qm_qino_alloc(mp, &pip,
			
 
				-					  sbflags | XFS_SB_GQUOTINO,
			
 
				+					  sbflags | XFS_SB_PQUOTINO,
			
 
				 					  flags | XFS_QMOPT_PQUOTA);
			
 
				 		if (error)
			
 
				 			goto error_rele;
			
@@ -1704,8 +1738,7 @@ xfs_qm_write_sb_changes(
 
				 	int		error;
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
			
 
				-				  0, 0, XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return error;
			
@@ -1734,8 +1767,8 @@ xfs_qm_write_sb_changes(
 
				 int
			
 
				 xfs_qm_vop_dqalloc(
			
 
				 	struct xfs_inode	*ip,
			
 
				-	uid_t			uid,
			
 
				-	gid_t			gid,
			
 
				+	xfs_dqid_t		uid,
			
 
				+	xfs_dqid_t		gid,
			
 
				 	prid_t			prid,
			
 
				 	uint			flags,
			
 
				 	struct xfs_dquot	**O_udqpp,
			
@@ -1782,7 +1815,7 @@ xfs_qm_vop_dqalloc(
 
				 			 * holding ilock.
			
 
				 			 */
			
 
				 			xfs_iunlock(ip, lockflags);
			
 
				-			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
			
 
				+			error = xfs_qm_dqget(mp, NULL, uid,
			
 
				 						 XFS_DQ_USER,
			
 
				 						 XFS_QMOPT_DQALLOC |
			
 
				 						 XFS_QMOPT_DOWARN,
			
@@ -1809,7 +1842,7 @@ xfs_qm_vop_dqalloc(
 
				 	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
			
 
				 		if (ip->i_d.di_gid != gid) {
			
 
				 			xfs_iunlock(ip, lockflags);
			
 
				-			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
			
 
				+			error = xfs_qm_dqget(mp, NULL, gid,
			
 
				 						 XFS_DQ_GROUP,
			
 
				 						 XFS_QMOPT_DQALLOC |
			
 
				 						 XFS_QMOPT_DOWARN,
			
@@ -1943,7 +1976,7 @@ xfs_qm_vop_chown_reserve(
 
				 			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
			
 
				 
			
 
				 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
			
 
				-	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
			
 
				+	    ip->i_d.di_uid != be32_to_cpu(udqp->q_core.d_id)) {
			
 
				 		udq_delblks = udqp;
			
 
				 		/*
			
 
				 		 * If there are delayed allocation blocks, then we have to
			
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -160,6 +160,8 @@ extern int		xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
 
				 					struct fs_disk_quota *);
			
 
				 extern int		xfs_qm_scall_getqstat(struct xfs_mount *,
			
 
				 					struct fs_quota_stat *);
			
 
				+extern int		xfs_qm_scall_getqstatv(struct xfs_mount *,
			
 
				+					struct fs_quota_statv *);
			
 
				 extern int		xfs_qm_scall_quotaon(struct xfs_mount *, uint);
			
 
				 extern int		xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
			
 
				 
			
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -17,6 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -20,6 +20,7 @@
 
				 
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
@@ -37,7 +38,6 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_qm.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
@@ -247,9 +247,7 @@ xfs_qm_scall_trunc_qfile(
 
				 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
			
 
				-				  XFS_TRANS_PERM_LOG_RES,
			
 
				-				  XFS_ITRUNCATE_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
@@ -296,8 +294,10 @@ xfs_qm_scall_trunc_qfiles(
 
				 
			
 
				 	if (flags & XFS_DQ_USER)
			
 
				 		error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
			
 
				-	if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
			
 
				+	if (flags & XFS_DQ_GROUP)
			
 
				 		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
			
 
				+	if (flags & XFS_DQ_PROJ)
			
 
				+		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino);
			
 
				 
			
 
				 	return error ? error : error2;
			
 
				 }
			
@@ -404,6 +404,7 @@ xfs_qm_scall_quotaon(
 
				 
			
 
				 /*
			
 
				  * Return quota status information, such as uquota-off, enforcements, etc.
			
 
				+ * for Q_XGETQSTAT command.
			
 
				  */
			
 
				 int
			
 
				 xfs_qm_scall_getqstat(
			
@@ -413,8 +414,10 @@ xfs_qm_scall_getqstat(
 
				 	struct xfs_quotainfo	*q = mp->m_quotainfo;
			
 
				 	struct xfs_inode	*uip = NULL;
			
 
				 	struct xfs_inode	*gip = NULL;
			
 
				+	struct xfs_inode	*pip = NULL;
			
 
				 	bool                    tempuqip = false;
			
 
				 	bool                    tempgqip = false;
			
 
				+	bool                    temppqip = false;
			
 
				 
			
 
				 	memset(out, 0, sizeof(fs_quota_stat_t));
			
 
				 
			
@@ -424,16 +427,106 @@ xfs_qm_scall_getqstat(
 
				 		out->qs_gquota.qfs_ino = NULLFSINO;
			
 
				 		return (0);
			
 
				 	}
			
 
				+
			
 
				+	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
			
 
				+							(XFS_ALL_QUOTA_ACCT|
			
 
				+							 XFS_ALL_QUOTA_ENFD));
			
 
				+	if (q) {
			
 
				+		uip = q->qi_uquotaip;
			
 
				+		gip = q->qi_gquotaip;
			
 
				+		pip = q->qi_pquotaip;
			
 
				+	}
			
 
				+	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
			
 
				+		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
			
 
				+					0, 0, &uip) == 0)
			
 
				+			tempuqip = true;
			
 
				+	}
			
 
				+	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
			
 
				+		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
			
 
				+					0, 0, &gip) == 0)
			
 
				+			tempgqip = true;
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Q_XGETQSTAT doesn't have room for both group and project quotas.
			
 
				+	 * So, allow the project quota values to be copied out only if
			
 
				+	 * there is no group quota information available.
			
 
				+	 */
			
 
				+	if (!gip) {
			
 
				+		if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
			
 
				+			if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
			
 
				+						0, 0, &pip) == 0)
			
 
				+				temppqip = true;
			
 
				+		}
			
 
				+	} else
			
 
				+		pip = NULL;
			
 
				+	if (uip) {
			
 
				+		out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
			
 
				+		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
			
 
				+		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
			
 
				+		if (tempuqip)
			
 
				+			IRELE(uip);
			
 
				+	}
			
 
				+
			
 
				+	if (gip) {
			
 
				+		out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
			
 
				+		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
			
 
				+		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
			
 
				+		if (tempgqip)
			
 
				+			IRELE(gip);
			
 
				+	}
			
 
				+	if (pip) {
			
 
				+		out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
			
 
				+		out->qs_gquota.qfs_nblks = pip->i_d.di_nblocks;
			
 
				+		out->qs_gquota.qfs_nextents = pip->i_d.di_nextents;
			
 
				+		if (temppqip)
			
 
				+			IRELE(pip);
			
 
				+	}
			
 
				+	if (q) {
			
 
				+		out->qs_incoredqs = q->qi_dquots;
			
 
				+		out->qs_btimelimit = q->qi_btimelimit;
			
 
				+		out->qs_itimelimit = q->qi_itimelimit;
			
 
				+		out->qs_rtbtimelimit = q->qi_rtbtimelimit;
			
 
				+		out->qs_bwarnlimit = q->qi_bwarnlimit;
			
 
				+		out->qs_iwarnlimit = q->qi_iwarnlimit;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Return quota status information, such as uquota-off, enforcements, etc.
			
 
				+ * for Q_XGETQSTATV command, to support separate project quota field.
			
 
				+ */
			
 
				+int
			
 
				+xfs_qm_scall_getqstatv(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct fs_quota_statv	*out)
			
 
				+{
			
 
				+	struct xfs_quotainfo	*q = mp->m_quotainfo;
			
 
				+	struct xfs_inode	*uip = NULL;
			
 
				+	struct xfs_inode	*gip = NULL;
			
 
				+	struct xfs_inode	*pip = NULL;
			
 
				+	bool                    tempuqip = false;
			
 
				+	bool                    tempgqip = false;
			
 
				+	bool                    temppqip = false;
			
 
				+
			
 
				+	if (!xfs_sb_version_hasquota(&mp->m_sb)) {
			
 
				+		out->qs_uquota.qfs_ino = NULLFSINO;
			
 
				+		out->qs_gquota.qfs_ino = NULLFSINO;
			
 
				+		out->qs_pquota.qfs_ino = NULLFSINO;
			
 
				+		return (0);
			
 
				+	}
			
 
				+
			
 
				 	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
			
 
				 							(XFS_ALL_QUOTA_ACCT|
			
 
				 							 XFS_ALL_QUOTA_ENFD));
			
 
				-	out->qs_pad = 0;
			
 
				 	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
			
 
				 	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
			
 
				+	out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino;
			
 
				 
			
 
				 	if (q) {
			
 
				 		uip = q->qi_uquotaip;
			
 
				 		gip = q->qi_gquotaip;
			
 
				+		pip = q->qi_pquotaip;
			
 
				 	}
			
 
				 	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
			
 
				 		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
			
@@ -445,18 +538,30 @@ xfs_qm_scall_getqstat(
 
				 					0, 0, &gip) == 0)
			
 
				 			tempgqip = true;
			
 
				 	}
			
 
				+	if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
			
 
				+		if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
			
 
				+					0, 0, &pip) == 0)
			
 
				+			temppqip = true;
			
 
				+	}
			
 
				 	if (uip) {
			
 
				 		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
			
 
				 		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
			
 
				 		if (tempuqip)
			
 
				 			IRELE(uip);
			
 
				 	}
			
 
				+
			
 
				 	if (gip) {
			
 
				 		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
			
 
				 		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
			
 
				 		if (tempgqip)
			
 
				 			IRELE(gip);
			
 
				 	}
			
 
				+	if (pip) {
			
 
				+		out->qs_pquota.qfs_nblks = pip->i_d.di_nblocks;
			
 
				+		out->qs_pquota.qfs_nextents = pip->i_d.di_nextents;
			
 
				+		if (temppqip)
			
 
				+			IRELE(pip);
			
 
				+	}
			
 
				 	if (q) {
			
 
				 		out->qs_incoredqs = q->qi_dquots;
			
 
				 		out->qs_btimelimit = q->qi_btimelimit;
			
@@ -515,8 +620,7 @@ xfs_qm_scall_setqlim(
 
				 	xfs_dqunlock(dqp);
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
			
 
				-				  0, 0, XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		goto out_rele;
			
@@ -650,8 +754,7 @@ xfs_qm_log_quotaoff_end(
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
			
 
				 
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
			
 
				-				  0, 0, XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
			
 
				 	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				 		return (error);
			
@@ -684,8 +787,7 @@ xfs_qm_log_quotaoff(
 
				 	uint			oldsbqflag=0;
			
 
				 
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
			
 
				-	error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
			
 
				-				  0, 0, XFS_DEFAULT_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
			
 
				 	if (error)
			
 
				 		goto error0;
			
 
				 
			
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -18,267 +18,14 @@
 
				 #ifndef __XFS_QUOTA_H__
			
 
				 #define __XFS_QUOTA_H__
			
 
				 
			
 
				-struct xfs_trans;
			
 
				-
			
 
				-/*
			
 
				- * The ondisk form of a dquot structure.
			
 
				- */
			
 
				-#define XFS_DQUOT_MAGIC		0x4451		/* 'DQ' */
			
 
				-#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
			
 
				-
			
 
				-/*
			
 
				- * uid_t and gid_t are hard-coded to 32 bits in the inode.
			
 
				- * Hence, an 'id' in a dquot is 32 bits..
			
 
				- */
			
 
				-typedef __uint32_t	xfs_dqid_t;
			
 
				-
			
 
				-/*
			
 
				- * Even though users may not have quota limits occupying all 64-bits,
			
 
				- * they may need 64-bit accounting. Hence, 64-bit quota-counters,
			
 
				- * and quota-limits. This is a waste in the common case, but hey ...
			
 
				- */
			
 
				-typedef __uint64_t	xfs_qcnt_t;
			
 
				-typedef __uint16_t	xfs_qwarncnt_t;
			
 
				-
			
 
				-/*
			
 
				- * This is the main portion of the on-disk representation of quota
			
 
				- * information for a user. This is the q_core of the xfs_dquot_t that
			
 
				- * is kept in kernel memory. We pad this with some more expansion room
			
 
				- * to construct the on disk structure.
			
 
				- */
			
 
				-typedef struct	xfs_disk_dquot {
			
 
				-	__be16		d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
			
 
				-	__u8		d_version;	/* dquot version */
			
 
				-	__u8		d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
			
 
				-	__be32		d_id;		/* user,project,group id */
			
 
				-	__be64		d_blk_hardlimit;/* absolute limit on disk blks */
			
 
				-	__be64		d_blk_softlimit;/* preferred limit on disk blks */
			
 
				-	__be64		d_ino_hardlimit;/* maximum # allocated inodes */
			
 
				-	__be64		d_ino_softlimit;/* preferred inode limit */
			
 
				-	__be64		d_bcount;	/* disk blocks owned by the user */
			
 
				-	__be64		d_icount;	/* inodes owned by the user */
			
 
				-	__be32		d_itimer;	/* zero if within inode limits if not,
			
 
				-					   this is when we refuse service */
			
 
				-	__be32		d_btimer;	/* similar to above; for disk blocks */
			
 
				-	__be16		d_iwarns;	/* warnings issued wrt num inodes */
			
 
				-	__be16		d_bwarns;	/* warnings issued wrt disk blocks */
			
 
				-	__be32		d_pad0;		/* 64 bit align */
			
 
				-	__be64		d_rtb_hardlimit;/* absolute limit on realtime blks */
			
 
				-	__be64		d_rtb_softlimit;/* preferred limit on RT disk blks */
			
 
				-	__be64		d_rtbcount;	/* realtime blocks owned */
			
 
				-	__be32		d_rtbtimer;	/* similar to above; for RT disk blocks */
			
 
				-	__be16		d_rtbwarns;	/* warnings issued wrt RT disk blocks */
			
 
				-	__be16		d_pad;
			
 
				-} xfs_disk_dquot_t;
			
 
				-
			
 
				-/*
			
 
				- * This is what goes on disk. This is separated from the xfs_disk_dquot because
			
 
				- * carrying the unnecessary padding would be a waste of memory.
			
 
				- */
			
 
				-typedef struct xfs_dqblk {
			
 
				-	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
			
 
				-	char		  dd_fill[4];	/* filling for posterity */
			
 
				-
			
 
				-	/*
			
 
				-	 * These two are only present on filesystems with the CRC bits set.
			
 
				-	 */
			
 
				-	__be32		  dd_crc;	/* checksum */
			
 
				-	__be64		  dd_lsn;	/* last modification in log */
			
 
				-	uuid_t		  dd_uuid;	/* location information */
			
 
				-} xfs_dqblk_t;
			
 
				-
			
 
				-#define XFS_DQUOT_CRC_OFF	offsetof(struct xfs_dqblk, dd_crc)
			
 
				-
			
 
				-/*
			
 
				- * flags for q_flags field in the dquot.
			
 
				- */
			
 
				-#define XFS_DQ_USER		0x0001		/* a user quota */
			
 
				-#define XFS_DQ_PROJ		0x0002		/* project quota */
			
 
				-#define XFS_DQ_GROUP		0x0004		/* a group quota */
			
 
				-#define XFS_DQ_DIRTY		0x0008		/* dquot is dirty */
			
 
				-#define XFS_DQ_FREEING		0x0010		/* dquot is beeing torn down */
			
 
				-
			
 
				-#define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
			
 
				-
			
 
				-#define XFS_DQ_FLAGS \
			
 
				-	{ XFS_DQ_USER,		"USER" }, \
			
 
				-	{ XFS_DQ_PROJ,		"PROJ" }, \
			
 
				-	{ XFS_DQ_GROUP,		"GROUP" }, \
			
 
				-	{ XFS_DQ_DIRTY,		"DIRTY" }, \
			
 
				-	{ XFS_DQ_FREEING,	"FREEING" }
			
 
				-
			
 
				-/*
			
 
				- * We have the possibility of all three quota types being active at once, and
			
 
				- * hence free space modification requires modification of all three current
			
 
				- * dquots in a single transaction. For this case we need to have a reservation
			
 
				- * of at least 3 dquots.
			
 
				- *
			
 
				- * However, a chmod operation can change both UID and GID in a single
			
 
				- * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be
			
 
				- * modified. Hence for this case we need to reserve space for at least 4 dquots.
			
 
				- *
			
 
				- * And in the worst case, there's a rename operation that can be modifying up to
			
 
				- * 4 inodes with dquots attached to them. In reality, the only inodes that can
			
 
				- * have their dquots modified are the source and destination directory inodes
			
 
				- * due to directory name creation and removal. That can require space allocation
			
 
				- * and/or freeing on both directory inodes, and hence all three dquots on each
			
 
				- * inode can be modified. And if the directories are world writeable, all the
			
 
				- * dquots can be unique and so 6 dquots can be modified....
			
 
				- *
			
 
				- * And, of course, we also need to take into account the dquot log format item
			
 
				- * used to describe each dquot.
			
 
				- */
			
 
				-#define XFS_DQUOT_LOGRES(mp)	\
			
 
				-	((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
			
 
				-
			
 
				-/*
			
 
				- * These are the structures used to lay out dquots and quotaoff
			
 
				- * records on the log. Quite similar to those of inodes.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * log format struct for dquots.
			
 
				- * The first two fields must be the type and size fitting into
			
 
				- * 32 bits : log_recovery code assumes that.
			
 
				- */
			
 
				-typedef struct xfs_dq_logformat {
			
 
				-	__uint16_t		qlf_type;      /* dquot log item type */
			
 
				-	__uint16_t		qlf_size;      /* size of this item */
			
 
				-	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
			
 
				-	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
			
 
				-	__int32_t		qlf_len;       /* len of dquot buffer */
			
 
				-	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
			
 
				-} xfs_dq_logformat_t;
			
 
				-
			
 
				-/*
			
 
				- * log format struct for QUOTAOFF records.
			
 
				- * The first two fields must be the type and size fitting into
			
 
				- * 32 bits : log_recovery code assumes that.
			
 
				- * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
			
 
				- * to the first and ensures that the first logitem is taken out of the AIL
			
 
				- * only when the last one is securely committed.
			
 
				- */
			
 
				-typedef struct xfs_qoff_logformat {
			
 
				-	unsigned short		qf_type;	/* quotaoff log item type */
			
 
				-	unsigned short		qf_size;	/* size of this item */
			
 
				-	unsigned int		qf_flags;	/* USR and/or GRP */
			
 
				-	char			qf_pad[12];	/* padding for future */
			
 
				-} xfs_qoff_logformat_t;
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
			
 
				- */
			
 
				-#define XFS_UQUOTA_ACCT	0x0001  /* user quota accounting ON */
			
 
				-#define XFS_UQUOTA_ENFD	0x0002  /* user quota limits enforced */
			
 
				-#define XFS_UQUOTA_CHKD	0x0004  /* quotacheck run on usr quotas */
			
 
				-#define XFS_PQUOTA_ACCT	0x0008  /* project quota accounting ON */
			
 
				-#define XFS_OQUOTA_ENFD	0x0010  /* other (grp/prj) quota limits enforced */
			
 
				-#define XFS_OQUOTA_CHKD	0x0020  /* quotacheck run on other (grp/prj) quotas */
			
 
				-#define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */
			
 
				-
			
 
				-/*
			
 
				- * Conversion to and from the combined OQUOTA flag (if necessary)
			
 
				- * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk()
			
 
				- */
			
 
				-#define XFS_GQUOTA_ENFD	0x0080  /* group quota limits enforced */
			
 
				-#define XFS_GQUOTA_CHKD	0x0100  /* quotacheck run on group quotas */
			
 
				-#define XFS_PQUOTA_ENFD	0x0200  /* project quota limits enforced */
			
 
				-#define XFS_PQUOTA_CHKD	0x0400  /* quotacheck run on project quotas */
			
 
				-
			
 
				-/*
			
 
				- * Quota Accounting/Enforcement flags
			
 
				- */
			
 
				-#define XFS_ALL_QUOTA_ACCT	\
			
 
				-		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
			
 
				-#define XFS_ALL_QUOTA_ENFD	\
			
 
				-		(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD)
			
 
				-#define XFS_ALL_QUOTA_CHKD	\
			
 
				-		(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD)
			
 
				-
			
 
				-#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
			
 
				-#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
			
 
				-#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
			
 
				-#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
			
 
				-#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
			
 
				-#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
			
 
				-#define XFS_IS_PQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_PQUOTA_ENFD)
			
 
				-
			
 
				-/*
			
 
				- * Incore only flags for quotaoff - these bits get cleared when quota(s)
			
 
				- * are in the process of getting turned off. These flags are in m_qflags but
			
 
				- * never in sb_qflags.
			
 
				- */
			
 
				-#define XFS_UQUOTA_ACTIVE	0x1000  /* uquotas are being turned off */
			
 
				-#define XFS_GQUOTA_ACTIVE	0x2000  /* gquotas are being turned off */
			
 
				-#define XFS_PQUOTA_ACTIVE	0x4000  /* pquotas are being turned off */
			
 
				-#define XFS_ALL_QUOTA_ACTIVE	\
			
 
				-	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
			
 
				+#include "xfs_quota_defs.h"
			
 
				 
			
 
				 /*
			
 
				- * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
			
 
				- * quota will be not be switched off as long as that inode lock is held.
			
 
				+ * Kernel only quota definitions and functions
			
 
				  */
			
 
				-#define XFS_IS_QUOTA_ON(mp)	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
			
 
				-						   XFS_GQUOTA_ACTIVE | \
			
 
				-						   XFS_PQUOTA_ACTIVE))
			
 
				-#define XFS_IS_OQUOTA_ON(mp)	((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
			
 
				-						   XFS_PQUOTA_ACTIVE))
			
 
				-#define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
			
 
				-#define XFS_IS_GQUOTA_ON(mp)	((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
			
 
				-#define XFS_IS_PQUOTA_ON(mp)	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
			
 
				 
			
 
				-/*
			
 
				- * Flags to tell various functions what to do. Not all of these are meaningful
			
 
				- * to a single function. None of these XFS_QMOPT_* flags are meant to have
			
 
				- * persistent values (ie. their values can and will change between versions)
			
 
				- */
			
 
				-#define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
			
 
				-#define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
			
 
				-#define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
			
 
				-#define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
			
 
				-#define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
			
 
				-#define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
			
 
				-#define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
			
 
				-#define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
			
 
				-#define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
			
 
				-
			
 
				-/*
			
 
				- * flags to xfs_trans_mod_dquot to indicate which field needs to be
			
 
				- * modified.
			
 
				- */
			
 
				-#define XFS_QMOPT_RES_REGBLKS	0x0010000
			
 
				-#define XFS_QMOPT_RES_RTBLKS	0x0020000
			
 
				-#define XFS_QMOPT_BCOUNT	0x0040000
			
 
				-#define XFS_QMOPT_ICOUNT	0x0080000
			
 
				-#define XFS_QMOPT_RTBCOUNT	0x0100000
			
 
				-#define XFS_QMOPT_DELBCOUNT	0x0200000
			
 
				-#define XFS_QMOPT_DELRTBCOUNT	0x0400000
			
 
				-#define XFS_QMOPT_RES_INOS	0x0800000
			
 
				-
			
 
				-/*
			
 
				- * flags for dqalloc.
			
 
				- */
			
 
				-#define XFS_QMOPT_INHERIT	0x1000000
			
 
				-
			
 
				-/*
			
 
				- * flags to xfs_trans_mod_dquot.
			
 
				- */
			
 
				-#define XFS_TRANS_DQ_RES_BLKS	XFS_QMOPT_RES_REGBLKS
			
 
				-#define XFS_TRANS_DQ_RES_RTBLKS	XFS_QMOPT_RES_RTBLKS
			
 
				-#define XFS_TRANS_DQ_RES_INOS	XFS_QMOPT_RES_INOS
			
 
				-#define XFS_TRANS_DQ_BCOUNT	XFS_QMOPT_BCOUNT
			
 
				-#define XFS_TRANS_DQ_DELBCOUNT	XFS_QMOPT_DELBCOUNT
			
 
				-#define XFS_TRANS_DQ_ICOUNT	XFS_QMOPT_ICOUNT
			
 
				-#define XFS_TRANS_DQ_RTBCOUNT	XFS_QMOPT_RTBCOUNT
			
 
				-#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
			
 
				-
			
 
				-
			
 
				-#define XFS_QMOPT_QUOTALL	\
			
 
				-		(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
			
 
				-#define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
			
 
				+struct xfs_trans;
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				 /*
			
 
				  * This check is done typically without holding the inode lock;
			
 
				  * that may seem racy, but it is harmless in the context that it is used.
			
@@ -301,13 +48,6 @@ typedef struct xfs_qoff_logformat {
 
				 	 (XFS_IS_PQUOTA_ON(mp) && \
			
 
				 		(mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0))
			
 
				 
			
 
				-#define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
			
 
				-				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
			
 
				-				 XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\
			
 
				-				 XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\
			
 
				-				 XFS_PQUOTA_CHKD)
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  * The structure kept inside the xfs_trans_t keep track of dquot changes
			
 
				  * within a transaction and apply them later.
			
@@ -340,8 +80,9 @@ extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
 
				 		struct xfs_mount *, struct xfs_dquot *,
			
 
				 		struct xfs_dquot *, struct xfs_dquot *, long, long, uint);
			
 
				 
			
 
				-extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
			
 
				-		struct xfs_dquot **, struct xfs_dquot **, struct xfs_dquot **);
			
 
				+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, xfs_dqid_t, xfs_dqid_t,
			
 
				+		prid_t, uint, struct xfs_dquot **, struct xfs_dquot **,
			
 
				+		struct xfs_dquot **);
			
 
				 extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
			
 
				 		struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *);
			
 
				 extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
			
@@ -362,9 +103,9 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *);
 
				 
			
 
				 #else
			
 
				 static inline int
			
 
				-xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
			
 
				-		uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp,
			
 
				-		struct xfs_dquot **pdqp)
			
 
				+xfs_qm_vop_dqalloc(struct xfs_inode *ip, xfs_dqid_t uid, xfs_dqid_t gid,
			
 
				+		prid_t prid, uint flags, struct xfs_dquot **udqp,
			
 
				+		struct xfs_dquot **gdqp, struct xfs_dquot **pdqp)
			
 
				 {
			
 
				 	*udqp = NULL;
			
 
				 	*gdqp = NULL;
			
@@ -415,5 +156,4 @@ extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
 
				 
			
 
				 extern const struct xfs_buf_ops xfs_dquot_buf_ops;
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				 #endif	/* __XFS_QUOTA_H__ */
			
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/xfs_quota_defs.h
@@ -0,0 +1,157 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef __XFS_QUOTA_DEFS_H__
			
 
				+#define __XFS_QUOTA_DEFS_H__
			
 
				+
			
 
				+/*
			
 
				+ * Quota definitions shared between user and kernel source trees.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Even though users may not have quota limits occupying all 64-bits,
			
 
				+ * they may need 64-bit accounting. Hence, 64-bit quota-counters,
			
 
				+ * and quota-limits. This is a waste in the common case, but hey ...
			
 
				+ */
			
 
				+typedef __uint64_t	xfs_qcnt_t;
			
 
				+typedef __uint16_t	xfs_qwarncnt_t;
			
 
				+
			
 
				+/*
			
 
				+ * flags for q_flags field in the dquot.
			
 
				+ */
			
 
				+#define XFS_DQ_USER		0x0001		/* a user quota */
			
 
				+#define XFS_DQ_PROJ		0x0002		/* project quota */
			
 
				+#define XFS_DQ_GROUP		0x0004		/* a group quota */
			
 
				+#define XFS_DQ_DIRTY		0x0008		/* dquot is dirty */
			
 
				+#define XFS_DQ_FREEING		0x0010		/* dquot is beeing torn down */
			
 
				+
			
 
				+#define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
			
 
				+
			
 
				+#define XFS_DQ_FLAGS \
			
 
				+	{ XFS_DQ_USER,		"USER" }, \
			
 
				+	{ XFS_DQ_PROJ,		"PROJ" }, \
			
 
				+	{ XFS_DQ_GROUP,		"GROUP" }, \
			
 
				+	{ XFS_DQ_DIRTY,		"DIRTY" }, \
			
 
				+	{ XFS_DQ_FREEING,	"FREEING" }
			
 
				+
			
 
				+/*
			
 
				+ * We have the possibility of all three quota types being active at once, and
			
 
				+ * hence free space modification requires modification of all three current
			
 
				+ * dquots in a single transaction. For this case we need to have a reservation
			
 
				+ * of at least 3 dquots.
			
 
				+ *
			
 
				+ * However, a chmod operation can change both UID and GID in a single
			
 
				+ * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be
			
 
				+ * modified. Hence for this case we need to reserve space for at least 4 dquots.
			
 
				+ *
			
 
				+ * And in the worst case, there's a rename operation that can be modifying up to
			
 
				+ * 4 inodes with dquots attached to them. In reality, the only inodes that can
			
 
				+ * have their dquots modified are the source and destination directory inodes
			
 
				+ * due to directory name creation and removal. That can require space allocation
			
 
				+ * and/or freeing on both directory inodes, and hence all three dquots on each
			
 
				+ * inode can be modified. And if the directories are world writeable, all the
			
 
				+ * dquots can be unique and so 6 dquots can be modified....
			
 
				+ *
			
 
				+ * And, of course, we also need to take into account the dquot log format item
			
 
				+ * used to describe each dquot.
			
 
				+ */
			
 
				+#define XFS_DQUOT_LOGRES(mp)	\
			
 
				+	((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
			
 
				+
			
 
				+#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
			
 
				+#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
			
 
				+#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
			
 
				+#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
			
 
				+#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
			
 
				+#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
			
 
				+#define XFS_IS_PQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_PQUOTA_ENFD)
			
 
				+
			
 
				+/*
			
 
				+ * Incore only flags for quotaoff - these bits get cleared when quota(s)
			
 
				+ * are in the process of getting turned off. These flags are in m_qflags but
			
 
				+ * never in sb_qflags.
			
 
				+ */
			
 
				+#define XFS_UQUOTA_ACTIVE	0x1000  /* uquotas are being turned off */
			
 
				+#define XFS_GQUOTA_ACTIVE	0x2000  /* gquotas are being turned off */
			
 
				+#define XFS_PQUOTA_ACTIVE	0x4000  /* pquotas are being turned off */
			
 
				+#define XFS_ALL_QUOTA_ACTIVE	\
			
 
				+	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
			
 
				+
			
 
				+/*
			
 
				+ * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
			
 
				+ * quota will be not be switched off as long as that inode lock is held.
			
 
				+ */
			
 
				+#define XFS_IS_QUOTA_ON(mp)	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
			
 
				+						   XFS_GQUOTA_ACTIVE | \
			
 
				+						   XFS_PQUOTA_ACTIVE))
			
 
				+#define XFS_IS_OQUOTA_ON(mp)	((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
			
 
				+						   XFS_PQUOTA_ACTIVE))
			
 
				+#define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
			
 
				+#define XFS_IS_GQUOTA_ON(mp)	((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
			
 
				+#define XFS_IS_PQUOTA_ON(mp)	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
			
 
				+
			
 
				+/*
			
 
				+ * Flags to tell various functions what to do. Not all of these are meaningful
			
 
				+ * to a single function. None of these XFS_QMOPT_* flags are meant to have
			
 
				+ * persistent values (ie. their values can and will change between versions)
			
 
				+ */
			
 
				+#define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
			
 
				+#define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
			
 
				+#define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
			
 
				+#define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
			
 
				+#define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
			
 
				+#define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
			
 
				+#define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
			
 
				+#define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
			
 
				+#define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
			
 
				+
			
 
				+/*
			
 
				+ * flags to xfs_trans_mod_dquot to indicate which field needs to be
			
 
				+ * modified.
			
 
				+ */
			
 
				+#define XFS_QMOPT_RES_REGBLKS	0x0010000
			
 
				+#define XFS_QMOPT_RES_RTBLKS	0x0020000
			
 
				+#define XFS_QMOPT_BCOUNT	0x0040000
			
 
				+#define XFS_QMOPT_ICOUNT	0x0080000
			
 
				+#define XFS_QMOPT_RTBCOUNT	0x0100000
			
 
				+#define XFS_QMOPT_DELBCOUNT	0x0200000
			
 
				+#define XFS_QMOPT_DELRTBCOUNT	0x0400000
			
 
				+#define XFS_QMOPT_RES_INOS	0x0800000
			
 
				+
			
 
				+/*
			
 
				+ * flags for dqalloc.
			
 
				+ */
			
 
				+#define XFS_QMOPT_INHERIT	0x1000000
			
 
				+
			
 
				+/*
			
 
				+ * flags to xfs_trans_mod_dquot.
			
 
				+ */
			
 
				+#define XFS_TRANS_DQ_RES_BLKS	XFS_QMOPT_RES_REGBLKS
			
 
				+#define XFS_TRANS_DQ_RES_RTBLKS	XFS_QMOPT_RES_RTBLKS
			
 
				+#define XFS_TRANS_DQ_RES_INOS	XFS_QMOPT_RES_INOS
			
 
				+#define XFS_TRANS_DQ_BCOUNT	XFS_QMOPT_BCOUNT
			
 
				+#define XFS_TRANS_DQ_DELBCOUNT	XFS_QMOPT_DELBCOUNT
			
 
				+#define XFS_TRANS_DQ_ICOUNT	XFS_QMOPT_ICOUNT
			
 
				+#define XFS_TRANS_DQ_RTBCOUNT	XFS_QMOPT_RTBCOUNT
			
 
				+#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
			
 
				+
			
 
				+
			
 
				+#define XFS_QMOPT_QUOTALL	\
			
 
				+		(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
			
 
				+#define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
			
 
				+
			
 
				+#endif	/* __XFS_QUOTA_H__ */
			
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -16,8 +16,10 @@
 
				  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				  */
			
 
				 #include "xfs.h"
			
 
				-#include "xfs_sb.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_trans_resv.h"
			
 
				 #include "xfs_log.h"
			
 
				+#include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_quota.h"
			
@@ -53,6 +55,18 @@ xfs_fs_get_xstate(
 
				 	return -xfs_qm_scall_getqstat(mp, fqs);
			
 
				 }
			
 
				 
			
 
				+STATIC int
			
 
				+xfs_fs_get_xstatev(
			
 
				+	struct super_block	*sb,
			
 
				+	struct fs_quota_statv	*fqs)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = XFS_M(sb);
			
 
				+
			
 
				+	if (!XFS_IS_QUOTA_RUNNING(mp))
			
 
				+		return -ENOSYS;
			
 
				+	return -xfs_qm_scall_getqstatv(mp, fqs);
			
 
				+}
			
 
				+
			
 
				 STATIC int
			
 
				 xfs_fs_set_xstate(
			
 
				 	struct super_block	*sb,
			
@@ -133,6 +147,7 @@ xfs_fs_set_dqblk(
 
				 }
			
 
				 
			
 
				 const struct quotactl_ops xfs_quotactl_operations = {
			
 
				+	.get_xstatev		= xfs_fs_get_xstatev,
			
 
				 	.get_xstate		= xfs_fs_get_xstate,
			
 
				 	.set_xstate		= xfs_fs_set_xstate,
			
 
				 	.get_dqblk		= xfs_fs_get_dqblk,
			
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -1,346 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
			
 
				- * All Rights Reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it would be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program; if not, write the Free Software Foundation,
			
 
				- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				- */
			
 
				-#include "xfs.h"
			
 
				-#include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				-#include "xfs_log.h"
			
 
				-#include "xfs_trans.h"
			
 
				-#include "xfs_sb.h"
			
 
				-#include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				-#include "xfs_mount.h"
			
 
				-#include "xfs_da_btree.h"
			
 
				-#include "xfs_bmap_btree.h"
			
 
				-#include "xfs_dinode.h"
			
 
				-#include "xfs_inode.h"
			
 
				-#include "xfs_inode_item.h"
			
 
				-#include "xfs_bmap.h"
			
 
				-#include "xfs_error.h"
			
 
				-#include "xfs_quota.h"
			
 
				-#include "xfs_utils.h"
			
 
				-#include "xfs_trans_space.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				-#include "xfs_trace.h"
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * Enter all inodes for a rename transaction into a sorted array.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_sort_for_rename(
			
 
				-	xfs_inode_t	*dp1,	/* in: old (source) directory inode */
			
 
				-	xfs_inode_t	*dp2,	/* in: new (target) directory inode */
			
 
				-	xfs_inode_t	*ip1,	/* in: inode of old entry */
			
 
				-	xfs_inode_t	*ip2,	/* in: inode of new entry, if it
			
 
				-				   already exists, NULL otherwise. */
			
 
				-	xfs_inode_t	**i_tab,/* out: array of inode returned, sorted */
			
 
				-	int		*num_inodes)  /* out: number of inodes in array */
			
 
				-{
			
 
				-	xfs_inode_t		*temp;
			
 
				-	int			i, j;
			
 
				-
			
 
				-	/*
			
 
				-	 * i_tab contains a list of pointers to inodes.  We initialize
			
 
				-	 * the table here & we'll sort it.  We will then use it to
			
 
				-	 * order the acquisition of the inode locks.
			
 
				-	 *
			
 
				-	 * Note that the table may contain duplicates.  e.g., dp1 == dp2.
			
 
				-	 */
			
 
				-	i_tab[0] = dp1;
			
 
				-	i_tab[1] = dp2;
			
 
				-	i_tab[2] = ip1;
			
 
				-	if (ip2) {
			
 
				-		*num_inodes = 4;
			
 
				-		i_tab[3] = ip2;
			
 
				-	} else {
			
 
				-		*num_inodes = 3;
			
 
				-		i_tab[3] = NULL;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Sort the elements via bubble sort.  (Remember, there are at
			
 
				-	 * most 4 elements to sort, so this is adequate.)
			
 
				-	 */
			
 
				-	for (i = 0; i < *num_inodes; i++) {
			
 
				-		for (j = 1; j < *num_inodes; j++) {
			
 
				-			if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
			
 
				-				temp = i_tab[j];
			
 
				-				i_tab[j] = i_tab[j-1];
			
 
				-				i_tab[j-1] = temp;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * xfs_rename
			
 
				- */
			
 
				-int
			
 
				-xfs_rename(
			
 
				-	xfs_inode_t	*src_dp,
			
 
				-	struct xfs_name	*src_name,
			
 
				-	xfs_inode_t	*src_ip,
			
 
				-	xfs_inode_t	*target_dp,
			
 
				-	struct xfs_name	*target_name,
			
 
				-	xfs_inode_t	*target_ip)
			
 
				-{
			
 
				-	xfs_trans_t	*tp = NULL;
			
 
				-	xfs_mount_t	*mp = src_dp->i_mount;
			
 
				-	int		new_parent;		/* moving to a new dir */
			
 
				-	int		src_is_directory;	/* src_name is a directory */
			
 
				-	int		error;
			
 
				-	xfs_bmap_free_t free_list;
			
 
				-	xfs_fsblock_t   first_block;
			
 
				-	int		cancel_flags;
			
 
				-	int		committed;
			
 
				-	xfs_inode_t	*inodes[4];
			
 
				-	int		spaceres;
			
 
				-	int		num_inodes;
			
 
				-
			
 
				-	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
			
 
				-
			
 
				-	new_parent = (src_dp != target_dp);
			
 
				-	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
			
 
				-
			
 
				-	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
			
 
				-				inodes, &num_inodes);
			
 
				-
			
 
				-	xfs_bmap_init(&free_list, &first_block);
			
 
				-	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
			
 
				-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				-	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
			
 
				-	error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0,
			
 
				-			XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
			
 
				-	if (error == ENOSPC) {
			
 
				-		spaceres = 0;
			
 
				-		error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0,
			
 
				-				XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
			
 
				-	}
			
 
				-	if (error) {
			
 
				-		xfs_trans_cancel(tp, 0);
			
 
				-		goto std_return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Attach the dquots to the inodes
			
 
				-	 */
			
 
				-	error = xfs_qm_vop_rename_dqattach(inodes);
			
 
				-	if (error) {
			
 
				-		xfs_trans_cancel(tp, cancel_flags);
			
 
				-		goto std_return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Lock all the participating inodes. Depending upon whether
			
 
				-	 * the target_name exists in the target directory, and
			
 
				-	 * whether the target directory is the same as the source
			
 
				-	 * directory, we can lock from 2 to 4 inodes.
			
 
				-	 */
			
 
				-	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	/*
			
 
				-	 * Join all the inodes to the transaction. From this point on,
			
 
				-	 * we can rely on either trans_commit or trans_cancel to unlock
			
 
				-	 * them.
			
 
				-	 */
			
 
				-	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
			
 
				-	if (new_parent)
			
 
				-		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
			
 
				-	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
			
 
				-	if (target_ip)
			
 
				-		xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
			
 
				-
			
 
				-	/*
			
 
				-	 * If we are using project inheritance, we only allow renames
			
 
				-	 * into our tree when the project IDs are the same; else the
			
 
				-	 * tree quota mechanism would be circumvented.
			
 
				-	 */
			
 
				-	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
			
 
				-		     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
			
 
				-		error = XFS_ERROR(EXDEV);
			
 
				-		goto error_return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Set up the target.
			
 
				-	 */
			
 
				-	if (target_ip == NULL) {
			
 
				-		/*
			
 
				-		 * If there's no space reservation, check the entry will
			
 
				-		 * fit before actually inserting it.
			
 
				-		 */
			
 
				-		error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
			
 
				-		if (error)
			
 
				-			goto error_return;
			
 
				-		/*
			
 
				-		 * If target does not exist and the rename crosses
			
 
				-		 * directories, adjust the target directory link count
			
 
				-		 * to account for the ".." reference from the new entry.
			
 
				-		 */
			
 
				-		error = xfs_dir_createname(tp, target_dp, target_name,
			
 
				-						src_ip->i_ino, &first_block,
			
 
				-						&free_list, spaceres);
			
 
				-		if (error == ENOSPC)
			
 
				-			goto error_return;
			
 
				-		if (error)
			
 
				-			goto abort_return;
			
 
				-
			
 
				-		xfs_trans_ichgtime(tp, target_dp,
			
 
				-					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				-
			
 
				-		if (new_parent && src_is_directory) {
			
 
				-			error = xfs_bumplink(tp, target_dp);
			
 
				-			if (error)
			
 
				-				goto abort_return;
			
 
				-		}
			
 
				-	} else { /* target_ip != NULL */
			
 
				-		/*
			
 
				-		 * If target exists and it's a directory, check that both
			
 
				-		 * target and source are directories and that target can be
			
 
				-		 * destroyed, or that neither is a directory.
			
 
				-		 */
			
 
				-		if (S_ISDIR(target_ip->i_d.di_mode)) {
			
 
				-			/*
			
 
				-			 * Make sure target dir is empty.
			
 
				-			 */
			
 
				-			if (!(xfs_dir_isempty(target_ip)) ||
			
 
				-			    (target_ip->i_d.di_nlink > 2)) {
			
 
				-				error = XFS_ERROR(EEXIST);
			
 
				-				goto error_return;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Link the source inode under the target name.
			
 
				-		 * If the source inode is a directory and we are moving
			
 
				-		 * it across directories, its ".." entry will be
			
 
				-		 * inconsistent until we replace that down below.
			
 
				-		 *
			
 
				-		 * In case there is already an entry with the same
			
 
				-		 * name at the destination directory, remove it first.
			
 
				-		 */
			
 
				-		error = xfs_dir_replace(tp, target_dp, target_name,
			
 
				-					src_ip->i_ino,
			
 
				-					&first_block, &free_list, spaceres);
			
 
				-		if (error)
			
 
				-			goto abort_return;
			
 
				-
			
 
				-		xfs_trans_ichgtime(tp, target_dp,
			
 
				-					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				-
			
 
				-		/*
			
 
				-		 * Decrement the link count on the target since the target
			
 
				-		 * dir no longer points to it.
			
 
				-		 */
			
 
				-		error = xfs_droplink(tp, target_ip);
			
 
				-		if (error)
			
 
				-			goto abort_return;
			
 
				-
			
 
				-		if (src_is_directory) {
			
 
				-			/*
			
 
				-			 * Drop the link from the old "." entry.
			
 
				-			 */
			
 
				-			error = xfs_droplink(tp, target_ip);
			
 
				-			if (error)
			
 
				-				goto abort_return;
			
 
				-		}
			
 
				-	} /* target_ip != NULL */
			
 
				-
			
 
				-	/*
			
 
				-	 * Remove the source.
			
 
				-	 */
			
 
				-	if (new_parent && src_is_directory) {
			
 
				-		/*
			
 
				-		 * Rewrite the ".." entry to point to the new
			
 
				-		 * directory.
			
 
				-		 */
			
 
				-		error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
			
 
				-					target_dp->i_ino,
			
 
				-					&first_block, &free_list, spaceres);
			
 
				-		ASSERT(error != EEXIST);
			
 
				-		if (error)
			
 
				-			goto abort_return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * We always want to hit the ctime on the source inode.
			
 
				-	 *
			
 
				-	 * This isn't strictly required by the standards since the source
			
 
				-	 * inode isn't really being changed, but old unix file systems did
			
 
				-	 * it and some incremental backup programs won't work without it.
			
 
				-	 */
			
 
				-	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
			
 
				-	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
			
 
				-
			
 
				-	/*
			
 
				-	 * Adjust the link count on src_dp.  This is necessary when
			
 
				-	 * renaming a directory, either within one parent when
			
 
				-	 * the target existed, or across two parent directories.
			
 
				-	 */
			
 
				-	if (src_is_directory && (new_parent || target_ip != NULL)) {
			
 
				-
			
 
				-		/*
			
 
				-		 * Decrement link count on src_directory since the
			
 
				-		 * entry that's moved no longer points to it.
			
 
				-		 */
			
 
				-		error = xfs_droplink(tp, src_dp);
			
 
				-		if (error)
			
 
				-			goto abort_return;
			
 
				-	}
			
 
				-
			
 
				-	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
			
 
				-					&first_block, &free_list, spaceres);
			
 
				-	if (error)
			
 
				-		goto abort_return;
			
 
				-
			
 
				-	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				-	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
			
 
				-	if (new_parent)
			
 
				-		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
			
 
				-
			
 
				-	/*
			
 
				-	 * If this is a synchronous mount, make sure that the
			
 
				-	 * rename transaction goes to disk before returning to
			
 
				-	 * the user.
			
 
				-	 */
			
 
				-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
			
 
				-		xfs_trans_set_sync(tp);
			
 
				-	}
			
 
				-
			
 
				-	error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				-	if (error) {
			
 
				-		xfs_bmap_cancel(&free_list);
			
 
				-		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
			
 
				-				 XFS_TRANS_ABORT));
			
 
				-		goto std_return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * trans_commit will unlock src_ip, target_ip & decrement
			
 
				-	 * the vnode references.
			
 
				-	 */
			
 
				-	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				-
			
 
				- abort_return:
			
 
				-	cancel_flags |= XFS_TRANS_ABORT;
			
 
				- error_return:
			
 
				-	xfs_bmap_cancel(&free_list);
			
 
				-	xfs_trans_cancel(tp, cancel_flags);
			
 
				- std_return:
			
 
				-	return error;
			
 
				-}
			
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -17,25 +17,24 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_fsops.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_buf.h"
			
 
				 #include "xfs_icache.h"
			
@@ -101,10 +100,9 @@ xfs_growfs_rt_alloc(
 
				 		/*
			
 
				 		 * Reserve space & log for one extent added to the file.
			
 
				 		 */
			
 
				-		if ((error = xfs_trans_reserve(tp, resblks,
			
 
				-				XFS_GROWRTALLOC_LOG_RES(mp), 0,
			
 
				-				XFS_TRANS_PERM_LOG_RES,
			
 
				-				XFS_DEFAULT_PERM_LOG_COUNT)))
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
			
 
				+					  resblks, 0);
			
 
				+		if (error)
			
 
				 			goto error_cancel;
			
 
				 		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				 		/*
			
@@ -147,8 +145,9 @@ xfs_growfs_rt_alloc(
 
				 			/*
			
 
				 			 * Reserve log for one block zeroing.
			
 
				 			 */
			
 
				-			if ((error = xfs_trans_reserve(tp, 0,
			
 
				-					XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0)))
			
 
				+			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero,
			
 
				+						  0, 0);
			
 
				+			if (error)
			
 
				 				goto error_cancel;
			
 
				 			/*
			
 
				 			 * Lock the bitmap inode.
			
@@ -736,8 +735,8 @@ xfs_rtallocate_range(
 
				 {
			
 
				 	xfs_rtblock_t	end;		/* end of the allocated extent */
			
 
				 	int		error;		/* error value */
			
 
				-	xfs_rtblock_t	postblock;	/* first block allocated > end */
			
 
				-	xfs_rtblock_t	preblock;	/* first block allocated < start */
			
 
				+	xfs_rtblock_t	postblock = 0;	/* first block allocated > end */
			
 
				+	xfs_rtblock_t	preblock = 0;	/* first block allocated < start */
			
 
				 
			
 
				 	end = start + len - 1;
			
 
				 	/*
			
@@ -1958,8 +1957,9 @@ xfs_growfs_rt(
 
				 		 * Start a transaction, get the log reservation.
			
 
				 		 */
			
 
				 		tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE);
			
 
				-		if ((error = xfs_trans_reserve(tp, 0,
			
 
				-				XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0)))
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtfree,
			
 
				+					  0, 0);
			
 
				+		if (error)
			
 
				 			goto error_cancel;
			
 
				 		/*
			
 
				 		 * Lock out other callers by grabbing the bitmap inode lock.
			
@@ -2148,7 +2148,7 @@ xfs_rtfree_extent(
 
				 	ASSERT(mp->m_rbmip->i_itemp != NULL);
			
 
				 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
			
 
				 
			
 
				-#if defined(__KERNEL__) && defined(DEBUG)
			
 
				+#ifdef DEBUG
			
 
				 	/*
			
 
				 	 * Check to see that this whole range is currently allocated.
			
 
				 	 */
			
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -18,58 +18,11 @@
 
				 #ifndef __XFS_RTALLOC_H__
			
 
				 #define	__XFS_RTALLOC_H__
			
 
				 
			
 
				+/* kernel only definitions and functions */
			
 
				+
			
 
				 struct xfs_mount;
			
 
				 struct xfs_trans;
			
 
				 
			
 
				-/* Min and max rt extent sizes, specified in bytes */
			
 
				-#define	XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
			
 
				-#define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64kB */
			
 
				-#define	XFS_MIN_RTEXTSIZE	(4 * 1024)		/* 4kB */
			
 
				-
			
 
				-/*
			
 
				- * Constants for bit manipulations.
			
 
				- */
			
 
				-#define	XFS_NBBYLOG	3		/* log2(NBBY) */
			
 
				-#define	XFS_WORDLOG	2		/* log2(sizeof(xfs_rtword_t)) */
			
 
				-#define	XFS_NBWORDLOG	(XFS_NBBYLOG + XFS_WORDLOG)
			
 
				-#define	XFS_NBWORD	(1 << XFS_NBWORDLOG)
			
 
				-#define	XFS_WORDMASK	((1 << XFS_WORDLOG) - 1)
			
 
				-
			
 
				-#define	XFS_BLOCKSIZE(mp)	((mp)->m_sb.sb_blocksize)
			
 
				-#define	XFS_BLOCKMASK(mp)	((mp)->m_blockmask)
			
 
				-#define	XFS_BLOCKWSIZE(mp)	((mp)->m_blockwsize)
			
 
				-#define	XFS_BLOCKWMASK(mp)	((mp)->m_blockwmask)
			
 
				-
			
 
				-/*
			
 
				- * Summary and bit manipulation macros.
			
 
				- */
			
 
				-#define	XFS_SUMOFFS(mp,ls,bb)	((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
			
 
				-#define	XFS_SUMOFFSTOBLOCK(mp,s)	\
			
 
				-	(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
			
 
				-#define	XFS_SUMPTR(mp,bp,so)	\
			
 
				-	((xfs_suminfo_t *)((bp)->b_addr + \
			
 
				-		(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
			
 
				-
			
 
				-#define	XFS_BITTOBLOCK(mp,bi)	((bi) >> (mp)->m_blkbit_log)
			
 
				-#define	XFS_BLOCKTOBIT(mp,bb)	((bb) << (mp)->m_blkbit_log)
			
 
				-#define	XFS_BITTOWORD(mp,bi)	\
			
 
				-	((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
			
 
				-
			
 
				-#define	XFS_RTMIN(a,b)	((a) < (b) ? (a) : (b))
			
 
				-#define	XFS_RTMAX(a,b)	((a) > (b) ? (a) : (b))
			
 
				-
			
 
				-#define	XFS_RTLOBIT(w)	xfs_lowbit32(w)
			
 
				-#define	XFS_RTHIBIT(w)	xfs_highbit32(w)
			
 
				-
			
 
				-#if XFS_BIG_BLKNOS
			
 
				-#define	XFS_RTBLOCKLOG(b)	xfs_highbit64(b)
			
 
				-#else
			
 
				-#define	XFS_RTBLOCKLOG(b)	xfs_highbit32(b)
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				-
			
 
				 #ifdef CONFIG_XFS_RT
			
 
				 /*
			
 
				  * Function prototypes for exported functions.
			
@@ -161,6 +114,4 @@ xfs_rtmount_init(
 
				 # define xfs_rtunmount_inodes(m)
			
 
				 #endif	/* CONFIG_XFS_RT */
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				 #endif	/* __XFS_RTALLOC_H__ */
			
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -0,0 +1,834 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_inum.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_alloc_btree.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_btree.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_rtalloc.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_quota.h"
			
 
				+#include "xfs_fsops.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				+
			
 
				+/*
			
 
				+ * Physical superblock buffer manipulations. Shared with libxfs in userspace.
			
 
				+ */
			
 
				+
			
 
				+static const struct {
			
 
				+	short offset;
			
 
				+	short type;	/* 0 = integer
			
 
				+			 * 1 = binary / string (no translation)
			
 
				+			 */
			
 
				+} xfs_sb_info[] = {
			
 
				+	{ offsetof(xfs_sb_t, sb_magicnum),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_blocksize),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_dblocks),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rblocks),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rextents),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_uuid),		1 },
			
 
				+	{ offsetof(xfs_sb_t, sb_logstart),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rootino),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rbmino),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rsumino),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rextsize),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_agblocks),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_agcount),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rbmblocks),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_logblocks),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_versionnum),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_sectsize),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_inodesize),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_inopblock),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_fname[0]),	1 },
			
 
				+	{ offsetof(xfs_sb_t, sb_blocklog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_sectlog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_inodelog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_inopblog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_agblklog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_rextslog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_inprogress),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_imax_pct),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_icount),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_ifree),		0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_fdblocks),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_frextents),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_uquotino),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_gquotino),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_qflags),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_flags),		0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_shared_vn),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_inoalignmt),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_unit),		0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_width),		0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_dirblklog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_logsectlog),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_logsectsize),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_logsunit),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_features2),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_bad_features2),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_features_compat),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_features_ro_compat),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_features_incompat),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_features_log_incompat),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_crc),		0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_pad),		0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_pquotino),	0 },
			
 
				+	{ offsetof(xfs_sb_t, sb_lsn),		0 },
			
 
				+	{ sizeof(xfs_sb_t),			0 }
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Reference counting access wrappers to the perag structures.
			
 
				+ * Because we never free per-ag structures, the only thing we
			
 
				+ * have to protect against changes is the tree structure itself.
			
 
				+ */
			
 
				+struct xfs_perag *
			
 
				+xfs_perag_get(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		agno)
			
 
				+{
			
 
				+	struct xfs_perag	*pag;
			
 
				+	int			ref = 0;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
			
 
				+	if (pag) {
			
 
				+		ASSERT(atomic_read(&pag->pag_ref) >= 0);
			
 
				+		ref = atomic_inc_return(&pag->pag_ref);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
			
 
				+	return pag;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * search from @first to find the next perag with the given tag set.
			
 
				+ */
			
 
				+struct xfs_perag *
			
 
				+xfs_perag_get_tag(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		first,
			
 
				+	int			tag)
			
 
				+{
			
 
				+	struct xfs_perag	*pag;
			
 
				+	int			found;
			
 
				+	int			ref;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
			
 
				+					(void **)&pag, first, 1, tag);
			
 
				+	if (found <= 0) {
			
 
				+		rcu_read_unlock();
			
 
				+		return NULL;
			
 
				+	}
			
 
				+	ref = atomic_inc_return(&pag->pag_ref);
			
 
				+	rcu_read_unlock();
			
 
				+	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
			
 
				+	return pag;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_perag_put(
			
 
				+	struct xfs_perag	*pag)
			
 
				+{
			
 
				+	int	ref;
			
 
				+
			
 
				+	ASSERT(atomic_read(&pag->pag_ref) > 0);
			
 
				+	ref = atomic_dec_return(&pag->pag_ref);
			
 
				+	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check the validity of the SB found.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xfs_mount_validate_sb(
			
 
				+	xfs_mount_t	*mp,
			
 
				+	xfs_sb_t	*sbp,
			
 
				+	bool		check_inprogress,
			
 
				+	bool		check_version)
			
 
				+{
			
 
				+
			
 
				+	/*
			
 
				+	 * If the log device and data device have the
			
 
				+	 * same device number, the log is internal.
			
 
				+	 * Consequently, the sb_logstart should be non-zero.  If
			
 
				+	 * we have a zero sb_logstart in this case, we may be trying to mount
			
 
				+	 * a volume filesystem in a non-volume manner.
			
 
				+	 */
			
 
				+	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
			
 
				+		xfs_warn(mp, "bad magic number");
			
 
				+		return XFS_ERROR(EWRONGFS);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	if (!xfs_sb_good_version(sbp)) {
			
 
				+		xfs_warn(mp, "bad version");
			
 
				+		return XFS_ERROR(EWRONGFS);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Version 5 superblock feature mask validation. Reject combinations the
			
 
				+	 * kernel cannot support up front before checking anything else. For
			
 
				+	 * write validation, we don't need to check feature masks.
			
 
				+	 */
			
 
				+	if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
			
 
				+		xfs_alert(mp,
			
 
				+"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
			
 
				+"Use of these features in this kernel is at your own risk!");
			
 
				+
			
 
				+		if (xfs_sb_has_compat_feature(sbp,
			
 
				+					XFS_SB_FEAT_COMPAT_UNKNOWN)) {
			
 
				+			xfs_warn(mp,
			
 
				+"Superblock has unknown compatible features (0x%x) enabled.\n"
			
 
				+"Using a more recent kernel is recommended.",
			
 
				+				(sbp->sb_features_compat &
			
 
				+						XFS_SB_FEAT_COMPAT_UNKNOWN));
			
 
				+		}
			
 
				+
			
 
				+		if (xfs_sb_has_ro_compat_feature(sbp,
			
 
				+					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
			
 
				+			xfs_alert(mp,
			
 
				+"Superblock has unknown read-only compatible features (0x%x) enabled.",
			
 
				+				(sbp->sb_features_ro_compat &
			
 
				+						XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
			
 
				+			if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
			
 
				+				xfs_warn(mp,
			
 
				+"Attempted to mount read-only compatible filesystem read-write.\n"
			
 
				+"Filesystem can only be safely mounted read only.");
			
 
				+				return XFS_ERROR(EINVAL);
			
 
				+			}
			
 
				+		}
			
 
				+		if (xfs_sb_has_incompat_feature(sbp,
			
 
				+					XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
			
 
				+			xfs_warn(mp,
			
 
				+"Superblock has unknown incompatible features (0x%x) enabled.\n"
			
 
				+"Filesystem can not be safely mounted by this kernel.",
			
 
				+				(sbp->sb_features_incompat &
			
 
				+						XFS_SB_FEAT_INCOMPAT_UNKNOWN));
			
 
				+			return XFS_ERROR(EINVAL);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (xfs_sb_version_has_pquotino(sbp)) {
			
 
				+		if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
			
 
				+			xfs_notice(mp,
			
 
				+			   "Version 5 of Super block has XFS_OQUOTA bits.\n");
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+		}
			
 
				+	} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
			
 
				+				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
			
 
				+			xfs_notice(mp,
			
 
				+"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.\n");
			
 
				+			return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	if (unlikely(
			
 
				+	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
			
 
				+		xfs_warn(mp,
			
 
				+		"filesystem is marked as having an external log; "
			
 
				+		"specify logdev on the mount command line.");
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+	}
			
 
				+
			
 
				+	if (unlikely(
			
 
				+	    sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
			
 
				+		xfs_warn(mp,
			
 
				+		"filesystem is marked as having an internal log; "
			
 
				+		"do not specify logdev on the mount command line.");
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * More sanity checking.  Most of these were stolen directly from
			
 
				+	 * xfs_repair.
			
 
				+	 */
			
 
				+	if (unlikely(
			
 
				+	    sbp->sb_agcount <= 0					||
			
 
				+	    sbp->sb_sectsize < XFS_MIN_SECTORSIZE			||
			
 
				+	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
			
 
				+	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
			
 
				+	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
			
 
				+	    sbp->sb_sectsize != (1 << sbp->sb_sectlog)			||
			
 
				+	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
			
 
				+	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
			
 
				+	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
			
 
				+	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
			
 
				+	    sbp->sb_blocksize != (1 << sbp->sb_blocklog)		||
			
 
				+	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
			
 
				+	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
			
 
				+	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
			
 
				+	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
			
 
				+	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		||
			
 
				+	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
			
 
				+	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
			
 
				+	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
			
 
				+	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)	||
			
 
				+	    sbp->sb_dblocks == 0					||
			
 
				+	    sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)			||
			
 
				+	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
			
 
				+		XFS_CORRUPTION_ERROR("SB sanity check failed",
			
 
				+				XFS_ERRLEVEL_LOW, mp, sbp);
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Until this is fixed only page-sized or smaller data blocks work.
			
 
				+	 */
			
 
				+	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
			
 
				+		xfs_warn(mp,
			
 
				+		"File system with blocksize %d bytes. "
			
 
				+		"Only pagesize (%ld) or less will currently work.",
			
 
				+				sbp->sb_blocksize, PAGE_SIZE);
			
 
				+		return XFS_ERROR(ENOSYS);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Currently only very few inode sizes are supported.
			
 
				+	 */
			
 
				+	switch (sbp->sb_inodesize) {
			
 
				+	case 256:
			
 
				+	case 512:
			
 
				+	case 1024:
			
 
				+	case 2048:
			
 
				+		break;
			
 
				+	default:
			
 
				+		xfs_warn(mp, "inode size of %d bytes not supported",
			
 
				+				sbp->sb_inodesize);
			
 
				+		return XFS_ERROR(ENOSYS);
			
 
				+	}
			
 
				+
			
 
				+	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
			
 
				+	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
			
 
				+		xfs_warn(mp,
			
 
				+		"file system too large to be mounted on this system.");
			
 
				+		return XFS_ERROR(EFBIG);
			
 
				+	}
			
 
				+
			
 
				+	if (check_inprogress && sbp->sb_inprogress) {
			
 
				+		xfs_warn(mp, "Offline file system operation in progress!");
			
 
				+		return XFS_ERROR(EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Version 1 directory format has never worked on Linux.
			
 
				+	 */
			
 
				+	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
			
 
				+		xfs_warn(mp, "file system using version 1 directory format");
			
 
				+		return XFS_ERROR(ENOSYS);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_sb_quota_from_disk(struct xfs_sb *sbp)
			
 
				+{
			
 
				+	/*
			
 
				+	 * older mkfs doesn't initialize quota inodes to NULLFSINO. This
			
 
				+	 * leads to in-core values having two different values for a quota
			
 
				+	 * inode to be invalid: 0 and NULLFSINO. Change it to a single value
			
 
				+	 * NULLFSINO.
			
 
				+	 *
			
 
				+	 * Note that this change affect only the in-core values. These
			
 
				+	 * values are not written back to disk unless any quota information
			
 
				+	 * is written to the disk. Even in that case, sb_pquotino field is
			
 
				+	 * not written to disk unless the superblock supports pquotino.
			
 
				+	 */
			
 
				+	if (sbp->sb_uquotino == 0)
			
 
				+		sbp->sb_uquotino = NULLFSINO;
			
 
				+	if (sbp->sb_gquotino == 0)
			
 
				+		sbp->sb_gquotino = NULLFSINO;
			
 
				+	if (sbp->sb_pquotino == 0)
			
 
				+		sbp->sb_pquotino = NULLFSINO;
			
 
				+
			
 
				+	/*
			
 
				+	 * We need to do these manipilations only if we are working
			
 
				+	 * with an older version of on-disk superblock.
			
 
				+	 */
			
 
				+	if (xfs_sb_version_has_pquotino(sbp))
			
 
				+		return;
			
 
				+
			
 
				+	if (sbp->sb_qflags & XFS_OQUOTA_ENFD)
			
 
				+		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
			
 
				+					XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD;
			
 
				+	if (sbp->sb_qflags & XFS_OQUOTA_CHKD)
			
 
				+		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
			
 
				+					XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
			
 
				+	sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
			
 
				+
			
 
				+	if (sbp->sb_qflags & XFS_PQUOTA_ACCT)  {
			
 
				+		/*
			
 
				+		 * In older version of superblock, on-disk superblock only
			
 
				+		 * has sb_gquotino, and in-core superblock has both sb_gquotino
			
 
				+		 * and sb_pquotino. But, only one of them is supported at any
			
 
				+		 * point of time. So, if PQUOTA is set in disk superblock,
			
 
				+		 * copy over sb_gquotino to sb_pquotino.
			
 
				+		 */
			
 
				+		sbp->sb_pquotino = sbp->sb_gquotino;
			
 
				+		sbp->sb_gquotino = NULLFSINO;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_sb_from_disk(
			
 
				+	struct xfs_sb	*to,
			
 
				+	xfs_dsb_t	*from)
			
 
				+{
			
 
				+	to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
			
 
				+	to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
			
 
				+	to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
			
 
				+	to->sb_rblocks = be64_to_cpu(from->sb_rblocks);
			
 
				+	to->sb_rextents = be64_to_cpu(from->sb_rextents);
			
 
				+	memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid));
			
 
				+	to->sb_logstart = be64_to_cpu(from->sb_logstart);
			
 
				+	to->sb_rootino = be64_to_cpu(from->sb_rootino);
			
 
				+	to->sb_rbmino = be64_to_cpu(from->sb_rbmino);
			
 
				+	to->sb_rsumino = be64_to_cpu(from->sb_rsumino);
			
 
				+	to->sb_rextsize = be32_to_cpu(from->sb_rextsize);
			
 
				+	to->sb_agblocks = be32_to_cpu(from->sb_agblocks);
			
 
				+	to->sb_agcount = be32_to_cpu(from->sb_agcount);
			
 
				+	to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks);
			
 
				+	to->sb_logblocks = be32_to_cpu(from->sb_logblocks);
			
 
				+	to->sb_versionnum = be16_to_cpu(from->sb_versionnum);
			
 
				+	to->sb_sectsize = be16_to_cpu(from->sb_sectsize);
			
 
				+	to->sb_inodesize = be16_to_cpu(from->sb_inodesize);
			
 
				+	to->sb_inopblock = be16_to_cpu(from->sb_inopblock);
			
 
				+	memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname));
			
 
				+	to->sb_blocklog = from->sb_blocklog;
			
 
				+	to->sb_sectlog = from->sb_sectlog;
			
 
				+	to->sb_inodelog = from->sb_inodelog;
			
 
				+	to->sb_inopblog = from->sb_inopblog;
			
 
				+	to->sb_agblklog = from->sb_agblklog;
			
 
				+	to->sb_rextslog = from->sb_rextslog;
			
 
				+	to->sb_inprogress = from->sb_inprogress;
			
 
				+	to->sb_imax_pct = from->sb_imax_pct;
			
 
				+	to->sb_icount = be64_to_cpu(from->sb_icount);
			
 
				+	to->sb_ifree = be64_to_cpu(from->sb_ifree);
			
 
				+	to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks);
			
 
				+	to->sb_frextents = be64_to_cpu(from->sb_frextents);
			
 
				+	to->sb_uquotino = be64_to_cpu(from->sb_uquotino);
			
 
				+	to->sb_gquotino = be64_to_cpu(from->sb_gquotino);
			
 
				+	to->sb_qflags = be16_to_cpu(from->sb_qflags);
			
 
				+	to->sb_flags = from->sb_flags;
			
 
				+	to->sb_shared_vn = from->sb_shared_vn;
			
 
				+	to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt);
			
 
				+	to->sb_unit = be32_to_cpu(from->sb_unit);
			
 
				+	to->sb_width = be32_to_cpu(from->sb_width);
			
 
				+	to->sb_dirblklog = from->sb_dirblklog;
			
 
				+	to->sb_logsectlog = from->sb_logsectlog;
			
 
				+	to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
			
 
				+	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
			
 
				+	to->sb_features2 = be32_to_cpu(from->sb_features2);
			
 
				+	to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
			
 
				+	to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
			
 
				+	to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
			
 
				+	to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
			
 
				+	to->sb_features_log_incompat =
			
 
				+				be32_to_cpu(from->sb_features_log_incompat);
			
 
				+	to->sb_pad = 0;
			
 
				+	to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
			
 
				+	to->sb_lsn = be64_to_cpu(from->sb_lsn);
			
 
				+}
			
 
				+
			
 
				+static inline void
			
 
				+xfs_sb_quota_to_disk(
			
 
				+	xfs_dsb_t	*to,
			
 
				+	xfs_sb_t	*from,
			
 
				+	__int64_t	*fields)
			
 
				+{
			
 
				+	__uint16_t	qflags = from->sb_qflags;
			
 
				+
			
 
				+	/*
			
 
				+	 * We need to do these manipilations only if we are working
			
 
				+	 * with an older version of on-disk superblock.
			
 
				+	 */
			
 
				+	if (xfs_sb_version_has_pquotino(from))
			
 
				+		return;
			
 
				+
			
 
				+	if (*fields & XFS_SB_QFLAGS) {
			
 
				+		/*
			
 
				+		 * The in-core version of sb_qflags do not have
			
 
				+		 * XFS_OQUOTA_* flags, whereas the on-disk version
			
 
				+		 * does.  So, convert incore XFS_{PG}QUOTA_* flags
			
 
				+		 * to on-disk XFS_OQUOTA_* flags.
			
 
				+		 */
			
 
				+		qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
			
 
				+				XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
			
 
				+
			
 
				+		if (from->sb_qflags &
			
 
				+				(XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD))
			
 
				+			qflags |= XFS_OQUOTA_ENFD;
			
 
				+		if (from->sb_qflags &
			
 
				+				(XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))
			
 
				+			qflags |= XFS_OQUOTA_CHKD;
			
 
				+		to->sb_qflags = cpu_to_be16(qflags);
			
 
				+		*fields &= ~XFS_SB_QFLAGS;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * GQUOTINO and PQUOTINO cannot be used together in versions
			
 
				+	 * of superblock that do not have pquotino. from->sb_flags
			
 
				+	 * tells us which quota is active and should be copied to
			
 
				+	 * disk.
			
 
				+	 */
			
 
				+	if ((*fields & XFS_SB_GQUOTINO) &&
			
 
				+				(from->sb_qflags & XFS_GQUOTA_ACCT))
			
 
				+		to->sb_gquotino = cpu_to_be64(from->sb_gquotino);
			
 
				+	else if ((*fields & XFS_SB_PQUOTINO) &&
			
 
				+				(from->sb_qflags & XFS_PQUOTA_ACCT))
			
 
				+		to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
			
 
				+
			
 
				+	*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Copy in core superblock to ondisk one.
			
 
				+ *
			
 
				+ * The fields argument is mask of superblock fields to copy.
			
 
				+ */
			
 
				+void
			
 
				+xfs_sb_to_disk(
			
 
				+	xfs_dsb_t	*to,
			
 
				+	xfs_sb_t	*from,
			
 
				+	__int64_t	fields)
			
 
				+{
			
 
				+	xfs_caddr_t	to_ptr = (xfs_caddr_t)to;
			
 
				+	xfs_caddr_t	from_ptr = (xfs_caddr_t)from;
			
 
				+	xfs_sb_field_t	f;
			
 
				+	int		first;
			
 
				+	int		size;
			
 
				+
			
 
				+	ASSERT(fields);
			
 
				+	if (!fields)
			
 
				+		return;
			
 
				+
			
 
				+	xfs_sb_quota_to_disk(to, from, &fields);
			
 
				+	while (fields) {
			
 
				+		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
			
 
				+		first = xfs_sb_info[f].offset;
			
 
				+		size = xfs_sb_info[f + 1].offset - first;
			
 
				+
			
 
				+		ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);
			
 
				+
			
 
				+		if (size == 1 || xfs_sb_info[f].type == 1) {
			
 
				+			memcpy(to_ptr + first, from_ptr + first, size);
			
 
				+		} else {
			
 
				+			switch (size) {
			
 
				+			case 2:
			
 
				+				*(__be16 *)(to_ptr + first) =
			
 
				+				      cpu_to_be16(*(__u16 *)(from_ptr + first));
			
 
				+				break;
			
 
				+			case 4:
			
 
				+				*(__be32 *)(to_ptr + first) =
			
 
				+				      cpu_to_be32(*(__u32 *)(from_ptr + first));
			
 
				+				break;
			
 
				+			case 8:
			
 
				+				*(__be64 *)(to_ptr + first) =
			
 
				+				      cpu_to_be64(*(__u64 *)(from_ptr + first));
			
 
				+				break;
			
 
				+			default:
			
 
				+				ASSERT(0);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		fields &= ~(1LL << f);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+xfs_sb_verify(
			
 
				+	struct xfs_buf	*bp,
			
 
				+	bool		check_version)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_sb	sb;
			
 
				+
			
 
				+	xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
			
 
				+
			
 
				+	/*
			
 
				+	 * Only check the in progress field for the primary superblock as
			
 
				+	 * mkfs.xfs doesn't clear it from secondary superblocks.
			
 
				+	 */
			
 
				+	return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
			
 
				+				     check_version);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If the superblock has the CRC feature bit set or the CRC field is non-null,
			
 
				+ * check that the CRC is valid.  We check the CRC field is non-null because a
			
 
				+ * single bit error could clear the feature bit and unused parts of the
			
 
				+ * superblock are supposed to be zero. Hence a non-null crc field indicates that
			
 
				+ * we've potentially lost a feature bit and we should check it anyway.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_sb_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
			
 
				+	int		error;
			
 
				+
			
 
				+	/*
			
 
				+	 * open code the version check to avoid needing to convert the entire
			
 
				+	 * superblock from disk order just to check the version number
			
 
				+	 */
			
 
				+	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
			
 
				+	    (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
			
 
				+						XFS_SB_VERSION_5) ||
			
 
				+	     dsb->sb_crc != 0)) {
			
 
				+
			
 
				+		if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
			
 
				+				      offsetof(struct xfs_sb, sb_crc))) {
			
 
				+			error = EFSCORRUPTED;
			
 
				+			goto out_error;
			
 
				+		}
			
 
				+	}
			
 
				+	error = xfs_sb_verify(bp, true);
			
 
				+
			
 
				+out_error:
			
 
				+	if (error) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, error);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We may be probed for a filesystem match, so we may not want to emit
			
 
				+ * messages when the superblock buffer is not actually an XFS superblock.
			
 
				+ * If we find an XFS superblock, then run a normal, noisy mount because we are
			
 
				+ * really going to mount it and want to know about errors.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_sb_quiet_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
			
 
				+
			
 
				+
			
 
				+	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
			
 
				+		/* XFS filesystem, verify noisily! */
			
 
				+		xfs_sb_read_verify(bp);
			
 
				+		return;
			
 
				+	}
			
 
				+	/* quietly fail */
			
 
				+	xfs_buf_ioerror(bp, EWRONGFS);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_sb_write_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+	int			error;
			
 
				+
			
 
				+	error = xfs_sb_verify(bp, false);
			
 
				+	if (error) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, error);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (bip)
			
 
				+		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 offsetof(struct xfs_sb, sb_crc));
			
 
				+}
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_sb_buf_ops = {
			
 
				+	.verify_read = xfs_sb_read_verify,
			
 
				+	.verify_write = xfs_sb_write_verify,
			
 
				+};
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
			
 
				+	.verify_read = xfs_sb_quiet_read_verify,
			
 
				+	.verify_write = xfs_sb_write_verify,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * xfs_mount_common
			
 
				+ *
			
 
				+ * Mount initialization code establishing various mount
			
 
				+ * fields from the superblock associated with the given
			
 
				+ * mount structure
			
 
				+ */
			
 
				+void
			
 
				+xfs_sb_mount_common(
			
 
				+	struct xfs_mount *mp,
			
 
				+	struct xfs_sb	*sbp)
			
 
				+{
			
 
				+	mp->m_agfrotor = mp->m_agirotor = 0;
			
 
				+	spin_lock_init(&mp->m_agirotor_lock);
			
 
				+	mp->m_maxagi = mp->m_sb.sb_agcount;
			
 
				+	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
			
 
				+	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
			
 
				+	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
			
 
				+	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
			
 
				+	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
			
 
				+	mp->m_blockmask = sbp->sb_blocksize - 1;
			
 
				+	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
			
 
				+	mp->m_blockwmask = mp->m_blockwsize - 1;
			
 
				+
			
 
				+	mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
			
 
				+	mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
			
 
				+	mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
			
 
				+	mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
			
 
				+
			
 
				+	mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
			
 
				+	mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
			
 
				+	mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
			
 
				+	mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
			
 
				+
			
 
				+	mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
			
 
				+	mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
			
 
				+	mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
			
 
				+	mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
			
 
				+
			
 
				+	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
			
 
				+	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
			
 
				+					sbp->sb_inopblock);
			
 
				+	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_initialize_perag_data
			
 
				+ *
			
 
				+ * Read in each per-ag structure so we can count up the number of
			
 
				+ * allocated inodes, free inodes and used filesystem blocks as this
			
 
				+ * information is no longer persistent in the superblock. Once we have
			
 
				+ * this information, write it into the in-core superblock structure.
			
 
				+ */
			
 
				+int
			
 
				+xfs_initialize_perag_data(
			
 
				+	struct xfs_mount *mp,
			
 
				+	xfs_agnumber_t	agcount)
			
 
				+{
			
 
				+	xfs_agnumber_t	index;
			
 
				+	xfs_perag_t	*pag;
			
 
				+	xfs_sb_t	*sbp = &mp->m_sb;
			
 
				+	uint64_t	ifree = 0;
			
 
				+	uint64_t	ialloc = 0;
			
 
				+	uint64_t	bfree = 0;
			
 
				+	uint64_t	bfreelst = 0;
			
 
				+	uint64_t	btree = 0;
			
 
				+	int		error;
			
 
				+
			
 
				+	for (index = 0; index < agcount; index++) {
			
 
				+		/*
			
 
				+		 * read the agf, then the agi. This gets us
			
 
				+		 * all the information we need and populates the
			
 
				+		 * per-ag structures for us.
			
 
				+		 */
			
 
				+		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				+		error = xfs_ialloc_pagi_init(mp, NULL, index);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		pag = xfs_perag_get(mp, index);
			
 
				+		ifree += pag->pagi_freecount;
			
 
				+		ialloc += pag->pagi_count;
			
 
				+		bfree += pag->pagf_freeblks;
			
 
				+		bfreelst += pag->pagf_flcount;
			
 
				+		btree += pag->pagf_btreeblks;
			
 
				+		xfs_perag_put(pag);
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Overwrite incore superblock counters with just-read data
			
 
				+	 */
			
 
				+	spin_lock(&mp->m_sb_lock);
			
 
				+	sbp->sb_ifree = ifree;
			
 
				+	sbp->sb_icount = ialloc;
			
 
				+	sbp->sb_fdblocks = bfree + bfreelst + btree;
			
 
				+	spin_unlock(&mp->m_sb_lock);
			
 
				+
			
 
				+	/* Fixup the per-cpu counters as well. */
			
 
				+	xfs_icsb_reinit_counters(mp);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xfs_mod_sb() can be used to copy arbitrary changes to the
			
 
				+ * in-core superblock into the superblock buffer to be logged.
			
 
				+ * It does not provide the higher level of locking that is
			
 
				+ * needed to protect the in-core superblock from concurrent
			
 
				+ * access.
			
 
				+ */
			
 
				+void
			
 
				+xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
			
 
				+{
			
 
				+	xfs_buf_t	*bp;
			
 
				+	int		first;
			
 
				+	int		last;
			
 
				+	xfs_mount_t	*mp;
			
 
				+	xfs_sb_field_t	f;
			
 
				+
			
 
				+	ASSERT(fields);
			
 
				+	if (!fields)
			
 
				+		return;
			
 
				+	mp = tp->t_mountp;
			
 
				+	bp = xfs_trans_getsb(tp, mp, 0);
			
 
				+	first = sizeof(xfs_sb_t);
			
 
				+	last = 0;
			
 
				+
			
 
				+	/* translate/copy */
			
 
				+
			
 
				+	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
			
 
				+
			
 
				+	/* find modified range */
			
 
				+	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
			
 
				+	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
			
 
				+	last = xfs_sb_info[f + 1].offset - 1;
			
 
				+
			
 
				+	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
			
 
				+	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
			
 
				+	first = xfs_sb_info[f].offset;
			
 
				+
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
			
 
				+	xfs_trans_log_buf(tp, bp, first, last);
			
 
				+}
			
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -26,6 +26,7 @@
 
				 
			
 
				 struct xfs_buf;
			
 
				 struct xfs_mount;
			
 
				+struct xfs_trans;
			
 
				 
			
 
				 #define	XFS_SB_MAGIC		0x58465342	/* 'XFSB' */
			
 
				 #define	XFS_SB_VERSION_1	1		/* 5.3, 6.0.1, 6.1 */
			
@@ -83,11 +84,13 @@ struct xfs_mount;
 
				 #define XFS_SB_VERSION2_PARENTBIT	0x00000010	/* parent pointers */
			
 
				 #define XFS_SB_VERSION2_PROJID32BIT	0x00000080	/* 32 bit project id */
			
 
				 #define XFS_SB_VERSION2_CRCBIT		0x00000100	/* metadata CRCs */
			
 
				+#define XFS_SB_VERSION2_FTYPE		0x00000200	/* inode type in dir */
			
 
				 
			
 
				 #define	XFS_SB_VERSION2_OKREALFBITS	\
			
 
				 	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
			
 
				 	 XFS_SB_VERSION2_ATTR2BIT	| \
			
 
				-	 XFS_SB_VERSION2_PROJID32BIT)
			
 
				+	 XFS_SB_VERSION2_PROJID32BIT	| \
			
 
				+	 XFS_SB_VERSION2_FTYPE)
			
 
				 #define	XFS_SB_VERSION2_OKSASHFBITS	\
			
 
				 	(0)
			
 
				 #define XFS_SB_VERSION2_OKREALBITS	\
			
@@ -354,15 +357,8 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
 
				 		     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS)))
			
 
				 			return 0;
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				 		if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
			
 
				 			return 0;
			
 
				-#else
			
 
				-		if ((sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) &&
			
 
				-		    sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
			
 
				-			return 0;
			
 
				-#endif
			
 
				-
			
 
				 		return 1;
			
 
				 	}
			
 
				 	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
			
@@ -554,12 +550,13 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
 
				 		(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
			
 
				 }
			
 
				 
			
 
				-static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
			
 
				+static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
			
 
				+	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
			
 
				+	sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT;
			
 
				+	sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /*
			
 
				  * Extended v5 superblock feature masks. These are to be used for new v5
			
 
				  * superblock features only.
			
@@ -598,7 +595,10 @@ xfs_sb_has_ro_compat_feature(
 
				 	return (sbp->sb_features_ro_compat & feature) != 0;
			
 
				 }
			
 
				 
			
 
				-#define XFS_SB_FEAT_INCOMPAT_ALL 0
			
 
				+#define XFS_SB_FEAT_INCOMPAT_FTYPE	(1 << 0)	/* filetype in dirent */
			
 
				+#define XFS_SB_FEAT_INCOMPAT_ALL \
			
 
				+		(XFS_SB_FEAT_INCOMPAT_FTYPE)
			
 
				+
			
 
				 #define XFS_SB_FEAT_INCOMPAT_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_ALL
			
 
				 static inline bool
			
 
				 xfs_sb_has_incompat_feature(
			
@@ -618,16 +618,39 @@ xfs_sb_has_incompat_log_feature(
 
				 	return (sbp->sb_features_log_incompat & feature) != 0;
			
 
				 }
			
 
				 
			
 
				-static inline bool
			
 
				-xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
			
 
				+/*
			
 
				+ * V5 superblock specific feature checks
			
 
				+ */
			
 
				+static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return (ino == sbp->sb_uquotino || ino == sbp->sb_gquotino);
			
 
				+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
			
 
				+}
			
 
				+
			
 
				+static inline int xfs_sb_version_has_pquotino(xfs_sb_t *sbp)
			
 
				+{
			
 
				+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
			
 
				+}
			
 
				+
			
 
				+static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
			
 
				+{
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
			
 
				+		xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) ||
			
 
				+	       (xfs_sb_version_hasmorebits(sbp) &&
			
 
				+		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * end of superblock version macros
			
 
				  */
			
 
				 
			
 
				+static inline bool
			
 
				+xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
			
 
				+{
			
 
				+	return (ino == sbp->sb_uquotino ||
			
 
				+		ino == sbp->sb_gquotino ||
			
 
				+		ino == sbp->sb_pquotino);
			
 
				+}
			
 
				+
			
 
				 #define XFS_SB_DADDR		((xfs_daddr_t)0) /* daddr in filesystem/ag */
			
 
				 #define	XFS_SB_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
			
 
				 #define XFS_BUF_TO_SBP(bp)	((xfs_dsb_t *)((bp)->b_addr))
			
@@ -660,4 +683,23 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
 
				 #define XFS_B_TO_FSBT(mp,b)	(((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
			
 
				 #define XFS_B_FSB_OFFSET(mp,b)	((b) & (mp)->m_blockmask)
			
 
				 
			
 
				+/*
			
 
				+ * perag get/put wrappers for ref counting
			
 
				+ */
			
 
				+extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t);
			
 
				+extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
			
 
				+					   int tag);
			
 
				+extern void	xfs_perag_put(struct xfs_perag *pag);
			
 
				+extern int	xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
			
 
				+
			
 
				+extern void	xfs_sb_calc_crc(struct xfs_buf	*);
			
 
				+extern void	xfs_mod_sb(struct xfs_trans *, __int64_t);
			
 
				+extern void	xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *);
			
 
				+extern void	xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
			
 
				+extern void	xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
			
 
				+extern void	xfs_sb_quota_from_disk(struct xfs_sb *sbp);
			
 
				+
			
 
				+extern const struct xfs_buf_ops xfs_sb_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
			
 
				+
			
 
				 #endif	/* __XFS_SB_H__ */
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -17,12 +17,12 @@
 
				  */
			
 
				 
			
 
				 #include "xfs.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_mount.h"
			
@@ -40,12 +40,12 @@
 
				 #include "xfs_fsops.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				-#include "xfs_utils.h"
			
 
				-#include "xfs_vnodeops.h"
			
 
				 #include "xfs_log_priv.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_filestream.h"
			
 
				 #include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_extfree_item.h"
			
 
				 #include "xfs_mru_cache.h"
			
 
				 #include "xfs_inode_item.h"
			
@@ -421,12 +421,6 @@ xfs_parseargs(
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				-	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
			
 
				-	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
			
 
				-		xfs_warn(mp, "cannot mount with both project and group quota");
			
 
				-		return EINVAL;
			
 
				-	}
			
 
				-
			
 
				 	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
			
 
				 		xfs_warn(mp, "sunit and swidth must be specified together");
			
 
				 		return EINVAL;
			
@@ -556,14 +550,13 @@ xfs_showargs(
 
				 	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
			
 
				 		seq_puts(m, "," MNTOPT_UQUOTANOENF);
			
 
				 
			
 
				-	/* Either project or group quotas can be active, not both */
			
 
				-
			
 
				 	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
			
 
				 		if (mp->m_qflags & XFS_PQUOTA_ENFD)
			
 
				 			seq_puts(m, "," MNTOPT_PRJQUOTA);
			
 
				 		else
			
 
				 			seq_puts(m, "," MNTOPT_PQUOTANOENF);
			
 
				-	} else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
			
 
				+	}
			
 
				+	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
			
 
				 		if (mp->m_qflags & XFS_GQUOTA_ENFD)
			
 
				 			seq_puts(m, "," MNTOPT_GRPQUOTA);
			
 
				 		else
			
@@ -870,17 +863,17 @@ xfs_init_mount_workqueues(
 
				 		goto out_destroy_unwritten;
			
 
				 
			
 
				 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
			
 
				-			WQ_NON_REENTRANT, 0, mp->m_fsname);
			
 
				+			0, 0, mp->m_fsname);
			
 
				 	if (!mp->m_reclaim_workqueue)
			
 
				 		goto out_destroy_cil;
			
 
				 
			
 
				 	mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
			
 
				-			WQ_NON_REENTRANT, 0, mp->m_fsname);
			
 
				+			0, 0, mp->m_fsname);
			
 
				 	if (!mp->m_log_workqueue)
			
 
				 		goto out_destroy_reclaim;
			
 
				 
			
 
				 	mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
			
 
				-			WQ_NON_REENTRANT, 0, mp->m_fsname);
			
 
				+			0, 0, mp->m_fsname);
			
 
				 	if (!mp->m_eofblocks_workqueue)
			
 
				 		goto out_destroy_log;
			
 
				 
			
@@ -1396,6 +1389,14 @@ xfs_finish_flags(
 
				 		return XFS_ERROR(EROFS);
			
 
				 	}
			
 
				 
			
 
				+	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
			
 
				+	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
			
 
				+	    !xfs_sb_version_has_pquotino(&mp->m_sb)) {
			
 
				+		xfs_warn(mp,
			
 
				+		  "Super block does not support project and group quota together");
			
 
				+		return XFS_ERROR(EINVAL);
			
 
				+	}
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -18,200 +18,29 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_dir2.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_ialloc_btree.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				-#include "xfs_inode_item.h"
			
 
				-#include "xfs_itable.h"
			
 
				 #include "xfs_ialloc.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_bmap_util.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				-#include "xfs_utils.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				-#include "xfs_log_priv.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_symlink.h"
			
 
				-#include "xfs_cksum.h"
			
 
				-#include "xfs_buf_item.h"
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * Each contiguous block has a header, so it is not just a simple pathlen
			
 
				- * to FSB conversion.
			
 
				- */
			
 
				-int
			
 
				-xfs_symlink_blocks(
			
 
				-	struct xfs_mount *mp,
			
 
				-	int		pathlen)
			
 
				-{
			
 
				-	int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
			
 
				-
			
 
				-	return (pathlen + buflen - 1) / buflen;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-xfs_symlink_hdr_set(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	xfs_ino_t		ino,
			
 
				-	uint32_t		offset,
			
 
				-	uint32_t		size,
			
 
				-	struct xfs_buf		*bp)
			
 
				-{
			
 
				-	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
			
 
				-
			
 
				-	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return 0;
			
 
				-
			
 
				-	dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
			
 
				-	dsl->sl_offset = cpu_to_be32(offset);
			
 
				-	dsl->sl_bytes = cpu_to_be32(size);
			
 
				-	uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
			
 
				-	dsl->sl_owner = cpu_to_be64(ino);
			
 
				-	dsl->sl_blkno = cpu_to_be64(bp->b_bn);
			
 
				-	bp->b_ops = &xfs_symlink_buf_ops;
			
 
				-
			
 
				-	return sizeof(struct xfs_dsymlink_hdr);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Checking of the symlink header is split into two parts. the verifier does
			
 
				- * CRC, location and bounds checking, the unpacking function checks the path
			
 
				- * parameters and owner.
			
 
				- */
			
 
				-bool
			
 
				-xfs_symlink_hdr_ok(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	xfs_ino_t		ino,
			
 
				-	uint32_t		offset,
			
 
				-	uint32_t		size,
			
 
				-	struct xfs_buf		*bp)
			
 
				-{
			
 
				-	struct xfs_dsymlink_hdr *dsl = bp->b_addr;
			
 
				-
			
 
				-	if (offset != be32_to_cpu(dsl->sl_offset))
			
 
				-		return false;
			
 
				-	if (size != be32_to_cpu(dsl->sl_bytes))
			
 
				-		return false;
			
 
				-	if (ino != be64_to_cpu(dsl->sl_owner))
			
 
				-		return false;
			
 
				-
			
 
				-	/* ok */
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static bool
			
 
				-xfs_symlink_verify(
			
 
				-	struct xfs_buf		*bp)
			
 
				-{
			
 
				-	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
			
 
				-
			
 
				-	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return false;
			
 
				-	if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
			
 
				-		return false;
			
 
				-	if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
			
 
				-		return false;
			
 
				-	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
			
 
				-		return false;
			
 
				-	if (be32_to_cpu(dsl->sl_offset) +
			
 
				-				be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
			
 
				-		return false;
			
 
				-	if (dsl->sl_owner == 0)
			
 
				-		return false;
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-xfs_symlink_read_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				-
			
 
				-	/* no verification of non-crc buffers */
			
 
				-	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return;
			
 
				-
			
 
				-	if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				-				  offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
			
 
				-	    !xfs_symlink_verify(bp)) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-xfs_symlink_write_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				-
			
 
				-	/* no verification of non-crc buffers */
			
 
				-	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return;
			
 
				-
			
 
				-	if (!xfs_symlink_verify(bp)) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	if (bip) {
			
 
				-		struct xfs_dsymlink_hdr *dsl = bp->b_addr;
			
 
				-		dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				-	}
			
 
				-	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				-			 offsetof(struct xfs_dsymlink_hdr, sl_crc));
			
 
				-}
			
 
				-
			
 
				-const struct xfs_buf_ops xfs_symlink_buf_ops = {
			
 
				-	.verify_read = xfs_symlink_read_verify,
			
 
				-	.verify_write = xfs_symlink_write_verify,
			
 
				-};
			
 
				-
			
 
				-void
			
 
				-xfs_symlink_local_to_remote(
			
 
				-	struct xfs_trans	*tp,
			
 
				-	struct xfs_buf		*bp,
			
 
				-	struct xfs_inode	*ip,
			
 
				-	struct xfs_ifork	*ifp)
			
 
				-{
			
 
				-	struct xfs_mount	*mp = ip->i_mount;
			
 
				-	char			*buf;
			
 
				-
			
 
				-	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				-		bp->b_ops = NULL;
			
 
				-		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * As this symlink fits in an inode literal area, it must also fit in
			
 
				-	 * the smallest buffer the filesystem supports.
			
 
				-	 */
			
 
				-	ASSERT(BBTOB(bp->b_length) >=
			
 
				-			ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
			
 
				-
			
 
				-	bp->b_ops = &xfs_symlink_buf_ops;
			
 
				-
			
 
				-	buf = bp->b_addr;
			
 
				-	buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
			
 
				-	memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				-}
			
 
				 
			
 
				 /* ----- Kernel only functions below ----- */
			
 
				 STATIC int
			
@@ -386,8 +215,11 @@ xfs_symlink(
 
				 	/*
			
 
				 	 * Make sure that we have allocated dquot(s) on disk.
			
 
				 	 */
			
 
				-	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
			
 
				-		XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp);
			
 
				+	error = xfs_qm_vop_dqalloc(dp,
			
 
				+			xfs_kuid_to_uid(current_fsuid()),
			
 
				+			xfs_kgid_to_gid(current_fsgid()), prid,
			
 
				+			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
			
 
				+			&udqp, &gdqp, &pdqp);
			
 
				 	if (error)
			
 
				 		goto std_return;
			
 
				 
			
@@ -402,12 +234,10 @@ xfs_symlink(
 
				 	else
			
 
				 		fs_blocks = xfs_symlink_blocks(mp, pathlen);
			
 
				 	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
			
 
				-	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
			
 
				-			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0);
			
 
				 	if (error == ENOSPC && fs_blocks == 0) {
			
 
				 		resblks = 0;
			
 
				-		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
			
 
				-				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
			
 
				+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
			
 
				 	}
			
 
				 	if (error) {
			
 
				 		cancel_flags = 0;
			
@@ -710,8 +540,8 @@ xfs_inactive_symlink_rmt(
 
				 	 * Put an itruncate log reservation in the new transaction
			
 
				 	 * for our caller.
			
 
				 	 */
			
 
				-	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
			
 
				-			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
			
 
				+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
			
 
				+	if (error) {
			
 
				 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				 		goto error0;
			
 
				 	}
			
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -17,50 +17,11 @@
 
				 #ifndef __XFS_SYMLINK_H
			
 
				 #define __XFS_SYMLINK_H 1
			
 
				 
			
 
				-struct xfs_mount;
			
 
				-struct xfs_trans;
			
 
				-struct xfs_inode;
			
 
				-struct xfs_buf;
			
 
				-struct xfs_ifork;
			
 
				-struct xfs_name;
			
 
				-
			
 
				-#define XFS_SYMLINK_MAGIC	0x58534c4d	/* XSLM */
			
 
				-
			
 
				-struct xfs_dsymlink_hdr {
			
 
				-	__be32	sl_magic;
			
 
				-	__be32	sl_offset;
			
 
				-	__be32	sl_bytes;
			
 
				-	__be32	sl_crc;
			
 
				-	uuid_t	sl_uuid;
			
 
				-	__be64	sl_owner;
			
 
				-	__be64	sl_blkno;
			
 
				-	__be64	sl_lsn;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * The maximum pathlen is 1024 bytes. Since the minimum file system
			
 
				- * blocksize is 512 bytes, we can get a max of 3 extents back from
			
 
				- * bmapi when crc headers are taken into account.
			
 
				- */
			
 
				-#define XFS_SYMLINK_MAPS 3
			
 
				-
			
 
				-#define XFS_SYMLINK_BUF_SPACE(mp, bufsize)	\
			
 
				-	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
			
 
				-			sizeof(struct xfs_dsymlink_hdr) : 0))
			
 
				-
			
 
				-int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
			
 
				-
			
 
				-void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				-				 struct xfs_inode *ip, struct xfs_ifork *ifp);
			
 
				-
			
 
				-extern const struct xfs_buf_ops xfs_symlink_buf_ops;
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				+/* Kernel only symlink defintions */
			
 
				 
			
 
				 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
			
 
				 		const char *target_path, umode_t mode, struct xfs_inode **ipp);
			
 
				 int xfs_readlink(struct xfs_inode *ip, char *link);
			
 
				 int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp);
			
 
				 
			
 
				-#endif /* __KERNEL__ */
			
 
				 #endif /* __XFS_SYMLINK_H */
			
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -0,0 +1,200 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2012-2013 Red Hat, Inc.
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_format.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_symlink.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Each contiguous block has a header, so it is not just a simple pathlen
			
 
				+ * to FSB conversion.
			
 
				+ */
			
 
				+int
			
 
				+xfs_symlink_blocks(
			
 
				+	struct xfs_mount *mp,
			
 
				+	int		pathlen)
			
 
				+{
			
 
				+	int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
			
 
				+
			
 
				+	return (pathlen + buflen - 1) / buflen;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_symlink_hdr_set(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino,
			
 
				+	uint32_t		offset,
			
 
				+	uint32_t		size,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return 0;
			
 
				+
			
 
				+	dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
			
 
				+	dsl->sl_offset = cpu_to_be32(offset);
			
 
				+	dsl->sl_bytes = cpu_to_be32(size);
			
 
				+	uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
			
 
				+	dsl->sl_owner = cpu_to_be64(ino);
			
 
				+	dsl->sl_blkno = cpu_to_be64(bp->b_bn);
			
 
				+	bp->b_ops = &xfs_symlink_buf_ops;
			
 
				+
			
 
				+	return sizeof(struct xfs_dsymlink_hdr);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Checking of the symlink header is split into two parts. the verifier does
			
 
				+ * CRC, location and bounds checking, the unpacking function checks the path
			
 
				+ * parameters and owner.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_symlink_hdr_ok(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino,
			
 
				+	uint32_t		offset,
			
 
				+	uint32_t		size,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_dsymlink_hdr *dsl = bp->b_addr;
			
 
				+
			
 
				+	if (offset != be32_to_cpu(dsl->sl_offset))
			
 
				+		return false;
			
 
				+	if (size != be32_to_cpu(dsl->sl_bytes))
			
 
				+		return false;
			
 
				+	if (ino != be64_to_cpu(dsl->sl_owner))
			
 
				+		return false;
			
 
				+
			
 
				+	/* ok */
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+xfs_symlink_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return false;
			
 
				+	if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
			
 
				+		return false;
			
 
				+	if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
			
 
				+		return false;
			
 
				+	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
			
 
				+		return false;
			
 
				+	if (be32_to_cpu(dsl->sl_offset) +
			
 
				+				be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
			
 
				+		return false;
			
 
				+	if (dsl->sl_owner == 0)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_symlink_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+	/* no verification of non-crc buffers */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+				  offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
			
 
				+	    !xfs_symlink_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_symlink_write_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	/* no verification of non-crc buffers */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (!xfs_symlink_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (bip) {
			
 
				+		struct xfs_dsymlink_hdr *dsl = bp->b_addr;
			
 
				+		dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+	}
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 offsetof(struct xfs_dsymlink_hdr, sl_crc));
			
 
				+}
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_symlink_buf_ops = {
			
 
				+	.verify_read = xfs_symlink_read_verify,
			
 
				+	.verify_write = xfs_symlink_write_verify,
			
 
				+};
			
 
				+
			
 
				+void
			
 
				+xfs_symlink_local_to_remote(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_buf		*bp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_ifork	*ifp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	char			*buf;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		bp->b_ops = NULL;
			
 
				+		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * As this symlink fits in an inode literal area, it must also fit in
			
 
				+	 * the smallest buffer the filesystem supports.
			
 
				+	 */
			
 
				+	ASSERT(BBTOB(bp->b_length) >=
			
 
				+			ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
			
 
				+
			
 
				+	bp->b_ops = &xfs_symlink_buf_ops;
			
 
				+
			
 
				+	buf = bp->b_addr;
			
 
				+	buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
			
 
				+	memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				+}
			
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -18,6 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -18,7 +18,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				+#include "xfs_format.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
@@ -48,629 +48,6 @@
 
				 kmem_zone_t	*xfs_trans_zone;
			
 
				 kmem_zone_t	*xfs_log_item_desc_zone;
			
 
				 
			
 
				-/*
			
 
				- * A buffer has a format structure overhead in the log in addition
			
 
				- * to the data, so we need to take this into account when reserving
			
 
				- * space in a transaction for a buffer.  Round the space required up
			
 
				- * to a multiple of 128 bytes so that we don't change the historical
			
 
				- * reservation that has been used for this overhead.
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_buf_log_overhead(void)
			
 
				-{
			
 
				-	return round_up(sizeof(struct xlog_op_header) +
			
 
				-			sizeof(struct xfs_buf_log_format), 128);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Calculate out transaction log reservation per item in bytes.
			
 
				- *
			
 
				- * The nbufs argument is used to indicate the number of items that
			
 
				- * will be changed in a transaction.  size is used to tell how many
			
 
				- * bytes should be reserved per item.
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_buf_res(
			
 
				-	uint		nbufs,
			
 
				-	uint		size)
			
 
				-{
			
 
				-	return nbufs * (size + xfs_buf_log_overhead());
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Various log reservation values.
			
 
				- *
			
 
				- * These are based on the size of the file system block because that is what
			
 
				- * most transactions manipulate.  Each adds in an additional 128 bytes per
			
 
				- * item logged to try to account for the overhead of the transaction mechanism.
			
 
				- *
			
 
				- * Note:  Most of the reservations underestimate the number of allocation
			
 
				- * groups into which they could free extents in the xfs_bmap_finish() call.
			
 
				- * This is because the number in the worst case is quite high and quite
			
 
				- * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
			
 
				- * extents in only a single AG at a time.  This will require changes to the
			
 
				- * EFI code as well, however, so that the EFI for the extents not freed is
			
 
				- * logged again in each transaction.  See SGI PV #261917.
			
 
				- *
			
 
				- * Reservation functions here avoid a huge stack in xfs_trans_init due to
			
 
				- * register overflow from temporaries in the calculations.
			
 
				- */
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * In a write transaction we can allocate a maximum of 2
			
 
				- * extents.  This gives:
			
 
				- *    the inode getting the new extents: inode size
			
 
				- *    the inode's bmap btree: max depth * block size
			
 
				- *    the agfs of the ags from which the extents are allocated: 2 * sector
			
 
				- *    the superblock free block counter: sector size
			
 
				- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
			
 
				- * And the bmap_finish transaction can free bmap blocks in a join:
			
 
				- *    the agfs of the ags containing the blocks: 2 * sector size
			
 
				- *    the agfls of the ags containing the blocks: 2 * sector size
			
 
				- *    the super block free block counter: sector size
			
 
				- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_write_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
			
 
				-				      XFS_FSB_TO_B(mp, 1)) +
			
 
				-		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
			
 
				-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
			
 
				-				      XFS_FSB_TO_B(mp, 1))),
			
 
				-		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
			
 
				-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
			
 
				-				      XFS_FSB_TO_B(mp, 1))));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * In truncating a file we free up to two extents at once.  We can modify:
			
 
				- *    the inode being truncated: inode size
			
 
				- *    the inode's bmap btree: (max depth + 1) * block size
			
 
				- * And the bmap_finish transaction can free the blocks and bmap blocks:
			
 
				- *    the agf for each of the ags: 4 * sector size
			
 
				- *    the agfl for each of the ags: 4 * sector size
			
 
				- *    the super block to reflect the freed blocks: sector size
			
 
				- *    worst case split in allocation btrees per extent assuming 4 extents:
			
 
				- *		4 exts * 2 trees * (2 * max depth - 1) * block size
			
 
				- *    the inode btree: max depth * blocksize
			
 
				- *    the allocation btrees: 2 trees * (max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_itruncate_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
			
 
				-				      XFS_FSB_TO_B(mp, 1))),
			
 
				-		    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
			
 
				-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
			
 
				-				      XFS_FSB_TO_B(mp, 1)) +
			
 
				-		    xfs_calc_buf_res(5, 0) +
			
 
				-		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				     XFS_FSB_TO_B(mp, 1)) +
			
 
				-		    xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
			
 
				-				     mp->m_in_maxlevels, 0)));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * In renaming a files we can modify:
			
 
				- *    the four inodes involved: 4 * inode size
			
 
				- *    the two directory btrees: 2 * (max depth + v2) * dir block size
			
 
				- *    the two directory bmap btrees: 2 * max depth * block size
			
 
				- * And the bmap_finish transaction can free dir and bmap blocks (two sets
			
 
				- *	of bmap blocks) giving:
			
 
				- *    the agf for the ags in which the blocks live: 3 * sector size
			
 
				- *    the agfl for the ags in which the blocks live: 3 * sector size
			
 
				- *    the superblock for the free block count: sector size
			
 
				- *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_rename_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
			
 
				-		     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
			
 
				-				      XFS_FSB_TO_B(mp, 1))),
			
 
				-		    (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
			
 
				-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
			
 
				-				      XFS_FSB_TO_B(mp, 1))));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * For creating a link to an inode:
			
 
				- *    the parent directory inode: inode size
			
 
				- *    the linked inode: inode size
			
 
				- *    the directory btree could split: (max depth + v2) * dir block size
			
 
				- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
			
 
				- * And the bmap_finish transaction can free some bmap blocks giving:
			
 
				- *    the agf for the ag in which the blocks live: sector size
			
 
				- *    the agfl for the ag in which the blocks live: sector size
			
 
				- *    the superblock for the free block count: sector size
			
 
				- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_link_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
			
 
				-		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
			
 
				-				      XFS_FSB_TO_B(mp, 1))),
			
 
				-		    (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
			
 
				-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				      XFS_FSB_TO_B(mp, 1))));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * For removing a directory entry we can modify:
			
 
				- *    the parent directory inode: inode size
			
 
				- *    the removed inode: inode size
			
 
				- *    the directory btree could join: (max depth + v2) * dir block size
			
 
				- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
			
 
				- * And the bmap_finish transaction can free the dir and bmap blocks giving:
			
 
				- *    the agf for the ag in which the blocks live: 2 * sector size
			
 
				- *    the agfl for the ag in which the blocks live: 2 * sector size
			
 
				- *    the superblock for the free block count: sector size
			
 
				- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_remove_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
			
 
				-		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
			
 
				-				      XFS_FSB_TO_B(mp, 1))),
			
 
				-		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
			
 
				-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
			
 
				-				      XFS_FSB_TO_B(mp, 1))));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * For create, break it in to the two cases that the transaction
			
 
				- * covers. We start with the modify case - allocation done by modification
			
 
				- * of the state of existing inodes - and the allocation case.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * For create we can modify:
			
 
				- *    the parent directory inode: inode size
			
 
				- *    the new inode: inode size
			
 
				- *    the inode btree entry: block size
			
 
				- *    the superblock for the nlink flag: sector size
			
 
				- *    the directory btree: (max depth + v2) * dir block size
			
 
				- *    the directory inode's bmap btree: (max depth + v2) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_create_resv_modify(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
			
 
				-		(uint)XFS_FSB_TO_B(mp, 1) +
			
 
				-		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * For create we can allocate some inodes giving:
			
 
				- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
			
 
				- *    the superblock for the nlink flag: sector size
			
 
				- *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
			
 
				- *    the inode btree: max depth * blocksize
			
 
				- *    the allocation btrees: 2 trees * (max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_create_resv_alloc(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
			
 
				-		mp->m_sb.sb_sectsize +
			
 
				-		xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) +
			
 
				-		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
			
 
				-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				 XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-STATIC uint
			
 
				-__xfs_calc_create_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX(xfs_calc_create_resv_alloc(mp),
			
 
				-		    xfs_calc_create_resv_modify(mp));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * For icreate we can allocate some inodes giving:
			
 
				- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
			
 
				- *    the superblock for the nlink flag: sector size
			
 
				- *    the inode btree: max depth * blocksize
			
 
				- *    the allocation btrees: 2 trees * (max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_icreate_resv_alloc(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
			
 
				-		mp->m_sb.sb_sectsize +
			
 
				-		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
			
 
				-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				 XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-STATIC uint
			
 
				-xfs_calc_icreate_reservation(xfs_mount_t *mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX(xfs_calc_icreate_resv_alloc(mp),
			
 
				-		    xfs_calc_create_resv_modify(mp));
			
 
				-}
			
 
				-
			
 
				-STATIC uint
			
 
				-xfs_calc_create_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				-		return xfs_calc_icreate_reservation(mp);
			
 
				-	return __xfs_calc_create_reservation(mp);
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Making a new directory is the same as creating a new file.
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_mkdir_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_create_reservation(mp);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * Making a new symplink is the same as creating a new file, but
			
 
				- * with the added blocks for remote symlink data which can be up to 1kB in
			
 
				- * length (MAXPATHLEN).
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_symlink_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_create_reservation(mp) +
			
 
				-	       xfs_calc_buf_res(1, MAXPATHLEN);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * In freeing an inode we can modify:
			
 
				- *    the inode being freed: inode size
			
 
				- *    the super block free inode counter: sector size
			
 
				- *    the agi hash list and counters: sector size
			
 
				- *    the inode btree entry: block size
			
 
				- *    the on disk inode before ours in the agi hash list: inode cluster size
			
 
				- *    the inode btree: max depth * blocksize
			
 
				- *    the allocation btrees: 2 trees * (max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_ifree_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
			
 
				-		xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
			
 
				-		MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
			
 
				-		    XFS_INODE_CLUSTER_SIZE(mp)) +
			
 
				-		xfs_calc_buf_res(1, 0) +
			
 
				-		xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
			
 
				-				 mp->m_in_maxlevels, 0) +
			
 
				-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				 XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * When only changing the inode we log the inode and possibly the superblock
			
 
				- * We also add a bit of slop for the transaction stuff.
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_ichange_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		mp->m_sb.sb_inodesize +
			
 
				-		mp->m_sb.sb_sectsize +
			
 
				-		512;
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Growing the data section of the filesystem.
			
 
				- *	superblock
			
 
				- *	agi and agf
			
 
				- *	allocation btrees
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_growdata_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
			
 
				-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				 XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Growing the rt section of the filesystem.
			
 
				- * In the first set of transactions (ALLOC) we allocate space to the
			
 
				- * bitmap or summary files.
			
 
				- *	superblock: sector size
			
 
				- *	agf of the ag from which the extent is allocated: sector size
			
 
				- *	bmap btree for bitmap/summary inode: max depth * blocksize
			
 
				- *	bitmap/summary inode: inode size
			
 
				- *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_growrtalloc_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
			
 
				-		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
			
 
				-				 XFS_FSB_TO_B(mp, 1)) +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				 XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Growing the rt section of the filesystem.
			
 
				- * In the second set of transactions (ZERO) we zero the new metadata blocks.
			
 
				- *	one bitmap/summary block: blocksize
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_growrtzero_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Growing the rt section of the filesystem.
			
 
				- * In the third set of transactions (FREE) we update metadata without
			
 
				- * allocating any new blocks.
			
 
				- *	superblock: sector size
			
 
				- *	bitmap inode: inode size
			
 
				- *	summary inode: inode size
			
 
				- *	one bitmap block: blocksize
			
 
				- *	summary blocks: new summary size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_growrtfree_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
			
 
				-		xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
			
 
				-		xfs_calc_buf_res(1, mp->m_rsumsize);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Logging the inode modification timestamp on a synchronous write.
			
 
				- *	inode
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_swrite_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Logging the inode mode bits when writing a setuid/setgid file
			
 
				- *	inode
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_writeid_reservation(xfs_mount_t *mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Converting the inode from non-attributed to attributed.
			
 
				- *	the inode being converted: inode size
			
 
				- *	agf block and superblock (for block allocation)
			
 
				- *	the new block (directory sized)
			
 
				- *	bmap blocks for the new directory block
			
 
				- *	allocation btrees
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_addafork_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
			
 
				-		xfs_calc_buf_res(1, mp->m_dirblksize) +
			
 
				-		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
			
 
				-				 XFS_FSB_TO_B(mp, 1)) +
			
 
				-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
			
 
				-				 XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Removing the attribute fork of a file
			
 
				- *    the inode being truncated: inode size
			
 
				- *    the inode's bmap btree: max depth * block size
			
 
				- * And the bmap_finish transaction can free the blocks and bmap blocks:
			
 
				- *    the agf for each of the ags: 4 * sector size
			
 
				- *    the agfl for each of the ags: 4 * sector size
			
 
				- *    the super block to reflect the freed blocks: sector size
			
 
				- *    worst case split in allocation btrees per extent assuming 4 extents:
			
 
				- *		4 exts * 2 trees * (2 * max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_attrinval_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
			
 
				-				     XFS_FSB_TO_B(mp, 1))),
			
 
				-		   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
			
 
				-		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
			
 
				-				     XFS_FSB_TO_B(mp, 1))));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Setting an attribute at mount time.
			
 
				- *	the inode getting the attribute
			
 
				- *	the superblock for allocations
			
 
				- *	the agfs extents are allocated from
			
 
				- *	the attribute btree * max depth
			
 
				- *	the inode allocation btree
			
 
				- * Since attribute transaction space is dependent on the size of the attribute,
			
 
				- * the calculation is done partially at mount time and partially at runtime(see
			
 
				- * below).
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_attrsetm_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
			
 
				-		xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Setting an attribute at runtime, transaction space unit per block.
			
 
				- * 	the superblock for allocations: sector size
			
 
				- *	the inode bmap btree could join or split: max depth * block size
			
 
				- * Since the runtime attribute transaction space is dependent on the total
			
 
				- * blocks needed for the 1st bmap, here we calculate out the space unit for
			
 
				- * one block so that the caller could figure out the total space according
			
 
				- * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_attrsetrt_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
			
 
				-		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
			
 
				-				 XFS_FSB_TO_B(mp, 1));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Removing an attribute.
			
 
				- *    the inode: inode size
			
 
				- *    the attribute btree could join: max depth * block size
			
 
				- *    the inode bmap btree could join or split: max depth * block size
			
 
				- * And the bmap_finish transaction can free the attr blocks freed giving:
			
 
				- *    the agf for the ag in which the blocks live: 2 * sector size
			
 
				- *    the agfl for the ag in which the blocks live: 2 * sector size
			
 
				- *    the superblock for the free block count: sector size
			
 
				- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_attrrm_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_DQUOT_LOGRES(mp) +
			
 
				-		MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
			
 
				-		     xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
			
 
				-				      XFS_FSB_TO_B(mp, 1)) +
			
 
				-		     (uint)XFS_FSB_TO_B(mp,
			
 
				-					XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
			
 
				-		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
			
 
				-		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
			
 
				-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
			
 
				-				      XFS_FSB_TO_B(mp, 1))));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Clearing a bad agino number in an agi hash bucket.
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_clear_agi_bucket_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Clearing the quotaflags in the superblock.
			
 
				- *	the super block for changing quota flags: sector size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_qm_sbchange_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Adjusting quota limits.
			
 
				- *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_qm_setqlim_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Allocating quota on disk if needed.
			
 
				- *	the write transaction log space: XFS_WRITE_LOG_RES(mp)
			
 
				- *	the unit of quota allocation: one system block size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_qm_dqalloc_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return XFS_WRITE_LOG_RES(mp) +
			
 
				-		xfs_calc_buf_res(1,
			
 
				-			XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Turning off quotas.
			
 
				- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
			
 
				- *    the superblock for the quota flags: sector size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_qm_quotaoff_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return sizeof(struct xfs_qoff_logitem) * 2 +
			
 
				-		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * End of turning off quotas.
			
 
				- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_qm_quotaoff_end_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return sizeof(struct xfs_qoff_logitem) * 2;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Syncing the incore super block changes to disk.
			
 
				- *     the super block to reflect the changes: sector size
			
 
				- */
			
 
				-STATIC uint
			
 
				-xfs_calc_sb_reservation(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Initialize the precomputed transaction reservation values
			
 
				  * in the mount structure.
			
@@ -679,36 +56,7 @@ void
 
				 xfs_trans_init(
			
 
				 	struct xfs_mount	*mp)
			
 
				 {
			
 
				-	struct xfs_trans_reservations *resp = &mp->m_reservations;
			
 
				-
			
 
				-	resp->tr_write = xfs_calc_write_reservation(mp);
			
 
				-	resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
			
 
				-	resp->tr_rename = xfs_calc_rename_reservation(mp);
			
 
				-	resp->tr_link = xfs_calc_link_reservation(mp);
			
 
				-	resp->tr_remove = xfs_calc_remove_reservation(mp);
			
 
				-	resp->tr_symlink = xfs_calc_symlink_reservation(mp);
			
 
				-	resp->tr_create = xfs_calc_create_reservation(mp);
			
 
				-	resp->tr_mkdir = xfs_calc_mkdir_reservation(mp);
			
 
				-	resp->tr_ifree = xfs_calc_ifree_reservation(mp);
			
 
				-	resp->tr_ichange = xfs_calc_ichange_reservation(mp);
			
 
				-	resp->tr_growdata = xfs_calc_growdata_reservation(mp);
			
 
				-	resp->tr_swrite = xfs_calc_swrite_reservation(mp);
			
 
				-	resp->tr_writeid = xfs_calc_writeid_reservation(mp);
			
 
				-	resp->tr_addafork = xfs_calc_addafork_reservation(mp);
			
 
				-	resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
			
 
				-	resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
			
 
				-	resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
			
 
				-	resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
			
 
				-	resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
			
 
				-	resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
			
 
				-	resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
			
 
				-	resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
			
 
				-	resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
			
 
				-	resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
			
 
				-	resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
			
 
				-	resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
			
 
				-	resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
			
 
				-	resp->tr_sb = xfs_calc_sb_reservation(mp);
			
 
				+	xfs_trans_resv_calc(mp, M_RES(mp));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -744,7 +92,7 @@ _xfs_trans_alloc(
 
				 	atomic_inc(&mp->m_active_trans);
			
 
				 
			
 
				 	tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
			
 
				-	tp->t_magic = XFS_TRANS_MAGIC;
			
 
				+	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
			
 
				 	tp->t_type = type;
			
 
				 	tp->t_mountp = mp;
			
 
				 	INIT_LIST_HEAD(&tp->t_items);
			
@@ -789,7 +137,7 @@ xfs_trans_dup(
 
				 	/*
			
 
				 	 * Initialize the new transaction structure.
			
 
				 	 */
			
 
				-	ntp->t_magic = XFS_TRANS_MAGIC;
			
 
				+	ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
			
 
				 	ntp->t_type = tp->t_type;
			
 
				 	ntp->t_mountp = tp->t_mountp;
			
 
				 	INIT_LIST_HEAD(&ntp->t_items);
			
@@ -832,12 +180,10 @@ xfs_trans_dup(
 
				  */
			
 
				 int
			
 
				 xfs_trans_reserve(
			
 
				-	xfs_trans_t	*tp,
			
 
				-	uint		blocks,
			
 
				-	uint		logspace,
			
 
				-	uint		rtextents,
			
 
				-	uint		flags,
			
 
				-	uint		logcount)
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_trans_res	*resp,
			
 
				+	uint			blocks,
			
 
				+	uint			rtextents)
			
 
				 {
			
 
				 	int		error = 0;
			
 
				 	int		rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
			
@@ -863,13 +209,15 @@ xfs_trans_reserve(
 
				 	/*
			
 
				 	 * Reserve the log space needed for this transaction.
			
 
				 	 */
			
 
				-	if (logspace > 0) {
			
 
				+	if (resp->tr_logres > 0) {
			
 
				 		bool	permanent = false;
			
 
				 
			
 
				-		ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace);
			
 
				-		ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount);
			
 
				+		ASSERT(tp->t_log_res == 0 ||
			
 
				+		       tp->t_log_res == resp->tr_logres);
			
 
				+		ASSERT(tp->t_log_count == 0 ||
			
 
				+		       tp->t_log_count == resp->tr_logcount);
			
 
				 
			
 
				-		if (flags & XFS_TRANS_PERM_LOG_RES) {
			
 
				+		if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
			
 
				 			tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
			
 
				 			permanent = true;
			
 
				 		} else {
			
@@ -878,20 +226,21 @@ xfs_trans_reserve(
 
				 		}
			
 
				 
			
 
				 		if (tp->t_ticket != NULL) {
			
 
				-			ASSERT(flags & XFS_TRANS_PERM_LOG_RES);
			
 
				+			ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
			
 
				 			error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
			
 
				 		} else {
			
 
				-			error = xfs_log_reserve(tp->t_mountp, logspace,
			
 
				-						logcount, &tp->t_ticket,
			
 
				-						XFS_TRANSACTION, permanent,
			
 
				-						tp->t_type);
			
 
				+			error = xfs_log_reserve(tp->t_mountp,
			
 
				+						resp->tr_logres,
			
 
				+						resp->tr_logcount,
			
 
				+						&tp->t_ticket, XFS_TRANSACTION,
			
 
				+						permanent, tp->t_type);
			
 
				 		}
			
 
				 
			
 
				 		if (error)
			
 
				 			goto undo_blocks;
			
 
				 
			
 
				-		tp->t_log_res = logspace;
			
 
				-		tp->t_log_count = logcount;
			
 
				+		tp->t_log_res = resp->tr_logres;
			
 
				+		tp->t_log_count = resp->tr_logcount;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -916,10 +265,10 @@ xfs_trans_reserve(
 
				 	 * reservations which have already been performed.
			
 
				 	 */
			
 
				 undo_log:
			
 
				-	if (logspace > 0) {
			
 
				+	if (resp->tr_logres > 0) {
			
 
				 		int		log_flags;
			
 
				 
			
 
				-		if (flags & XFS_TRANS_PERM_LOG_RES) {
			
 
				+		if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
			
 
				 			log_flags = XFS_LOG_REL_PERM_RESERV;
			
 
				 		} else {
			
 
				 			log_flags = 0;
			
@@ -1367,10 +716,10 @@ xfs_trans_free_items(
 
				 		lip->li_desc = NULL;
			
 
				 
			
 
				 		if (commit_lsn != NULLCOMMITLSN)
			
 
				-			IOP_COMMITTING(lip, commit_lsn);
			
 
				+			lip->li_ops->iop_committing(lip, commit_lsn);
			
 
				 		if (flags & XFS_TRANS_ABORT)
			
 
				 			lip->li_flags |= XFS_LI_ABORTED;
			
 
				-		IOP_UNLOCK(lip);
			
 
				+		lip->li_ops->iop_unlock(lip);
			
 
				 
			
 
				 		xfs_trans_free_item_desc(lidp);
			
 
				 	}
			
@@ -1390,8 +739,11 @@ xfs_log_item_batch_insert(
 
				 	/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
			
 
				 	xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
			
 
				 
			
 
				-	for (i = 0; i < nr_items; i++)
			
 
				-		IOP_UNPIN(log_items[i], 0);
			
 
				+	for (i = 0; i < nr_items; i++) {
			
 
				+		struct xfs_log_item *lip = log_items[i];
			
 
				+
			
 
				+		lip->li_ops->iop_unpin(lip, 0);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1401,11 +753,11 @@ xfs_log_item_batch_insert(
 
				  *
			
 
				  * If we are called with the aborted flag set, it is because a log write during
			
 
				  * a CIL checkpoint commit has failed. In this case, all the items in the
			
 
				- * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which
			
 
				+ * checkpoint have already gone through iop_commited and iop_unlock, which
			
 
				  * means that checkpoint commit abort handling is treated exactly the same
			
 
				  * as an iclog write error even though we haven't started any IO yet. Hence in
			
 
				- * this case all we need to do is IOP_COMMITTED processing, followed by an
			
 
				- * IOP_UNPIN(aborted) call.
			
 
				+ * this case all we need to do is iop_committed processing, followed by an
			
 
				+ * iop_unpin(aborted) call.
			
 
				  *
			
 
				  * The AIL cursor is used to optimise the insert process. If commit_lsn is not
			
 
				  * at the end of the AIL, the insert cursor avoids the need to walk
			
@@ -1438,7 +790,7 @@ xfs_trans_committed_bulk(
 
				 
			
 
				 		if (aborted)
			
 
				 			lip->li_flags |= XFS_LI_ABORTED;
			
 
				-		item_lsn = IOP_COMMITTED(lip, commit_lsn);
			
 
				+		item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
			
 
				 
			
 
				 		/* item_lsn of -1 means the item needs no further processing */
			
 
				 		if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
			
@@ -1450,7 +802,7 @@ xfs_trans_committed_bulk(
 
				 		 */
			
 
				 		if (aborted) {
			
 
				 			ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
			
 
				-			IOP_UNPIN(lip, 1);
			
 
				+			lip->li_ops->iop_unpin(lip, 1);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
@@ -1468,7 +820,7 @@ xfs_trans_committed_bulk(
 
				 				xfs_trans_ail_update(ailp, lip, item_lsn);
			
 
				 			else
			
 
				 				spin_unlock(&ailp->xa_lock);
			
 
				-			IOP_UNPIN(lip, 0);
			
 
				+			lip->li_ops->iop_unpin(lip, 0);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
@@ -1666,7 +1018,7 @@ xfs_trans_roll(
 
				 	struct xfs_inode	*dp)
			
 
				 {
			
 
				 	struct xfs_trans	*trans;
			
 
				-	unsigned int		logres, count;
			
 
				+	struct xfs_trans_res	tres;
			
 
				 	int			error;
			
 
				 
			
 
				 	/*
			
@@ -1678,8 +1030,8 @@ xfs_trans_roll(
 
				 	/*
			
 
				 	 * Copy the critical parameters from one trans to the next.
			
 
				 	 */
			
 
				-	logres = trans->t_log_res;
			
 
				-	count = trans->t_log_count;
			
 
				+	tres.tr_logres = trans->t_log_res;
			
 
				+	tres.tr_logcount = trans->t_log_count;
			
 
				 	*tpp = xfs_trans_dup(trans);
			
 
				 
			
 
				 	/*
			
@@ -1710,8 +1062,8 @@ xfs_trans_roll(
 
				 	 * across this call, or that anything that is locked be logged in
			
 
				 	 * the prior and the next transactions.
			
 
				 	 */
			
 
				-	error = xfs_trans_reserve(trans, 0, logres, 0,
			
 
				-				  XFS_TRANS_PERM_LOG_RES, count);
			
 
				+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
			
 
				+	error = xfs_trans_reserve(trans, &tres, 0, 0);
			
 
				 	/*
			
 
				 	 *  Ensure that the inode is in the new transaction and locked.
			
 
				 	 */
			
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -20,285 +20,9 @@
 
				 
			
 
				 struct xfs_log_item;
			
 
				 
			
 
				-/*
			
 
				- * This is the structure written in the log at the head of
			
 
				- * every transaction. It identifies the type and id of the
			
 
				- * transaction, and contains the number of items logged by
			
 
				- * the transaction so we know how many to expect during recovery.
			
 
				- *
			
 
				- * Do not change the below structure without redoing the code in
			
 
				- * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
			
 
				- */
			
 
				-typedef struct xfs_trans_header {
			
 
				-	uint		th_magic;		/* magic number */
			
 
				-	uint		th_type;		/* transaction type */
			
 
				-	__int32_t	th_tid;			/* transaction id (unused) */
			
 
				-	uint		th_num_items;		/* num items logged by trans */
			
 
				-} xfs_trans_header_t;
			
 
				-
			
 
				-#define	XFS_TRANS_HEADER_MAGIC	0x5452414e	/* TRAN */
			
 
				-
			
 
				-/*
			
 
				- * Log item types.
			
 
				- */
			
 
				-#define	XFS_LI_EFI		0x1236
			
 
				-#define	XFS_LI_EFD		0x1237
			
 
				-#define	XFS_LI_IUNLINK		0x1238
			
 
				-#define	XFS_LI_INODE		0x123b	/* aligned ino chunks, var-size ibufs */
			
 
				-#define	XFS_LI_BUF		0x123c	/* v2 bufs, variable sized inode bufs */
			
 
				-#define	XFS_LI_DQUOT		0x123d
			
 
				-#define	XFS_LI_QUOTAOFF		0x123e
			
 
				-#define	XFS_LI_ICREATE		0x123f
			
 
				-
			
 
				-#define XFS_LI_TYPE_DESC \
			
 
				-	{ XFS_LI_EFI,		"XFS_LI_EFI" }, \
			
 
				-	{ XFS_LI_EFD,		"XFS_LI_EFD" }, \
			
 
				-	{ XFS_LI_IUNLINK,	"XFS_LI_IUNLINK" }, \
			
 
				-	{ XFS_LI_INODE,		"XFS_LI_INODE" }, \
			
 
				-	{ XFS_LI_BUF,		"XFS_LI_BUF" }, \
			
 
				-	{ XFS_LI_DQUOT,		"XFS_LI_DQUOT" }, \
			
 
				-	{ XFS_LI_QUOTAOFF,	"XFS_LI_QUOTAOFF" }
			
 
				-
			
 
				-/*
			
 
				- * Transaction types.  Used to distinguish types of buffers.
			
 
				- */
			
 
				-#define XFS_TRANS_SETATTR_NOT_SIZE	1
			
 
				-#define XFS_TRANS_SETATTR_SIZE		2
			
 
				-#define XFS_TRANS_INACTIVE		3
			
 
				-#define XFS_TRANS_CREATE		4
			
 
				-#define XFS_TRANS_CREATE_TRUNC		5
			
 
				-#define XFS_TRANS_TRUNCATE_FILE		6
			
 
				-#define XFS_TRANS_REMOVE		7
			
 
				-#define XFS_TRANS_LINK			8
			
 
				-#define XFS_TRANS_RENAME		9
			
 
				-#define XFS_TRANS_MKDIR			10
			
 
				-#define XFS_TRANS_RMDIR			11
			
 
				-#define XFS_TRANS_SYMLINK		12
			
 
				-#define XFS_TRANS_SET_DMATTRS		13
			
 
				-#define XFS_TRANS_GROWFS		14
			
 
				-#define XFS_TRANS_STRAT_WRITE		15
			
 
				-#define XFS_TRANS_DIOSTRAT		16
			
 
				-/* 17 was XFS_TRANS_WRITE_SYNC */
			
 
				-#define	XFS_TRANS_WRITEID		18
			
 
				-#define	XFS_TRANS_ADDAFORK		19
			
 
				-#define	XFS_TRANS_ATTRINVAL		20
			
 
				-#define	XFS_TRANS_ATRUNCATE		21
			
 
				-#define	XFS_TRANS_ATTR_SET		22
			
 
				-#define	XFS_TRANS_ATTR_RM		23
			
 
				-#define	XFS_TRANS_ATTR_FLAG		24
			
 
				-#define	XFS_TRANS_CLEAR_AGI_BUCKET	25
			
 
				-#define XFS_TRANS_QM_SBCHANGE		26
			
 
				-/*
			
 
				- * Dummy entries since we use the transaction type to index into the
			
 
				- * trans_type[] in xlog_recover_print_trans_head()
			
 
				- */
			
 
				-#define XFS_TRANS_DUMMY1		27
			
 
				-#define XFS_TRANS_DUMMY2		28
			
 
				-#define XFS_TRANS_QM_QUOTAOFF		29
			
 
				-#define XFS_TRANS_QM_DQALLOC		30
			
 
				-#define XFS_TRANS_QM_SETQLIM		31
			
 
				-#define XFS_TRANS_QM_DQCLUSTER		32
			
 
				-#define XFS_TRANS_QM_QINOCREATE		33
			
 
				-#define XFS_TRANS_QM_QUOTAOFF_END	34
			
 
				-#define XFS_TRANS_SB_UNIT		35
			
 
				-#define XFS_TRANS_FSYNC_TS		36
			
 
				-#define	XFS_TRANS_GROWFSRT_ALLOC	37
			
 
				-#define	XFS_TRANS_GROWFSRT_ZERO		38
			
 
				-#define	XFS_TRANS_GROWFSRT_FREE		39
			
 
				-#define	XFS_TRANS_SWAPEXT		40
			
 
				-#define	XFS_TRANS_SB_COUNT		41
			
 
				-#define	XFS_TRANS_CHECKPOINT		42
			
 
				-#define	XFS_TRANS_ICREATE		43
			
 
				-#define	XFS_TRANS_TYPE_MAX		43
			
 
				-/* new transaction types need to be reflected in xfs_logprint(8) */
			
 
				-
			
 
				-#define XFS_TRANS_TYPES \
			
 
				-	{ XFS_TRANS_SETATTR_NOT_SIZE,	"SETATTR_NOT_SIZE" }, \
			
 
				-	{ XFS_TRANS_SETATTR_SIZE,	"SETATTR_SIZE" }, \
			
 
				-	{ XFS_TRANS_INACTIVE,		"INACTIVE" }, \
			
 
				-	{ XFS_TRANS_CREATE,		"CREATE" }, \
			
 
				-	{ XFS_TRANS_CREATE_TRUNC,	"CREATE_TRUNC" }, \
			
 
				-	{ XFS_TRANS_TRUNCATE_FILE,	"TRUNCATE_FILE" }, \
			
 
				-	{ XFS_TRANS_REMOVE,		"REMOVE" }, \
			
 
				-	{ XFS_TRANS_LINK,		"LINK" }, \
			
 
				-	{ XFS_TRANS_RENAME,		"RENAME" }, \
			
 
				-	{ XFS_TRANS_MKDIR,		"MKDIR" }, \
			
 
				-	{ XFS_TRANS_RMDIR,		"RMDIR" }, \
			
 
				-	{ XFS_TRANS_SYMLINK,		"SYMLINK" }, \
			
 
				-	{ XFS_TRANS_SET_DMATTRS,	"SET_DMATTRS" }, \
			
 
				-	{ XFS_TRANS_GROWFS,		"GROWFS" }, \
			
 
				-	{ XFS_TRANS_STRAT_WRITE,	"STRAT_WRITE" }, \
			
 
				-	{ XFS_TRANS_DIOSTRAT,		"DIOSTRAT" }, \
			
 
				-	{ XFS_TRANS_WRITEID,		"WRITEID" }, \
			
 
				-	{ XFS_TRANS_ADDAFORK,		"ADDAFORK" }, \
			
 
				-	{ XFS_TRANS_ATTRINVAL,		"ATTRINVAL" }, \
			
 
				-	{ XFS_TRANS_ATRUNCATE,		"ATRUNCATE" }, \
			
 
				-	{ XFS_TRANS_ATTR_SET,		"ATTR_SET" }, \
			
 
				-	{ XFS_TRANS_ATTR_RM,		"ATTR_RM" }, \
			
 
				-	{ XFS_TRANS_ATTR_FLAG,		"ATTR_FLAG" }, \
			
 
				-	{ XFS_TRANS_CLEAR_AGI_BUCKET,	"CLEAR_AGI_BUCKET" }, \
			
 
				-	{ XFS_TRANS_QM_SBCHANGE,	"QM_SBCHANGE" }, \
			
 
				-	{ XFS_TRANS_QM_QUOTAOFF,	"QM_QUOTAOFF" }, \
			
 
				-	{ XFS_TRANS_QM_DQALLOC,		"QM_DQALLOC" }, \
			
 
				-	{ XFS_TRANS_QM_SETQLIM,		"QM_SETQLIM" }, \
			
 
				-	{ XFS_TRANS_QM_DQCLUSTER,	"QM_DQCLUSTER" }, \
			
 
				-	{ XFS_TRANS_QM_QINOCREATE,	"QM_QINOCREATE" }, \
			
 
				-	{ XFS_TRANS_QM_QUOTAOFF_END,	"QM_QOFF_END" }, \
			
 
				-	{ XFS_TRANS_SB_UNIT,		"SB_UNIT" }, \
			
 
				-	{ XFS_TRANS_FSYNC_TS,		"FSYNC_TS" }, \
			
 
				-	{ XFS_TRANS_GROWFSRT_ALLOC,	"GROWFSRT_ALLOC" }, \
			
 
				-	{ XFS_TRANS_GROWFSRT_ZERO,	"GROWFSRT_ZERO" }, \
			
 
				-	{ XFS_TRANS_GROWFSRT_FREE,	"GROWFSRT_FREE" }, \
			
 
				-	{ XFS_TRANS_SWAPEXT,		"SWAPEXT" }, \
			
 
				-	{ XFS_TRANS_SB_COUNT,		"SB_COUNT" }, \
			
 
				-	{ XFS_TRANS_CHECKPOINT,		"CHECKPOINT" }, \
			
 
				-	{ XFS_TRANS_DUMMY1,		"DUMMY1" }, \
			
 
				-	{ XFS_TRANS_DUMMY2,		"DUMMY2" }, \
			
 
				-	{ XLOG_UNMOUNT_REC_TYPE,	"UNMOUNT" }
			
 
				-
			
 
				-/*
			
 
				- * This structure is used to track log items associated with
			
 
				- * a transaction.  It points to the log item and keeps some
			
 
				- * flags to track the state of the log item.  It also tracks
			
 
				- * the amount of space needed to log the item it describes
			
 
				- * once we get to commit processing (see xfs_trans_commit()).
			
 
				- */
			
 
				-struct xfs_log_item_desc {
			
 
				-	struct xfs_log_item	*lid_item;
			
 
				-	struct list_head	lid_trans;
			
 
				-	unsigned char		lid_flags;
			
 
				-};
			
 
				-
			
 
				-#define XFS_LID_DIRTY		0x1
			
 
				-
			
 
				-#define	XFS_TRANS_MAGIC		0x5452414E	/* 'TRAN' */
			
 
				-/*
			
 
				- * Values for t_flags.
			
 
				- */
			
 
				-#define	XFS_TRANS_DIRTY		0x01	/* something needs to be logged */
			
 
				-#define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */
			
 
				-#define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */
			
 
				-#define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */
			
 
				-#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */
			
 
				-#define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
			
 
				-#define XFS_TRANS_FREEZE_PROT	0x40	/* Transaction has elevated writer
			
 
				-					   count in superblock */
			
 
				-
			
 
				-/*
			
 
				- * Values for call flags parameter.
			
 
				- */
			
 
				-#define	XFS_TRANS_RELEASE_LOG_RES	0x4
			
 
				-#define	XFS_TRANS_ABORT			0x8
			
 
				-
			
 
				-/*
			
 
				- * Field values for xfs_trans_mod_sb.
			
 
				- */
			
 
				-#define	XFS_TRANS_SB_ICOUNT		0x00000001
			
 
				-#define	XFS_TRANS_SB_IFREE		0x00000002
			
 
				-#define	XFS_TRANS_SB_FDBLOCKS		0x00000004
			
 
				-#define	XFS_TRANS_SB_RES_FDBLOCKS	0x00000008
			
 
				-#define	XFS_TRANS_SB_FREXTENTS		0x00000010
			
 
				-#define	XFS_TRANS_SB_RES_FREXTENTS	0x00000020
			
 
				-#define	XFS_TRANS_SB_DBLOCKS		0x00000040
			
 
				-#define	XFS_TRANS_SB_AGCOUNT		0x00000080
			
 
				-#define	XFS_TRANS_SB_IMAXPCT		0x00000100
			
 
				-#define	XFS_TRANS_SB_REXTSIZE		0x00000200
			
 
				-#define	XFS_TRANS_SB_RBMBLOCKS		0x00000400
			
 
				-#define	XFS_TRANS_SB_RBLOCKS		0x00000800
			
 
				-#define	XFS_TRANS_SB_REXTENTS		0x00001000
			
 
				-#define	XFS_TRANS_SB_REXTSLOG		0x00002000
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * Per-extent log reservation for the allocation btree changes
			
 
				- * involved in freeing or allocating an extent.
			
 
				- * 2 trees * (2 blocks/level * max depth - 1)
			
 
				- */
			
 
				-#define	XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
			
 
				-	((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
			
 
				-
			
 
				-/*
			
 
				- * Per-directory log reservation for any directory change.
			
 
				- * dir blocks: (1 btree block per level + data block + free block)
			
 
				- * bmap btree: (levels + 2) * max depth
			
 
				- * v2 directory blocks can be fragmented below the dirblksize down to the fsb
			
 
				- * size, so account for that in the DAENTER macros.
			
 
				- */
			
 
				-#define	XFS_DIROP_LOG_COUNT(mp)	\
			
 
				-	(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
			
 
				-	 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
			
 
				-
			
 
				+#include "xfs_trans_resv.h"
			
 
				 
			
 
				-#define	XFS_WRITE_LOG_RES(mp)	((mp)->m_reservations.tr_write)
			
 
				-#define	XFS_ITRUNCATE_LOG_RES(mp)   ((mp)->m_reservations.tr_itruncate)
			
 
				-#define	XFS_RENAME_LOG_RES(mp)	((mp)->m_reservations.tr_rename)
			
 
				-#define	XFS_LINK_LOG_RES(mp)	((mp)->m_reservations.tr_link)
			
 
				-#define	XFS_REMOVE_LOG_RES(mp)	((mp)->m_reservations.tr_remove)
			
 
				-#define	XFS_SYMLINK_LOG_RES(mp)	((mp)->m_reservations.tr_symlink)
			
 
				-#define	XFS_CREATE_LOG_RES(mp)	((mp)->m_reservations.tr_create)
			
 
				-#define	XFS_MKDIR_LOG_RES(mp)	((mp)->m_reservations.tr_mkdir)
			
 
				-#define	XFS_IFREE_LOG_RES(mp)	((mp)->m_reservations.tr_ifree)
			
 
				-#define	XFS_ICHANGE_LOG_RES(mp)	((mp)->m_reservations.tr_ichange)
			
 
				-#define	XFS_GROWDATA_LOG_RES(mp)    ((mp)->m_reservations.tr_growdata)
			
 
				-#define	XFS_GROWRTALLOC_LOG_RES(mp)	((mp)->m_reservations.tr_growrtalloc)
			
 
				-#define	XFS_GROWRTZERO_LOG_RES(mp)	((mp)->m_reservations.tr_growrtzero)
			
 
				-#define	XFS_GROWRTFREE_LOG_RES(mp)	((mp)->m_reservations.tr_growrtfree)
			
 
				-#define	XFS_SWRITE_LOG_RES(mp)	((mp)->m_reservations.tr_swrite)
			
 
				-/*
			
 
				- * Logging the inode timestamps on an fsync -- same as SWRITE
			
 
				- * as long as SWRITE logs the entire inode core
			
 
				- */
			
 
				-#define XFS_FSYNC_TS_LOG_RES(mp)        ((mp)->m_reservations.tr_swrite)
			
 
				-#define	XFS_WRITEID_LOG_RES(mp)		((mp)->m_reservations.tr_swrite)
			
 
				-#define	XFS_ADDAFORK_LOG_RES(mp)	((mp)->m_reservations.tr_addafork)
			
 
				-#define	XFS_ATTRINVAL_LOG_RES(mp)	((mp)->m_reservations.tr_attrinval)
			
 
				-#define	XFS_ATTRSETM_LOG_RES(mp)	((mp)->m_reservations.tr_attrsetm)
			
 
				-#define XFS_ATTRSETRT_LOG_RES(mp)	((mp)->m_reservations.tr_attrsetrt)
			
 
				-#define	XFS_ATTRRM_LOG_RES(mp)		((mp)->m_reservations.tr_attrrm)
			
 
				-#define	XFS_CLEAR_AGI_BUCKET_LOG_RES(mp)  ((mp)->m_reservations.tr_clearagi)
			
 
				-#define XFS_QM_SBCHANGE_LOG_RES(mp)	((mp)->m_reservations.tr_qm_sbchange)
			
 
				-#define XFS_QM_SETQLIM_LOG_RES(mp)	((mp)->m_reservations.tr_qm_setqlim)
			
 
				-#define XFS_QM_DQALLOC_LOG_RES(mp)	((mp)->m_reservations.tr_qm_dqalloc)
			
 
				-#define XFS_QM_QUOTAOFF_LOG_RES(mp)	((mp)->m_reservations.tr_qm_quotaoff)
			
 
				-#define XFS_QM_QUOTAOFF_END_LOG_RES(mp)	((mp)->m_reservations.tr_qm_equotaoff)
			
 
				-#define XFS_SB_LOG_RES(mp)		((mp)->m_reservations.tr_sb)
			
 
				-
			
 
				-/*
			
 
				- * Various log count values.
			
 
				- */
			
 
				-#define	XFS_DEFAULT_LOG_COUNT		1
			
 
				-#define	XFS_DEFAULT_PERM_LOG_COUNT	2
			
 
				-#define	XFS_ITRUNCATE_LOG_COUNT		2
			
 
				-#define XFS_INACTIVE_LOG_COUNT		2
			
 
				-#define	XFS_CREATE_LOG_COUNT		2
			
 
				-#define	XFS_MKDIR_LOG_COUNT		3
			
 
				-#define	XFS_SYMLINK_LOG_COUNT		3
			
 
				-#define	XFS_REMOVE_LOG_COUNT		2
			
 
				-#define	XFS_LINK_LOG_COUNT		2
			
 
				-#define	XFS_RENAME_LOG_COUNT		2
			
 
				-#define	XFS_WRITE_LOG_COUNT		2
			
 
				-#define	XFS_ADDAFORK_LOG_COUNT		2
			
 
				-#define	XFS_ATTRINVAL_LOG_COUNT		1
			
 
				-#define	XFS_ATTRSET_LOG_COUNT		3
			
 
				-#define	XFS_ATTRRM_LOG_COUNT		3
			
 
				-
			
 
				-/*
			
 
				- * Here we centralize the specification of XFS meta-data buffer
			
 
				- * reference count values.  This determine how hard the buffer
			
 
				- * cache tries to hold onto the buffer.
			
 
				- */
			
 
				-#define	XFS_AGF_REF		4
			
 
				-#define	XFS_AGI_REF		4
			
 
				-#define	XFS_AGFL_REF		3
			
 
				-#define	XFS_INO_BTREE_REF	3
			
 
				-#define	XFS_ALLOC_BTREE_REF	2
			
 
				-#define	XFS_BMAP_BTREE_REF	2
			
 
				-#define	XFS_DIR_BTREE_REF	2
			
 
				-#define	XFS_INO_REF		2
			
 
				-#define	XFS_ATTR_BTREE_REF	1
			
 
				-#define	XFS_DQUOT_REF		1
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				+/* kernel only transaction subsystem defines */
			
 
				 
			
 
				 struct xfs_buf;
			
 
				 struct xfs_buftarg;
			
@@ -310,6 +34,7 @@ struct xfs_log_iovec;
 
				 struct xfs_log_item_desc;
			
 
				 struct xfs_mount;
			
 
				 struct xfs_trans;
			
 
				+struct xfs_trans_res;
			
 
				 struct xfs_dquot_acct;
			
 
				 struct xfs_busy_extent;
			
 
				 
			
@@ -342,7 +67,7 @@ typedef struct xfs_log_item {
 
				 	{ XFS_LI_ABORTED,	"ABORTED" }
			
 
				 
			
 
				 struct xfs_item_ops {
			
 
				-	uint (*iop_size)(xfs_log_item_t *);
			
 
				+	void (*iop_size)(xfs_log_item_t *, int *, int *);
			
 
				 	void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
			
 
				 	void (*iop_pin)(xfs_log_item_t *);
			
 
				 	void (*iop_unpin)(xfs_log_item_t *, int remove);
			
@@ -352,17 +77,8 @@ struct xfs_item_ops {
 
				 	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
			
 
				 };
			
 
				 
			
 
				-#define IOP_SIZE(ip)		(*(ip)->li_ops->iop_size)(ip)
			
 
				-#define IOP_FORMAT(ip,vp)	(*(ip)->li_ops->iop_format)(ip, vp)
			
 
				-#define IOP_PIN(ip)		(*(ip)->li_ops->iop_pin)(ip)
			
 
				-#define IOP_UNPIN(ip, remove)	(*(ip)->li_ops->iop_unpin)(ip, remove)
			
 
				-#define IOP_PUSH(ip, list)	(*(ip)->li_ops->iop_push)(ip, list)
			
 
				-#define IOP_UNLOCK(ip)		(*(ip)->li_ops->iop_unlock)(ip)
			
 
				-#define IOP_COMMITTED(ip, lsn)	(*(ip)->li_ops->iop_committed)(ip, lsn)
			
 
				-#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
			
 
				-
			
 
				 /*
			
 
				- * Return values for the IOP_PUSH() routines.
			
 
				+ * Return values for the iop_push() routines.
			
 
				  */
			
 
				 #define XFS_ITEM_SUCCESS	0
			
 
				 #define XFS_ITEM_PINNED		1
			
@@ -446,7 +162,7 @@ typedef struct xfs_trans {
 
				 xfs_trans_t	*xfs_trans_alloc(struct xfs_mount *, uint);
			
 
				 xfs_trans_t	*_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
			
 
				 xfs_trans_t	*xfs_trans_dup(xfs_trans_t *);
			
 
				-int		xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
			
 
				+int		xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
			
 
				 				  uint, uint);
			
 
				 void		xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
			
 
				 
			
@@ -528,9 +244,4 @@ void		xfs_trans_ail_destroy(struct xfs_mount *);
 
				 extern kmem_zone_t	*xfs_trans_zone;
			
 
				 extern kmem_zone_t	*xfs_log_item_desc_zone;
			
 
				 
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				-void		xfs_trans_init(struct xfs_mount *);
			
 
				-int		xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
			
 
				-
			
 
				 #endif	/* __XFS_TRANS_H__ */