9 years ago · 555b67e4e7
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -1518,6 +1518,24 @@ xfs_iext_indirect_to_direct(
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Remove all records from the indirection array.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_iext_irec_remove_all(
			
 
				+	struct xfs_ifork *ifp)
			
 
				+{
			
 
				+	int		nlists;
			
 
				+	int		i;
			
 
				+
			
 
				+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
			
 
				+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				+	for (i = 0; i < nlists; i++)
			
 
				+		kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
			
 
				+	kmem_free(ifp->if_u1.if_ext_irec);
			
 
				+	ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Free incore file extents.
			
 
				  */
			
@@ -1526,14 +1544,7 @@ xfs_iext_destroy(
 
				 	xfs_ifork_t	*ifp)		/* inode fork pointer */
			
 
				 {
			
 
				 	if (ifp->if_flags & XFS_IFEXTIREC) {
			
 
				-		int	erp_idx;
			
 
				-		int	nlists;
			
 
				-
			
 
				-		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
			
 
				-		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
			
 
				-			xfs_iext_irec_remove(ifp, erp_idx);
			
 
				-		}
			
 
				-		ifp->if_flags &= ~XFS_IFEXTIREC;
			
 
				+		xfs_iext_irec_remove_all(ifp);
			
 
				 	} else if (ifp->if_real_bytes) {
			
 
				 		kmem_free(ifp->if_u1.if_extents);
			
 
				 	} else if (ifp->if_bytes) {
			
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -37,9 +37,6 @@
 
				 #include <linux/kthread.h>
			
 
				 #include <linux/freezer.h>
			
 
				 
			
 
				-STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
			
 
				-				struct xfs_perag *pag, struct xfs_inode *ip);
			
 
				-
			
 
				 /*
			
 
				  * Allocate and initialise an xfs_inode.
			
 
				  */
			
@@ -94,13 +91,6 @@ xfs_inode_free_callback(
 
				 	struct inode		*inode = container_of(head, struct inode, i_rcu);
			
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
 
				 
			
 
				-	kmem_zone_free(xfs_inode_zone, ip);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xfs_inode_free(
			
 
				-	struct xfs_inode	*ip)
			
 
				-{
			
 
				 	switch (VFS_I(ip)->i_mode & S_IFMT) {
			
 
				 	case S_IFREG:
			
 
				 	case S_IFDIR:
			
@@ -118,6 +108,25 @@ xfs_inode_free(
 
				 		ip->i_itemp = NULL;
			
 
				 	}
			
 
				 
			
 
				+	kmem_zone_free(xfs_inode_zone, ip);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+__xfs_inode_free(
			
 
				+	struct xfs_inode	*ip)
			
 
				+{
			
 
				+	/* asserts to verify all state is correct here */
			
 
				+	ASSERT(atomic_read(&ip->i_pincount) == 0);
			
 
				+	ASSERT(!xfs_isiflocked(ip));
			
 
				+	XFS_STATS_DEC(ip->i_mount, vn_active);
			
 
				+
			
 
				+	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_inode_free(
			
 
				+	struct xfs_inode	*ip)
			
 
				+{
			
 
				 	/*
			
 
				 	 * Because we use RCU freeing we need to ensure the inode always
			
 
				 	 * appears to be reclaimed with an invalid inode number when in the
			
@@ -129,12 +138,123 @@ xfs_inode_free(
 
				 	ip->i_ino = 0;
			
 
				 	spin_unlock(&ip->i_flags_lock);
			
 
				 
			
 
				-	/* asserts to verify all state is correct here */
			
 
				-	ASSERT(atomic_read(&ip->i_pincount) == 0);
			
 
				-	ASSERT(!xfs_isiflocked(ip));
			
 
				-	XFS_STATS_DEC(ip->i_mount, vn_active);
			
 
				+	__xfs_inode_free(ip);
			
 
				+}
			
 
				 
			
 
				-	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
			
 
				+/*
			
 
				+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
			
 
				+ * isn't a reclaim pass already in progress. By default it runs every 5s based
			
 
				+ * on the xfs periodic sync default of 30s. Perhaps this should have it's own
			
 
				+ * tunable, but that can be done if this method proves to be ineffective or too
			
 
				+ * aggressive.
			
 
				+ */
			
 
				+static void
			
 
				+xfs_reclaim_work_queue(
			
 
				+	struct xfs_mount        *mp)
			
 
				+{
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
			
 
				+		queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
			
 
				+			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is a fast pass over the inode cache to try to get reclaim moving on as
			
 
				+ * many inodes as possible in a short period of time. It kicks itself every few
			
 
				+ * seconds, as well as being kicked by the inode cache shrinker when memory
			
 
				+ * goes low. It scans as quickly as possible avoiding locked inodes or those
			
 
				+ * already being flushed, and once done schedules a future pass.
			
 
				+ */
			
 
				+void
			
 
				+xfs_reclaim_worker(
			
 
				+	struct work_struct *work)
			
 
				+{
			
 
				+	struct xfs_mount *mp = container_of(to_delayed_work(work),
			
 
				+					struct xfs_mount, m_reclaim_work);
			
 
				+
			
 
				+	xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
			
 
				+	xfs_reclaim_work_queue(mp);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_perag_set_reclaim_tag(
			
 
				+	struct xfs_perag	*pag)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = pag->pag_mount;
			
 
				+
			
 
				+	ASSERT(spin_is_locked(&pag->pag_ici_lock));
			
 
				+	if (pag->pag_ici_reclaimable++)
			
 
				+		return;
			
 
				+
			
 
				+	/* propagate the reclaim tag up into the perag radix tree */
			
 
				+	spin_lock(&mp->m_perag_lock);
			
 
				+	radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno,
			
 
				+			   XFS_ICI_RECLAIM_TAG);
			
 
				+	spin_unlock(&mp->m_perag_lock);
			
 
				+
			
 
				+	/* schedule periodic background inode reclaim */
			
 
				+	xfs_reclaim_work_queue(mp);
			
 
				+
			
 
				+	trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_perag_clear_reclaim_tag(
			
 
				+	struct xfs_perag	*pag)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = pag->pag_mount;
			
 
				+
			
 
				+	ASSERT(spin_is_locked(&pag->pag_ici_lock));
			
 
				+	if (--pag->pag_ici_reclaimable)
			
 
				+		return;
			
 
				+
			
 
				+	/* clear the reclaim tag from the perag radix tree */
			
 
				+	spin_lock(&mp->m_perag_lock);
			
 
				+	radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno,
			
 
				+			     XFS_ICI_RECLAIM_TAG);
			
 
				+	spin_unlock(&mp->m_perag_lock);
			
 
				+	trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * We set the inode flag atomically with the radix tree tag.
			
 
				+ * Once we get tag lookups on the radix tree, this inode flag
			
 
				+ * can go away.
			
 
				+ */
			
 
				+void
			
 
				+xfs_inode_set_reclaim_tag(
			
 
				+	struct xfs_inode	*ip)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_perag	*pag;
			
 
				+
			
 
				+	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
			
 
				+	spin_lock(&pag->pag_ici_lock);
			
 
				+	spin_lock(&ip->i_flags_lock);
			
 
				+
			
 
				+	radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
			
 
				+			   XFS_ICI_RECLAIM_TAG);
			
 
				+	xfs_perag_set_reclaim_tag(pag);
			
 
				+	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
			
 
				+
			
 
				+	spin_unlock(&ip->i_flags_lock);
			
 
				+	spin_unlock(&pag->pag_ici_lock);
			
 
				+	xfs_perag_put(pag);
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xfs_inode_clear_reclaim_tag(
			
 
				+	struct xfs_perag	*pag,
			
 
				+	xfs_ino_t		ino)
			
 
				+{
			
 
				+	radix_tree_tag_clear(&pag->pag_ici_root,
			
 
				+			     XFS_INO_TO_AGINO(pag->pag_mount, ino),
			
 
				+			     XFS_ICI_RECLAIM_TAG);
			
 
				+	xfs_perag_clear_reclaim_tag(pag);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -264,7 +384,7 @@ xfs_iget_cache_hit(
 
				 		 */
			
 
				 		ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
			
 
				 		ip->i_flags |= XFS_INEW;
			
 
				-		__xfs_inode_clear_reclaim_tag(mp, pag, ip);
			
 
				+		xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
			
 
				 		inode->i_state = I_NEW;
			
 
				 
			
 
				 		ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
			
@@ -722,121 +842,6 @@ xfs_inode_ag_iterator_tag(
 
				 	return last_error;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Queue a new inode reclaim pass if there are reclaimable inodes and there
			
 
				- * isn't a reclaim pass already in progress. By default it runs every 5s based
			
 
				- * on the xfs periodic sync default of 30s. Perhaps this should have it's own
			
 
				- * tunable, but that can be done if this method proves to be ineffective or too
			
 
				- * aggressive.
			
 
				- */
			
 
				-static void
			
 
				-xfs_reclaim_work_queue(
			
 
				-	struct xfs_mount        *mp)
			
 
				-{
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
			
 
				-		queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
			
 
				-			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
			
 
				-	}
			
 
				-	rcu_read_unlock();
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is a fast pass over the inode cache to try to get reclaim moving on as
			
 
				- * many inodes as possible in a short period of time. It kicks itself every few
			
 
				- * seconds, as well as being kicked by the inode cache shrinker when memory
			
 
				- * goes low. It scans as quickly as possible avoiding locked inodes or those
			
 
				- * already being flushed, and once done schedules a future pass.
			
 
				- */
			
 
				-void
			
 
				-xfs_reclaim_worker(
			
 
				-	struct work_struct *work)
			
 
				-{
			
 
				-	struct xfs_mount *mp = container_of(to_delayed_work(work),
			
 
				-					struct xfs_mount, m_reclaim_work);
			
 
				-
			
 
				-	xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
			
 
				-	xfs_reclaim_work_queue(mp);
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-__xfs_inode_set_reclaim_tag(
			
 
				-	struct xfs_perag	*pag,
			
 
				-	struct xfs_inode	*ip)
			
 
				-{
			
 
				-	radix_tree_tag_set(&pag->pag_ici_root,
			
 
				-			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
			
 
				-			   XFS_ICI_RECLAIM_TAG);
			
 
				-
			
 
				-	if (!pag->pag_ici_reclaimable) {
			
 
				-		/* propagate the reclaim tag up into the perag radix tree */
			
 
				-		spin_lock(&ip->i_mount->m_perag_lock);
			
 
				-		radix_tree_tag_set(&ip->i_mount->m_perag_tree,
			
 
				-				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
			
 
				-				XFS_ICI_RECLAIM_TAG);
			
 
				-		spin_unlock(&ip->i_mount->m_perag_lock);
			
 
				-
			
 
				-		/* schedule periodic background inode reclaim */
			
 
				-		xfs_reclaim_work_queue(ip->i_mount);
			
 
				-
			
 
				-		trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
			
 
				-							-1, _RET_IP_);
			
 
				-	}
			
 
				-	pag->pag_ici_reclaimable++;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * We set the inode flag atomically with the radix tree tag.
			
 
				- * Once we get tag lookups on the radix tree, this inode flag
			
 
				- * can go away.
			
 
				- */
			
 
				-void
			
 
				-xfs_inode_set_reclaim_tag(
			
 
				-	xfs_inode_t	*ip)
			
 
				-{
			
 
				-	struct xfs_mount *mp = ip->i_mount;
			
 
				-	struct xfs_perag *pag;
			
 
				-
			
 
				-	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
			
 
				-	spin_lock(&pag->pag_ici_lock);
			
 
				-	spin_lock(&ip->i_flags_lock);
			
 
				-	__xfs_inode_set_reclaim_tag(pag, ip);
			
 
				-	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
			
 
				-	spin_unlock(&ip->i_flags_lock);
			
 
				-	spin_unlock(&pag->pag_ici_lock);
			
 
				-	xfs_perag_put(pag);
			
 
				-}
			
 
				-
			
 
				-STATIC void
			
 
				-__xfs_inode_clear_reclaim(
			
 
				-	xfs_perag_t	*pag,
			
 
				-	xfs_inode_t	*ip)
			
 
				-{
			
 
				-	pag->pag_ici_reclaimable--;
			
 
				-	if (!pag->pag_ici_reclaimable) {
			
 
				-		/* clear the reclaim tag from the perag radix tree */
			
 
				-		spin_lock(&ip->i_mount->m_perag_lock);
			
 
				-		radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
			
 
				-				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
			
 
				-				XFS_ICI_RECLAIM_TAG);
			
 
				-		spin_unlock(&ip->i_mount->m_perag_lock);
			
 
				-		trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
			
 
				-							-1, _RET_IP_);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-STATIC void
			
 
				-__xfs_inode_clear_reclaim_tag(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	xfs_perag_t	*pag,
			
 
				-	xfs_inode_t	*ip)
			
 
				-{
			
 
				-	radix_tree_tag_clear(&pag->pag_ici_root,
			
 
				-			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
			
 
				-	__xfs_inode_clear_reclaim(pag, ip);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Grab the inode for reclaim exclusively.
			
 
				  * Return 0 if we grabbed it, non-zero otherwise.
			
@@ -929,6 +934,7 @@ xfs_reclaim_inode(
 
				 	int			sync_mode)
			
 
				 {
			
 
				 	struct xfs_buf		*bp = NULL;
			
 
				+	xfs_ino_t		ino = ip->i_ino; /* for radix_tree_delete */
			
 
				 	int			error;
			
 
				 
			
 
				 restart:
			
@@ -993,6 +999,22 @@ restart:
 
				 
			
 
				 	xfs_iflock(ip);
			
 
				 reclaim:
			
 
				+	/*
			
 
				+	 * Because we use RCU freeing we need to ensure the inode always appears
			
 
				+	 * to be reclaimed with an invalid inode number when in the free state.
			
 
				+	 * We do this as early as possible under the ILOCK and flush lock so
			
 
				+	 * that xfs_iflush_cluster() can be guaranteed to detect races with us
			
 
				+	 * here. By doing this, we guarantee that once xfs_iflush_cluster has
			
 
				+	 * locked both the XFS_ILOCK and the flush lock that it will see either
			
 
				+	 * a valid, flushable inode that will serialise correctly against the
			
 
				+	 * locks below, or it will see a clean (and invalid) inode that it can
			
 
				+	 * skip.
			
 
				+	 */
			
 
				+	spin_lock(&ip->i_flags_lock);
			
 
				+	ip->i_flags = XFS_IRECLAIM;
			
 
				+	ip->i_ino = 0;
			
 
				+	spin_unlock(&ip->i_flags_lock);
			
 
				+
			
 
				 	xfs_ifunlock(ip);
			
 
				 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 
			
@@ -1006,9 +1028,9 @@ reclaim:
 
				 	 */
			
 
				 	spin_lock(&pag->pag_ici_lock);
			
 
				 	if (!radix_tree_delete(&pag->pag_ici_root,
			
 
				-				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
			
 
				+				XFS_INO_TO_AGINO(ip->i_mount, ino)))
			
 
				 		ASSERT(0);
			
 
				-	__xfs_inode_clear_reclaim(pag, ip);
			
 
				+	xfs_perag_clear_reclaim_tag(pag);
			
 
				 	spin_unlock(&pag->pag_ici_lock);
			
 
				 
			
 
				 	/*
			
@@ -1023,7 +1045,7 @@ reclaim:
 
				 	xfs_qm_dqdetach(ip);
			
 
				 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				-	xfs_inode_free(ip);
			
 
				+	__xfs_inode_free(ip);
			
 
				 	return error;
			
 
				 
			
 
				 out_ifunlock:
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3149,16 +3149,16 @@ out_release_wip:
 
				 
			
 
				 STATIC int
			
 
				 xfs_iflush_cluster(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	xfs_buf_t	*bp)
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	xfs_mount_t		*mp = ip->i_mount;
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	struct xfs_perag	*pag;
			
 
				 	unsigned long		first_index, mask;
			
 
				 	unsigned long		inodes_per_cluster;
			
 
				-	int			ilist_size;
			
 
				-	xfs_inode_t		**ilist;
			
 
				-	xfs_inode_t		*iq;
			
 
				+	int			cilist_size;
			
 
				+	struct xfs_inode	**cilist;
			
 
				+	struct xfs_inode	*cip;
			
 
				 	int			nr_found;
			
 
				 	int			clcount = 0;
			
 
				 	int			bufwasdelwri;
			
@@ -3167,23 +3167,23 @@ xfs_iflush_cluster(
 
				 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
			
 
				 
			
 
				 	inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
			
 
				-	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
			
 
				-	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
			
 
				-	if (!ilist)
			
 
				+	cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
			
 
				+	cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
			
 
				+	if (!cilist)
			
 
				 		goto out_put;
			
 
				 
			
 
				 	mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
			
 
				 	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
			
 
				 	rcu_read_lock();
			
 
				 	/* really need a gang lookup range call here */
			
 
				-	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
			
 
				+	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
			
 
				 					first_index, inodes_per_cluster);
			
 
				 	if (nr_found == 0)
			
 
				 		goto out_free;
			
 
				 
			
 
				 	for (i = 0; i < nr_found; i++) {
			
 
				-		iq = ilist[i];
			
 
				-		if (iq == ip)
			
 
				+		cip = cilist[i];
			
 
				+		if (cip == ip)
			
 
				 			continue;
			
 
				 
			
 
				 		/*
			
@@ -3192,20 +3192,30 @@ xfs_iflush_cluster(
 
				 		 * We need to check under the i_flags_lock for a valid inode
			
 
				 		 * here. Skip it if it is not valid or the wrong inode.
			
 
				 		 */
			
 
				-		spin_lock(&ip->i_flags_lock);
			
 
				-		if (!ip->i_ino ||
			
 
				-		    (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
			
 
				-			spin_unlock(&ip->i_flags_lock);
			
 
				+		spin_lock(&cip->i_flags_lock);
			
 
				+		if (!cip->i_ino ||
			
 
				+		    __xfs_iflags_test(cip, XFS_ISTALE)) {
			
 
				+			spin_unlock(&cip->i_flags_lock);
			
 
				 			continue;
			
 
				 		}
			
 
				-		spin_unlock(&ip->i_flags_lock);
			
 
				+
			
 
				+		/*
			
 
				+		 * Once we fall off the end of the cluster, no point checking
			
 
				+		 * any more inodes in the list because they will also all be
			
 
				+		 * outside the cluster.
			
 
				+		 */
			
 
				+		if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
			
 
				+			spin_unlock(&cip->i_flags_lock);
			
 
				+			break;
			
 
				+		}
			
 
				+		spin_unlock(&cip->i_flags_lock);
			
 
				 
			
 
				 		/*
			
 
				 		 * Do an un-protected check to see if the inode is dirty and
			
 
				 		 * is a candidate for flushing.  These checks will be repeated
			
 
				 		 * later after the appropriate locks are acquired.
			
 
				 		 */
			
 
				-		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
			
 
				+		if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
			
 
				 			continue;
			
 
				 
			
 
				 		/*
			
@@ -3213,15 +3223,28 @@ xfs_iflush_cluster(
 
				 		 * then this inode cannot be flushed and is skipped.
			
 
				 		 */
			
 
				 
			
 
				-		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
			
 
				+		if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
			
 
				 			continue;
			
 
				-		if (!xfs_iflock_nowait(iq)) {
			
 
				-			xfs_iunlock(iq, XFS_ILOCK_SHARED);
			
 
				+		if (!xfs_iflock_nowait(cip)) {
			
 
				+			xfs_iunlock(cip, XFS_ILOCK_SHARED);
			
 
				 			continue;
			
 
				 		}
			
 
				-		if (xfs_ipincount(iq)) {
			
 
				-			xfs_ifunlock(iq);
			
 
				-			xfs_iunlock(iq, XFS_ILOCK_SHARED);
			
 
				+		if (xfs_ipincount(cip)) {
			
 
				+			xfs_ifunlock(cip);
			
 
				+			xfs_iunlock(cip, XFS_ILOCK_SHARED);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		/*
			
 
				+		 * Check the inode number again, just to be certain we are not
			
 
				+		 * racing with freeing in xfs_reclaim_inode(). See the comments
			
 
				+		 * in that function for more information as to why the initial
			
 
				+		 * check is not sufficient.
			
 
				+		 */
			
 
				+		if (!cip->i_ino) {
			
 
				+			xfs_ifunlock(cip);
			
 
				+			xfs_iunlock(cip, XFS_ILOCK_SHARED);
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
@@ -3229,18 +3252,18 @@ xfs_iflush_cluster(
 
				 		 * arriving here means that this inode can be flushed.  First
			
 
				 		 * re-check that it's dirty before flushing.
			
 
				 		 */
			
 
				-		if (!xfs_inode_clean(iq)) {
			
 
				+		if (!xfs_inode_clean(cip)) {
			
 
				 			int	error;
			
 
				-			error = xfs_iflush_int(iq, bp);
			
 
				+			error = xfs_iflush_int(cip, bp);
			
 
				 			if (error) {
			
 
				-				xfs_iunlock(iq, XFS_ILOCK_SHARED);
			
 
				+				xfs_iunlock(cip, XFS_ILOCK_SHARED);
			
 
				 				goto cluster_corrupt_out;
			
 
				 			}
			
 
				 			clcount++;
			
 
				 		} else {
			
 
				-			xfs_ifunlock(iq);
			
 
				+			xfs_ifunlock(cip);
			
 
				 		}
			
 
				-		xfs_iunlock(iq, XFS_ILOCK_SHARED);
			
 
				+		xfs_iunlock(cip, XFS_ILOCK_SHARED);
			
 
				 	}
			
 
				 
			
 
				 	if (clcount) {
			
@@ -3250,7 +3273,7 @@ xfs_iflush_cluster(
 
				 
			
 
				 out_free:
			
 
				 	rcu_read_unlock();
			
 
				-	kmem_free(ilist);
			
 
				+	kmem_free(cilist);
			
 
				 out_put:
			
 
				 	xfs_perag_put(pag);
			
 
				 	return 0;
			
@@ -3293,8 +3316,8 @@ cluster_corrupt_out:
 
				 	/*
			
 
				 	 * Unlocks the flush lock
			
 
				 	 */
			
 
				-	xfs_iflush_abort(iq, false);
			
 
				-	kmem_free(ilist);
			
 
				+	xfs_iflush_abort(cip, false);
			
 
				+	kmem_free(cilist);
			
 
				 	xfs_perag_put(pag);
			
 
				 	return -EFSCORRUPTED;
			
 
				 }
			
@@ -3314,7 +3337,7 @@ xfs_iflush(
 
				 	struct xfs_buf		**bpp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = ip->i_mount;
			
 
				-	struct xfs_buf		*bp;
			
 
				+	struct xfs_buf		*bp = NULL;
			
 
				 	struct xfs_dinode	*dip;
			
 
				 	int			error;
			
 
				 
			
@@ -3356,14 +3379,22 @@ xfs_iflush(
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Get the buffer containing the on-disk inode.
			
 
				+	 * Get the buffer containing the on-disk inode. We are doing a try-lock
			
 
				+	 * operation here, so we may get  an EAGAIN error. In that case, we
			
 
				+	 * simply want to return with the inode still dirty.
			
 
				+	 *
			
 
				+	 * If we get any other error, we effectively have a corruption situation
			
 
				+	 * and we cannot flush the inode, so we treat it the same as failing
			
 
				+	 * xfs_iflush_int().
			
 
				 	 */
			
 
				 	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
			
 
				 			       0);
			
 
				-	if (error || !bp) {
			
 
				+	if (error == -EAGAIN) {
			
 
				 		xfs_ifunlock(ip);
			
 
				 		return error;
			
 
				 	}
			
 
				+	if (error)
			
 
				+		goto corrupt_out;
			
 
				 
			
 
				 	/*
			
 
				 	 * First flush out the inode that xfs_iflush was called with.
			
@@ -3391,7 +3422,8 @@ xfs_iflush(
 
				 	return 0;
			
 
				 
			
 
				 corrupt_out:
			
 
				-	xfs_buf_relse(bp);
			
 
				+	if (bp)
			
 
				+		xfs_buf_relse(bp);
			
 
				 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
			
 
				 cluster_corrupt_out:
			
 
				 	error = -EFSCORRUPTED;
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -928,7 +928,7 @@ xfs_fs_alloc_inode(
 
				 
			
 
				 /*
			
 
				  * Now that the generic code is guaranteed not to be accessing
			
 
				- * the linux inode, we can reclaim the inode.
			
 
				+ * the linux inode, we can inactivate and reclaim the inode.
			
 
				  */
			
 
				 STATIC void
			
 
				 xfs_fs_destroy_inode(
			
@@ -938,9 +938,14 @@ xfs_fs_destroy_inode(
 
				 
			
 
				 	trace_xfs_destroy_inode(ip);
			
 
				 
			
 
				-	XFS_STATS_INC(ip->i_mount, vn_reclaim);
			
 
				+	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
			
 
				+	XFS_STATS_INC(ip->i_mount, vn_rele);
			
 
				+	XFS_STATS_INC(ip->i_mount, vn_remove);
			
 
				+
			
 
				+	xfs_inactive(ip);
			
 
				 
			
 
				 	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
			
 
				+	XFS_STATS_INC(ip->i_mount, vn_reclaim);
			
 
				 
			
 
				 	/*
			
 
				 	 * We should never get here with one of the reclaim flags already set.
			
@@ -987,24 +992,6 @@ xfs_fs_inode_init_once(
 
				 		     "xfsino", ip->i_ino);
			
 
				 }
			
 
				 
			
 
				-STATIC void
			
 
				-xfs_fs_evict_inode(
			
 
				-	struct inode		*inode)
			
 
				-{
			
 
				-	xfs_inode_t		*ip = XFS_I(inode);
			
 
				-
			
 
				-	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
			
 
				-
			
 
				-	trace_xfs_evict_inode(ip);
			
 
				-
			
 
				-	truncate_inode_pages_final(&inode->i_data);
			
 
				-	clear_inode(inode);
			
 
				-	XFS_STATS_INC(ip->i_mount, vn_rele);
			
 
				-	XFS_STATS_INC(ip->i_mount, vn_remove);
			
 
				-
			
 
				-	xfs_inactive(ip);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * We do an unlocked check for XFS_IDONTCACHE here because we are already
			
 
				  * serialised against cache hits here via the inode->i_lock and igrab() in
			
@@ -1673,7 +1660,6 @@ xfs_fs_free_cached_objects(
 
				 static const struct super_operations xfs_super_operations = {
			
 
				 	.alloc_inode		= xfs_fs_alloc_inode,
			
 
				 	.destroy_inode		= xfs_fs_destroy_inode,
			
 
				-	.evict_inode		= xfs_fs_evict_inode,
			
 
				 	.drop_inode		= xfs_fs_drop_inode,
			
 
				 	.put_super		= xfs_fs_put_super,
			
 
				 	.sync_fs		= xfs_fs_sync_fs,