8 жил өмнө · 0ee7a3f6b5
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
 
															 	struct xfs_inode	*ip = XFS_I(inode);
														
 
															 	loff_t			isize = i_size_read(inode);
														
 
															 	size_t			count = iov_iter_count(to);
														
 
															+	loff_t			end = iocb->ki_pos + count - 1;
														
 
															 	struct iov_iter		data;
														
 
															 	struct xfs_buftarg	*target;
														
 
															 	ssize_t			ret = 0;
														
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
 
															 	file_accessed(iocb->ki_filp);
														
 
															-	/*
														
 
															-	 * Locking is a bit tricky here. If we take an exclusive lock for direct
														
 
															-	 * IO, we effectively serialise all new concurrent read IO to this file
														
 
															-	 * and block it behind IO that is currently in progress because IO in
														
 
															-	 * progress holds the IO lock shared. We only need to hold the lock
														
 
															-	 * exclusive to blow away the page cache, so only take lock exclusively
														
 
															-	 * if the page cache needs invalidation. This allows the normal direct
														
 
															-	 * IO case of no page cache pages to proceeed concurrently without
														
 
															-	 * serialisation.
														
 
															-	 */
														
 
															 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
														
 
															 	if (mapping->nrpages) {
														
 
															-		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
														
 
															-		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
														
 
															+		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
														
 
															+		if (ret)
														
 
															+			goto out_unlock;
														
 
															 		/*
														
 
															-		 * The generic dio code only flushes the range of the particular
														
 
															-		 * I/O. Because we take an exclusive lock here, this whole
														
 
															-		 * sequence is considerably more expensive for us. This has a
														
 
															-		 * noticeable performance impact for any file with cached pages,
														
 
															-		 * even when outside of the range of the particular I/O.
														
 
															-		 *
														
 
															-		 * Hence, amortize the cost of the lock against a full file
														
 
															-		 * flush and reduce the chances of repeated iolock cycles going
														
 
															-		 * forward.
														
 
															+		 * Invalidate whole pages. This can return an error if we fail
														
 
															+		 * to invalidate a page, but this should never happen on XFS.
														
 
															+		 * Warn if it does fail.
														
 
															 		 */
														
 
															-		if (mapping->nrpages) {
														
 
															-			ret = filemap_write_and_wait(mapping);
														
 
															-			if (ret) {
														
 
															-				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
														
 
															-				return ret;
														
 
															-			}
														
 
															-
														
 
															-			/*
														
 
															-			 * Invalidate whole pages. This can return an error if
														
 
															-			 * we fail to invalidate a page, but this should never
														
 
															-			 * happen on XFS. Warn if it does fail.
														
 
															-			 */
														
 
															-			ret = invalidate_inode_pages2(mapping);
														
 
															-			WARN_ON_ONCE(ret);
														
 
															-			ret = 0;
														
 
															-		}
														
 
															-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
														
 
															+		ret = invalidate_inode_pages2_range(mapping,
														
 
															+				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
														
 
															+		WARN_ON_ONCE(ret);
														
 
															+		ret = 0;
														
 
															 	}
														
 
															 	data = *to;
														
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
 
															 		iocb->ki_pos += ret;
														
 
															 		iov_iter_advance(to, ret);
														
 
															 	}
														
 
															-	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
														
 
															+out_unlock:
														
 
															+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
														
 
															 	return ret;
														
 
															 }
														
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
 
															 	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
														
 
															 		return -EINVAL;
														
 
															-	/* "unaligned" here means not aligned to a filesystem block */
														
 
															-	if ((iocb->ki_pos & mp->m_blockmask) ||
														
 
															-	    ((iocb->ki_pos + count) & mp->m_blockmask))
														
 
															-		unaligned_io = 1;
														
 
															-
														
 
															 	/*
														
 
															-	 * We don't need to take an exclusive lock unless there page cache needs
														
 
															-	 * to be invalidated or unaligned IO is being executed. We don't need to
														
 
															-	 * consider the EOF extension case here because
														
 
															-	 * xfs_file_aio_write_checks() will relock the inode as necessary for
														
 
															-	 * EOF zeroing cases and fill out the new inode size as appropriate.
														
 
															+	 * Don't take the exclusive iolock here unless the I/O is unaligned to
														
 
															+	 * the file system block size.  We don't need to consider the EOF
														
 
															+	 * extension case here because xfs_file_aio_write_checks() will relock
														
 
															+	 * the inode as necessary for EOF zeroing cases and fill out the new
														
 
															+	 * inode size as appropriate.
														
 
															 	 */
														
 
															-	if (unaligned_io || mapping->nrpages)
														
 
															+	if ((iocb->ki_pos & mp->m_blockmask) ||
														
 
															+	    ((iocb->ki_pos + count) & mp->m_blockmask)) {
														
 
															+		unaligned_io = 1;
														
 
															 		iolock = XFS_IOLOCK_EXCL;
														
 
															-	else
														
 
															+	} else {
														
 
															 		iolock = XFS_IOLOCK_SHARED;
														
 
															-	xfs_rw_ilock(ip, iolock);
														
 
															-
														
 
															-	/*
														
 
															-	 * Recheck if there are cached pages that need invalidate after we got
														
 
															-	 * the iolock to protect against other threads adding new pages while
														
 
															-	 * we were waiting for the iolock.
														
 
															-	 */
														
 
															-	if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
														
 
															-		xfs_rw_iunlock(ip, iolock);
														
 
															-		iolock = XFS_IOLOCK_EXCL;
														
 
															-		xfs_rw_ilock(ip, iolock);
														
 
															 	}
														
 
															+	xfs_rw_ilock(ip, iolock);
														
 
															+
														
 
															 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
														
 
															 	if (ret)
														
 
															 		goto out;
														
 
															 	count = iov_iter_count(from);
														
 
															 	end = iocb->ki_pos + count - 1;
														
 
															-	/*
														
 
															-	 * See xfs_file_dio_aio_read() for why we do a full-file flush here.
														
 
															-	 */
														
 
															 	if (mapping->nrpages) {
														
 
															-		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
														
 
															+		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
														
 
															 		if (ret)
														
 
															 			goto out;
														
 
															+
														
 
															 		/*
														
 
															 		 * Invalidate whole pages. This can return an error if we fail
														
 
															 		 * to invalidate a page, but this should never happen on XFS.
														
 
															 		 * Warn if it does fail.
														
 
															 		 */
														
 
															-		ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
														
 
															+		ret = invalidate_inode_pages2_range(mapping,
														
 
															+				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
														
 
															 		WARN_ON_ONCE(ret);
														
 
															 		ret = 0;
														
 
															 	}
														
 
															 	/*
														
 
															 	 * If we are doing unaligned IO, wait for all other IO to drain,
														
 
															-	 * otherwise demote the lock if we had to flush cached pages
														
 
															+	 * otherwise demote the lock if we had to take the exclusive lock
														
 
															+	 * for other reasons in xfs_file_aio_write_checks.
														
 
															 	 */
														
 
															 	if (unaligned_io)
														
 
															 		inode_dio_wait(inode);