|
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
|
|
struct xfs_inode *ip = XFS_I(inode);
|
|
struct xfs_inode *ip = XFS_I(inode);
|
|
loff_t isize = i_size_read(inode);
|
|
loff_t isize = i_size_read(inode);
|
|
size_t count = iov_iter_count(to);
|
|
size_t count = iov_iter_count(to);
|
|
|
|
+ loff_t end = iocb->ki_pos + count - 1;
|
|
struct iov_iter data;
|
|
struct iov_iter data;
|
|
struct xfs_buftarg *target;
|
|
struct xfs_buftarg *target;
|
|
ssize_t ret = 0;
|
|
ssize_t ret = 0;
|
|
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
|
|
|
|
|
|
file_accessed(iocb->ki_filp);
|
|
file_accessed(iocb->ki_filp);
|
|
|
|
|
|
- /*
|
|
|
|
- * Locking is a bit tricky here. If we take an exclusive lock for direct
|
|
|
|
- * IO, we effectively serialise all new concurrent read IO to this file
|
|
|
|
- * and block it behind IO that is currently in progress because IO in
|
|
|
|
- * progress holds the IO lock shared. We only need to hold the lock
|
|
|
|
- * exclusive to blow away the page cache, so only take lock exclusively
|
|
|
|
- * if the page cache needs invalidation. This allows the normal direct
|
|
|
|
- * IO case of no page cache pages to proceeed concurrently without
|
|
|
|
- * serialisation.
|
|
|
|
- */
|
|
|
|
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
if (mapping->nrpages) {
|
|
if (mapping->nrpages) {
|
|
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
- xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
|
|
|
|
|
|
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
|
|
|
|
+ if (ret)
|
|
|
|
+ goto out_unlock;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * The generic dio code only flushes the range of the particular
|
|
|
|
- * I/O. Because we take an exclusive lock here, this whole
|
|
|
|
- * sequence is considerably more expensive for us. This has a
|
|
|
|
- * noticeable performance impact for any file with cached pages,
|
|
|
|
- * even when outside of the range of the particular I/O.
|
|
|
|
- *
|
|
|
|
- * Hence, amortize the cost of the lock against a full file
|
|
|
|
- * flush and reduce the chances of repeated iolock cycles going
|
|
|
|
- * forward.
|
|
|
|
|
|
+ * Invalidate whole pages. This can return an error if we fail
|
|
|
|
+ * to invalidate a page, but this should never happen on XFS.
|
|
|
|
+ * Warn if it does fail.
|
|
*/
|
|
*/
|
|
- if (mapping->nrpages) {
|
|
|
|
- ret = filemap_write_and_wait(mapping);
|
|
|
|
- if (ret) {
|
|
|
|
- xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Invalidate whole pages. This can return an error if
|
|
|
|
- * we fail to invalidate a page, but this should never
|
|
|
|
- * happen on XFS. Warn if it does fail.
|
|
|
|
- */
|
|
|
|
- ret = invalidate_inode_pages2(mapping);
|
|
|
|
- WARN_ON_ONCE(ret);
|
|
|
|
- ret = 0;
|
|
|
|
- }
|
|
|
|
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
|
|
|
|
|
+ ret = invalidate_inode_pages2_range(mapping,
|
|
|
|
+ iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
|
|
|
|
+ WARN_ON_ONCE(ret);
|
|
|
|
+ ret = 0;
|
|
}
|
|
}
|
|
|
|
|
|
data = *to;
|
|
data = *to;
|
|
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
|
|
iocb->ki_pos += ret;
|
|
iocb->ki_pos += ret;
|
|
iov_iter_advance(to, ret);
|
|
iov_iter_advance(to, ret);
|
|
}
|
|
}
|
|
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
|
|
|
|
+out_unlock:
|
|
|
|
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
|
|
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
|
|
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
- /* "unaligned" here means not aligned to a filesystem block */
|
|
|
|
- if ((iocb->ki_pos & mp->m_blockmask) ||
|
|
|
|
- ((iocb->ki_pos + count) & mp->m_blockmask))
|
|
|
|
- unaligned_io = 1;
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
- * We don't need to take an exclusive lock unless there page cache needs
|
|
|
|
- * to be invalidated or unaligned IO is being executed. We don't need to
|
|
|
|
- * consider the EOF extension case here because
|
|
|
|
- * xfs_file_aio_write_checks() will relock the inode as necessary for
|
|
|
|
- * EOF zeroing cases and fill out the new inode size as appropriate.
|
|
|
|
|
|
+ * Don't take the exclusive iolock here unless the I/O is unaligned to
|
|
|
|
+ * the file system block size. We don't need to consider the EOF
|
|
|
|
+ * extension case here because xfs_file_aio_write_checks() will relock
|
|
|
|
+ * the inode as necessary for EOF zeroing cases and fill out the new
|
|
|
|
+ * inode size as appropriate.
|
|
*/
|
|
*/
|
|
- if (unaligned_io || mapping->nrpages)
|
|
|
|
|
|
+ if ((iocb->ki_pos & mp->m_blockmask) ||
|
|
|
|
+ ((iocb->ki_pos + count) & mp->m_blockmask)) {
|
|
|
|
+ unaligned_io = 1;
|
|
iolock = XFS_IOLOCK_EXCL;
|
|
iolock = XFS_IOLOCK_EXCL;
|
|
- else
|
|
|
|
|
|
+ } else {
|
|
iolock = XFS_IOLOCK_SHARED;
|
|
iolock = XFS_IOLOCK_SHARED;
|
|
- xfs_rw_ilock(ip, iolock);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Recheck if there are cached pages that need invalidate after we got
|
|
|
|
- * the iolock to protect against other threads adding new pages while
|
|
|
|
- * we were waiting for the iolock.
|
|
|
|
- */
|
|
|
|
- if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
|
|
|
|
- xfs_rw_iunlock(ip, iolock);
|
|
|
|
- iolock = XFS_IOLOCK_EXCL;
|
|
|
|
- xfs_rw_ilock(ip, iolock);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ xfs_rw_ilock(ip, iolock);
|
|
|
|
+
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
if (ret)
|
|
if (ret)
|
|
goto out;
|
|
goto out;
|
|
count = iov_iter_count(from);
|
|
count = iov_iter_count(from);
|
|
end = iocb->ki_pos + count - 1;
|
|
end = iocb->ki_pos + count - 1;
|
|
|
|
|
|
- /*
|
|
|
|
- * See xfs_file_dio_aio_read() for why we do a full-file flush here.
|
|
|
|
- */
|
|
|
|
if (mapping->nrpages) {
|
|
if (mapping->nrpages) {
|
|
- ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
|
|
|
|
|
|
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
|
|
if (ret)
|
|
if (ret)
|
|
goto out;
|
|
goto out;
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Invalidate whole pages. This can return an error if we fail
|
|
* Invalidate whole pages. This can return an error if we fail
|
|
* to invalidate a page, but this should never happen on XFS.
|
|
* to invalidate a page, but this should never happen on XFS.
|
|
* Warn if it does fail.
|
|
* Warn if it does fail.
|
|
*/
|
|
*/
|
|
- ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
|
|
|
|
|
|
+ ret = invalidate_inode_pages2_range(mapping,
|
|
|
|
+ iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
|
|
WARN_ON_ONCE(ret);
|
|
WARN_ON_ONCE(ret);
|
|
ret = 0;
|
|
ret = 0;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
* If we are doing unaligned IO, wait for all other IO to drain,
|
|
* If we are doing unaligned IO, wait for all other IO to drain,
|
|
- * otherwise demote the lock if we had to flush cached pages
|
|
|
|
|
|
+ * otherwise demote the lock if we had to take the exclusive lock
|
|
|
|
+ * for other reasons in xfs_file_aio_write_checks.
|
|
*/
|
|
*/
|
|
if (unaligned_io)
|
|
if (unaligned_io)
|
|
inode_dio_wait(inode);
|
|
inode_dio_wait(inode);
|