|
@@ -284,7 +284,7 @@ xfs_file_read_iter(
|
|
|
if (file->f_mode & FMODE_NOCMTIME)
|
|
|
ioflags |= XFS_IO_INVIS;
|
|
|
|
|
|
- if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
|
|
|
+ if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
|
|
|
xfs_buftarg_t *target =
|
|
|
XFS_IS_REALTIME_INODE(ip) ?
|
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
@@ -378,7 +378,11 @@ xfs_file_splice_read(
|
|
|
|
|
|
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
|
|
|
|
|
|
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
|
|
|
+ /* for dax, we need to avoid the page cache */
|
|
|
+ if (IS_DAX(VFS_I(ip)))
|
|
|
+ ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
|
|
|
+ else
|
|
|
+ ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
|
|
|
if (ret > 0)
|
|
|
XFS_STATS_ADD(xs_read_bytes, ret);
|
|
|
|
|
@@ -672,7 +676,7 @@ xfs_file_dio_aio_write(
|
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
|
|
|
|
/* DIO must be aligned to device logical sector size */
|
|
|
- if ((pos | count) & target->bt_logical_sectormask)
|
|
|
+ if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
|
|
|
return -EINVAL;
|
|
|
|
|
|
/* "unaligned" here means not aligned to a filesystem block */
|
|
@@ -758,8 +762,11 @@ xfs_file_dio_aio_write(
|
|
|
out:
|
|
|
xfs_rw_iunlock(ip, iolock);
|
|
|
|
|
|
- /* No fallback to buffered IO on errors for XFS. */
|
|
|
- ASSERT(ret < 0 || ret == count);
|
|
|
+ /*
|
|
|
+ * No fallback to buffered IO on errors for XFS. DAX can result in
|
|
|
+ * partial writes, but direct IO will either complete fully or fail.
|
|
|
+ */
|
|
|
+ ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -842,7 +849,7 @@ xfs_file_write_iter(
|
|
|
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
|
|
return -EIO;
|
|
|
|
|
|
- if (unlikely(iocb->ki_flags & IOCB_DIRECT))
|
|
|
+ if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
|
|
|
ret = xfs_file_dio_aio_write(iocb, from);
|
|
|
else
|
|
|
ret = xfs_file_buffered_aio_write(iocb, from);
|
|
@@ -1063,17 +1070,6 @@ xfs_file_readdir(
|
|
|
return xfs_readdir(ip, ctx, bufsize);
|
|
|
}
|
|
|
|
|
|
-STATIC int
|
|
|
-xfs_file_mmap(
|
|
|
- struct file *filp,
|
|
|
- struct vm_area_struct *vma)
|
|
|
-{
|
|
|
- vma->vm_ops = &xfs_file_vm_ops;
|
|
|
-
|
|
|
- file_accessed(filp);
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* This type is designed to indicate the type of offset we would like
|
|
|
* to search from page cache for xfs_seek_hole_data().
|
|
@@ -1454,26 +1450,11 @@ xfs_file_llseek(
|
|
|
* ordering of:
|
|
|
*
|
|
|
* mmap_sem (MM)
|
|
|
- * i_mmap_lock (XFS - truncate serialisation)
|
|
|
- * page_lock (MM)
|
|
|
- * i_lock (XFS - extent map serialisation)
|
|
|
+ * sb_start_pagefault(vfs, freeze)
|
|
|
+ * i_mmap_lock (XFS - truncate serialisation)
|
|
|
+ * page_lock (MM)
|
|
|
+ * i_lock (XFS - extent map serialisation)
|
|
|
*/
|
|
|
-STATIC int
|
|
|
-xfs_filemap_fault(
|
|
|
- struct vm_area_struct *vma,
|
|
|
- struct vm_fault *vmf)
|
|
|
-{
|
|
|
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
|
|
|
- int error;
|
|
|
-
|
|
|
- trace_xfs_filemap_fault(ip);
|
|
|
-
|
|
|
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
- error = filemap_fault(vma, vmf);
|
|
|
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
-
|
|
|
- return error;
|
|
|
-}
|
|
|
|
|
|
/*
|
|
|
* mmap()d file has taken write protection fault and is being made writable. We
|
|
@@ -1486,21 +1467,66 @@ xfs_filemap_page_mkwrite(
|
|
|
struct vm_area_struct *vma,
|
|
|
struct vm_fault *vmf)
|
|
|
{
|
|
|
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
|
|
|
+ struct inode *inode = file_inode(vma->vm_file);
|
|
|
int ret;
|
|
|
|
|
|
- trace_xfs_filemap_page_mkwrite(ip);
|
|
|
+ trace_xfs_filemap_page_mkwrite(XFS_I(inode));
|
|
|
|
|
|
- sb_start_pagefault(VFS_I(ip)->i_sb);
|
|
|
+ sb_start_pagefault(inode->i_sb);
|
|
|
file_update_time(vma->vm_file);
|
|
|
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
|
+
|
|
|
+ if (IS_DAX(inode)) {
|
|
|
+ ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
|
|
|
+ xfs_end_io_dax_write);
|
|
|
+ } else {
|
|
|
+ ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
|
|
+ ret = block_page_mkwrite_return(ret);
|
|
|
+ }
|
|
|
+
|
|
|
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
|
+ sb_end_pagefault(inode->i_sb);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+STATIC int
|
|
|
+xfs_filemap_fault(
|
|
|
+ struct vm_area_struct *vma,
|
|
|
+ struct vm_fault *vmf)
|
|
|
+{
|
|
|
+ struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
|
|
|
+ int ret;
|
|
|
|
|
|
- ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
|
|
+ trace_xfs_filemap_fault(ip);
|
|
|
+
|
|
|
+ /* DAX can shortcut the normal fault path on write faults! */
|
|
|
+ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
|
|
|
+ return xfs_filemap_page_mkwrite(vma, vmf);
|
|
|
|
|
|
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
+ ret = filemap_fault(vma, vmf);
|
|
|
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
- sb_end_pagefault(VFS_I(ip)->i_sb);
|
|
|
|
|
|
- return block_page_mkwrite_return(ret);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static const struct vm_operations_struct xfs_file_vm_ops = {
|
|
|
+ .fault = xfs_filemap_fault,
|
|
|
+ .map_pages = filemap_map_pages,
|
|
|
+ .page_mkwrite = xfs_filemap_page_mkwrite,
|
|
|
+};
|
|
|
+
|
|
|
+STATIC int
|
|
|
+xfs_file_mmap(
|
|
|
+ struct file *filp,
|
|
|
+ struct vm_area_struct *vma)
|
|
|
+{
|
|
|
+ file_accessed(filp);
|
|
|
+ vma->vm_ops = &xfs_file_vm_ops;
|
|
|
+ if (IS_DAX(file_inode(filp)))
|
|
|
+ vma->vm_flags |= VM_MIXEDMAP;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
const struct file_operations xfs_file_operations = {
|
|
@@ -1531,9 +1557,3 @@ const struct file_operations xfs_dir_file_operations = {
|
|
|
#endif
|
|
|
.fsync = xfs_dir_fsync,
|
|
|
};
|
|
|
-
|
|
|
-static const struct vm_operations_struct xfs_file_vm_ops = {
|
|
|
- .fault = xfs_filemap_fault,
|
|
|
- .map_pages = filemap_map_pages,
|
|
|
- .page_mkwrite = xfs_filemap_page_mkwrite,
|
|
|
-};
|