|
@@ -79,14 +79,15 @@ xfs_rw_ilock_demote(
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * xfs_iozero
|
|
|
|
|
|
+ * xfs_iozero clears the specified range supplied via the page cache (except in
|
|
|
|
+ * the DAX case). Writes through the page cache will allocate blocks over holes,
|
|
|
|
+ * though the callers usually map the holes first and avoid them. If a block is
|
|
|
|
+ * not completely zeroed, then it will be read from disk before being partially
|
|
|
|
+ * zeroed.
|
|
*
|
|
*
|
|
- * xfs_iozero clears the specified range of buffer supplied,
|
|
|
|
- * and marks all the affected blocks as valid and modified. If
|
|
|
|
- * an affected block is not allocated, it will be allocated. If
|
|
|
|
- * an affected block is not completely overwritten, and is not
|
|
|
|
- * valid before the operation, it will be read from disk before
|
|
|
|
- * being partially zeroed.
|
|
|
|
|
|
+ * In the DAX case, we can just directly write to the underlying pages. This
|
|
|
|
+ * will not allocate blocks, but will avoid holes and unwritten extents and so
|
|
|
|
+ * not do unnecessary work.
|
|
*/
|
|
*/
|
|
int
|
|
int
|
|
xfs_iozero(
|
|
xfs_iozero(
|
|
@@ -96,7 +97,8 @@ xfs_iozero(
|
|
{
|
|
{
|
|
struct page *page;
|
|
struct page *page;
|
|
struct address_space *mapping;
|
|
struct address_space *mapping;
|
|
- int status;
|
|
|
|
|
|
+ int status = 0;
|
|
|
|
+
|
|
|
|
|
|
mapping = VFS_I(ip)->i_mapping;
|
|
mapping = VFS_I(ip)->i_mapping;
|
|
do {
|
|
do {
|
|
@@ -108,20 +110,27 @@ xfs_iozero(
|
|
if (bytes > count)
|
|
if (bytes > count)
|
|
bytes = count;
|
|
bytes = count;
|
|
|
|
|
|
- status = pagecache_write_begin(NULL, mapping, pos, bytes,
|
|
|
|
- AOP_FLAG_UNINTERRUPTIBLE,
|
|
|
|
- &page, &fsdata);
|
|
|
|
- if (status)
|
|
|
|
- break;
|
|
|
|
|
|
+ if (IS_DAX(VFS_I(ip))) {
|
|
|
|
+ status = dax_zero_page_range(VFS_I(ip), pos, bytes,
|
|
|
|
+ xfs_get_blocks_direct);
|
|
|
|
+ if (status)
|
|
|
|
+ break;
|
|
|
|
+ } else {
|
|
|
|
+ status = pagecache_write_begin(NULL, mapping, pos, bytes,
|
|
|
|
+ AOP_FLAG_UNINTERRUPTIBLE,
|
|
|
|
+ &page, &fsdata);
|
|
|
|
+ if (status)
|
|
|
|
+ break;
|
|
|
|
|
|
- zero_user(page, offset, bytes);
|
|
|
|
|
|
+ zero_user(page, offset, bytes);
|
|
|
|
|
|
- status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
|
|
|
|
- page, fsdata);
|
|
|
|
- WARN_ON(status <= 0); /* can't return less than zero! */
|
|
|
|
|
|
+ status = pagecache_write_end(NULL, mapping, pos, bytes,
|
|
|
|
+ bytes, page, fsdata);
|
|
|
|
+ WARN_ON(status <= 0); /* can't return less than zero! */
|
|
|
|
+ status = 0;
|
|
|
|
+ }
|
|
pos += bytes;
|
|
pos += bytes;
|
|
count -= bytes;
|
|
count -= bytes;
|
|
- status = 0;
|
|
|
|
} while (count);
|
|
} while (count);
|
|
|
|
|
|
return status;
|
|
return status;
|
|
@@ -284,7 +293,7 @@ xfs_file_read_iter(
|
|
if (file->f_mode & FMODE_NOCMTIME)
|
|
if (file->f_mode & FMODE_NOCMTIME)
|
|
ioflags |= XFS_IO_INVIS;
|
|
ioflags |= XFS_IO_INVIS;
|
|
|
|
|
|
- if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
|
|
|
|
|
|
+ if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
|
|
xfs_buftarg_t *target =
|
|
xfs_buftarg_t *target =
|
|
XFS_IS_REALTIME_INODE(ip) ?
|
|
XFS_IS_REALTIME_INODE(ip) ?
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
@@ -378,7 +387,11 @@ xfs_file_splice_read(
|
|
|
|
|
|
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
|
|
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
|
|
|
|
|
|
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
|
|
|
|
|
|
+ /* for dax, we need to avoid the page cache */
|
|
|
|
+ if (IS_DAX(VFS_I(ip)))
|
|
|
|
+ ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
|
|
|
|
+ else
|
|
|
|
+ ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
|
|
if (ret > 0)
|
|
if (ret > 0)
|
|
XFS_STATS_ADD(xs_read_bytes, ret);
|
|
XFS_STATS_ADD(xs_read_bytes, ret);
|
|
|
|
|
|
@@ -672,7 +685,7 @@ xfs_file_dio_aio_write(
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
|
|
|
|
/* DIO must be aligned to device logical sector size */
|
|
/* DIO must be aligned to device logical sector size */
|
|
- if ((pos | count) & target->bt_logical_sectormask)
|
|
|
|
|
|
+ if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
/* "unaligned" here means not aligned to a filesystem block */
|
|
/* "unaligned" here means not aligned to a filesystem block */
|
|
@@ -758,8 +771,11 @@ xfs_file_dio_aio_write(
|
|
out:
|
|
out:
|
|
xfs_rw_iunlock(ip, iolock);
|
|
xfs_rw_iunlock(ip, iolock);
|
|
|
|
|
|
- /* No fallback to buffered IO on errors for XFS. */
|
|
|
|
- ASSERT(ret < 0 || ret == count);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * No fallback to buffered IO on errors for XFS. DAX can result in
|
|
|
|
+ * partial writes, but direct IO will either complete fully or fail.
|
|
|
|
+ */
|
|
|
|
+ ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -842,7 +858,7 @@ xfs_file_write_iter(
|
|
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
|
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
|
return -EIO;
|
|
return -EIO;
|
|
|
|
|
|
- if (unlikely(iocb->ki_flags & IOCB_DIRECT))
|
|
|
|
|
|
+ if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
|
|
ret = xfs_file_dio_aio_write(iocb, from);
|
|
ret = xfs_file_dio_aio_write(iocb, from);
|
|
else
|
|
else
|
|
ret = xfs_file_buffered_aio_write(iocb, from);
|
|
ret = xfs_file_buffered_aio_write(iocb, from);
|
|
@@ -1063,17 +1079,6 @@ xfs_file_readdir(
|
|
return xfs_readdir(ip, ctx, bufsize);
|
|
return xfs_readdir(ip, ctx, bufsize);
|
|
}
|
|
}
|
|
|
|
|
|
-STATIC int
|
|
|
|
-xfs_file_mmap(
|
|
|
|
- struct file *filp,
|
|
|
|
- struct vm_area_struct *vma)
|
|
|
|
-{
|
|
|
|
- vma->vm_ops = &xfs_file_vm_ops;
|
|
|
|
-
|
|
|
|
- file_accessed(filp);
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* This type is designed to indicate the type of offset we would like
|
|
* This type is designed to indicate the type of offset we would like
|
|
* to search from page cache for xfs_seek_hole_data().
|
|
* to search from page cache for xfs_seek_hole_data().
|
|
@@ -1454,48 +1459,83 @@ xfs_file_llseek(
|
|
* ordering of:
|
|
* ordering of:
|
|
*
|
|
*
|
|
* mmap_sem (MM)
|
|
* mmap_sem (MM)
|
|
- * i_mmap_lock (XFS - truncate serialisation)
|
|
|
|
- * page_lock (MM)
|
|
|
|
- * i_lock (XFS - extent map serialisation)
|
|
|
|
|
|
+ * sb_start_pagefault(vfs, freeze)
|
|
|
|
+ * i_mmap_lock (XFS - truncate serialisation)
|
|
|
|
+ * page_lock (MM)
|
|
|
|
+ * i_lock (XFS - extent map serialisation)
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * mmap()d file has taken write protection fault and is being made writable. We
|
|
|
|
+ * can set the page state up correctly for a writable page, which means we can
|
|
|
|
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
|
|
|
|
+ * mapping.
|
|
*/
|
|
*/
|
|
STATIC int
|
|
STATIC int
|
|
-xfs_filemap_fault(
|
|
|
|
|
|
+xfs_filemap_page_mkwrite(
|
|
struct vm_area_struct *vma,
|
|
struct vm_area_struct *vma,
|
|
struct vm_fault *vmf)
|
|
struct vm_fault *vmf)
|
|
{
|
|
{
|
|
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
|
|
|
|
- int error;
|
|
|
|
|
|
+ struct inode *inode = file_inode(vma->vm_file);
|
|
|
|
+ int ret;
|
|
|
|
|
|
- trace_xfs_filemap_fault(ip);
|
|
|
|
|
|
+ trace_xfs_filemap_page_mkwrite(XFS_I(inode));
|
|
|
|
|
|
- xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
|
- error = filemap_fault(vma, vmf);
|
|
|
|
- xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
|
|
|
+ sb_start_pagefault(inode->i_sb);
|
|
|
|
+ file_update_time(vma->vm_file);
|
|
|
|
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
|
|
|
|
- return error;
|
|
|
|
|
|
+ if (IS_DAX(inode)) {
|
|
|
|
+ ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
|
|
|
|
+ xfs_end_io_dax_write);
|
|
|
|
+ } else {
|
|
|
|
+ ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
|
|
|
+ ret = block_page_mkwrite_return(ret);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
|
|
+ sb_end_pagefault(inode->i_sb);
|
|
|
|
+
|
|
|
|
+ return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * mmap()d file has taken write protection fault and is being made writable. We
|
|
|
|
- * can set the page state up correctly for a writable page, which means we can
|
|
|
|
- * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
|
|
|
|
- * mapping.
|
|
|
|
- */
|
|
|
|
STATIC int
|
|
STATIC int
|
|
-xfs_filemap_page_mkwrite(
|
|
|
|
|
|
+xfs_filemap_fault(
|
|
struct vm_area_struct *vma,
|
|
struct vm_area_struct *vma,
|
|
struct vm_fault *vmf)
|
|
struct vm_fault *vmf)
|
|
{
|
|
{
|
|
- struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
|
|
|
|
- int error;
|
|
|
|
|
|
+ struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ trace_xfs_filemap_fault(ip);
|
|
|
|
|
|
- trace_xfs_filemap_page_mkwrite(ip);
|
|
|
|
|
|
+ /* DAX can shortcut the normal fault path on write faults! */
|
|
|
|
+ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
|
|
|
|
+ return xfs_filemap_page_mkwrite(vma, vmf);
|
|
|
|
|
|
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
|
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
|
- error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
|
|
|
|
|
+ ret = filemap_fault(vma, vmf);
|
|
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
|
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
|
|
|
|
|
- return error;
|
|
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static const struct vm_operations_struct xfs_file_vm_ops = {
|
|
|
|
+ .fault = xfs_filemap_fault,
|
|
|
|
+ .map_pages = filemap_map_pages,
|
|
|
|
+ .page_mkwrite = xfs_filemap_page_mkwrite,
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+STATIC int
|
|
|
|
+xfs_file_mmap(
|
|
|
|
+ struct file *filp,
|
|
|
|
+ struct vm_area_struct *vma)
|
|
|
|
+{
|
|
|
|
+ file_accessed(filp);
|
|
|
|
+ vma->vm_ops = &xfs_file_vm_ops;
|
|
|
|
+ if (IS_DAX(file_inode(filp)))
|
|
|
|
+ vma->vm_flags |= VM_MIXEDMAP;
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
const struct file_operations xfs_file_operations = {
|
|
const struct file_operations xfs_file_operations = {
|
|
@@ -1526,9 +1566,3 @@ const struct file_operations xfs_dir_file_operations = {
|
|
#endif
|
|
#endif
|
|
.fsync = xfs_dir_fsync,
|
|
.fsync = xfs_dir_fsync,
|
|
};
|
|
};
|
|
-
|
|
|
|
-static const struct vm_operations_struct xfs_file_vm_ops = {
|
|
|
|
- .fault = xfs_filemap_fault,
|
|
|
|
- .map_pages = filemap_map_pages,
|
|
|
|
- .page_mkwrite = xfs_filemap_page_mkwrite,
|
|
|
|
-};
|
|
|