|
@@ -37,6 +37,7 @@
|
|
|
#include "xfs_log.h"
|
|
|
#include "xfs_icache.h"
|
|
|
#include "xfs_pnfs.h"
|
|
|
+#include "xfs_iomap.h"
|
|
|
|
|
|
#include <linux/dcache.h>
|
|
|
#include <linux/falloc.h>
|
|
@@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * xfs_iozero clears the specified range supplied via the page cache (except in
|
|
|
- * the DAX case). Writes through the page cache will allocate blocks over holes,
|
|
|
- * though the callers usually map the holes first and avoid them. If a block is
|
|
|
- * not completely zeroed, then it will be read from disk before being partially
|
|
|
- * zeroed.
|
|
|
- *
|
|
|
- * In the DAX case, we can just directly write to the underlying pages. This
|
|
|
- * will not allocate blocks, but will avoid holes and unwritten extents and so
|
|
|
- * not do unnecessary work.
|
|
|
+ * Clear the specified ranges to zero through either the pagecache or DAX.
|
|
|
+ * Holes and unwritten extents will be left as-is as they already are zeroed.
|
|
|
*/
|
|
|
int
|
|
|
-xfs_iozero(
|
|
|
- struct xfs_inode *ip, /* inode */
|
|
|
- loff_t pos, /* offset in file */
|
|
|
- size_t count) /* size of data to zero */
|
|
|
+xfs_zero_range(
|
|
|
+ struct xfs_inode *ip,
|
|
|
+ xfs_off_t pos,
|
|
|
+ xfs_off_t count,
|
|
|
+ bool *did_zero)
|
|
|
{
|
|
|
- struct page *page;
|
|
|
- struct address_space *mapping;
|
|
|
- int status = 0;
|
|
|
-
|
|
|
-
|
|
|
- mapping = VFS_I(ip)->i_mapping;
|
|
|
- do {
|
|
|
- unsigned offset, bytes;
|
|
|
- void *fsdata;
|
|
|
-
|
|
|
- offset = (pos & (PAGE_SIZE -1)); /* Within page */
|
|
|
- bytes = PAGE_SIZE - offset;
|
|
|
- if (bytes > count)
|
|
|
- bytes = count;
|
|
|
-
|
|
|
- if (IS_DAX(VFS_I(ip))) {
|
|
|
- status = dax_zero_page_range(VFS_I(ip), pos, bytes,
|
|
|
- xfs_get_blocks_direct);
|
|
|
- if (status)
|
|
|
- break;
|
|
|
- } else {
|
|
|
- status = pagecache_write_begin(NULL, mapping, pos, bytes,
|
|
|
- AOP_FLAG_UNINTERRUPTIBLE,
|
|
|
- &page, &fsdata);
|
|
|
- if (status)
|
|
|
- break;
|
|
|
-
|
|
|
- zero_user(page, offset, bytes);
|
|
|
-
|
|
|
- status = pagecache_write_end(NULL, mapping, pos, bytes,
|
|
|
- bytes, page, fsdata);
|
|
|
- WARN_ON(status <= 0); /* can't return less than zero! */
|
|
|
- status = 0;
|
|
|
- }
|
|
|
- pos += bytes;
|
|
|
- count -= bytes;
|
|
|
- } while (count);
|
|
|
-
|
|
|
- return status;
|
|
|
+ return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
|
|
|
}
|
|
|
|
|
|
int
|
|
@@ -282,48 +239,35 @@ xfs_file_fsync(
|
|
|
}
|
|
|
|
|
|
STATIC ssize_t
|
|
|
-xfs_file_read_iter(
|
|
|
+xfs_file_dio_aio_read(
|
|
|
struct kiocb *iocb,
|
|
|
struct iov_iter *to)
|
|
|
{
|
|
|
- struct file *file = iocb->ki_filp;
|
|
|
- struct inode *inode = file->f_mapping->host;
|
|
|
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
|
|
|
+ struct inode *inode = mapping->host;
|
|
|
struct xfs_inode *ip = XFS_I(inode);
|
|
|
- struct xfs_mount *mp = ip->i_mount;
|
|
|
- size_t size = iov_iter_count(to);
|
|
|
+ loff_t isize = i_size_read(inode);
|
|
|
+ size_t count = iov_iter_count(to);
|
|
|
+ struct iov_iter data;
|
|
|
+ struct xfs_buftarg *target;
|
|
|
ssize_t ret = 0;
|
|
|
- int ioflags = 0;
|
|
|
- xfs_fsize_t n;
|
|
|
- loff_t pos = iocb->ki_pos;
|
|
|
|
|
|
- XFS_STATS_INC(mp, xs_read_calls);
|
|
|
-
|
|
|
- if (unlikely(iocb->ki_flags & IOCB_DIRECT))
|
|
|
- ioflags |= XFS_IO_ISDIRECT;
|
|
|
- if (file->f_mode & FMODE_NOCMTIME)
|
|
|
- ioflags |= XFS_IO_INVIS;
|
|
|
-
|
|
|
- if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
|
|
|
- xfs_buftarg_t *target =
|
|
|
- XFS_IS_REALTIME_INODE(ip) ?
|
|
|
- mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
|
- /* DIO must be aligned to device logical sector size */
|
|
|
- if ((pos | size) & target->bt_logical_sectormask) {
|
|
|
- if (pos == i_size_read(inode))
|
|
|
- return 0;
|
|
|
- return -EINVAL;
|
|
|
- }
|
|
|
- }
|
|
|
+ trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
|
|
|
|
|
|
- n = mp->m_super->s_maxbytes - pos;
|
|
|
- if (n <= 0 || size == 0)
|
|
|
- return 0;
|
|
|
+ if (!count)
|
|
|
+ return 0; /* skip atime */
|
|
|
|
|
|
- if (n < size)
|
|
|
- size = n;
|
|
|
+ if (XFS_IS_REALTIME_INODE(ip))
|
|
|
+ target = ip->i_mount->m_rtdev_targp;
|
|
|
+ else
|
|
|
+ target = ip->i_mount->m_ddev_targp;
|
|
|
|
|
|
- if (XFS_FORCED_SHUTDOWN(mp))
|
|
|
- return -EIO;
|
|
|
+ /* DIO must be aligned to device logical sector size */
|
|
|
+ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
|
|
|
+ if (iocb->ki_pos == isize)
|
|
|
+ return 0;
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Locking is a bit tricky here. If we take an exclusive lock for direct
|
|
@@ -336,7 +280,7 @@ xfs_file_read_iter(
|
|
|
* serialisation.
|
|
|
*/
|
|
|
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
|
- if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
|
|
|
+ if (mapping->nrpages) {
|
|
|
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
|
|
|
|
|
@@ -351,8 +295,8 @@ xfs_file_read_iter(
|
|
|
* flush and reduce the chances of repeated iolock cycles going
|
|
|
* forward.
|
|
|
*/
|
|
|
- if (inode->i_mapping->nrpages) {
|
|
|
- ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
|
|
|
+ if (mapping->nrpages) {
|
|
|
+ ret = filemap_write_and_wait(mapping);
|
|
|
if (ret) {
|
|
|
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
|
|
|
return ret;
|
|
@@ -363,20 +307,95 @@ xfs_file_read_iter(
|
|
|
* we fail to invalidate a page, but this should never
|
|
|
* happen on XFS. Warn if it does fail.
|
|
|
*/
|
|
|
- ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
|
|
|
+ ret = invalidate_inode_pages2(mapping);
|
|
|
WARN_ON_ONCE(ret);
|
|
|
ret = 0;
|
|
|
}
|
|
|
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
|
|
}
|
|
|
|
|
|
- trace_xfs_file_read(ip, size, pos, ioflags);
|
|
|
+ data = *to;
|
|
|
+ ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
|
|
+ xfs_get_blocks_direct, NULL, NULL, 0);
|
|
|
+ if (ret > 0) {
|
|
|
+ iocb->ki_pos += ret;
|
|
|
+ iov_iter_advance(to, ret);
|
|
|
+ }
|
|
|
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
+ file_accessed(iocb->ki_filp);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static noinline ssize_t
|
|
|
+xfs_file_dax_read(
|
|
|
+ struct kiocb *iocb,
|
|
|
+ struct iov_iter *to)
|
|
|
+{
|
|
|
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
|
|
|
+ struct inode *inode = mapping->host;
|
|
|
+ struct xfs_inode *ip = XFS_I(inode);
|
|
|
+ struct iov_iter data = *to;
|
|
|
+ size_t count = iov_iter_count(to);
|
|
|
+ ssize_t ret = 0;
|
|
|
+
|
|
|
+ trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
|
|
|
+
|
|
|
+ if (!count)
|
|
|
+ return 0; /* skip atime */
|
|
|
+
|
|
|
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
|
+ ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
|
|
|
+ if (ret > 0) {
|
|
|
+ iocb->ki_pos += ret;
|
|
|
+ iov_iter_advance(to, ret);
|
|
|
+ }
|
|
|
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
+
|
|
|
+ file_accessed(iocb->ki_filp);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+STATIC ssize_t
|
|
|
+xfs_file_buffered_aio_read(
|
|
|
+ struct kiocb *iocb,
|
|
|
+ struct iov_iter *to)
|
|
|
+{
|
|
|
+ struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
|
|
|
+ ssize_t ret;
|
|
|
+
|
|
|
+ trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
|
|
|
+
|
|
|
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
|
ret = generic_file_read_iter(iocb, to);
|
|
|
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+STATIC ssize_t
|
|
|
+xfs_file_read_iter(
|
|
|
+ struct kiocb *iocb,
|
|
|
+ struct iov_iter *to)
|
|
|
+{
|
|
|
+ struct inode *inode = file_inode(iocb->ki_filp);
|
|
|
+ struct xfs_mount *mp = XFS_I(inode)->i_mount;
|
|
|
+ ssize_t ret = 0;
|
|
|
+
|
|
|
+ XFS_STATS_INC(mp, xs_read_calls);
|
|
|
+
|
|
|
+ if (XFS_FORCED_SHUTDOWN(mp))
|
|
|
+ return -EIO;
|
|
|
+
|
|
|
+ if (IS_DAX(inode))
|
|
|
+ ret = xfs_file_dax_read(iocb, to);
|
|
|
+ else if (iocb->ki_flags & IOCB_DIRECT)
|
|
|
+ ret = xfs_file_dio_aio_read(iocb, to);
|
|
|
+ else
|
|
|
+ ret = xfs_file_buffered_aio_read(iocb, to);
|
|
|
+
|
|
|
if (ret > 0)
|
|
|
XFS_STATS_ADD(mp, xs_read_bytes, ret);
|
|
|
-
|
|
|
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -389,18 +408,14 @@ xfs_file_splice_read(
|
|
|
unsigned int flags)
|
|
|
{
|
|
|
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
|
|
|
- int ioflags = 0;
|
|
|
ssize_t ret;
|
|
|
|
|
|
XFS_STATS_INC(ip->i_mount, xs_read_calls);
|
|
|
|
|
|
- if (infilp->f_mode & FMODE_NOCMTIME)
|
|
|
- ioflags |= XFS_IO_INVIS;
|
|
|
-
|
|
|
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
|
|
return -EIO;
|
|
|
|
|
|
- trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
|
|
|
+ trace_xfs_file_splice_read(ip, count, *ppos);
|
|
|
|
|
|
/*
|
|
|
* DAX inodes cannot ues the page cache for splice, so we have to push
|
|
@@ -423,49 +438,6 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * This routine is called to handle zeroing any space in the last block of the
|
|
|
- * file that is beyond the EOF. We do this since the size is being increased
|
|
|
- * without writing anything to that block and we don't want to read the
|
|
|
- * garbage on the disk.
|
|
|
- */
|
|
|
-STATIC int /* error (positive) */
|
|
|
-xfs_zero_last_block(
|
|
|
- struct xfs_inode *ip,
|
|
|
- xfs_fsize_t offset,
|
|
|
- xfs_fsize_t isize,
|
|
|
- bool *did_zeroing)
|
|
|
-{
|
|
|
- struct xfs_mount *mp = ip->i_mount;
|
|
|
- xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
|
|
|
- int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
|
|
|
- int zero_len;
|
|
|
- int nimaps = 1;
|
|
|
- int error = 0;
|
|
|
- struct xfs_bmbt_irec imap;
|
|
|
-
|
|
|
- xfs_ilock(ip, XFS_ILOCK_EXCL);
|
|
|
- error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
|
|
|
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
|
- if (error)
|
|
|
- return error;
|
|
|
-
|
|
|
- ASSERT(nimaps > 0);
|
|
|
-
|
|
|
- /*
|
|
|
- * If the block underlying isize is just a hole, then there
|
|
|
- * is nothing to zero.
|
|
|
- */
|
|
|
- if (imap.br_startblock == HOLESTARTBLOCK)
|
|
|
- return 0;
|
|
|
-
|
|
|
- zero_len = mp->m_sb.sb_blocksize - zero_offset;
|
|
|
- if (isize + zero_len > offset)
|
|
|
- zero_len = offset - isize;
|
|
|
- *did_zeroing = true;
|
|
|
- return xfs_iozero(ip, isize, zero_len);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Zero any on disk space between the current EOF and the new, larger EOF.
|
|
|
*
|
|
@@ -484,94 +456,11 @@ xfs_zero_eof(
|
|
|
xfs_fsize_t isize, /* current inode size */
|
|
|
bool *did_zeroing)
|
|
|
{
|
|
|
- struct xfs_mount *mp = ip->i_mount;
|
|
|
- xfs_fileoff_t start_zero_fsb;
|
|
|
- xfs_fileoff_t end_zero_fsb;
|
|
|
- xfs_fileoff_t zero_count_fsb;
|
|
|
- xfs_fileoff_t last_fsb;
|
|
|
- xfs_fileoff_t zero_off;
|
|
|
- xfs_fsize_t zero_len;
|
|
|
- int nimaps;
|
|
|
- int error = 0;
|
|
|
- struct xfs_bmbt_irec imap;
|
|
|
-
|
|
|
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
|
|
|
ASSERT(offset > isize);
|
|
|
|
|
|
trace_xfs_zero_eof(ip, isize, offset - isize);
|
|
|
-
|
|
|
- /*
|
|
|
- * First handle zeroing the block on which isize resides.
|
|
|
- *
|
|
|
- * We only zero a part of that block so it is handled specially.
|
|
|
- */
|
|
|
- if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
|
|
|
- error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
|
|
|
- if (error)
|
|
|
- return error;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Calculate the range between the new size and the old where blocks
|
|
|
- * needing to be zeroed may exist.
|
|
|
- *
|
|
|
- * To get the block where the last byte in the file currently resides,
|
|
|
- * we need to subtract one from the size and truncate back to a block
|
|
|
- * boundary. We subtract 1 in case the size is exactly on a block
|
|
|
- * boundary.
|
|
|
- */
|
|
|
- last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
|
|
|
- start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
|
|
|
- end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
|
|
|
- ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
|
|
|
- if (last_fsb == end_zero_fsb) {
|
|
|
- /*
|
|
|
- * The size was only incremented on its last block.
|
|
|
- * We took care of that above, so just return.
|
|
|
- */
|
|
|
- return 0;
|
|
|
- }
|
|
|
-
|
|
|
- ASSERT(start_zero_fsb <= end_zero_fsb);
|
|
|
- while (start_zero_fsb <= end_zero_fsb) {
|
|
|
- nimaps = 1;
|
|
|
- zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
|
|
|
-
|
|
|
- xfs_ilock(ip, XFS_ILOCK_EXCL);
|
|
|
- error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
|
|
|
- &imap, &nimaps, 0);
|
|
|
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
|
- if (error)
|
|
|
- return error;
|
|
|
-
|
|
|
- ASSERT(nimaps > 0);
|
|
|
-
|
|
|
- if (imap.br_state == XFS_EXT_UNWRITTEN ||
|
|
|
- imap.br_startblock == HOLESTARTBLOCK) {
|
|
|
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
|
|
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * There are blocks we need to zero.
|
|
|
- */
|
|
|
- zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
|
|
|
- zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
|
|
|
-
|
|
|
- if ((zero_off + zero_len) > offset)
|
|
|
- zero_len = offset - zero_off;
|
|
|
-
|
|
|
- error = xfs_iozero(ip, zero_off, zero_len);
|
|
|
- if (error)
|
|
|
- return error;
|
|
|
-
|
|
|
- *did_zeroing = true;
|
|
|
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
|
|
|
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
|
|
|
- }
|
|
|
-
|
|
|
- return 0;
|
|
|
+ return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -722,8 +611,7 @@ xfs_file_dio_aio_write(
|
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
|
|
|
|
/* DIO must be aligned to device logical sector size */
|
|
|
- if (!IS_DAX(inode) &&
|
|
|
- ((iocb->ki_pos | count) & target->bt_logical_sectormask))
|
|
|
+ if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
|
|
|
return -EINVAL;
|
|
|
|
|
|
/* "unaligned" here means not aligned to a filesystem block */
|
|
@@ -762,7 +650,7 @@ xfs_file_dio_aio_write(
|
|
|
end = iocb->ki_pos + count - 1;
|
|
|
|
|
|
/*
|
|
|
- * See xfs_file_read_iter() for why we do a full-file flush here.
|
|
|
+ * See xfs_file_dio_aio_read() for why we do a full-file flush here.
|
|
|
*/
|
|
|
if (mapping->nrpages) {
|
|
|
ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
|
|
@@ -789,10 +677,12 @@ xfs_file_dio_aio_write(
|
|
|
iolock = XFS_IOLOCK_SHARED;
|
|
|
}
|
|
|
|
|
|
- trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
|
|
|
+ trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
|
|
|
|
|
|
data = *from;
|
|
|
- ret = mapping->a_ops->direct_IO(iocb, &data);
|
|
|
+ ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
|
|
+ xfs_get_blocks_direct, xfs_end_io_direct_write,
|
|
|
+ NULL, DIO_ASYNC_EXTEND);
|
|
|
|
|
|
/* see generic_file_direct_write() for why this is necessary */
|
|
|
if (mapping->nrpages) {
|
|
@@ -809,10 +699,70 @@ out:
|
|
|
xfs_rw_iunlock(ip, iolock);
|
|
|
|
|
|
/*
|
|
|
- * No fallback to buffered IO on errors for XFS. DAX can result in
|
|
|
- * partial writes, but direct IO will either complete fully or fail.
|
|
|
+ * No fallback to buffered IO on errors for XFS, direct IO will either
|
|
|
+ * complete fully or fail.
|
|
|
*/
|
|
|
- ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
|
|
|
+ ASSERT(ret < 0 || ret == count);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static noinline ssize_t
|
|
|
+xfs_file_dax_write(
|
|
|
+ struct kiocb *iocb,
|
|
|
+ struct iov_iter *from)
|
|
|
+{
|
|
|
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
|
|
|
+ struct inode *inode = mapping->host;
|
|
|
+ struct xfs_inode *ip = XFS_I(inode);
|
|
|
+ struct xfs_mount *mp = ip->i_mount;
|
|
|
+ ssize_t ret = 0;
|
|
|
+ int unaligned_io = 0;
|
|
|
+ int iolock;
|
|
|
+ struct iov_iter data;
|
|
|
+
|
|
|
+ /* "unaligned" here means not aligned to a filesystem block */
|
|
|
+ if ((iocb->ki_pos & mp->m_blockmask) ||
|
|
|
+ ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
|
|
|
+ unaligned_io = 1;
|
|
|
+ iolock = XFS_IOLOCK_EXCL;
|
|
|
+ } else if (mapping->nrpages) {
|
|
|
+ iolock = XFS_IOLOCK_EXCL;
|
|
|
+ } else {
|
|
|
+ iolock = XFS_IOLOCK_SHARED;
|
|
|
+ }
|
|
|
+ xfs_rw_ilock(ip, iolock);
|
|
|
+
|
|
|
+ ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
|
+ if (ret)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Yes, even DAX files can have page cache attached to them: A zeroed
|
|
|
+ * page is inserted into the pagecache when we have to serve a write
|
|
|
+ * fault on a hole. It should never be dirtied and can simply be
|
|
|
+ * dropped from the pagecache once we get real data for the page.
|
|
|
+ */
|
|
|
+ if (mapping->nrpages) {
|
|
|
+ ret = invalidate_inode_pages2(mapping);
|
|
|
+ WARN_ON_ONCE(ret);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
|
|
|
+ xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
|
|
+ iolock = XFS_IOLOCK_SHARED;
|
|
|
+ }
|
|
|
+
|
|
|
+ trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
|
|
|
+
|
|
|
+ data = *from;
|
|
|
+ ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
|
|
|
+ xfs_end_io_direct_write, 0);
|
|
|
+ if (ret > 0) {
|
|
|
+ iocb->ki_pos += ret;
|
|
|
+ iov_iter_advance(from, ret);
|
|
|
+ }
|
|
|
+out:
|
|
|
+ xfs_rw_iunlock(ip, iolock);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -839,9 +789,8 @@ xfs_file_buffered_aio_write(
|
|
|
current->backing_dev_info = inode_to_bdi(inode);
|
|
|
|
|
|
write_retry:
|
|
|
- trace_xfs_file_buffered_write(ip, iov_iter_count(from),
|
|
|
- iocb->ki_pos, 0);
|
|
|
- ret = generic_perform_write(file, from, iocb->ki_pos);
|
|
|
+ trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
|
|
|
+ ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
|
|
|
if (likely(ret >= 0))
|
|
|
iocb->ki_pos += ret;
|
|
|
|
|
@@ -895,7 +844,9 @@ xfs_file_write_iter(
|
|
|
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
|
|
return -EIO;
|
|
|
|
|
|
- if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
|
|
|
+ if (IS_DAX(inode))
|
|
|
+ ret = xfs_file_dax_write(iocb, from);
|
|
|
+ else if (iocb->ki_flags & IOCB_DIRECT)
|
|
|
ret = xfs_file_dio_aio_write(iocb, from);
|
|
|
else
|
|
|
ret = xfs_file_buffered_aio_write(iocb, from);
|
|
@@ -1553,7 +1504,7 @@ xfs_filemap_page_mkwrite(
|
|
|
if (IS_DAX(inode)) {
|
|
|
ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
|
|
|
} else {
|
|
|
- ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
|
|
+ ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
|
|
|
ret = block_page_mkwrite_return(ret);
|
|
|
}
|
|
|
|