|
@@ -47,40 +47,6 @@
|
|
|
|
|
|
static const struct vm_operations_struct xfs_file_vm_ops;
|
|
static const struct vm_operations_struct xfs_file_vm_ops;
|
|
|
|
|
|
-/*
|
|
|
|
- * Locking primitives for read and write IO paths to ensure we consistently use
|
|
|
|
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
|
|
|
|
- */
|
|
|
|
-static inline void
|
|
|
|
-xfs_rw_ilock(
|
|
|
|
- struct xfs_inode *ip,
|
|
|
|
- int type)
|
|
|
|
-{
|
|
|
|
- if (type & XFS_IOLOCK_EXCL)
|
|
|
|
- inode_lock(VFS_I(ip));
|
|
|
|
- xfs_ilock(ip, type);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static inline void
|
|
|
|
-xfs_rw_iunlock(
|
|
|
|
- struct xfs_inode *ip,
|
|
|
|
- int type)
|
|
|
|
-{
|
|
|
|
- xfs_iunlock(ip, type);
|
|
|
|
- if (type & XFS_IOLOCK_EXCL)
|
|
|
|
- inode_unlock(VFS_I(ip));
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static inline void
|
|
|
|
-xfs_rw_ilock_demote(
|
|
|
|
- struct xfs_inode *ip,
|
|
|
|
- int type)
|
|
|
|
-{
|
|
|
|
- xfs_ilock_demote(ip, type);
|
|
|
|
- if (type & XFS_IOLOCK_EXCL)
|
|
|
|
- inode_unlock(VFS_I(ip));
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* Clear the specified ranges to zero through either the pagecache or DAX.
|
|
* Clear the specified ranges to zero through either the pagecache or DAX.
|
|
* Holes and unwritten extents will be left as-is as they already are zeroed.
|
|
* Holes and unwritten extents will be left as-is as they already are zeroed.
|
|
@@ -244,62 +210,21 @@ xfs_file_dio_aio_read(
|
|
struct kiocb *iocb,
|
|
struct kiocb *iocb,
|
|
struct iov_iter *to)
|
|
struct iov_iter *to)
|
|
{
|
|
{
|
|
- struct address_space *mapping = iocb->ki_filp->f_mapping;
|
|
|
|
- struct inode *inode = mapping->host;
|
|
|
|
- struct xfs_inode *ip = XFS_I(inode);
|
|
|
|
- loff_t isize = i_size_read(inode);
|
|
|
|
|
|
+ struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
|
|
size_t count = iov_iter_count(to);
|
|
size_t count = iov_iter_count(to);
|
|
- loff_t end = iocb->ki_pos + count - 1;
|
|
|
|
- struct iov_iter data;
|
|
|
|
- struct xfs_buftarg *target;
|
|
|
|
- ssize_t ret = 0;
|
|
|
|
|
|
+ ssize_t ret;
|
|
|
|
|
|
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
|
|
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
|
|
|
|
|
|
if (!count)
|
|
if (!count)
|
|
return 0; /* skip atime */
|
|
return 0; /* skip atime */
|
|
|
|
|
|
- if (XFS_IS_REALTIME_INODE(ip))
|
|
|
|
- target = ip->i_mount->m_rtdev_targp;
|
|
|
|
- else
|
|
|
|
- target = ip->i_mount->m_ddev_targp;
|
|
|
|
-
|
|
|
|
- /* DIO must be aligned to device logical sector size */
|
|
|
|
- if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
|
|
|
|
- if (iocb->ki_pos == isize)
|
|
|
|
- return 0;
|
|
|
|
- return -EINVAL;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
file_accessed(iocb->ki_filp);
|
|
file_accessed(iocb->ki_filp);
|
|
|
|
|
|
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
- if (mapping->nrpages) {
|
|
|
|
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
|
|
|
|
- if (ret)
|
|
|
|
- goto out_unlock;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Invalidate whole pages. This can return an error if we fail
|
|
|
|
- * to invalidate a page, but this should never happen on XFS.
|
|
|
|
- * Warn if it does fail.
|
|
|
|
- */
|
|
|
|
- ret = invalidate_inode_pages2_range(mapping,
|
|
|
|
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
|
|
|
|
- WARN_ON_ONCE(ret);
|
|
|
|
- ret = 0;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- data = *to;
|
|
|
|
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
|
|
|
- xfs_get_blocks_direct, NULL, NULL, 0);
|
|
|
|
- if (ret >= 0) {
|
|
|
|
- iocb->ki_pos += ret;
|
|
|
|
- iov_iter_advance(to, ret);
|
|
|
|
- }
|
|
|
|
|
|
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
+ ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
|
|
|
|
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
-out_unlock:
|
|
|
|
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -317,9 +242,9 @@ xfs_file_dax_read(
|
|
if (!count)
|
|
if (!count)
|
|
return 0; /* skip atime */
|
|
return 0; /* skip atime */
|
|
|
|
|
|
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
|
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
|
|
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
|
|
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
file_accessed(iocb->ki_filp);
|
|
file_accessed(iocb->ki_filp);
|
|
return ret;
|
|
return ret;
|
|
@@ -335,9 +260,9 @@ xfs_file_buffered_aio_read(
|
|
|
|
|
|
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
|
|
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
|
|
|
|
|
|
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
|
ret = generic_file_read_iter(iocb, to);
|
|
ret = generic_file_read_iter(iocb, to);
|
|
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
|
|
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
@@ -418,15 +343,18 @@ restart:
|
|
if (error <= 0)
|
|
if (error <= 0)
|
|
return error;
|
|
return error;
|
|
|
|
|
|
- error = xfs_break_layouts(inode, iolock, true);
|
|
|
|
|
|
+ error = xfs_break_layouts(inode, iolock);
|
|
if (error)
|
|
if (error)
|
|
return error;
|
|
return error;
|
|
|
|
|
|
- /* For changing security info in file_remove_privs() we need i_mutex */
|
|
|
|
|
|
+ /*
|
|
|
|
+ * For changing security info in file_remove_privs() we need i_rwsem
|
|
|
|
+ * exclusively.
|
|
|
|
+ */
|
|
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
|
|
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
|
|
- xfs_rw_iunlock(ip, *iolock);
|
|
|
|
|
|
+ xfs_iunlock(ip, *iolock);
|
|
*iolock = XFS_IOLOCK_EXCL;
|
|
*iolock = XFS_IOLOCK_EXCL;
|
|
- xfs_rw_ilock(ip, *iolock);
|
|
|
|
|
|
+ xfs_ilock(ip, *iolock);
|
|
goto restart;
|
|
goto restart;
|
|
}
|
|
}
|
|
/*
|
|
/*
|
|
@@ -451,9 +379,9 @@ restart:
|
|
spin_unlock(&ip->i_flags_lock);
|
|
spin_unlock(&ip->i_flags_lock);
|
|
if (!drained_dio) {
|
|
if (!drained_dio) {
|
|
if (*iolock == XFS_IOLOCK_SHARED) {
|
|
if (*iolock == XFS_IOLOCK_SHARED) {
|
|
- xfs_rw_iunlock(ip, *iolock);
|
|
|
|
|
|
+ xfs_iunlock(ip, *iolock);
|
|
*iolock = XFS_IOLOCK_EXCL;
|
|
*iolock = XFS_IOLOCK_EXCL;
|
|
- xfs_rw_ilock(ip, *iolock);
|
|
|
|
|
|
+ xfs_ilock(ip, *iolock);
|
|
iov_iter_reexpand(from, count);
|
|
iov_iter_reexpand(from, count);
|
|
}
|
|
}
|
|
/*
|
|
/*
|
|
@@ -496,6 +424,58 @@ restart:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static int
|
|
|
|
+xfs_dio_write_end_io(
|
|
|
|
+ struct kiocb *iocb,
|
|
|
|
+ ssize_t size,
|
|
|
|
+ unsigned flags)
|
|
|
|
+{
|
|
|
|
+ struct inode *inode = file_inode(iocb->ki_filp);
|
|
|
|
+ struct xfs_inode *ip = XFS_I(inode);
|
|
|
|
+ loff_t offset = iocb->ki_pos;
|
|
|
|
+ bool update_size = false;
|
|
|
|
+ int error = 0;
|
|
|
|
+
|
|
|
|
+ trace_xfs_end_io_direct_write(ip, offset, size);
|
|
|
|
+
|
|
|
|
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
|
|
|
+ return -EIO;
|
|
|
|
+
|
|
|
|
+ if (size <= 0)
|
|
|
|
+ return size;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * We need to update the in-core inode size here so that we don't end up
|
|
|
|
+ * with the on-disk inode size being outside the in-core inode size. We
|
|
|
|
+ * have no other method of updating EOF for AIO, so always do it here
|
|
|
|
+ * if necessary.
|
|
|
|
+ *
|
|
|
|
+ * We need to lock the test/set EOF update as we can be racing with
|
|
|
|
+ * other IO completions here to update the EOF. Failing to serialise
|
|
|
|
+ * here can result in EOF moving backwards and Bad Things Happen when
|
|
|
|
+ * that occurs.
|
|
|
|
+ */
|
|
|
|
+ spin_lock(&ip->i_flags_lock);
|
|
|
|
+ if (offset + size > i_size_read(inode)) {
|
|
|
|
+ i_size_write(inode, offset + size);
|
|
|
|
+ update_size = true;
|
|
|
|
+ }
|
|
|
|
+ spin_unlock(&ip->i_flags_lock);
|
|
|
|
+
|
|
|
|
+ if (flags & IOMAP_DIO_COW) {
|
|
|
|
+ error = xfs_reflink_end_cow(ip, offset, size);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (flags & IOMAP_DIO_UNWRITTEN)
|
|
|
|
+ error = xfs_iomap_write_unwritten(ip, offset, size);
|
|
|
|
+ else if (update_size)
|
|
|
|
+ error = xfs_setfilesize(ip, offset, size);
|
|
|
|
+
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* xfs_file_dio_aio_write - handle direct IO writes
|
|
* xfs_file_dio_aio_write - handle direct IO writes
|
|
*
|
|
*
|
|
@@ -535,9 +515,7 @@ xfs_file_dio_aio_write(
|
|
int unaligned_io = 0;
|
|
int unaligned_io = 0;
|
|
int iolock;
|
|
int iolock;
|
|
size_t count = iov_iter_count(from);
|
|
size_t count = iov_iter_count(from);
|
|
- loff_t end;
|
|
|
|
- struct iov_iter data;
|
|
|
|
- struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
|
|
|
|
|
|
+ struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
mp->m_rtdev_targp : mp->m_ddev_targp;
|
|
|
|
|
|
/* DIO must be aligned to device logical sector size */
|
|
/* DIO must be aligned to device logical sector size */
|
|
@@ -559,29 +537,12 @@ xfs_file_dio_aio_write(
|
|
iolock = XFS_IOLOCK_SHARED;
|
|
iolock = XFS_IOLOCK_SHARED;
|
|
}
|
|
}
|
|
|
|
|
|
- xfs_rw_ilock(ip, iolock);
|
|
|
|
|
|
+ xfs_ilock(ip, iolock);
|
|
|
|
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
if (ret)
|
|
if (ret)
|
|
goto out;
|
|
goto out;
|
|
count = iov_iter_count(from);
|
|
count = iov_iter_count(from);
|
|
- end = iocb->ki_pos + count - 1;
|
|
|
|
-
|
|
|
|
- if (mapping->nrpages) {
|
|
|
|
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
|
|
|
|
- if (ret)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Invalidate whole pages. This can return an error if we fail
|
|
|
|
- * to invalidate a page, but this should never happen on XFS.
|
|
|
|
- * Warn if it does fail.
|
|
|
|
- */
|
|
|
|
- ret = invalidate_inode_pages2_range(mapping,
|
|
|
|
- iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
|
|
|
|
- WARN_ON_ONCE(ret);
|
|
|
|
- ret = 0;
|
|
|
|
- }
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
* If we are doing unaligned IO, wait for all other IO to drain,
|
|
* If we are doing unaligned IO, wait for all other IO to drain,
|
|
@@ -591,7 +552,7 @@ xfs_file_dio_aio_write(
|
|
if (unaligned_io)
|
|
if (unaligned_io)
|
|
inode_dio_wait(inode);
|
|
inode_dio_wait(inode);
|
|
else if (iolock == XFS_IOLOCK_EXCL) {
|
|
else if (iolock == XFS_IOLOCK_EXCL) {
|
|
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
|
|
|
|
|
+ xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
|
|
iolock = XFS_IOLOCK_SHARED;
|
|
iolock = XFS_IOLOCK_SHARED;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -604,24 +565,9 @@ xfs_file_dio_aio_write(
|
|
goto out;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
|
|
- data = *from;
|
|
|
|
- ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
|
|
|
|
- xfs_get_blocks_direct, xfs_end_io_direct_write,
|
|
|
|
- NULL, DIO_ASYNC_EXTEND);
|
|
|
|
-
|
|
|
|
- /* see generic_file_direct_write() for why this is necessary */
|
|
|
|
- if (mapping->nrpages) {
|
|
|
|
- invalidate_inode_pages2_range(mapping,
|
|
|
|
- iocb->ki_pos >> PAGE_SHIFT,
|
|
|
|
- end >> PAGE_SHIFT);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (ret > 0) {
|
|
|
|
- iocb->ki_pos += ret;
|
|
|
|
- iov_iter_advance(from, ret);
|
|
|
|
- }
|
|
|
|
|
|
+ ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
|
|
out:
|
|
out:
|
|
- xfs_rw_iunlock(ip, iolock);
|
|
|
|
|
|
+ xfs_iunlock(ip, iolock);
|
|
|
|
|
|
/*
|
|
/*
|
|
* No fallback to buffered IO on errors for XFS, direct IO will either
|
|
* No fallback to buffered IO on errors for XFS, direct IO will either
|
|
@@ -643,7 +589,7 @@ xfs_file_dax_write(
|
|
size_t count;
|
|
size_t count;
|
|
loff_t pos;
|
|
loff_t pos;
|
|
|
|
|
|
- xfs_rw_ilock(ip, iolock);
|
|
|
|
|
|
+ xfs_ilock(ip, iolock);
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
if (ret)
|
|
if (ret)
|
|
goto out;
|
|
goto out;
|
|
@@ -652,15 +598,13 @@ xfs_file_dax_write(
|
|
count = iov_iter_count(from);
|
|
count = iov_iter_count(from);
|
|
|
|
|
|
trace_xfs_file_dax_write(ip, count, pos);
|
|
trace_xfs_file_dax_write(ip, count, pos);
|
|
-
|
|
|
|
ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
|
|
ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
|
|
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
|
|
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
|
|
i_size_write(inode, iocb->ki_pos);
|
|
i_size_write(inode, iocb->ki_pos);
|
|
error = xfs_setfilesize(ip, pos, ret);
|
|
error = xfs_setfilesize(ip, pos, ret);
|
|
}
|
|
}
|
|
-
|
|
|
|
out:
|
|
out:
|
|
- xfs_rw_iunlock(ip, iolock);
|
|
|
|
|
|
+ xfs_iunlock(ip, iolock);
|
|
return error ? error : ret;
|
|
return error ? error : ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -677,7 +621,7 @@ xfs_file_buffered_aio_write(
|
|
int enospc = 0;
|
|
int enospc = 0;
|
|
int iolock = XFS_IOLOCK_EXCL;
|
|
int iolock = XFS_IOLOCK_EXCL;
|
|
|
|
|
|
- xfs_rw_ilock(ip, iolock);
|
|
|
|
|
|
+ xfs_ilock(ip, iolock);
|
|
|
|
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
|
|
if (ret)
|
|
if (ret)
|
|
@@ -721,7 +665,7 @@ write_retry:
|
|
|
|
|
|
current->backing_dev_info = NULL;
|
|
current->backing_dev_info = NULL;
|
|
out:
|
|
out:
|
|
- xfs_rw_iunlock(ip, iolock);
|
|
|
|
|
|
+ xfs_iunlock(ip, iolock);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -797,7 +741,7 @@ xfs_file_fallocate(
|
|
return -EOPNOTSUPP;
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
xfs_ilock(ip, iolock);
|
|
xfs_ilock(ip, iolock);
|
|
- error = xfs_break_layouts(inode, &iolock, false);
|
|
|
|
|
|
+ error = xfs_break_layouts(inode, &iolock);
|
|
if (error)
|
|
if (error)
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
|
|
|
|
@@ -1501,15 +1445,9 @@ xfs_filemap_fault(
|
|
return xfs_filemap_page_mkwrite(vma, vmf);
|
|
return xfs_filemap_page_mkwrite(vma, vmf);
|
|
|
|
|
|
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
- if (IS_DAX(inode)) {
|
|
|
|
- /*
|
|
|
|
- * we do not want to trigger unwritten extent conversion on read
|
|
|
|
- * faults - that is unnecessary overhead and would also require
|
|
|
|
- * changes to xfs_get_blocks_direct() to map unwritten extent
|
|
|
|
- * ioend for conversion on read-only mappings.
|
|
|
|
- */
|
|
|
|
|
|
+ if (IS_DAX(inode))
|
|
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
|
|
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
|
|
- } else
|
|
|
|
|
|
+ else
|
|
ret = filemap_fault(vma, vmf);
|
|
ret = filemap_fault(vma, vmf);
|
|
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
|
|
|
|