|
@@ -1234,15 +1234,19 @@ xfs_vm_releasepage(
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * When we map a DIO buffer, we need to attach an ioend that describes the type
|
|
|
|
- * of write IO we are doing. This passes to the completion function the
|
|
|
|
- * operations it needs to perform.
|
|
|
|
|
|
+ * When we map a DIO buffer, we may need to attach an ioend that describes the
|
|
|
|
+ * type of write IO we are doing. This passes to the completion function the
|
|
|
|
+ * operations it needs to perform. If the mapping is for an overwrite wholly
|
|
|
|
+ * within the EOF then we don't need an ioend and so we don't allocate one.
|
|
|
|
+ * This avoids the unnecessary overhead of allocating and freeing ioends for
|
|
|
|
+ * workloads that don't require transactions on IO completion.
|
|
*
|
|
*
|
|
* If we get multiple mappings in a single IO, we might be mapping different
|
|
* If we get multiple mappings in a single IO, we might be mapping different
|
|
* types. But because the direct IO can only have a single private pointer, we
|
|
* types. But because the direct IO can only have a single private pointer, we
|
|
* need to ensure that:
|
|
* need to ensure that:
|
|
*
|
|
*
|
|
- * a) the ioend spans the entire region of the IO; and
|
|
|
|
|
|
+ * a) i) the ioend spans the entire region of unwritten mappings; or
|
|
|
|
+ * ii) the ioend spans all the mappings that cross or are beyond EOF; and
|
|
* b) if it contains unwritten extents, it is *permanently* marked as such
|
|
* b) if it contains unwritten extents, it is *permanently* marked as such
|
|
*
|
|
*
|
|
* We could do this by chaining ioends like buffered IO does, but we only
|
|
* We could do this by chaining ioends like buffered IO does, but we only
|
|
@@ -1283,21 +1287,23 @@ xfs_map_direct(
|
|
trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
|
|
trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
|
|
ioend->io_size, ioend->io_type,
|
|
ioend->io_size, ioend->io_type,
|
|
imap);
|
|
imap);
|
|
- } else {
|
|
|
|
|
|
+ } else if (type == XFS_IO_UNWRITTEN ||
|
|
|
|
+ offset + size > i_size_read(inode)) {
|
|
ioend = xfs_alloc_ioend(inode, type);
|
|
ioend = xfs_alloc_ioend(inode, type);
|
|
ioend->io_offset = offset;
|
|
ioend->io_offset = offset;
|
|
ioend->io_size = size;
|
|
ioend->io_size = size;
|
|
|
|
+
|
|
bh_result->b_private = ioend;
|
|
bh_result->b_private = ioend;
|
|
|
|
+ set_buffer_defer_completion(bh_result);
|
|
|
|
|
|
trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
|
|
trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
|
|
imap);
|
|
imap);
|
|
|
|
+ } else {
|
|
|
|
+ trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
|
|
|
|
+ imap);
|
|
}
|
|
}
|
|
-
|
|
|
|
- if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend))
|
|
|
|
- set_buffer_defer_completion(bh_result);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
|
|
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
|
|
* is, so that we can avoid repeated get_blocks calls.
|
|
* is, so that we can avoid repeated get_blocks calls.
|
|
@@ -1519,9 +1525,11 @@ xfs_get_blocks_direct(
|
|
/*
|
|
/*
|
|
* Complete a direct I/O write request.
|
|
* Complete a direct I/O write request.
|
|
*
|
|
*
|
|
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
|
|
|
|
- * need to issue a transaction to convert the range from unwritten to written
|
|
|
|
- * extents.
|
|
|
|
|
|
+ * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
|
|
|
|
+ * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
|
|
|
|
+ * wholly within the EOF and so there is nothing for us to do. Note that in this
|
|
|
|
+ * case the completion can be called in interrupt context, whereas if we have an
|
|
|
|
+ * ioend we will always be called in task context (i.e. from a workqueue).
|
|
*/
|
|
*/
|
|
STATIC void
|
|
STATIC void
|
|
xfs_end_io_direct_write(
|
|
xfs_end_io_direct_write(
|
|
@@ -1535,7 +1543,13 @@ xfs_end_io_direct_write(
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
struct xfs_ioend *ioend = private;
|
|
struct xfs_ioend *ioend = private;
|
|
|
|
|
|
- trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL);
|
|
|
|
|
|
+ trace_xfs_gbmap_direct_endio(ip, offset, size,
|
|
|
|
+ ioend ? ioend->io_type : 0, NULL);
|
|
|
|
+
|
|
|
|
+ if (!ioend) {
|
|
|
|
+ ASSERT(offset + size <= i_size_read(inode));
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
|
|
if (XFS_FORCED_SHUTDOWN(mp))
|
|
if (XFS_FORCED_SHUTDOWN(mp))
|
|
goto out_end_io;
|
|
goto out_end_io;
|
|
@@ -1548,12 +1562,12 @@ xfs_end_io_direct_write(
|
|
|
|
|
|
/*
|
|
/*
|
|
* The ioend only maps whole blocks, while the IO may be sector aligned.
|
|
* The ioend only maps whole blocks, while the IO may be sector aligned.
|
|
- * Hence the ioend offset/size may not match the IO offset/size exactly,
|
|
|
|
- * but should span it completely. Write the IO sizes into the ioend so
|
|
|
|
- * that completion processing does the right thing.
|
|
|
|
|
|
+ * Hence the ioend offset/size may not match the IO offset/size exactly.
|
|
|
|
+ * Because we don't map overwrites within EOF into the ioend, the offset
|
|
|
|
+ * may not match, but only if the endio spans EOF. Either way, write
|
|
|
|
+ * the IO sizes into the ioend so that completion processing does the
|
|
|
|
+ * right thing.
|
|
*/
|
|
*/
|
|
- ASSERT(size <= ioend->io_size);
|
|
|
|
- ASSERT(offset >= ioend->io_offset);
|
|
|
|
ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
|
|
ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
|
|
ioend->io_size = size;
|
|
ioend->io_size = size;
|
|
ioend->io_offset = offset;
|
|
ioend->io_offset = offset;
|
|
@@ -1562,20 +1576,15 @@ xfs_end_io_direct_write(
|
|
* The ioend tells us whether we are doing unwritten extent conversion
|
|
* The ioend tells us whether we are doing unwritten extent conversion
|
|
* or an append transaction that updates the on-disk file size. These
|
|
* or an append transaction that updates the on-disk file size. These
|
|
* cases are the only cases where we should *potentially* be needing
|
|
* cases are the only cases where we should *potentially* be needing
|
|
- * to update the VFS inode size. When the ioend indicates this, we
|
|
|
|
- * are *guaranteed* to be running in non-interrupt context.
|
|
|
|
|
|
+ * to update the VFS inode size.
|
|
*
|
|
*
|
|
* We need to update the in-core inode size here so that we don't end up
|
|
* We need to update the in-core inode size here so that we don't end up
|
|
- * with the on-disk inode size being outside the in-core inode size.
|
|
|
|
- * While we can do this in the process context after the IO has
|
|
|
|
- * completed, this does not work for AIO and hence we always update
|
|
|
|
- * the in-core inode size here if necessary.
|
|
|
|
|
|
+ * with the on-disk inode size being outside the in-core inode size. We
|
|
|
|
+ * have no other method of updating EOF for AIO, so always do it here
|
|
|
|
+ * if necessary.
|
|
*/
|
|
*/
|
|
- if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) {
|
|
|
|
- if (offset + size > i_size_read(inode))
|
|
|
|
- i_size_write(inode, offset + size);
|
|
|
|
- } else
|
|
|
|
- ASSERT(offset + size <= i_size_read(inode));
|
|
|
|
|
|
+ if (offset + size > i_size_read(inode))
|
|
|
|
+ i_size_write(inode, offset + size);
|
|
|
|
|
|
/*
|
|
/*
|
|
* If we are doing an append IO that needs to update the EOF on disk,
|
|
* If we are doing an append IO that needs to update the EOF on disk,
|
|
@@ -1584,7 +1593,7 @@ xfs_end_io_direct_write(
|
|
* result in the ioend processing passing on the error if it is
|
|
* result in the ioend processing passing on the error if it is
|
|
* possible as we can't return it from here.
|
|
* possible as we can't return it from here.
|
|
*/
|
|
*/
|
|
- if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend))
|
|
|
|
|
|
+ if (ioend->io_type == XFS_IO_OVERWRITE)
|
|
ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
|
|
ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
|
|
|
|
|
|
out_end_io:
|
|
out_end_io:
|