|
@@ -1259,13 +1259,28 @@ xfs_vm_releasepage(
|
|
|
* the DIO. There is only going to be one reference to the ioend and its life
|
|
|
* cycle is constrained by the DIO completion code. hence we don't need
|
|
|
* reference counting here.
|
|
|
+ *
|
|
|
+ * Note that for DIO, an IO to the highest supported file block offset (i.e.
|
|
|
+ * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
|
|
|
+ * bit variable. Hence if we see this overflow, we have to assume that the IO is
|
|
|
+ * extending the file size. We won't know for sure until IO completion is run
|
|
|
+ * and the actual max write offset is communicated to the IO completion
|
|
|
+ * routine.
|
|
|
+ *
|
|
|
+ * For DAX page faults, we are preparing to never see unwritten extents here,
|
|
|
+ * nor should we ever extend the inode size. Hence we will soon have nothing to
|
|
|
+ * do here for this case, ensuring we don't have to provide an IO completion
|
|
|
+ * callback to free an ioend that we don't actually need for a fault into the
|
|
|
+ * page at offset (2^63 - 1FSB) bytes.
|
|
|
*/
|
|
|
+
|
|
|
static void
|
|
|
xfs_map_direct(
|
|
|
struct inode *inode,
|
|
|
struct buffer_head *bh_result,
|
|
|
struct xfs_bmbt_irec *imap,
|
|
|
- xfs_off_t offset)
|
|
|
+ xfs_off_t offset,
|
|
|
+ bool dax_fault)
|
|
|
{
|
|
|
struct xfs_ioend *ioend;
|
|
|
xfs_off_t size = bh_result->b_size;
|
|
@@ -1278,6 +1293,13 @@ xfs_map_direct(
|
|
|
|
|
|
trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
|
|
|
|
|
|
+ if (dax_fault) {
|
|
|
+ ASSERT(type == XFS_IO_OVERWRITE);
|
|
|
+ trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
|
|
|
+ imap);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
if (bh_result->b_private) {
|
|
|
ioend = bh_result->b_private;
|
|
|
ASSERT(ioend->io_size > 0);
|
|
@@ -1292,7 +1314,8 @@ xfs_map_direct(
|
|
|
ioend->io_size, ioend->io_type,
|
|
|
imap);
|
|
|
} else if (type == XFS_IO_UNWRITTEN ||
|
|
|
- offset + size > i_size_read(inode)) {
|
|
|
+ offset + size > i_size_read(inode) ||
|
|
|
+ offset + size < 0) {
|
|
|
ioend = xfs_alloc_ioend(inode, type);
|
|
|
ioend->io_offset = offset;
|
|
|
ioend->io_size = size;
|
|
@@ -1354,7 +1377,8 @@ __xfs_get_blocks(
|
|
|
sector_t iblock,
|
|
|
struct buffer_head *bh_result,
|
|
|
int create,
|
|
|
- bool direct)
|
|
|
+ bool direct,
|
|
|
+ bool dax_fault)
|
|
|
{
|
|
|
struct xfs_inode *ip = XFS_I(inode);
|
|
|
struct xfs_mount *mp = ip->i_mount;
|
|
@@ -1402,10 +1426,12 @@ __xfs_get_blocks(
|
|
|
if (error)
|
|
|
goto out_unlock;
|
|
|
|
|
|
+ /* for DAX, we convert unwritten extents directly */
|
|
|
if (create &&
|
|
|
(!nimaps ||
|
|
|
(imap.br_startblock == HOLESTARTBLOCK ||
|
|
|
- imap.br_startblock == DELAYSTARTBLOCK))) {
|
|
|
+ imap.br_startblock == DELAYSTARTBLOCK) ||
|
|
|
+ (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
|
|
|
if (direct || xfs_get_extsz_hint(ip)) {
|
|
|
/*
|
|
|
* xfs_iomap_write_direct() expects the shared lock. It
|
|
@@ -1450,6 +1476,12 @@ __xfs_get_blocks(
|
|
|
goto out_unlock;
|
|
|
}
|
|
|
|
|
|
+ if (IS_DAX(inode) && create) {
|
|
|
+ ASSERT(!ISUNWRITTEN(&imap));
|
|
|
+ /* zeroing is not needed at a higher layer */
|
|
|
+ new = 0;
|
|
|
+ }
|
|
|
+
|
|
|
/* trim mapping down to size requested */
|
|
|
if (direct || size > (1 << inode->i_blkbits))
|
|
|
xfs_map_trim_size(inode, iblock, bh_result,
|
|
@@ -1467,7 +1499,8 @@ __xfs_get_blocks(
|
|
|
set_buffer_unwritten(bh_result);
|
|
|
/* direct IO needs special help */
|
|
|
if (create && direct)
|
|
|
- xfs_map_direct(inode, bh_result, &imap, offset);
|
|
|
+ xfs_map_direct(inode, bh_result, &imap, offset,
|
|
|
+ dax_fault);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1514,7 +1547,7 @@ xfs_get_blocks(
|
|
|
struct buffer_head *bh_result,
|
|
|
int create)
|
|
|
{
|
|
|
- return __xfs_get_blocks(inode, iblock, bh_result, create, false);
|
|
|
+ return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
|
|
|
}
|
|
|
|
|
|
int
|
|
@@ -1524,7 +1557,17 @@ xfs_get_blocks_direct(
|
|
|
struct buffer_head *bh_result,
|
|
|
int create)
|
|
|
{
|
|
|
- return __xfs_get_blocks(inode, iblock, bh_result, create, true);
|
|
|
+ return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
|
|
|
+}
|
|
|
+
|
|
|
+int
|
|
|
+xfs_get_blocks_dax_fault(
|
|
|
+ struct inode *inode,
|
|
|
+ sector_t iblock,
|
|
|
+ struct buffer_head *bh_result,
|
|
|
+ int create)
|
|
|
+{
|
|
|
+ return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
|
|
|
}
|
|
|
|
|
|
static void
|
|
@@ -1623,45 +1666,6 @@ xfs_end_io_direct_write(
|
|
|
__xfs_end_io_direct_write(inode, ioend, offset, size);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * For DAX we need a mapping buffer callback for unwritten extent conversion
|
|
|
- * when page faults allocate blocks and then zero them. Note that in this
|
|
|
- * case the mapping indicated by the ioend may extend beyond EOF. We most
|
|
|
- * definitely do not want to extend EOF here, so we trim back the ioend size to
|
|
|
- * EOF.
|
|
|
- */
|
|
|
-#ifdef CONFIG_FS_DAX
|
|
|
-void
|
|
|
-xfs_end_io_dax_write(
|
|
|
- struct buffer_head *bh,
|
|
|
- int uptodate)
|
|
|
-{
|
|
|
- struct xfs_ioend *ioend = bh->b_private;
|
|
|
- struct inode *inode = ioend->io_inode;
|
|
|
- ssize_t size = ioend->io_size;
|
|
|
-
|
|
|
- ASSERT(IS_DAX(ioend->io_inode));
|
|
|
-
|
|
|
- /* if there was an error zeroing, then don't convert it */
|
|
|
- if (!uptodate)
|
|
|
- ioend->io_error = -EIO;
|
|
|
-
|
|
|
- /*
|
|
|
- * Trim update to EOF, so we don't extend EOF during unwritten extent
|
|
|
- * conversion of partial EOF blocks.
|
|
|
- */
|
|
|
- spin_lock(&XFS_I(inode)->i_flags_lock);
|
|
|
- if (ioend->io_offset + size > i_size_read(inode))
|
|
|
- size = i_size_read(inode) - ioend->io_offset;
|
|
|
- spin_unlock(&XFS_I(inode)->i_flags_lock);
|
|
|
-
|
|
|
- __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
|
|
|
-
|
|
|
-}
|
|
|
-#else
|
|
|
-void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
|
|
|
-#endif
|
|
|
-
|
|
|
static inline ssize_t
|
|
|
xfs_vm_do_dio(
|
|
|
struct inode *inode,
|