|
@@ -797,18 +797,16 @@ int ext4_dio_get_block(struct inode *inode, sector_t iblock,
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Get block function for DIO writes when we create unwritten extent if
|
|
|
|
|
|
+ * Get block function for AIO DIO writes when we create unwritten extent if
|
|
* blocks are not allocated yet. The extent will be converted to written
|
|
* blocks are not allocated yet. The extent will be converted to written
|
|
* after IO is complete.
|
|
* after IO is complete.
|
|
*/
|
|
*/
|
|
-static int ext4_dio_get_block_unwritten(struct inode *inode, sector_t iblock,
|
|
|
|
- struct buffer_head *bh_result, int create)
|
|
|
|
|
|
+static int ext4_dio_get_block_unwritten_async(struct inode *inode,
|
|
|
|
+ sector_t iblock, struct buffer_head *bh_result, int create)
|
|
{
|
|
{
|
|
handle_t *handle;
|
|
handle_t *handle;
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
- ext4_debug("ext4_dio_get_block_unwritten: inode %lu, create flag %d\n",
|
|
|
|
- inode->i_ino, create);
|
|
|
|
/* We don't expect handle for direct IO */
|
|
/* We don't expect handle for direct IO */
|
|
WARN_ON_ONCE(ext4_journal_current_handle());
|
|
WARN_ON_ONCE(ext4_journal_current_handle());
|
|
|
|
|
|
@@ -818,16 +816,62 @@ static int ext4_dio_get_block_unwritten(struct inode *inode, sector_t iblock,
|
|
ret = _ext4_get_block(inode, iblock, bh_result,
|
|
ret = _ext4_get_block(inode, iblock, bh_result,
|
|
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
|
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
|
ext4_journal_stop(handle);
|
|
ext4_journal_stop(handle);
|
|
- if (!ret && buffer_unwritten(bh_result)) {
|
|
|
|
- ext4_io_end_t *io_end = ext4_inode_aio(inode);
|
|
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * When doing DIO using unwritten extents, we need io_end to convert
|
|
|
|
+ * unwritten extents to written on IO completion. We allocate io_end
|
|
|
|
+ * once we spot unwritten extent and store it in b_private. Generic
|
|
|
|
+ * DIO code keeps b_private set and furthermore passes the value to
|
|
|
|
+ * our completion callback in 'private' argument.
|
|
|
|
+ */
|
|
|
|
+ if (!ret && buffer_unwritten(bh_result)) {
|
|
|
|
+ if (!bh_result->b_private) {
|
|
|
|
+ ext4_io_end_t *io_end;
|
|
|
|
+
|
|
|
|
+ io_end = ext4_init_io_end(inode, GFP_KERNEL);
|
|
|
|
+ if (!io_end)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ bh_result->b_private = io_end;
|
|
|
|
+ ext4_set_io_unwritten_flag(inode, io_end);
|
|
|
|
+ }
|
|
set_buffer_defer_completion(bh_result);
|
|
set_buffer_defer_completion(bh_result);
|
|
- WARN_ON_ONCE(io_end && !(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
|
|
|
}
|
|
}
|
|
|
|
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Get block function for non-AIO DIO writes when we create unwritten extent if
|
|
|
|
+ * blocks are not allocated yet. The extent will be converted to written
|
|
|
|
+ * after IO is complete from ext4_ext_direct_IO() function.
|
|
|
|
+ */
|
|
|
|
+static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
|
|
|
|
+ sector_t iblock, struct buffer_head *bh_result, int create)
|
|
|
|
+{
|
|
|
|
+ handle_t *handle;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ /* We don't expect handle for direct IO */
|
|
|
|
+ WARN_ON_ONCE(ext4_journal_current_handle());
|
|
|
|
+
|
|
|
|
+ handle = start_dio_trans(inode, bh_result);
|
|
|
|
+ if (IS_ERR(handle))
|
|
|
|
+ return PTR_ERR(handle);
|
|
|
|
+ ret = _ext4_get_block(inode, iblock, bh_result,
|
|
|
|
+ EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
|
|
|
+ ext4_journal_stop(handle);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Mark inode as having pending DIO writes to unwritten extents.
|
|
|
|
+ * ext4_ext_direct_IO() checks this flag and converts extents to
|
|
|
|
+ * written.
|
|
|
|
+ */
|
|
|
|
+ if (!ret && buffer_unwritten(bh_result))
|
|
|
|
+ ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
|
|
|
|
+
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
|
|
static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
|
|
struct buffer_head *bh_result, int create)
|
|
struct buffer_head *bh_result, int create)
|
|
{
|
|
{
|
|
@@ -3243,7 +3287,7 @@ out:
|
|
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
|
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
|
ssize_t size, void *private)
|
|
ssize_t size, void *private)
|
|
{
|
|
{
|
|
- ext4_io_end_t *io_end = iocb->private;
|
|
|
|
|
|
+ ext4_io_end_t *io_end = private;
|
|
|
|
|
|
/* if not async direct IO just return */
|
|
/* if not async direct IO just return */
|
|
if (!io_end)
|
|
if (!io_end)
|
|
@@ -3251,10 +3295,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
|
|
|
|
|
ext_debug("ext4_end_io_dio(): io_end 0x%p "
|
|
ext_debug("ext4_end_io_dio(): io_end 0x%p "
|
|
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
|
|
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
|
|
- iocb->private, io_end->inode->i_ino, iocb, offset,
|
|
|
|
- size);
|
|
|
|
|
|
+ io_end, io_end->inode->i_ino, iocb, offset, size);
|
|
|
|
|
|
- iocb->private = NULL;
|
|
|
|
io_end->offset = offset;
|
|
io_end->offset = offset;
|
|
io_end->size = size;
|
|
io_end->size = size;
|
|
ext4_put_io_end(io_end);
|
|
ext4_put_io_end(io_end);
|
|
@@ -3290,7 +3332,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
|
get_block_t *get_block_func = NULL;
|
|
get_block_t *get_block_func = NULL;
|
|
int dio_flags = 0;
|
|
int dio_flags = 0;
|
|
loff_t final_size = offset + count;
|
|
loff_t final_size = offset + count;
|
|
- ext4_io_end_t *io_end = NULL;
|
|
|
|
|
|
|
|
/* Use the old path for reads and writes beyond i_size. */
|
|
/* Use the old path for reads and writes beyond i_size. */
|
|
if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
|
|
if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
|
|
@@ -3315,16 +3356,17 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
|
/*
|
|
/*
|
|
* We could direct write to holes and fallocate.
|
|
* We could direct write to holes and fallocate.
|
|
*
|
|
*
|
|
- * Allocated blocks to fill the hole are marked as
|
|
|
|
- * unwritten to prevent parallel buffered read to expose
|
|
|
|
- * the stale data before DIO complete the data IO.
|
|
|
|
|
|
+ * Allocated blocks to fill the hole are marked as unwritten to prevent
|
|
|
|
+ * parallel buffered read to expose the stale data before DIO complete
|
|
|
|
+ * the data IO.
|
|
*
|
|
*
|
|
- * As to previously fallocated extents, ext4 get_block will
|
|
|
|
- * just simply mark the buffer mapped but still keep the
|
|
|
|
- * extents unwritten.
|
|
|
|
|
|
+ * As to previously fallocated extents, ext4 get_block will just simply
|
|
|
|
+ * mark the buffer mapped but still keep the extents unwritten.
|
|
*
|
|
*
|
|
- * For non AIO case, we will convert those unwritten extents
|
|
|
|
- * to written after return back from blockdev_direct_IO.
|
|
|
|
|
|
+ * For non AIO case, we will convert those unwritten extents to written
|
|
|
|
+ * after return back from blockdev_direct_IO. That way we save us from
|
|
|
|
+ * allocating io_end structure and also the overhead of offloading
|
|
|
|
+ * the extent convertion to a workqueue.
|
|
*
|
|
*
|
|
* For async DIO, the conversion needs to be deferred when the
|
|
* For async DIO, the conversion needs to be deferred when the
|
|
* IO is completed. The ext4 end_io callback function will be
|
|
* IO is completed. The ext4 end_io callback function will be
|
|
@@ -3332,30 +3374,13 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
|
* case, we allocate an io_end structure to hook to the iocb.
|
|
* case, we allocate an io_end structure to hook to the iocb.
|
|
*/
|
|
*/
|
|
iocb->private = NULL;
|
|
iocb->private = NULL;
|
|
- if (overwrite) {
|
|
|
|
|
|
+ if (overwrite)
|
|
get_block_func = ext4_dio_get_block_overwrite;
|
|
get_block_func = ext4_dio_get_block_overwrite;
|
|
|
|
+ else if (is_sync_kiocb(iocb)) {
|
|
|
|
+ get_block_func = ext4_dio_get_block_unwritten_sync;
|
|
|
|
+ dio_flags = DIO_LOCKING;
|
|
} else {
|
|
} else {
|
|
- ext4_inode_aio_set(inode, NULL);
|
|
|
|
- if (!is_sync_kiocb(iocb)) {
|
|
|
|
- io_end = ext4_init_io_end(inode, GFP_NOFS);
|
|
|
|
- if (!io_end) {
|
|
|
|
- ret = -ENOMEM;
|
|
|
|
- goto retake_lock;
|
|
|
|
- }
|
|
|
|
- /*
|
|
|
|
- * Grab reference for DIO. Will be dropped in
|
|
|
|
- * ext4_end_io_dio()
|
|
|
|
- */
|
|
|
|
- iocb->private = ext4_get_io_end(io_end);
|
|
|
|
- /*
|
|
|
|
- * we save the io structure for current async direct
|
|
|
|
- * IO, so that later ext4_map_blocks() could flag the
|
|
|
|
- * io structure whether there is a unwritten extents
|
|
|
|
- * needs to be converted when IO is completed.
|
|
|
|
- */
|
|
|
|
- ext4_inode_aio_set(inode, io_end);
|
|
|
|
- }
|
|
|
|
- get_block_func = ext4_dio_get_block_unwritten;
|
|
|
|
|
|
+ get_block_func = ext4_dio_get_block_unwritten_async;
|
|
dio_flags = DIO_LOCKING;
|
|
dio_flags = DIO_LOCKING;
|
|
}
|
|
}
|
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
|
@@ -3370,27 +3395,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
|
get_block_func,
|
|
get_block_func,
|
|
ext4_end_io_dio, NULL, dio_flags);
|
|
ext4_end_io_dio, NULL, dio_flags);
|
|
|
|
|
|
- /*
|
|
|
|
- * Put our reference to io_end. This can free the io_end structure e.g.
|
|
|
|
- * in sync IO case or in case of error. It can even perform extent
|
|
|
|
- * conversion if all bios we submitted finished before we got here.
|
|
|
|
- * Note that in that case iocb->private can be already set to NULL
|
|
|
|
- * here.
|
|
|
|
- */
|
|
|
|
- if (io_end) {
|
|
|
|
- ext4_inode_aio_set(inode, NULL);
|
|
|
|
- ext4_put_io_end(io_end);
|
|
|
|
- /*
|
|
|
|
- * When no IO was submitted ext4_end_io_dio() was not
|
|
|
|
- * called so we have to put iocb's reference.
|
|
|
|
- */
|
|
|
|
- if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
|
|
|
|
- WARN_ON(iocb->private != io_end);
|
|
|
|
- WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
|
|
|
- ext4_put_io_end(io_end);
|
|
|
|
- iocb->private = NULL;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
|
|
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
|
|
EXT4_STATE_DIO_UNWRITTEN)) {
|
|
EXT4_STATE_DIO_UNWRITTEN)) {
|
|
int err;
|
|
int err;
|
|
@@ -3405,7 +3409,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
|
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
|
|
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
|
|
}
|
|
}
|
|
|
|
|
|
-retake_lock:
|
|
|
|
if (iov_iter_rw(iter) == WRITE)
|
|
if (iov_iter_rw(iter) == WRITE)
|
|
inode_dio_end(inode);
|
|
inode_dio_end(inode);
|
|
/* take i_mutex locking again if we do a ovewrite dio */
|
|
/* take i_mutex locking again if we do a ovewrite dio */
|