|
@@ -685,6 +685,7 @@ EXPORT_SYMBOL_GPL(iomap_seek_data);
|
|
* Private flags for iomap_dio, must not overlap with the public ones in
|
|
* Private flags for iomap_dio, must not overlap with the public ones in
|
|
* iomap.h:
|
|
* iomap.h:
|
|
*/
|
|
*/
|
|
|
|
+#define IOMAP_DIO_WRITE_FUA (1 << 28)
|
|
#define IOMAP_DIO_NEED_SYNC (1 << 29)
|
|
#define IOMAP_DIO_NEED_SYNC (1 << 29)
|
|
#define IOMAP_DIO_WRITE (1 << 30)
|
|
#define IOMAP_DIO_WRITE (1 << 30)
|
|
#define IOMAP_DIO_DIRTY (1 << 31)
|
|
#define IOMAP_DIO_DIRTY (1 << 31)
|
|
@@ -861,6 +862,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
|
struct iov_iter iter;
|
|
struct iov_iter iter;
|
|
struct bio *bio;
|
|
struct bio *bio;
|
|
bool need_zeroout = false;
|
|
bool need_zeroout = false;
|
|
|
|
+ bool use_fua = false;
|
|
int nr_pages, ret;
|
|
int nr_pages, ret;
|
|
size_t copied = 0;
|
|
size_t copied = 0;
|
|
|
|
|
|
@@ -884,8 +886,20 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
|
case IOMAP_MAPPED:
|
|
case IOMAP_MAPPED:
|
|
if (iomap->flags & IOMAP_F_SHARED)
|
|
if (iomap->flags & IOMAP_F_SHARED)
|
|
dio->flags |= IOMAP_DIO_COW;
|
|
dio->flags |= IOMAP_DIO_COW;
|
|
- if (iomap->flags & IOMAP_F_NEW)
|
|
|
|
|
|
+ if (iomap->flags & IOMAP_F_NEW) {
|
|
need_zeroout = true;
|
|
need_zeroout = true;
|
|
|
|
+ } else {
|
|
|
|
+ /*
|
|
|
|
+ * Use a FUA write if we need datasync semantics, this
|
|
|
|
+ * is a pure data IO that doesn't require any metadata
|
|
|
|
+ * updates and the underlying device supports FUA. This
|
|
|
|
+ * allows us to avoid cache flushes on IO completion.
|
|
|
|
+ */
|
|
|
|
+ if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
|
|
|
|
+ (dio->flags & IOMAP_DIO_WRITE_FUA) &&
|
|
|
|
+ blk_queue_fua(bdev_get_queue(iomap->bdev)))
|
|
|
|
+ use_fua = true;
|
|
|
|
+ }
|
|
break;
|
|
break;
|
|
default:
|
|
default:
|
|
WARN_ON_ONCE(1);
|
|
WARN_ON_ONCE(1);
|
|
@@ -933,10 +947,14 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
|
|
|
|
|
n = bio->bi_iter.bi_size;
|
|
n = bio->bi_iter.bi_size;
|
|
if (dio->flags & IOMAP_DIO_WRITE) {
|
|
if (dio->flags & IOMAP_DIO_WRITE) {
|
|
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
|
|
|
|
|
|
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
|
|
|
|
+ if (use_fua)
|
|
|
|
+ bio->bi_opf |= REQ_FUA;
|
|
|
|
+ else
|
|
|
|
+ dio->flags &= ~IOMAP_DIO_WRITE_FUA;
|
|
task_io_account_write(n);
|
|
task_io_account_write(n);
|
|
} else {
|
|
} else {
|
|
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
|
|
|
|
|
+ bio->bi_opf = REQ_OP_READ;
|
|
if (dio->flags & IOMAP_DIO_DIRTY)
|
|
if (dio->flags & IOMAP_DIO_DIRTY)
|
|
bio_set_pages_dirty(bio);
|
|
bio_set_pages_dirty(bio);
|
|
}
|
|
}
|
|
@@ -966,7 +984,12 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
|
|
|
|
|
/*
|
|
/*
|
|
* iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
|
|
* iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
|
|
- * is being issued as AIO or not.
|
|
|
|
|
|
+ * is being issued as AIO or not. This allows us to optimise pure data writes
|
|
|
|
+ * to use REQ_FUA rather than requiring generic_write_sync() to issue a
|
|
|
|
+ * REQ_FLUSH post write. This is slightly tricky because a single request here
|
|
|
|
+ * can be mapped into multiple disjoint IOs and only a subset of the IOs issued
|
|
|
|
+ * may be pure data writes. In that case, we still need to do a full data sync
|
|
|
|
+ * completion.
|
|
*/
|
|
*/
|
|
ssize_t
|
|
ssize_t
|
|
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|
@@ -1012,10 +1035,21 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|
if (iter->type == ITER_IOVEC)
|
|
if (iter->type == ITER_IOVEC)
|
|
dio->flags |= IOMAP_DIO_DIRTY;
|
|
dio->flags |= IOMAP_DIO_DIRTY;
|
|
} else {
|
|
} else {
|
|
|
|
+ flags |= IOMAP_WRITE;
|
|
dio->flags |= IOMAP_DIO_WRITE;
|
|
dio->flags |= IOMAP_DIO_WRITE;
|
|
|
|
+
|
|
|
|
+ /* for data sync or sync, we need sync completion processing */
|
|
if (iocb->ki_flags & IOCB_DSYNC)
|
|
if (iocb->ki_flags & IOCB_DSYNC)
|
|
dio->flags |= IOMAP_DIO_NEED_SYNC;
|
|
dio->flags |= IOMAP_DIO_NEED_SYNC;
|
|
- flags |= IOMAP_WRITE;
|
|
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * For datasync only writes, we optimistically try using FUA for
|
|
|
|
+ * this IO. Any non-FUA write that occurs will clear this flag,
|
|
|
|
+ * hence we know before completion whether a cache flush is
|
|
|
|
+ * necessary.
|
|
|
|
+ */
|
|
|
|
+ if ((iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) == IOCB_DSYNC)
|
|
|
|
+ dio->flags |= IOMAP_DIO_WRITE_FUA;
|
|
}
|
|
}
|
|
|
|
|
|
if (iocb->ki_flags & IOCB_NOWAIT) {
|
|
if (iocb->ki_flags & IOCB_NOWAIT) {
|
|
@@ -1071,6 +1105,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|
if (ret < 0)
|
|
if (ret < 0)
|
|
iomap_dio_set_error(dio, ret);
|
|
iomap_dio_set_error(dio, ret);
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * If all the writes we issued were FUA, we don't need to flush the
|
|
|
|
+ * cache on IO completion. Clear the sync flag for this case.
|
|
|
|
+ */
|
|
|
|
+ if (dio->flags & IOMAP_DIO_WRITE_FUA)
|
|
|
|
+ dio->flags &= ~IOMAP_DIO_NEED_SYNC;
|
|
|
|
+
|
|
if (!atomic_dec_and_test(&dio->ref)) {
|
|
if (!atomic_dec_and_test(&dio->ref)) {
|
|
if (!is_sync_kiocb(iocb))
|
|
if (!is_sync_kiocb(iocb))
|
|
return -EIOCBQUEUED;
|
|
return -EIOCBQUEUED;
|