|
@@ -151,6 +151,38 @@ struct kioctx {
|
|
|
unsigned id;
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
|
|
|
+ * cancelled or completed (this makes a certain amount of sense because
|
|
|
+ * successful cancellation - io_cancel() - does deliver the completion to
|
|
|
+ * userspace).
|
|
|
+ *
|
|
|
+ * And since most things don't implement kiocb cancellation and we'd really like
|
|
|
+ * kiocb completion to be lockless when possible, we use ki_cancel to
|
|
|
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
|
|
|
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
|
|
|
+ */
|
|
|
+#define KIOCB_CANCELLED ((void *) (~0ULL))
|
|
|
+
|
|
|
+struct aio_kiocb {
|
|
|
+ struct kiocb common;
|
|
|
+
|
|
|
+ struct kioctx *ki_ctx;
|
|
|
+ kiocb_cancel_fn *ki_cancel;
|
|
|
+
|
|
|
+ struct iocb __user *ki_user_iocb; /* user's aiocb */
|
|
|
+ __u64 ki_user_data; /* user's data for completion */
|
|
|
+
|
|
|
+ struct list_head ki_list; /* the aio core uses this
|
|
|
+ * for cancellation */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the aio_resfd field of the userspace iocb is not zero,
|
|
|
+ * this is the underlying eventfd context to deliver events to.
|
|
|
+ */
|
|
|
+ struct eventfd_ctx *ki_eventfd;
|
|
|
+};
|
|
|
+
|
|
|
/*------ sysctl variables----*/
|
|
|
static DEFINE_SPINLOCK(aio_nr_lock);
|
|
|
unsigned long aio_nr; /* current system wide number of aio requests */
|
|
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
|
|
|
if (IS_ERR(aio_mnt))
|
|
|
panic("Failed to create aio fs mount.");
|
|
|
|
|
|
- kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
|
|
|
+ kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
|
|
|
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
|
|
|
|
|
|
pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
|
|
@@ -480,8 +512,9 @@ static int aio_setup_ring(struct kioctx *ctx)
|
|
|
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
|
|
|
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
|
|
|
|
|
|
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
|
|
|
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
|
|
|
{
|
|
|
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
|
|
|
struct kioctx *ctx = req->ki_ctx;
|
|
|
unsigned long flags;
|
|
|
|
|
@@ -496,7 +529,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
|
|
|
}
|
|
|
EXPORT_SYMBOL(kiocb_set_cancel_fn);
|
|
|
|
|
|
-static int kiocb_cancel(struct kiocb *kiocb)
|
|
|
+static int kiocb_cancel(struct aio_kiocb *kiocb)
|
|
|
{
|
|
|
kiocb_cancel_fn *old, *cancel;
|
|
|
|
|
@@ -514,7 +547,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
|
|
|
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
|
|
|
} while (cancel != old);
|
|
|
|
|
|
- return cancel(kiocb);
|
|
|
+ return cancel(&kiocb->common);
|
|
|
}
|
|
|
|
|
|
static void free_ioctx(struct work_struct *work)
|
|
@@ -550,13 +583,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
|
|
|
static void free_ioctx_users(struct percpu_ref *ref)
|
|
|
{
|
|
|
struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
|
|
- struct kiocb *req;
|
|
|
+ struct aio_kiocb *req;
|
|
|
|
|
|
spin_lock_irq(&ctx->ctx_lock);
|
|
|
|
|
|
while (!list_empty(&ctx->active_reqs)) {
|
|
|
req = list_first_entry(&ctx->active_reqs,
|
|
|
- struct kiocb, ki_list);
|
|
|
+ struct aio_kiocb, ki_list);
|
|
|
|
|
|
list_del_init(&req->ki_list);
|
|
|
kiocb_cancel(req);
|
|
@@ -932,9 +965,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
|
|
|
* Allocate a slot for an aio request.
|
|
|
* Returns NULL if no requests are free.
|
|
|
*/
|
|
|
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
|
|
|
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
|
|
|
{
|
|
|
- struct kiocb *req;
|
|
|
+ struct aio_kiocb *req;
|
|
|
|
|
|
if (!get_reqs_available(ctx)) {
|
|
|
user_refill_reqs_available(ctx);
|
|
@@ -955,10 +988,10 @@ out_put:
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
-static void kiocb_free(struct kiocb *req)
|
|
|
+static void kiocb_free(struct aio_kiocb *req)
|
|
|
{
|
|
|
- if (req->ki_filp)
|
|
|
- fput(req->ki_filp);
|
|
|
+ if (req->common.ki_filp)
|
|
|
+ fput(req->common.ki_filp);
|
|
|
if (req->ki_eventfd != NULL)
|
|
|
eventfd_ctx_put(req->ki_eventfd);
|
|
|
kmem_cache_free(kiocb_cachep, req);
|
|
@@ -994,8 +1027,9 @@ out:
|
|
|
/* aio_complete
|
|
|
* Called when the io request on the given iocb is complete.
|
|
|
*/
|
|
|
-void aio_complete(struct kiocb *iocb, long res, long res2)
|
|
|
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
|
|
|
{
|
|
|
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
|
|
|
struct kioctx *ctx = iocb->ki_ctx;
|
|
|
struct aio_ring *ring;
|
|
|
struct io_event *ev_page, *event;
|
|
@@ -1009,7 +1043,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
|
|
* ref, no other paths have a way to get another ref
|
|
|
* - the sync task helpfully left a reference to itself in the iocb
|
|
|
*/
|
|
|
- BUG_ON(is_sync_kiocb(iocb));
|
|
|
+ BUG_ON(is_sync_kiocb(kiocb));
|
|
|
|
|
|
if (iocb->ki_list.next) {
|
|
|
unsigned long flags;
|
|
@@ -1035,7 +1069,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
|
|
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
|
|
|
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
|
|
|
|
|
|
- event->obj = (u64)(unsigned long)iocb->ki_obj.user;
|
|
|
+ event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
|
|
|
event->data = iocb->ki_user_data;
|
|
|
event->res = res;
|
|
|
event->res2 = res2;
|
|
@@ -1044,7 +1078,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
|
|
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
|
|
|
|
|
|
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
|
|
|
- ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
|
|
|
+ ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
|
|
|
res, res2);
|
|
|
|
|
|
/* after flagging the request as done, we
|
|
@@ -1091,7 +1125,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
|
|
|
|
|
|
percpu_ref_put(&ctx->reqs);
|
|
|
}
|
|
|
-EXPORT_SYMBOL(aio_complete);
|
|
|
|
|
|
/* aio_read_events_ring
|
|
|
* Pull an event off of the ioctx's event ring. Returns the number of
|
|
@@ -1480,7 +1513,7 @@ rw_common:
|
|
|
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
|
|
struct iocb *iocb, bool compat)
|
|
|
{
|
|
|
- struct kiocb *req;
|
|
|
+ struct aio_kiocb *req;
|
|
|
ssize_t ret;
|
|
|
|
|
|
/* enforce forwards compatibility on users */
|
|
@@ -1503,11 +1536,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
|
|
if (unlikely(!req))
|
|
|
return -EAGAIN;
|
|
|
|
|
|
- req->ki_filp = fget(iocb->aio_fildes);
|
|
|
- if (unlikely(!req->ki_filp)) {
|
|
|
+ req->common.ki_filp = fget(iocb->aio_fildes);
|
|
|
+ if (unlikely(!req->common.ki_filp)) {
|
|
|
ret = -EBADF;
|
|
|
goto out_put_req;
|
|
|
}
|
|
|
+ req->common.ki_pos = iocb->aio_offset;
|
|
|
+ req->common.ki_complete = aio_complete;
|
|
|
+ req->common.ki_flags = 0;
|
|
|
|
|
|
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
|
|
|
/*
|
|
@@ -1522,6 +1558,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
|
|
req->ki_eventfd = NULL;
|
|
|
goto out_put_req;
|
|
|
}
|
|
|
+
|
|
|
+ req->common.ki_flags |= IOCB_EVENTFD;
|
|
|
}
|
|
|
|
|
|
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
|
|
@@ -1530,11 +1568,10 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
|
|
goto out_put_req;
|
|
|
}
|
|
|
|
|
|
- req->ki_obj.user = user_iocb;
|
|
|
+ req->ki_user_iocb = user_iocb;
|
|
|
req->ki_user_data = iocb->aio_data;
|
|
|
- req->ki_pos = iocb->aio_offset;
|
|
|
|
|
|
- ret = aio_run_iocb(req, iocb->aio_lio_opcode,
|
|
|
+ ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
|
|
|
(char __user *)(unsigned long)iocb->aio_buf,
|
|
|
iocb->aio_nbytes,
|
|
|
compat);
|
|
@@ -1623,10 +1660,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
|
|
|
/* lookup_kiocb
|
|
|
* Finds a given iocb for cancellation.
|
|
|
*/
|
|
|
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
|
|
|
- u32 key)
|
|
|
+static struct aio_kiocb *
|
|
|
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
|
|
|
{
|
|
|
- struct list_head *pos;
|
|
|
+ struct aio_kiocb *kiocb;
|
|
|
|
|
|
assert_spin_locked(&ctx->ctx_lock);
|
|
|
|
|
@@ -1634,9 +1671,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
|
|
|
return NULL;
|
|
|
|
|
|
/* TODO: use a hash or array, this sucks. */
|
|
|
- list_for_each(pos, &ctx->active_reqs) {
|
|
|
- struct kiocb *kiocb = list_kiocb(pos);
|
|
|
- if (kiocb->ki_obj.user == iocb)
|
|
|
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
|
|
|
+ if (kiocb->ki_user_iocb == iocb)
|
|
|
return kiocb;
|
|
|
}
|
|
|
return NULL;
|
|
@@ -1656,7 +1692,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
|
|
|
struct io_event __user *, result)
|
|
|
{
|
|
|
struct kioctx *ctx;
|
|
|
- struct kiocb *kiocb;
|
|
|
+ struct aio_kiocb *kiocb;
|
|
|
u32 key;
|
|
|
int ret;
|
|
|
|