The upcoming aio poll support would like to be able to complete the
iocb inline from the cancellation context, but that would cause
a lock order reversal.  Add support for optionally moving the cancelation
outside the context lock to avoid this reversal.

Signed-off-by: Christoph Hellwig <h...@lst.de>
Acked-by: Jeff Moyer <jmo...@redhat.com>
---
 fs/aio.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 0b6394b4e528..9d7d6e4cde87 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -170,6 +170,10 @@ struct aio_kiocb {
        struct list_head        ki_list;        /* the aio core uses this
                                                 * for cancellation */
 
+       unsigned int            flags;          /* protected by ctx->ctx_lock */
+#define AIO_IOCB_DELAYED_CANCEL        (1 << 0)
+#define AIO_IOCB_CANCELLED     (1 << 1)
+
        /*
         * If the aio_resfd field of the userspace iocb is not zero,
         * this is the underlying eventfd context to deliver events to.
@@ -536,9 +540,9 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int 
nr_events)
 #define AIO_EVENTS_FIRST_PAGE  ((PAGE_SIZE - sizeof(struct aio_ring)) / 
sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET      (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
+static void __kiocb_set_cancel_fn(struct aio_kiocb *req,
+               kiocb_cancel_fn *cancel, unsigned int iocb_flags)
 {
-       struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
        struct kioctx *ctx = req->ki_ctx;
        unsigned long flags;
 
@@ -548,8 +552,15 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, 
kiocb_cancel_fn *cancel)
        spin_lock_irqsave(&ctx->ctx_lock, flags);
        list_add_tail(&req->ki_list, &ctx->active_reqs);
        req->ki_cancel = cancel;
+       req->flags |= iocb_flags;
        spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
+
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
+{
+       return __kiocb_set_cancel_fn(container_of(iocb, struct aio_kiocb, rw),
+                       cancel, 0);
+}
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
 /*
@@ -603,17 +614,27 @@ static void free_ioctx_users(struct percpu_ref *ref)
 {
        struct kioctx *ctx = container_of(ref, struct kioctx, users);
        struct aio_kiocb *req;
+       LIST_HEAD(list);
 
        spin_lock_irq(&ctx->ctx_lock);
-
        while (!list_empty(&ctx->active_reqs)) {
                req = list_first_entry(&ctx->active_reqs,
                                       struct aio_kiocb, ki_list);
-               kiocb_cancel(req);
-       }
 
+               if (req->flags & AIO_IOCB_DELAYED_CANCEL) {
+                       req->flags |= AIO_IOCB_CANCELLED;
+                       list_move_tail(&req->ki_list, &list);
+               } else {
+                       kiocb_cancel(req);
+               }
+       }
        spin_unlock_irq(&ctx->ctx_lock);
 
+       while (!list_empty(&list)) {
+               req = list_first_entry(&list, struct aio_kiocb, ki_list);
+               kiocb_cancel(req);
+       }
+
        percpu_ref_kill(&ctx->reqs);
        percpu_ref_put(&ctx->reqs);
 }
@@ -1785,15 +1806,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
struct iocb __user *, iocb,
        if (unlikely(!ctx))
                return -EINVAL;
 
-       spin_lock_irq(&ctx->ctx_lock);
+       ret = -EINVAL;
 
+       spin_lock_irq(&ctx->ctx_lock);
        kiocb = lookup_kiocb(ctx, iocb, key);
+       if (kiocb) {
+               if (kiocb->flags & AIO_IOCB_DELAYED_CANCEL) {
+                       kiocb->flags |= AIO_IOCB_CANCELLED;
+               } else {
+                       ret = kiocb_cancel(kiocb);
+                       kiocb = NULL;
+               }
+       }
+       spin_unlock_irq(&ctx->ctx_lock);
+
        if (kiocb)
                ret = kiocb_cancel(kiocb);
-       else
-               ret = -EINVAL;
-
-       spin_unlock_irq(&ctx->ctx_lock);
 
        if (!ret) {
                /*
@@ -1805,7 +1833,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct 
iocb __user *, iocb,
        }
 
        percpu_ref_put(&ctx->users);
-
        return ret;
 }
 
-- 
2.14.2

Reply via email to