git: 2dde3bed347a - main - aio: Fix a race in sys_aio_cancel()

Mark Johnston Thu, 31 Jul 2025 09:54:03 -0700

The branch main has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=2dde3bed347ab46685d079c2fcc1dbd1fa149286


commit 2dde3bed347ab46685d079c2fcc1dbd1fa149286
Author:     Mark Johnston <ma...@freebsd.org>
AuthorDate: 2025-07-29 14:46:53 +0000
Commit:     Mark Johnston <ma...@freebsd.org>
CommitDate: 2025-07-31 16:51:49 +0000

    aio: Fix a race in sys_aio_cancel()
    
    sys_aio_cancel() loops over pending jobs for the process, cancelling
    some of them.  To cancel a job with a cancel callback, it must drop the
    job list mutex.  It uses flags, KAIOCB_CANCELLING and KAIOCB_CANCELLED,
    to make sure that a job isn't double-cancelled.  However, when iterating
    over the list it uses TAILQ_FOREACH_SAFE and thus assumes that the next
    job isn't going to be removed while the lock is dropped.  Of course,
    this assumption is false.
    
    We could simply start search from the beginning after cancelling a job,
    but that might be quite expensive.  Instead, introduce the notion of a
    marker job, used to keep track of one's position in the queue.  Use it
    in sys_aio_cancel() to resume iteration after a job is cancelled.
    
    Reported by:    syzkaller
    Reviewed by:    kib, jhb
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D51626
---
 sys/kern/vfs_aio.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 02973146068d..e63fa4c01434 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -222,6 +222,7 @@ typedef struct oaiocb {
 #define        KAIOCB_CHECKSYNC        0x08
 #define        KAIOCB_CLEARED          0x10
 #define        KAIOCB_FINISHED         0x20
+#define        KAIOCB_MARKER           0x40
 
 /* ioflags */
 #define        KAIOCB_IO_FOFFSET       0x01
@@ -584,6 +585,12 @@ aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct 
kaiocb *job)
        int cancelled;
 
        AIO_LOCK_ASSERT(ki, MA_OWNED);
+
+       /*
+        * If we're running down the queue, the process must be single-threaded,
+        * and so no markers should be present.
+        */
+       MPASS((job->jobflags & KAIOCB_MARKER) == 0);
        if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED))
                return (0);
        MPASS((job->jobflags & KAIOCB_CANCELLING) == 0);
@@ -658,7 +665,7 @@ restart:
        }
 
        /* Wait for all running I/O to be finished */
-       if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) {
+       if (!TAILQ_EMPTY(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) {
                ki->kaio_flags |= KAIO_WAKEUP;
                msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz);
                goto restart;
@@ -1804,6 +1811,8 @@ aio_queue_file(struct file *fp, struct kaiocb *job)
        } else if (job->uaiocb.aio_lio_opcode & LIO_SYNC) {
                AIO_LOCK(ki);
                TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) {
+                       if ((job2->jobflags & KAIOCB_MARKER) != 0)
+                               continue;
                        if (job2->fd_file == job->fd_file &&
                            ((job2->uaiocb.aio_lio_opcode & LIO_SYNC) == 0) &&
                            job2->seqno < job->seqno) {
@@ -2033,7 +2042,7 @@ sys_aio_cancel(struct thread *td, struct aio_cancel_args 
*uap)
 {
        struct proc *p = td->td_proc;
        struct kaioinfo *ki;
-       struct kaiocb *job, *jobn;
+       struct kaiocb *job, *jobn, marker;
        struct file *fp;
        int error;
        int cancelled = 0;
@@ -2058,16 +2067,30 @@ sys_aio_cancel(struct thread *td, struct 
aio_cancel_args *uap)
                }
        }
 
+       /*
+        * We may have to drop the list mutex in order to cancel a job.  After
+        * that point it is unsafe to rely on the stability of the list.  We
+        * could restart the search from the beginning after canceling a job,
+        * but this may inefficient.  Instead, use a marker job to keep our
+        * place in the list.
+        */
+       memset(&marker, 0, sizeof(marker));
+       marker.jobflags = KAIOCB_MARKER;
+
        AIO_LOCK(ki);
        TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) {
-               if ((uap->fd == job->uaiocb.aio_fildes) &&
-                   ((uap->aiocbp == NULL) ||
-                    (uap->aiocbp == job->ujob))) {
+               if (uap->fd == job->uaiocb.aio_fildes &&
+                   (uap->aiocbp == NULL || uap->aiocbp == job->ujob) &&
+                   (job->jobflags & KAIOCB_MARKER) == 0) {
+                       TAILQ_INSERT_AFTER(&ki->kaio_jobqueue, job, &marker,
+                           plist);
                        if (aio_cancel_job(p, ki, job)) {
                                cancelled++;
                        } else {
                                notcancelled++;
                        }
+                       jobn = TAILQ_NEXT(&marker, plist);
+                       TAILQ_REMOVE(&ki->kaio_jobqueue, &marker, plist);
                        if (uap->aiocbp != NULL)
                                break;
                }

git: 2dde3bed347a - main - aio: Fix a race in sys_aio_cancel()

Reply via email to