The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=2dde3bed347ab46685d079c2fcc1dbd1fa149286
commit 2dde3bed347ab46685d079c2fcc1dbd1fa149286 Author: Mark Johnston <ma...@freebsd.org> AuthorDate: 2025-07-29 14:46:53 +0000 Commit: Mark Johnston <ma...@freebsd.org> CommitDate: 2025-07-31 16:51:49 +0000 aio: Fix a race in sys_aio_cancel() sys_aio_cancel() loops over pending jobs for the process, cancelling some of them. To cancel a job with a cancel callback, it must drop the job list mutex. It uses flags, KAIOCB_CANCELLING and KAIOCB_CANCELLED, to make sure that a job isn't double-cancelled. However, when iterating over the list it uses TAILQ_FOREACH_SAFE and thus assumes that the next job isn't going to be removed while the lock is dropped. Of course, this assumption is false. We could simply start search from the beginning after cancelling a job, but that might be quite expensive. Instead, introduce the notion of a marker job, used to keep track of one's position in the queue. Use it in sys_aio_cancel() to resume iteration after a job is cancelled. Reported by: syzkaller Reviewed by: kib, jhb MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D51626 --- sys/kern/vfs_aio.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 02973146068d..e63fa4c01434 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -222,6 +222,7 @@ typedef struct oaiocb { #define KAIOCB_CHECKSYNC 0x08 #define KAIOCB_CLEARED 0x10 #define KAIOCB_FINISHED 0x20 +#define KAIOCB_MARKER 0x40 /* ioflags */ #define KAIOCB_IO_FOFFSET 0x01 @@ -584,6 +585,12 @@ aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct kaiocb *job) int cancelled; AIO_LOCK_ASSERT(ki, MA_OWNED); + + /* + * If we're running down the queue, the process must be single-threaded, + * and so no markers should be present. + */ + MPASS((job->jobflags & KAIOCB_MARKER) == 0); if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED)) return (0); MPASS((job->jobflags & KAIOCB_CANCELLING) == 0); @@ -658,7 +665,7 @@ restart: } /* Wait for all running I/O to be finished */ - if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) { + if (!TAILQ_EMPTY(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) { ki->kaio_flags |= KAIO_WAKEUP; msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz); goto restart; @@ -1804,6 +1811,8 @@ aio_queue_file(struct file *fp, struct kaiocb *job) } else if (job->uaiocb.aio_lio_opcode & LIO_SYNC) { AIO_LOCK(ki); TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) { + if ((job2->jobflags & KAIOCB_MARKER) != 0) + continue; if (job2->fd_file == job->fd_file && ((job2->uaiocb.aio_lio_opcode & LIO_SYNC) == 0) && job2->seqno < job->seqno) { @@ -2033,7 +2042,7 @@ sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap) { struct proc *p = td->td_proc; struct kaioinfo *ki; - struct kaiocb *job, *jobn; + struct kaiocb *job, *jobn, marker; struct file *fp; int error; int cancelled = 0; @@ -2058,16 +2067,30 @@ sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap) } } + /* + * We may have to drop the list mutex in order to cancel a job. After + * that point it is unsafe to rely on the stability of the list. We + * could restart the search from the beginning after canceling a job, + * but this may inefficient. Instead, use a marker job to keep our + * place in the list. + */ + memset(&marker, 0, sizeof(marker)); + marker.jobflags = KAIOCB_MARKER; + AIO_LOCK(ki); TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) { - if ((uap->fd == job->uaiocb.aio_fildes) && - ((uap->aiocbp == NULL) || - (uap->aiocbp == job->ujob))) { + if (uap->fd == job->uaiocb.aio_fildes && + (uap->aiocbp == NULL || uap->aiocbp == job->ujob) && + (job->jobflags & KAIOCB_MARKER) == 0) { + TAILQ_INSERT_AFTER(&ki->kaio_jobqueue, job, &marker, + plist); if (aio_cancel_job(p, ki, job)) { cancelled++; } else { notcancelled++; } + jobn = TAILQ_NEXT(&marker, plist); + TAILQ_REMOVE(&ki->kaio_jobqueue, &marker, plist); if (uap->aiocbp != NULL) break; }