On Thu, Feb 06, 2020 at 04:31:47PM -0800, Rob Newberry wrote:
> Hi.
> 
> I spent last weekend -- and a few days this week -- tracking down a problem 
> that exists in current.
> I found a workaround, but I don't know what the "proper" fix is.
> Digging through the VM layer and debugging with printfs was slow --
> and it's a boot-time issue, so I had to swap a lot of SD cards back and forth 
> :-).
> Hopefully someone here is better at this than me.
> 
> 
> [analysis...]

good job working your way through all that, this code is pretty complicated.


> 3) Start "aiodone_queue" earlier in the sequence.  I don't have a rich enough 
> understanding of
> this part of the kernel and user land startup process to know how hard this 
> is, or how hacky it is.

this is the right way to fix it.  please try the attached patch.


> BTW, I'm ASSUMING that if uvm.aiodone_queue were present, the asynchronous 
> completion would somehow
> handle marking the pages as "not busy".  But I actually never debugged that 
> code path,
> so I can't be sure that's helpful.

right, the "aiodone_queue" workqueue will call uvm_aiodone_worker() on the 
buffer,
and bp->b_iodone will have been set to uvm_aio_aiodone, which unbusies the pages
among other things.

-Chuck
Index: src/sys/kern/init_main.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/kern/init_main.c,v
retrieving revision 1.519
diff -u -p -r1.519 init_main.c
--- src/sys/kern/init_main.c    28 Jan 2020 16:35:39 -0000      1.519
+++ src/sys/kern/init_main.c    7 Feb 2020 18:14:06 -0000
@@ -655,6 +655,22 @@ main(void)
 
        sysctl_finalize();
 
+       /* Create the aiodone daemon kernel thread. */
+       if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
+           uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE))
+               panic("fork aiodoned");
+
+       /* Create the pageout daemon kernel thread. */
+       uvm_swap_init();
+       if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, uvm_pageout,
+           NULL, NULL, "pgdaemon"))
+               panic("fork pagedaemon");
+
+       /* Create the filesystem syncer kernel thread. */
+       if (kthread_create(PRI_IOFLUSH, KTHREAD_MPSAFE, NULL, sched_sync,
+           NULL, NULL, "ioflush"))
+               panic("fork syncer");
+
        /*
         * Now that autoconfiguration has completed, we can determine
         * the root and dump devices.
@@ -709,22 +725,6 @@ main(void)
                ci->ci_schedstate.spc_lastmod = time_second;
        }
 
-       /* Create the pageout daemon kernel thread. */
-       uvm_swap_init();
-       if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, uvm_pageout,
-           NULL, NULL, "pgdaemon"))
-               panic("fork pagedaemon");
-
-       /* Create the filesystem syncer kernel thread. */
-       if (kthread_create(PRI_IOFLUSH, KTHREAD_MPSAFE, NULL, sched_sync,
-           NULL, NULL, "ioflush"))
-               panic("fork syncer");
-
-       /* Create the aiodone daemon kernel thread. */
-       if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
-           uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE))
-               panic("fork aiodoned");
-
        /* Wait for final configure threads to complete. */
        config_finalize_mountroot();
 
Index: src/sys/miscfs/genfs/genfs_io.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/miscfs/genfs/genfs_io.c,v
retrieving revision 1.84
diff -u -p -r1.84 genfs_io.c
--- src/sys/miscfs/genfs/genfs_io.c     15 Jan 2020 17:55:44 -0000      1.84
+++ src/sys/miscfs/genfs/genfs_io.c     7 Feb 2020 18:22:31 -0000
@@ -606,9 +606,6 @@ genfs_getpages_read(struct vnode *vp, st
        if (kva == 0)
                return EBUSY;
 
-       if (uvm.aiodone_queue == NULL)
-               async = 0;
-
        mbp = getiobuf(vp, true);
        mbp->b_bufsize = totalbytes;
        mbp->b_data = (void *)kva;
@@ -1396,7 +1393,6 @@ genfs_gop_write(struct vnode *vp, struct
            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
        len = npages << PAGE_SHIFT;
 
-       KASSERT(uvm.aiodone_queue != NULL);
        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                            uvm_aio_biodone);
 
@@ -1429,7 +1425,6 @@ genfs_gop_write_rwmap(struct vnode *vp, 
            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
        len = npages << PAGE_SHIFT;
 
-       KASSERT(uvm.aiodone_queue != NULL);
        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                            uvm_aio_biodone);
 
Index: src/sys/uvm/uvm_swap.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.185
diff -u -p -r1.185 uvm_swap.c
--- src/sys/uvm/uvm_swap.c      27 Dec 2019 09:41:51 -0000      1.185
+++ src/sys/uvm/uvm_swap.c      7 Feb 2020 18:26:49 -0000
@@ -1778,10 +1778,6 @@ uvm_swap_io(struct vm_page **pps, int st
        write = (flags & B_READ) == 0;
        async = (flags & B_ASYNC) != 0;
 
-       /* XXX swap io make take place before the aiodone queue exists */
-       if (uvm.aiodone_queue == NULL)
-               async = 0;
-
        /*
         * allocate a buf for the i/o.
         */
@@ -1836,7 +1832,6 @@ uvm_swap_io(struct vm_page **pps, int st
         */
 
        if (async) {
-               KASSERT(uvm.aiodone_queue != NULL);
                bp->b_iodone = uvm_aio_biodone;
                UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
                if (curlwp == uvm.pagedaemon_lwp)

Reply via email to