On Thu, Feb 06, 2020 at 04:31:47PM -0800, Rob Newberry wrote:
> Hi.
>
> I spent last weekend -- and a few days this week -- tracking down a problem
> that exists in current.
> I found a workaround, but I don't know what the "proper" fix is.
> Digging through the VM layer and debugging with printfs was slow --
> and it's a boot-time issue, so I had to swap a lot of SD cards back and forth
> :-).
> Hopefully someone here is better at this than me.
>
>
> [analysis...]
good job working your way through all that, this code is pretty complicated.
> 3) Start "aiodone_queue" earlier in the sequence. I don't have a rich enough
> understanding of
> this part of the kernel and user land startup process to know how hard this
> is, or how hacky it is.
this is the right way to fix it. please try the attached patch.
> BTW, I'm ASSUMING that if uvm.aiodone_queue were present, the asynchronous
> completion would somehow
> handle marking the pages as "not busy". But I actually never debugged that
> code path,
> so I can't be sure that's helpful.
right, the "aiodone_queue" workqueue will call uvm_aiodone_worker() on the
buffer,
and bp->b_iodone will have been set to uvm_aio_aiodone, which unbusies the pages
among other things.
-Chuck
Index: src/sys/kern/init_main.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/kern/init_main.c,v
retrieving revision 1.519
diff -u -p -r1.519 init_main.c
--- src/sys/kern/init_main.c 28 Jan 2020 16:35:39 -0000 1.519
+++ src/sys/kern/init_main.c 7 Feb 2020 18:14:06 -0000
@@ -655,6 +655,22 @@ main(void)
sysctl_finalize();
+ /* Create the aiodone daemon kernel thread. */
+ if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
+ uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE))
+ panic("fork aiodoned");
+
+ /* Create the pageout daemon kernel thread. */
+ uvm_swap_init();
+ if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, uvm_pageout,
+ NULL, NULL, "pgdaemon"))
+ panic("fork pagedaemon");
+
+ /* Create the filesystem syncer kernel thread. */
+ if (kthread_create(PRI_IOFLUSH, KTHREAD_MPSAFE, NULL, sched_sync,
+ NULL, NULL, "ioflush"))
+ panic("fork syncer");
+
/*
* Now that autoconfiguration has completed, we can determine
* the root and dump devices.
@@ -709,22 +725,6 @@ main(void)
ci->ci_schedstate.spc_lastmod = time_second;
}
- /* Create the pageout daemon kernel thread. */
- uvm_swap_init();
- if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, uvm_pageout,
- NULL, NULL, "pgdaemon"))
- panic("fork pagedaemon");
-
- /* Create the filesystem syncer kernel thread. */
- if (kthread_create(PRI_IOFLUSH, KTHREAD_MPSAFE, NULL, sched_sync,
- NULL, NULL, "ioflush"))
- panic("fork syncer");
-
- /* Create the aiodone daemon kernel thread. */
- if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
- uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE))
- panic("fork aiodoned");
-
/* Wait for final configure threads to complete. */
config_finalize_mountroot();
Index: src/sys/miscfs/genfs/genfs_io.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/miscfs/genfs/genfs_io.c,v
retrieving revision 1.84
diff -u -p -r1.84 genfs_io.c
--- src/sys/miscfs/genfs/genfs_io.c 15 Jan 2020 17:55:44 -0000 1.84
+++ src/sys/miscfs/genfs/genfs_io.c 7 Feb 2020 18:22:31 -0000
@@ -606,9 +606,6 @@ genfs_getpages_read(struct vnode *vp, st
if (kva == 0)
return EBUSY;
- if (uvm.aiodone_queue == NULL)
- async = 0;
-
mbp = getiobuf(vp, true);
mbp->b_bufsize = totalbytes;
mbp->b_data = (void *)kva;
@@ -1396,7 +1393,6 @@ genfs_gop_write(struct vnode *vp, struct
UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
len = npages << PAGE_SHIFT;
- KASSERT(uvm.aiodone_queue != NULL);
error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
uvm_aio_biodone);
@@ -1429,7 +1425,6 @@ genfs_gop_write_rwmap(struct vnode *vp,
UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
len = npages << PAGE_SHIFT;
- KASSERT(uvm.aiodone_queue != NULL);
error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
uvm_aio_biodone);
Index: src/sys/uvm/uvm_swap.c
===================================================================
RCS file: /home/chs/netbsd/cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.185
diff -u -p -r1.185 uvm_swap.c
--- src/sys/uvm/uvm_swap.c 27 Dec 2019 09:41:51 -0000 1.185
+++ src/sys/uvm/uvm_swap.c 7 Feb 2020 18:26:49 -0000
@@ -1778,10 +1778,6 @@ uvm_swap_io(struct vm_page **pps, int st
write = (flags & B_READ) == 0;
async = (flags & B_ASYNC) != 0;
- /* XXX swap io make take place before the aiodone queue exists */
- if (uvm.aiodone_queue == NULL)
- async = 0;
-
/*
* allocate a buf for the i/o.
*/
@@ -1836,7 +1832,6 @@ uvm_swap_io(struct vm_page **pps, int st
*/
if (async) {
- KASSERT(uvm.aiodone_queue != NULL);
bp->b_iodone = uvm_aio_biodone;
UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
if (curlwp == uvm.pagedaemon_lwp)