On 29/07/22(Fri) 14:22, Theo Buehler wrote:
> On Mon, Jul 11, 2022 at 01:05:19PM +0200, Martin Pieuchot wrote:
> > On 11/07/22(Mon) 07:50, Theo Buehler wrote:
> > > On Fri, Jun 03, 2022 at 03:02:36PM +0200, Theo Buehler wrote:
> > > > > Please do note that this change can introduce/expose other issues.
> > > >
> > > > It seems that this diff causes occasional hangs when building snapshots
> > > > on my mac M1 mini. This happened twice in 10 builds, both times in
> > > > xenocara. Unfortunately, both times the machine became entirely
> > > > unresponsive and as I don't have serial console, that's all the info I
> > > > have...
> > > >
> > > > This machine has been very reliable and built >50 snaps without any hang
> > > > over the last 2.5 months. I'm now trying snap builds in a loop without
> > > > the diff to make sure the machine doesn't hang due to another recent
> > > > kernel change.
> > > >
> > >
> > > A little bit of info on this. The first three lines were a bit garbled on
> > > the screen:
> > >
> > > panic: kernel diagnostic assertion "uvn->_oppa jai c: ke r
> > > el d iag no tic a s rt n " map == UL L | | rw wr
> > > k
> > > ite held(amap->amap_lock)" failed: file "/ss/uvm/uvm_fault.c", line 846.
> > > ernel diagnostic assertion "!_kernel_lock_held
> > > Stopped at panic+0160: cmp w21, #0x0 ailed: file "/sys/kern/
> > > TID PID UID PRFLAGS PFLAGS CPU COMMAND
> > > 411910 44540 21 0x100001 0 3 make
> > > *436444 84241 21 0x100003 0 6 sh
> > > 227952 53498 21 0x100003 0 5 sh
> > > 258925 15765 21 0x101005 0 0 make
> > > 128459 9649 21 0x100003 0 1 tradcpp
> > > 287213 64216 21 0x100003 0x8 7 make
> > > 173587 4617 1000 0x100000 0 2 tmux
> > > 126511 69919 0 0x14000 0x200 4 softnet
> > > db_enter() at panic+0x15c
> > > panic() at __assert+0x24
> > > uvm_fault() at uvm_fault_upper_lookup+0x258
> > > uvm_fault_upper() at uvm_fault+0xec
> > > uvm_fault() at udata_abort+0x128
> > > udata_abort() at do_el0_sync+0xdc
> > > do_el0_sync() at handle_el0_sync+0x74
> > > https://www.openbsd.org/ddb.html describes the minimum info required in
> > > bug
> > > reports. Insufficient info makes it difficult to find and fix bugs.
> > > ddb{6}> show panic
> > > *cpu0: kernel diagnostic assertion "uvn->u_obj.uo_refs == 0" failed: file
> > > "/sys/kern/uvn_vnode.c", line 231.
> > > cpu6: kernel diagnostic assertion "amap == NULL ||
> > > rw_write_held(amap->am_lock)" failed: file "/sys/uvm/uvm_fault", line
> > > 846.
> > > cpu3: kernel diagnostic assertion "!_kernel_lock_held()" failed: file
> > > "/sys/kern/kern_fork.c", line 678
> > > ddb{6}> mach ddbcpu 0
> > >
> > > After pressing enter here, the machine locked up completely.
> >
> > It's hard for me to tell what's going on. I believe the interesting
> > trace is the one from cpu0 that we don't have. Can you easily reproduce
> > this? I'm trying on amd64 without luck. I'd glad if you could gather
> > more infos.
>
> Sorry for the delay. I was only at home intermittently. I hit this three
> times:
>
> panic: kernel diagnostic assertion "uvn->u_obj.uo_refs == 0" failed: file
> "/sys/uvm/uvm_vnode.c", line 231
Yesterday gnezdo@ fixed a race in uvn_attach() that lead to the same
assert. Here's an rebased diff for the bug discussed in this thread,
could you try again and let us know? Thanks!
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.127
diff -u -p -r1.127 uvm_vnode.c
--- uvm/uvm_vnode.c 31 Aug 2022 09:07:35 -0000 1.127
+++ uvm/uvm_vnode.c 1 Sep 2022 12:54:27 -0000
@@ -163,11 +163,8 @@ uvn_attach(struct vnode *vp, vm_prot_t a
*/
rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
if (uvn->u_flags & UVM_VNODE_VALID) { /* already active? */
+ KASSERT(uvn->u_obj.uo_refs > 0);
- /* regain vref if we were persisting */
- if (uvn->u_obj.uo_refs == 0) {
- vref(vp);
- }
uvn->u_obj.uo_refs++; /* bump uvn ref! */
rw_exit(uvn->u_obj.vmobjlock);
@@ -235,7 +232,7 @@ uvn_attach(struct vnode *vp, vm_prot_t a
KASSERT(uvn->u_obj.uo_refs == 0);
uvn->u_obj.uo_refs++;
oldflags = uvn->u_flags;
- uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
+ uvn->u_flags = UVM_VNODE_VALID;
uvn->u_nio = 0;
uvn->u_size = used_vnode_size;
@@ -248,7 +245,7 @@ uvn_attach(struct vnode *vp, vm_prot_t a
/*
* add a reference to the vnode. this reference will stay as long
* as there is a valid mapping of the vnode. dropped when the
- * reference count goes to zero [and we either free or persist].
+ * reference count goes to zero.
*/
vref(vp);
if (oldflags & UVM_VNODE_WANTED)
@@ -321,16 +318,6 @@ uvn_detach(struct uvm_object *uobj)
*/
vp->v_flag &= ~VTEXT;
- /*
- * we just dropped the last reference to the uvn. see if we can
- * let it "stick around".
- */
- if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
- /* won't block */
- uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
- goto out;
- }
-
/* its a goner! */
uvn->u_flags |= UVM_VNODE_DYING;
@@ -380,7 +367,6 @@ uvn_detach(struct uvm_object *uobj)
/* wake up any sleepers */
if (oldflags & UVM_VNODE_WANTED)
wakeup(uvn);
-out:
rw_exit(uobj->vmobjlock);
/* drop our reference to the vnode. */
@@ -496,8 +482,8 @@ uvm_vnp_terminate(struct vnode *vp)
}
/*
- * done. now we free the uvn if its reference count is zero
- * (true if we are zapping a persisting uvn). however, if we are
+ * done. now we free the uvn if its reference count is zero.
+ * however, if we are
* terminating a uvn with active mappings we let it live ... future
* calls down to the vnode layer will fail.
*/
@@ -505,14 +491,14 @@ uvm_vnp_terminate(struct vnode *vp)
if (uvn->u_obj.uo_refs) {
/*
* uvn must live on it is dead-vnode state until all references
- * are gone. restore flags. clear CANPERSIST state.
+ * are gone. restore flags.
*/
uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
- UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
+ UVM_VNODE_WANTED);
} else {
/*
* free the uvn now. note that the vref reference is already
- * gone [it is dropped when we enter the persist state].
+ * gone.
*/
if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
panic("uvm_vnp_terminate: io sync wanted bit set");
@@ -1349,46 +1335,14 @@ uvm_vnp_uncache(struct vnode *vp)
}
/*
- * we have a valid, non-blocked uvn. clear persist flag.
+ * we have a valid, non-blocked uvn.
* if uvn is currently active we can return now.
*/
- uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
if (uvn->u_obj.uo_refs) {
rw_exit(uobj->vmobjlock);
return FALSE;
}
- /*
- * uvn is currently persisting! we have to gain a reference to
- * it so that we can call uvn_detach to kill the uvn.
- */
- vref(vp); /* seems ok, even with VOP_LOCK */
- uvn->u_obj.uo_refs++; /* value is now 1 */
- rw_exit(uobj->vmobjlock);
-
-#ifdef VFSLCKDEBUG
- /*
- * carry over sanity check from old vnode pager: the vnode should
- * be VOP_LOCK'd, and we confirm it here.
- */
- if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
- panic("uvm_vnp_uncache: vnode not locked!");
-#endif
-
- /*
- * now drop our reference to the vnode. if we have the sole
- * reference to the vnode then this will cause it to die [as we
- * just cleared the persist flag]. we have to unlock the vnode
- * while we are doing this as it may trigger I/O.
- *
- * XXX: it might be possible for uvn to get reclaimed while we are
- * unlocked causing us to return TRUE when we should not. we ignore
- * this as a false-positive return value doesn't hurt us.
- */
- VOP_UNLOCK(vp);
- uvn_detach(&uvn->u_obj);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
return TRUE;
}
@@ -1483,11 +1437,9 @@ uvm_vnp_sync(struct mount *mp)
continue;
/*
- * gain reference. watch out for persisting uvns (need to
- * regain vnode REF).
+ * gain reference.
*/
- if (uvn->u_obj.uo_refs == 0)
- vref(vp);
+ KASSERT(uvn->u_obj.uo_refs > 0);
uvn->u_obj.uo_refs++;
SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);