Re: panic witness_warn

2018-07-04 Thread Kenneth R Westerback
On Wed, Jul 04, 2018 at 05:48:21PM +0200, Alexander Bluhm wrote:
> On Mon, Jun 04, 2018 at 08:53:49PM +0200, Alexander Bluhm wrote:
> > userret: returning with the following locks held:
> > exclusive rrwlock inode r = 0 (0xff023d492b48) locked @ 
> > /usr/src/sys/ufs/uf
> > s/ufs_vnops.c:1559
> > #0  witness_lock+0x254
> > #1  _rw_enter+0x29b
> > #2  _rrw_enter+0x3e
> > #3  VOP_LOCK+0x3d
> > #4  vn_lock+0x34
> > #5  vget+0xf7
> > #6  cache_lookup+0x217
> > #7  ufs_lookup+0x112
> > #8  VOP_LOOKUP+0x4f
> > #9  vfs_lookup+0x27e
> > #10 namei+0x226
> > #11 vn_open+0xcf
> > #12 doopenat+0x1af
> > #13 syscall+0x32a
> > #14 Xsyscall_untramp+0xc0
> > panic: witness_warn
> 
> We are leaking a vnode in namei().  The mount check was copied from
> NetBSD in 2003.  Later in 2006 they added a vput().  As we have not
> locked the directory vnode, call vrele() there.
> 
> ok?
> 
> bluhm
> 
> Index: kern/vfs_lookup.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/vfs_lookup.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 vfs_lookup.c
> --- kern/vfs_lookup.c 2 May 2018 02:24:56 -   1.69
> +++ kern/vfs_lookup.c 4 Jul 2018 10:44:48 -
> @@ -203,6 +203,8 @@ fail:
>   if (!dp->v_mount) {
>   /* Give up if the directory is no longer mounted */
>   pool_put(_pool, cnp->cn_pnbuf);
> + vrele(dp);
> + ndp->ni_vp = NULL;
>   return (ENOENT);
>   }
>   cnp->cn_nameptr = cnp->cn_pnbuf;
> 

Makes sense to me. ok krw@ fwiw.

 Ken



Re: panic witness_warn

2018-07-04 Thread Alexander Bluhm
On Mon, Jun 04, 2018 at 08:53:49PM +0200, Alexander Bluhm wrote:
> userret: returning with the following locks held:
> exclusive rrwlock inode r = 0 (0xff023d492b48) locked @ 
> /usr/src/sys/ufs/uf
> s/ufs_vnops.c:1559
> #0  witness_lock+0x254
> #1  _rw_enter+0x29b
> #2  _rrw_enter+0x3e
> #3  VOP_LOCK+0x3d
> #4  vn_lock+0x34
> #5  vget+0xf7
> #6  cache_lookup+0x217
> #7  ufs_lookup+0x112
> #8  VOP_LOOKUP+0x4f
> #9  vfs_lookup+0x27e
> #10 namei+0x226
> #11 vn_open+0xcf
> #12 doopenat+0x1af
> #13 syscall+0x32a
> #14 Xsyscall_untramp+0xc0
> panic: witness_warn

We are leaking a vnode in namei().  The mount check was copied from
NetBSD in 2003.  Later in 2006 they added a vput().  As we have not
locked the directory vnode, call vrele() there.

ok?

bluhm

Index: kern/vfs_lookup.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/vfs_lookup.c,v
retrieving revision 1.69
diff -u -p -r1.69 vfs_lookup.c
--- kern/vfs_lookup.c   2 May 2018 02:24:56 -   1.69
+++ kern/vfs_lookup.c   4 Jul 2018 10:44:48 -
@@ -203,6 +203,8 @@ fail:
if (!dp->v_mount) {
/* Give up if the directory is no longer mounted */
pool_put(_pool, cnp->cn_pnbuf);
+   vrele(dp);
+   ndp->ni_vp = NULL;
return (ENOENT);
}
cnp->cn_nameptr = cnp->cn_pnbuf;



Re: panic witness_warn

2018-06-04 Thread Alexander Bluhm
On Mon, Jun 04, 2018 at 01:23:53AM +0200, Alexander Bluhm wrote:
> On my amd64 regress machine I see a witness_warn panic now.

It is triggered by /usr/src/regress/sys/kern/mount.  With visa@'s
improved stack trace I see:

 run-regress-unmount-busy 
cp -r /usr /mnt/regress-mount &
sleep 5
sync
sleep 10
rm -rf /mnt/regress-mount/usr &
sleep .1
umount -f /mnt/regress-mount
userret: returning with the following locks helfsck -y /dev/rvnd:d0a 2>&1 | tee 
f
esck-clean.log
xclusive rrwlock inode r = 0 (0xff023d492b48) locked @ 
/usr/src/sys/ufs/ufs/ufs_vnops.c:1559
#0  witness_lock+0x254
#1  _rw_enter+0x29b
#2  _rrw_enrm: terfts_read: +0x3e
#3  VOP_LOCK+0x3d
#4  vn_lock+0x34
#5  vget+0xf7
#6  cache_lookup+0x217
#7  ufs_lookup+0x112
#8  VOP_LOOKUP+0x4f
#9  vfs_lookup+0x27e
#directory  No such file or 
10 namei+0x226
#11 vn_open+0xcf
#12 doopenat+0x1af
#13 syscall+0x32a
#14 Xsyscall_untramp+0xc0
panic: witness_warn
Stopped at  db_enter+0x12:  popq%r11
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
*340900  45836  00x13  02  cp
db_enter() at db_enter+0x12
panic() at panic+0x138
witness_warn(4daf31f665ef77b2,800021f74290,80006960) at witness_war
n+0x4fb
userret(184acb3033f01432) at userret+0x1a0
syscall(e44e1a254de07323) at syscall+0x423
Xsyscall_untramp(6,0,0,0,0,2) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7d19b0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{2}> show locks
exclusive rrwlock inode r = 0 (0xff023d492b48) locked @ /usr/src/sys/ufs/uf
s/ufs_vnops.c:1559
#0  witness_lock+0x254
#1  _rw_enter+0x29b
#2  _rrw_enter+0x3e
#3  VOP_LOCK+0x3d
#4  vn_lock+0x34
#5  vget+0xf7
#6  cache_lookup+0x217
#7  ufs_lookup+0x112
#8  VOP_LOOKUP+0x4f
#9  vfs_lookup+0x27e
#10 namei+0x226
#11 vn_open+0xcf
#12 doopenat+0x1af
#13 syscall+0x32a
#14 Xsyscall_untramp+0xc0
ddb{2}> show register
rdi   0x81ca2888kprintf_mutex+0x10
rsi  0x8
rbp   0x800021f740e0
rbx   0x800021f74180
rdx   0x81bfddd6intel_hpll_vco.cl_vco+0x3e6
rcx0x201
rax  0x1
r80x800021f740b0
r90x8080808080808080
r10   0x3df1125987f3e8ca
r11   0x1a70d75cd6a8e458
r12 0x38
r13   0x800021f740f0
r140x100
r15   0x81c0463fnviic_ids+0x7df
rip   0x81767d82db_enter+0x12
cs   0x8
rflags 0x202
rsp   0x800021f740d8
ss  0x10
db_enter+0x12:  popq%r11
ddb{2}> 

All other CPUs are in the idle thread.

bluhm



panic witness_warn

2018-06-03 Thread Alexander Bluhm
Hi,

On my amd64 regress machine I see a witness_warn panic now.

OpenBSD 6.3-current (GENERIC.MP) #65: Fri Jun  1 17:44:06 MDT 2018
dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP

acquiring duplicate lock of same type: ">mnt_lock"
 1st vfslock @ /usr/src/sys/kern/vfs_subr.c:191
 2nd vfslock @ /usr/src/sys/kern/vfs_subr.c:191
Starting stack trace...
witness_checkorder(9,819de278,bf,806c7840,21) at 
witness_checkorder+0x63d
_rw_enter(0,806c7840,806c7800,8) at _rw_enter+0x56
vfs_busy(800022050180,806ca400) at vfs_busy+0x66
dounmount(800022050180,8000ffef1c30,1) at dounmount+0x8a
sys_unmount(160,8000ffef1c30,16) at sys_unmount+0xba
syscall(7f7e7c20) at syscall+0x31d
Xsyscall_untramp(6,0,0,0,0,16) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7e8060, count: 250
End of stack trace.
userret: returning with the following locks held:
exclusive rrwlock inode r = 0 (0xff0241320f80) locked @ 
/usr/src/sys/ufs/ufs/ufs_vnops.c:1559
papnaicn:i cw:i tkneersnse_lw adrina
gStopped at  db_enter+0x5:   popq%rbp
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
*397087   5443  00x12  00  cp
 174469  59263 730x100010  02K syslogd
 157117  56285  0 0x14000  0x2001  reaper
db_enter() at db_enter+0x5
panic() at panic+0x138
witness_warn(8000ffa89690,8000dbc8,5) at witness_warn+0x47e
userret(8000dbc8) at userret+0x190
syscall(1a3e4b16d780) at syscall+0x416
Xsyscall_untramp(6,0,0,0,0,2) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7f2dd0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}> trace
db_enter() at db_enter+0x5
panic() at panic+0x138
witness_warn(8000ffa89690,8000dbc8,5) at witness_warn+0x47e
userret(8000dbc8) at userret+0x190
syscall(1a3e4b16d780) at syscall+0x416
Xsyscall_untramp(6,0,0,0,0,2) at Xsyscall_untramp+0xc0
end of kernel
end trace frame: 0x7f7f2dd0, count: -6
ddb{0}> show panic
witness_warn
ddb{0}> show register
rdi   0x81ba4030kprintf_mutex+0x10
rsi  0x8
rbp   0x8000ffa894e0
rbx   0x8000ffa89580
rdx   0x819ce073dll_speed_table+0x9d3
rcx0x201
rax  0x1
r80x8000ffa894b0
r90x8080808080808080
r10   0x8000ffa894d8
r11   0x81656ca0x86_bus_space_io_read_1
r12 0x38
r13   0x8000ffa894f0
r140x100
r15   0x819fcd59i830_bc_wm_info+0x281
rip   0x815d5ac5db_enter+0x5
cs   0x8
rflags 0x202
rsp   0x8000ffa894e0
ss  0x10
db_enter+0x5:   popq%rbp
ddb{0}> ps
   PID TID   PPIDUID  S   FLAGS  WAIT  COMMAND
* 5443  397087  1  0  70x12cp
 53242  267775  96119  0  30x10008a  pause make
 961191926  63816  0  30x10008a  pause sh
 63816  148879  54466  0  30x10008a  pause make
  3454   96597  75594  0  30x100082  piperdgzip
 75594  102367  54466  0  30x100082  piperdpax
 54466   56520  51132  0  30x82  piperdperl
 51132  108585  75657  0  30x10008a  pause ksh
 75657  329342  25504  0  30x92  selectsshd
 90541   67298  1  0  30x100083  ttyin getty
 67781  180626  1  0  30x100083  ttyin getty
 24187   77492  1  0  30x100083  ttyin getty
   918  224848  1  0  30x100083  ttyin getty
 13705  513022  1  0  30x100083  ttyin getty
 65400  130475  1  0  30x100083  ttyin getty
 57752   49491  1  0  30x100098  poll  cron
 18699  445424  0  0  3 0x14280  nfsidlnfsio
 28801  439290  0  0  3 0x14280  nfsidlnfsio
 74268  153048  0  0  3 0x14280  nfsidlnfsio
 32886  255688  0  0  3 0x14280  nfsidlnfsio
 21303  398601  1 99  30x100090  poll  sndiod
 16832  417143  1110  30x100090  poll  sndiod
 16311  337353  72178 95  30x100092  kqreadsmtpd
 71318   47528  72178103  30x100092  kqreadsmtpd
 96699   35402  72178 95  30x100092  kqreadsmtpd
 29558  384597  72178 95  30x100092  kqreadsmtpd
 52970  409419  72178 95  30x100092  kqreadsmtpd
  6162  119654  72178 95  30x100092  kqreadsmtpd
 72