On 17.5.2021. 16:24, Alexandr Nedvedicky wrote:
> Hrvoje,
>
> managed to trigger diagnostic panics with diff [1] sent by bluhm@
> some time ago. The panic Hrvoje sees comes from ether_input() here:
> [1] https://marc.info/?l=openbsd-tech&m=161903387904923&w=2
>
> [2] https://marc.info/?l=openbsd-tech&m=162099270106067&w=2
for those interested, I sent emails to a couple of developers with the
following panics
c/p
Hi guys,
i'm testing bluhm@ parallel diff with pf and tpmr/veb/bridge.
if i'm sending traffic over tmpr or veb, the same second i enable pf,
box panic.. see attachment ..
with bridge i can't panic box, but it seems that performance is even
worse than without parallel diff ... i will play with it more, maybe
it's just some glitch ...
pf and tpmr
r620-1# panic: kernel diagnostic assertion
"curcpu()->ci_schedstate.spc_smrdepth == 0" failed: file "/sys/kern/sub
r_xxx.c", line 163
Stopped at db_enter+0x10: popq %rbp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
11538 95266 0 0x14000 0x200 2 softnet
239565 4172 0 0x14000 0x200 1 softnet
db_enter() at db_enter+0x10
panic(ffffffff81e0198a) at panic+0x12a
__assert(ffffffff81e6f4ee,ffffffff81e93713,a3,ffffffff81ea877c) at __assert+0x2b
assertwaitok() at assertwaitok+0xdc
mi_switch() at mi_switch+0x40
sleep_finish(ffff800023877740,1) at sleep_finish+0x11c
rw_enter(ffffffff8212e900,1) at rw_enter+0x229
pf_test(2,1,ffff800000082048,ffff800023877978) at pf_test+0x1055
tpmr_pf(ffff800000082048,1,fffffd80cccb8c00) at tpmr_pf+0x7e
tpmr_input(ffff800000082048,fffffd80cccb8c00,90e2ba1d4f89,ffff800000680f00) at
tpmr_input+0x124
ether_input(ffff800000082048,fffffd80cccb8c00) at ether_input+0xf5
if_input_process(ffff800000082048,ffff800023877af8) at if_input_process+0x6f
ifiq_process(ffff800000086000) at ifiq_process+0x69
taskq_thread(ffff800000030300) at taskq_thread+0x9f
end trace frame: 0x0, count: 1
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb{0}>
ddb{0}> show panic
kernel diagnostic assertion "curcpu()->ci_schedstate.spc_smrdepth == 0" failed:
file "/sys/kern/subr_xxx.c", line 163
ddb{0}> show locks
shared rwlock netlock r = 0 (0xffffffff8212eab0)
#0 witness_lock+0x339
#1 if_input_process+0x43
#2 ifiq_process+0x69
#3 taskq_thread+0x9f
#4 proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030370)
#0 witness_lock+0x339
#1 taskq_thread+0x92
#2 proc_trampoline+0x1c
exclusive sched_lock &sched_lock r = 0 (0xffffffff8230a100)
#0 witness_lock+0x339
#1 sleep_setup+0xa5
#2 rw_enter+0x208
#3 pf_test+0x1055
#4 tpmr_pf+0x7e
#5 tpmr_input+0x124
#6 ether_input+0xf5
#7 if_input_process+0x6f
#8 ifiq_process+0x69
#9 taskq_thread+0x9f
#10 proc_trampoline+0x1c
ddb{0}> show reg
rdi 0xffffffff820f1c80 kprintf_mutex
rsi 0x5
rbp 0xffff8000238775a0
rbx 0xffff8000238775b0
rdx 0x8000000000000000
rcx 0x206
rax 0x1
r8 0xffff800023877560
r9 0
r10 0
r11 0x80ef1bc6ac87b49d
r12 0x3000000008
r13 0xffff800023877650
r14 0x100
r15 0xffffffff81e0198a cmd0646_9_tim_udma+0x24a93
rip 0xffffffff81106070 db_enter+0x10
cs 0x8
rflags 0x282
rsp 0xffff8000238775a0
ss 0x10
db_enter+0x10: popq %rbp
ddb{0}> show mbuf
mbuf 0xffffffff81106070
m_type: -13108 m_flags: c3cc<M_EOR,M_EXTWR,M_LOOP,M_BCAST,M_MCAST,M_COMP,M_LIN
K0>
m_next: 0x1d3b4c241c334c5d m_nextpkt: 0xcccc117400fdffac
m_data: 0xcccccccccccccccc m_len: 3435973836
m_dat: 0xffffffff81106090 m_pktdat: 0xffffffff811060e0
ddb{0}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
80810 268029 1 0 3 0x100083 ttyin ksh
90433 180245 1 0 3 0x100098 poll cron
92176 8747 42701 95 3 0x100092 kqread smtpd
83989 36449 42701 103 3 0x100092 kqread smtpd
82331 325465 42701 95 3 0x100092 kqread smtpd
28903 422168 42701 95 3 0x100092 kqread smtpd
5781 10814 42701 95 3 0x100092 kqread smtpd
70395 59130 42701 95 3 0x100092 kqread smtpd
42701 329509 1 0 3 0x100080 kqread smtpd
47357 342015 1 0 3 0x80 select sshd
91828 134960 1 0 3 0x100080 poll ntpd
42264 230567 72970 83 3 0x100092 poll ntpd
72970 150962 1 83 3 0x100092 poll ntpd
23464 496327 92870 73 3 0x100090 kqread syslogd
92870 181949 1 0 3 0x100082 netio syslogd
15150 95273 0 0 3 0x14200 bored smr
99929 188193 0 0 3 0x14200 pgzero zerothread
86179 199281 0 0 3 0x14200 aiodoned aiodoned
11987 220908 0 0 3 0x14200 syncer update
50840 261844 0 0 3 0x14200 cleaner cleaner
30838 273558 0 0 3 0x14200 reaper reaper
93337 216579 0 0 3 0x14200 pgdaemon pagedaemon
89554 248076 0 0 3 0x14200 bored crynlk
45374 201214 0 0 3 0x14200 bored crypto
18771 170189 0 0 3 0x14200 usbtsk usbtask
45842 460181 0 0 3 0x14200 usbatsk usbatsk
12046 94232 0 0 3 0x40014200 acpi0 acpi0
70821 289069 0 0 7 0x40014200 idle5
76276 222595 0 0 7 0x40014200 idle4
12971 140346 0 0 7 0x40014200 idle3
9815 330346 0 0 3 0x40014200 idle2
91666 310911 0 0 3 0x40014200 idle1
67977 81742 0 0 3 0x14200 bored sensors
*50215 81595 0 0 3 0x14200 pf_lock softnet
89309 266931 0 0 3 0x14200 bored softnet
95266 11538 0 0 7 0x14200 softnet
4172 239565 0 0 7 0x14200 softnet
52909 134576 0 0 3 0x14200 bored systqmp
59216 329148 0 0 3 0x14200 bored systq
56819 222198 0 0 3 0x40014200 bored softclock
65568 50797 0 0 3 0x40014200 idle0
1 249632 0 0 3 0x82 wait init
0 0 -1 0 3 0x10200 scheduler swapper
ddb{0}>
ddb{0}> trace /t 0t81595
end trace frame: 0x0, count: -1
ddb{0}>
ddb{0}> show pool
panic: kernel diagnostic assertion "__mp_lock_held(&sched_lock, curcpu()) == 0"
failed: file "/sys/kern/kern_lock.
c", line 63
Stopped at db_enter+0x10: popq %rbp
db_enter() at db_enter+0x10
panic(ffffffff81e0198a) at panic+0x12a
__assert(ffffffff81e6f4ee,ffffffff81e3a3a4,3f,ffffffff81ea0c42) at __assert+0x2b
_kernel_lock() at _kernel_lock+0x63
kpageflttrap(ffff8000238771a0,13f73) at kpageflttrap+0x178
kerntrap(ffff8000238771a0) at kerntrap+0x91
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
pool_print1(13ebb,ffff8000238772b0,ffffffff810b92e0) at pool_print1+0x45
db_command(ffffffff821d7ea0,ffffffff82109200) at db_command+0x2ae
db_command_loop() at db_command_loop+0xda
db_trap(1,0) at db_trap+0x125
db_ktrap(1,0,ffff8000238774f0) at db_ktrap+0x112
kerntrap(ffff8000238774f0) at kerntrap+0xa2
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
end trace frame: 0xffff8000238775a0, count: 0
pf and veb
r620-1# ifconfig veb0 link1
r620-1# panic: kernel diagnostic assertion
"curcpu()->ci_schedstate.spc_smrdepth == 0" failed: file "/sys/kern/sub
r_xxx.c", line 163
Stopped at db_enter+0x10: popq %rbp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
44883 87268 0 0x14000 0x200 3 softnet
482018 75692 0 0x14000 0x200 4 softnet
db_enter() at db_enter+0x10
panic(ffffffff81e057d2) at panic+0x12a
__assert(ffffffff81e70afd,ffffffff81e99706,a3,ffffffff81eac198) at __assert+0x2b
assertwaitok() at assertwaitok+0xdc
mi_switch() at mi_switch+0x40
sleep_finish(ffff800023870d60,1) at sleep_finish+0x11c
rw_enter(ffffffff8212c8a8,1) at rw_enter+0x229
pf_test(2,1,ffff800000082048,ffff800023870f98) at pf_test+0x1055
veb_pf(ffff800000082048,1,fffffd80a6736800) at veb_pf+0x8e
veb_port_input(ffff800000082048,fffffd80a6736800,90e2ba1d4f89,ffff80000016a100)
at veb_port_input+0x2d1
ether_input(ffff800000082048,fffffd80a6736800) at ether_input+0xf5
if_input_process(ffff800000082048,ffff800023871138) at if_input_process+0x6f
ifiq_process(ffff800000086300) at ifiq_process+0x69
taskq_thread(ffff800000030200) at taskq_thread+0x9f
end trace frame: 0x0, count: 1
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb{2}>
ddb{2}> show panic
kernel diagnostic assertion "curcpu()->ci_schedstate.spc_smrdepth == 0" failed:
file "/sys/kern/subr_xxx.c", line 163
ddb{2}>
ddb{2}> show locks
shared rwlock netlock r = 0 (0xffffffff821397c0)
#0 witness_lock+0x339
#1 if_input_process+0x43
#2 ifiq_process+0x69
#3 taskq_thread+0x9f
#4 proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030270)
#0 witness_lock+0x339
#1 taskq_thread+0x92
#2 proc_trampoline+0x1c
exclusive sched_lock &sched_lock r = 0 (0xffffffff82307f98)
#0 witness_lock+0x339
#1 sleep_setup+0xa5
#2 rw_enter+0x208
#3 pf_test+0x1055
#4 veb_pf+0x8e
#5 veb_port_input+0x2d1
#6 ether_input+0xf5
#7 if_input_process+0x6f
#8 ifiq_process+0x69
#9 taskq_thread+0x9f
#10 proc_trampoline+0x1c
ddb{2}> show reg
rdi 0xffffffff82128dc8 kprintf_mutex
rsi 0x5
rbp 0xffff800023870bc0
rbx 0xffff800023870bd0
rdx 0x8000000000000000
rcx 0x282
rax 0x1
r8 0xffff800023870b80
r9 0
r10 0
r11 0xb8628b3a64165262
r12 0x3000000008
r13 0xffff800023870c70
r14 0x100
r15 0xffffffff81e057d2 cmd0646_9_tim_udma+0x2ac08
rip 0xffffffff8188f150 db_enter+0x10
cs 0x8
rflags 0x282
rsp 0xffff800023870bc0
ss 0x10
db_enter+0x10: popq %rbp
ddb{2}> show mbuf
mbuf 0xffffffff8188f150
m_type: -13108 m_flags: c3cc<M_EOR,M_EXTWR,M_LOOP,M_BCAST,M_MCAST,M_COMP,M_LIN
K0>
m_next: 0x1d3b4c241c334c5d m_nextpkt: 0xcccc117400856724
m_data: 0xcccccccccccccccc m_len: 3435973836
m_dat: 0xffffffff8188f170 m_pktdat: 0xffffffff8188f1c0
ddb{2}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
31286 254508 1 0 3 0x100083 ttyin ksh
15221 364141 1 0 3 0x100098 poll cron
32441 483454 91695 95 3 0x100092 kqread smtpd
88914 211051 91695 103 3 0x100092 kqread smtpd
33036 9518 91695 95 3 0x100092 kqread smtpd
10355 12672 91695 95 3 0x100092 kqread smtpd
33653 363607 91695 95 3 0x100092 kqread smtpd
8106 144806 91695 95 3 0x100092 kqread smtpd
91695 69653 1 0 3 0x100080 kqread smtpd
56202 100526 1 0 3 0x80 select sshd
82578 119120 1 0 3 0x100080 poll ntpd
22577 323503 58450 83 3 0x100092 poll ntpd
58450 446242 1 83 3 0x100092 poll ntpd
76484 382256 47754 73 3 0x100090 kqread syslogd
47754 345348 1 0 3 0x100082 netio syslogd
15131 21719 0 0 3 0x14200 bored smr
78001 235336 0 0 3 0x14200 pgzero zerothread
83331 409074 0 0 3 0x14200 aiodoned aiodoned
6987 397384 0 0 3 0x14200 syncer update
64124 18420 0 0 3 0x14200 cleaner cleaner
48935 144264 0 0 3 0x14200 reaper reaper
28 358555 0 0 3 0x14200 pgdaemon pagedaemon
6569 456943 0 0 3 0x14200 bored crynlk
33534 153083 0 0 3 0x14200 bored crypto
28957 224669 0 0 3 0x14200 usbtsk usbtask
52327 430732 0 0 3 0x14200 usbatsk usbatsk
61211 131485 0 0 3 0x40014200 acpi0 acpi0
30473 493700 0 0 7 0x40014200 idle5
31700 240358 0 0 3 0x40014200 idle4
84549 195512 0 0 3 0x40014200 idle3
80678 29514 0 0 3 0x40014200 idle2
81368 239014 0 0 7 0x40014200 idle1
49779 300141 0 0 3 0x14200 bored sensors
87268 44883 0 0 7 0x14200 softnet
*34012 176595 0 0 3 0x14200 pf_lock softnet
61748 243827 0 0 2 0x14200 softnet
75692 482018 0 0 7 0x14200 softnet
59516 283252 0 0 3 0x14200 bored systqmp
62531 29302 0 0 3 0x14200 bored systq
59010 303115 0 0 3 0x40014200 bored softclock
42327 234827 0 0 7 0x40014200 idle0
1 390641 0 0 3 0x82 wait init
0 0 -1 0 3 0x10200 scheduler swapper
ddb{2}> trace /t 0t176595
end trace frame: 0x0, count: -1
ddb{2}>
ddb{2}> show pool
panic: kernel diagnostic assertion "__mp_lock_held(&sched_lock, curcpu()) == 0"
failed: file "/sys/kern/kern_lock.c", line 63
Stopped at db_enter+0x10: popq %rbp
db_enter() at db_enter+0x10
panic(ffffffff81e057d2) at panic+0x12a
__assert(ffffffff81e70afd,ffffffff81e3e35f,3f,ffffffff81ea23aa) at __assert+0x2b
_kernel_lock() at _kernel_lock+0x63
kpageflttrap(ffff8000238707c0,2b28b) at kpageflttrap+0x178
kerntrap(ffff8000238707c0) at kerntrap+0x91
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
pool_print1(2b1d3,ffff8000238708d0,ffffffff816646a0) at pool_print1+0x45
db_command(ffffffff821e6cc0,ffffffff8212f260) at db_command+0x2ae
db_command_loop() at db_command_loop+0xda
db_trap(1,0) at db_trap+0x125
db_ktrap(1,0,ffff800023870b10) at db_ktrap+0x112
kerntrap(ffff800023870b10) at kerntrap+0xa2
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
end trace frame: 0xffff800023870bc0, count: 0