On 17.5.2021. 16:24, Alexandr Nedvedicky wrote:
> Hrvoje,
> 
> managed to trigger diagnostic panics with diff [1] sent by bluhm@
> some time ago. The panic Hrvoje sees comes from ether_input() here:

> [1] https://marc.info/?l=openbsd-tech&m=161903387904923&w=2
> 
> [2] https://marc.info/?l=openbsd-tech&m=162099270106067&w=2

for those interested, I sent emails to a couple of developers with the
following panics

c/p

Hi guys,

i'm testing bluhm@ parallel diff with pf and tpmr/veb/bridge.
if i'm sending traffic over tmpr or veb, the same second i enable pf,
box panic.. see attachment ..

with bridge i can't panic box, but it seems that performance is even
worse than without parallel diff ... i will play with it more, maybe
it's just some glitch ...
pf and tpmr

r620-1# panic: kernel diagnostic assertion 
"curcpu()->ci_schedstate.spc_smrdepth == 0" failed: file "/sys/kern/sub
r_xxx.c", line 163
Stopped at      db_enter+0x10:  popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  11538  95266      0     0x14000      0x200    2  softnet
 239565   4172      0     0x14000      0x200    1  softnet
db_enter() at db_enter+0x10
panic(ffffffff81e0198a) at panic+0x12a
__assert(ffffffff81e6f4ee,ffffffff81e93713,a3,ffffffff81ea877c) at __assert+0x2b
assertwaitok() at assertwaitok+0xdc
mi_switch() at mi_switch+0x40
sleep_finish(ffff800023877740,1) at sleep_finish+0x11c
rw_enter(ffffffff8212e900,1) at rw_enter+0x229
pf_test(2,1,ffff800000082048,ffff800023877978) at pf_test+0x1055
tpmr_pf(ffff800000082048,1,fffffd80cccb8c00) at tpmr_pf+0x7e
tpmr_input(ffff800000082048,fffffd80cccb8c00,90e2ba1d4f89,ffff800000680f00) at 
tpmr_input+0x124
ether_input(ffff800000082048,fffffd80cccb8c00) at ether_input+0xf5
if_input_process(ffff800000082048,ffff800023877af8) at if_input_process+0x6f
ifiq_process(ffff800000086000) at ifiq_process+0x69
taskq_thread(ffff800000030300) at taskq_thread+0x9f
end trace frame: 0x0, count: 1
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}>


ddb{0}> show panic
kernel diagnostic assertion "curcpu()->ci_schedstate.spc_smrdepth == 0" failed:
 file "/sys/kern/subr_xxx.c", line 163


ddb{0}> show locks
shared rwlock netlock r = 0 (0xffffffff8212eab0)
#0  witness_lock+0x339
#1  if_input_process+0x43
#2  ifiq_process+0x69
#3  taskq_thread+0x9f
#4  proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030370)
#0  witness_lock+0x339
#1  taskq_thread+0x92
#2  proc_trampoline+0x1c
exclusive sched_lock &sched_lock r = 0 (0xffffffff8230a100)
#0  witness_lock+0x339
#1  sleep_setup+0xa5
#2  rw_enter+0x208
#3  pf_test+0x1055
#4  tpmr_pf+0x7e
#5  tpmr_input+0x124
#6  ether_input+0xf5
#7  if_input_process+0x6f
#8  ifiq_process+0x69
#9  taskq_thread+0x9f
#10 proc_trampoline+0x1c


ddb{0}> show reg
rdi               0xffffffff820f1c80    kprintf_mutex
rsi                              0x5
rbp               0xffff8000238775a0
rbx               0xffff8000238775b0
rdx               0x8000000000000000
rcx                            0x206
rax                              0x1
r8                0xffff800023877560
r9                                 0
r10                                0
r11               0x80ef1bc6ac87b49d
r12                     0x3000000008
r13               0xffff800023877650
r14                            0x100
r15               0xffffffff81e0198a    cmd0646_9_tim_udma+0x24a93
rip               0xffffffff81106070    db_enter+0x10
cs                               0x8
rflags                         0x282
rsp               0xffff8000238775a0
ss                              0x10
db_enter+0x10:  popq    %rbp



ddb{0}> show mbuf
mbuf 0xffffffff81106070
m_type: -13108  m_flags: c3cc<M_EOR,M_EXTWR,M_LOOP,M_BCAST,M_MCAST,M_COMP,M_LIN
K0>
m_next: 0x1d3b4c241c334c5d      m_nextpkt: 0xcccc117400fdffac
m_data: 0xcccccccccccccccc      m_len: 3435973836
m_dat: 0xffffffff81106090       m_pktdat: 0xffffffff811060e0


ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 80810  268029      1      0  3    0x100083  ttyin         ksh
 90433  180245      1      0  3    0x100098  poll          cron
 92176    8747  42701     95  3    0x100092  kqread        smtpd
 83989   36449  42701    103  3    0x100092  kqread        smtpd
 82331  325465  42701     95  3    0x100092  kqread        smtpd
 28903  422168  42701     95  3    0x100092  kqread        smtpd
  5781   10814  42701     95  3    0x100092  kqread        smtpd
 70395   59130  42701     95  3    0x100092  kqread        smtpd
 42701  329509      1      0  3    0x100080  kqread        smtpd
 47357  342015      1      0  3        0x80  select        sshd
 91828  134960      1      0  3    0x100080  poll          ntpd
 42264  230567  72970     83  3    0x100092  poll          ntpd
 72970  150962      1     83  3    0x100092  poll          ntpd
 23464  496327  92870     73  3    0x100090  kqread        syslogd
 92870  181949      1      0  3    0x100082  netio         syslogd
 15150   95273      0      0  3     0x14200  bored         smr
 99929  188193      0      0  3     0x14200  pgzero        zerothread
 86179  199281      0      0  3     0x14200  aiodoned      aiodoned
 11987  220908      0      0  3     0x14200  syncer        update
 50840  261844      0      0  3     0x14200  cleaner       cleaner
 30838  273558      0      0  3     0x14200  reaper        reaper
 93337  216579      0      0  3     0x14200  pgdaemon      pagedaemon
 89554  248076      0      0  3     0x14200  bored         crynlk
 45374  201214      0      0  3     0x14200  bored         crypto
 18771  170189      0      0  3     0x14200  usbtsk        usbtask
 45842  460181      0      0  3     0x14200  usbatsk       usbatsk
 12046   94232      0      0  3  0x40014200  acpi0         acpi0
 70821  289069      0      0  7  0x40014200                idle5
 76276  222595      0      0  7  0x40014200                idle4
 12971  140346      0      0  7  0x40014200                idle3
  9815  330346      0      0  3  0x40014200                idle2
 91666  310911      0      0  3  0x40014200                idle1
 67977   81742      0      0  3     0x14200  bored         sensors
*50215   81595      0      0  3     0x14200  pf_lock       softnet
 89309  266931      0      0  3     0x14200  bored         softnet
 95266   11538      0      0  7     0x14200                softnet
  4172  239565      0      0  7     0x14200                softnet
 52909  134576      0      0  3     0x14200  bored         systqmp
 59216  329148      0      0  3     0x14200  bored         systq
 56819  222198      0      0  3  0x40014200  bored         softclock
 65568   50797      0      0  3  0x40014200                idle0
     1  249632      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{0}>

ddb{0}> trace /t 0t81595
end trace frame: 0x0, count: -1
ddb{0}>


ddb{0}> show pool
panic: kernel diagnostic assertion "__mp_lock_held(&sched_lock, curcpu()) == 0" 
failed: file "/sys/kern/kern_lock.
c", line 63
Stopped at      db_enter+0x10:  popq    %rbp
db_enter() at db_enter+0x10
panic(ffffffff81e0198a) at panic+0x12a
__assert(ffffffff81e6f4ee,ffffffff81e3a3a4,3f,ffffffff81ea0c42) at __assert+0x2b
_kernel_lock() at _kernel_lock+0x63
kpageflttrap(ffff8000238771a0,13f73) at kpageflttrap+0x178
kerntrap(ffff8000238771a0) at kerntrap+0x91
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
pool_print1(13ebb,ffff8000238772b0,ffffffff810b92e0) at pool_print1+0x45
db_command(ffffffff821d7ea0,ffffffff82109200) at db_command+0x2ae
db_command_loop() at db_command_loop+0xda
db_trap(1,0) at db_trap+0x125
db_ktrap(1,0,ffff8000238774f0) at db_ktrap+0x112
kerntrap(ffff8000238774f0) at kerntrap+0xa2
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
end trace frame: 0xffff8000238775a0, count: 0
pf and veb

r620-1# ifconfig veb0 link1
r620-1# panic: kernel diagnostic assertion 
"curcpu()->ci_schedstate.spc_smrdepth == 0" failed: file "/sys/kern/sub
r_xxx.c", line 163
Stopped at      db_enter+0x10:  popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  44883  87268      0     0x14000      0x200    3  softnet
 482018  75692      0     0x14000      0x200    4  softnet
db_enter() at db_enter+0x10
panic(ffffffff81e057d2) at panic+0x12a
__assert(ffffffff81e70afd,ffffffff81e99706,a3,ffffffff81eac198) at __assert+0x2b
assertwaitok() at assertwaitok+0xdc
mi_switch() at mi_switch+0x40
sleep_finish(ffff800023870d60,1) at sleep_finish+0x11c
rw_enter(ffffffff8212c8a8,1) at rw_enter+0x229
pf_test(2,1,ffff800000082048,ffff800023870f98) at pf_test+0x1055
veb_pf(ffff800000082048,1,fffffd80a6736800) at veb_pf+0x8e
veb_port_input(ffff800000082048,fffffd80a6736800,90e2ba1d4f89,ffff80000016a100)
 at veb_port_input+0x2d1
ether_input(ffff800000082048,fffffd80a6736800) at ether_input+0xf5
if_input_process(ffff800000082048,ffff800023871138) at if_input_process+0x6f
ifiq_process(ffff800000086300) at ifiq_process+0x69
taskq_thread(ffff800000030200) at taskq_thread+0x9f
end trace frame: 0x0, count: 1
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{2}>


ddb{2}> show panic
kernel diagnostic assertion "curcpu()->ci_schedstate.spc_smrdepth == 0" failed:
 file "/sys/kern/subr_xxx.c", line 163
ddb{2}>


ddb{2}> show locks
shared rwlock netlock r = 0 (0xffffffff821397c0)
#0  witness_lock+0x339
#1  if_input_process+0x43
#2  ifiq_process+0x69
#3  taskq_thread+0x9f
#4  proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030270)
#0  witness_lock+0x339
#1  taskq_thread+0x92
#2  proc_trampoline+0x1c
exclusive sched_lock &sched_lock r = 0 (0xffffffff82307f98)
#0  witness_lock+0x339
#1  sleep_setup+0xa5
#2  rw_enter+0x208
#3  pf_test+0x1055
#4  veb_pf+0x8e
#5  veb_port_input+0x2d1
#6  ether_input+0xf5
#7  if_input_process+0x6f
#8  ifiq_process+0x69
#9  taskq_thread+0x9f
#10 proc_trampoline+0x1c



ddb{2}> show reg
rdi               0xffffffff82128dc8    kprintf_mutex
rsi                              0x5
rbp               0xffff800023870bc0
rbx               0xffff800023870bd0
rdx               0x8000000000000000
rcx                            0x282
rax                              0x1
r8                0xffff800023870b80
r9                                 0
r10                                0
r11               0xb8628b3a64165262
r12                     0x3000000008
r13               0xffff800023870c70
r14                            0x100
r15               0xffffffff81e057d2    cmd0646_9_tim_udma+0x2ac08
rip               0xffffffff8188f150    db_enter+0x10
cs                               0x8
rflags                         0x282
rsp               0xffff800023870bc0
ss                              0x10
db_enter+0x10:  popq    %rbp



ddb{2}> show mbuf
mbuf 0xffffffff8188f150
m_type: -13108  m_flags: c3cc<M_EOR,M_EXTWR,M_LOOP,M_BCAST,M_MCAST,M_COMP,M_LIN
K0>
m_next: 0x1d3b4c241c334c5d      m_nextpkt: 0xcccc117400856724
m_data: 0xcccccccccccccccc      m_len: 3435973836
m_dat: 0xffffffff8188f170       m_pktdat: 0xffffffff8188f1c0



ddb{2}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 31286  254508      1      0  3    0x100083  ttyin         ksh
 15221  364141      1      0  3    0x100098  poll          cron
 32441  483454  91695     95  3    0x100092  kqread        smtpd
 88914  211051  91695    103  3    0x100092  kqread        smtpd
 33036    9518  91695     95  3    0x100092  kqread        smtpd
 10355   12672  91695     95  3    0x100092  kqread        smtpd
 33653  363607  91695     95  3    0x100092  kqread        smtpd
  8106  144806  91695     95  3    0x100092  kqread        smtpd
 91695   69653      1      0  3    0x100080  kqread        smtpd
 56202  100526      1      0  3        0x80  select        sshd
 82578  119120      1      0  3    0x100080  poll          ntpd
 22577  323503  58450     83  3    0x100092  poll          ntpd
 58450  446242      1     83  3    0x100092  poll          ntpd
 76484  382256  47754     73  3    0x100090  kqread        syslogd
 47754  345348      1      0  3    0x100082  netio         syslogd
 15131   21719      0      0  3     0x14200  bored         smr
 78001  235336      0      0  3     0x14200  pgzero        zerothread
 83331  409074      0      0  3     0x14200  aiodoned      aiodoned
  6987  397384      0      0  3     0x14200  syncer        update
 64124   18420      0      0  3     0x14200  cleaner       cleaner
 48935  144264      0      0  3     0x14200  reaper        reaper
    28  358555      0      0  3     0x14200  pgdaemon      pagedaemon
  6569  456943      0      0  3     0x14200  bored         crynlk
 33534  153083      0      0  3     0x14200  bored         crypto
 28957  224669      0      0  3     0x14200  usbtsk        usbtask
 52327  430732      0      0  3     0x14200  usbatsk       usbatsk
 61211  131485      0      0  3  0x40014200  acpi0         acpi0
 30473  493700      0      0  7  0x40014200                idle5
 31700  240358      0      0  3  0x40014200                idle4
 84549  195512      0      0  3  0x40014200                idle3
 80678   29514      0      0  3  0x40014200                idle2
 81368  239014      0      0  7  0x40014200                idle1
 49779  300141      0      0  3     0x14200  bored         sensors
 87268   44883      0      0  7     0x14200                softnet
*34012  176595      0      0  3     0x14200  pf_lock       softnet
 61748  243827      0      0  2     0x14200                softnet
 75692  482018      0      0  7     0x14200                softnet
 59516  283252      0      0  3     0x14200  bored         systqmp
 62531   29302      0      0  3     0x14200  bored         systq
 59010  303115      0      0  3  0x40014200  bored         softclock
 42327  234827      0      0  7  0x40014200                idle0
     1  390641      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper


ddb{2}> trace /t 0t176595
end trace frame: 0x0, count: -1
ddb{2}>



ddb{2}> show pool
panic: kernel diagnostic assertion "__mp_lock_held(&sched_lock, curcpu()) == 0" 
failed: file "/sys/kern/kern_lock.c", line 63
Stopped at      db_enter+0x10:  popq    %rbp
db_enter() at db_enter+0x10
panic(ffffffff81e057d2) at panic+0x12a
__assert(ffffffff81e70afd,ffffffff81e3e35f,3f,ffffffff81ea23aa) at __assert+0x2b
_kernel_lock() at _kernel_lock+0x63
kpageflttrap(ffff8000238707c0,2b28b) at kpageflttrap+0x178
kerntrap(ffff8000238707c0) at kerntrap+0x91
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
pool_print1(2b1d3,ffff8000238708d0,ffffffff816646a0) at pool_print1+0x45
db_command(ffffffff821e6cc0,ffffffff8212f260) at db_command+0x2ae
db_command_loop() at db_command_loop+0xda
db_trap(1,0) at db_trap+0x125
db_ktrap(1,0,ffff800023870b10) at db_ktrap+0x112
kerntrap(ffff800023870b10) at kerntrap+0xa2
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
end trace frame: 0xffff800023870bc0, count: 0

Reply via email to