On 27.11.2022. 9:28, Hrvoje Popovski wrote:
> On 27.11.2022. 1:51, Alexandr Nedvedicky wrote:
>> Hello,
>>
>> On Sat, Nov 26, 2022 at 08:33:28PM +0100, Hrvoje Popovski wrote:
>> </snip>
>>> I just need to say that with all pf, pfsync and with pf_purge diffs
>>> after hackaton + this diff on tech@
>>> https://www.mail-archive.com/tech@openbsd.org/msg72582.html
>>> my production firewall seems stable and it wasn't without that diff
>>     this diff still waits for OK. it makes pfsync to use
>>     state mutex to safely dereference keys.
>>
>>> I'm not sure if we have same diffs but even Josmar Pierri on bugs@
>>> https://www.mail-archive.com/bugs@openbsd.org/msg18994.html
>>> who had panics quite regularly with that diff on tech@ seems to have
>>> stable firewall now.
>>>
>>>
>>>
>>> r620-1# uvm_fault(0xffffffff82374288, 0x17, 0, 2) -> e
>>> kernel: page fault trap, code=0
>>> Stopped at      pfsync_q_del+0x96:      movq    %rdx,0x8(%rax)
>>>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>>> *192892  19920      0     0x14000      0x200    5K softnet
>>> pfsync_q_del(fffffd82e8a4ce20) at pfsync_q_del+0x96
>>> pf_remove_state(fffffd82e8a4ce20) at pf_remove_state+0x14b
>>> pfsync_in_del_c(fffffd8006d843b8,c,79,0) at pfsync_in_del_c+0x6f
>>> pfsync_input(ffff800022d60ad8,ffff800022d60ae4,f0,2) at pfsync_input+0x33c
>>> ip_deliver(ffff800022d60ad8,ffff800022d60ae4,f0,2) at ip_deliver+0x113
>>> ipintr() at ipintr+0x69
>>> if_netisr(0) at if_netisr+0xea
>>> taskq_thread(ffff800000030000) at taskq_thread+0x100
>>> end trace frame: 0x0, count: 7
>>> https://www.openbsd.org/ddb.html describes the minimum info required in
>>> bug reports.  Insufficient info makes it difficult to find and fix bugs.
>>> ddb{5}>
>>>
>>     those panics are causing me headaches. this got most-likely uncovered
>>     by diff which adds a mutex. The mutex makes pfsync stable enough
>>     so you can trigger unknown bugs.
> 
> Hi,
> 
> here's panic with WITNESS. Now I will try to trigger panic with that
> mutex diff on tech@


Hi,

here's panic with WITNESS and this diff on tech@
https://www.mail-archive.com/tech@openbsd.org/msg72582.html

I will stop now because I'm not sure what I'm doing and which diffs I'm
testing...


r620-1# uvm_fault(0xffffffff8248ea28, 0x17, 0, 2) -> e
kernel: page fault trap, code=0
Stopped at      pfsync_q_del+0x96:      movq    %rdx,0x8(%rax)
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*300703  35643      0     0x14000      0x200    1K systq
 237790  10061      0     0x14000 0x40000200    0  softclock
pfsync_q_del(fffffd8323dc3900) at pfsync_q_del+0x96
pfsync_delete_state(fffffd8323dc3900) at pfsync_delete_state+0x118
pf_remove_state(fffffd8323dc3900) at pf_remove_state+0x14e
pf_purge_expired_states(c3501) at pf_purge_expired_states+0x1b3
pf_purge(ffffffff823ae080) at pf_purge+0x28
taskq_thread(ffffffff822cbe30) at taskq_thread+0x11a
end trace frame: 0x0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in
bug reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{1}>


ddb{1}> show panic
*cpu1: uvm_fault(0xffffffff8248ea28, 0x17, 0, 2) -> e
ddb{1}>


ddb{1}> show reg
rdi                              0x9
rsi                              0xf
rbp               0xffff800022d593c0
rbx               0xfffffd83347714a8
rdx               0xffffffffffffffff
rcx                             0x10
rax                              0xf
r8                0xffff7fffffffffff
r9                0xffff800022d59570
r10               0xcc7e29c6fd100f64
r11               0x1e575244acf63fd3
r12               0xffff8000008c4000
r13               0xfffffd8318aac200
r14               0xfffffd8323dc3900
r15               0xffff8000008c47e0
rip               0xffffffff817d3ec6    pfsync_q_del+0x96
cs                               0x8
rflags                       0x10286    __ALIGN_SIZE+0xf286
rsp               0xffff800022d59390
ss                              0x10
pfsync_q_del+0x96:      movq    %rdx,0x8(%rax)
ddb{1}>



ddb{1}>  show locks
exclusive rwlock pf_state_lock r = 0 (0xffffffff822b03a0)
#0  witness_lock+0x311
#1  pf_purge_expired_states+0x17f
#2  pf_purge+0x28
#3  taskq_thread+0x11a
#4  proc_trampoline+0x1c
exclusive rwlock pf_lock r = 0 (0xffffffff822b0370)
#0  witness_lock+0x311
#1  pf_purge_expired_states+0x173
#2  pf_purge+0x28
#3  taskq_thread+0x11a
#4  proc_trampoline+0x1c
exclusive rwlock pfstates r = 0 (0xffffffff822c57d0)
#0  witness_lock+0x311
#1  pf_purge_expired_states+0x167
#2  pf_purge+0x28
#3  taskq_thread+0x11a
#4  proc_trampoline+0x1c
exclusive rwlock netlock r = 0 (0xffffffff822b2590)
#0  witness_lock+0x311
#1  rw_enter+0x292
#2  pf_purge_expired_states+0x15b
#3  pf_purge+0x28
#4  taskq_thread+0x11a
#5  proc_trampoline+0x1c
exclusive kernel_lock &kernel_lock r = 1 (0xffffffff824be1f8)
#0  witness_lock+0x311
#1  __mp_acquire_count+0x38
#2  mi_switch+0x28b
#3  sleep_finish+0xfe
#4  rw_enter+0x232
#5  pf_purge_expired_states+0x15b
#6  pf_purge+0x28
#7  taskq_thread+0x11a
#8  proc_trampoline+0x1c
shared rwlock systq r = 0 (0xffffffff822cbea0)
#0  witness_lock+0x311
#1  taskq_thread+0x10d
#2  proc_trampoline+0x1c
exclusive mutex &sc->sc_st_mtx r = 0 (0xffff8000008c47f0)
#0  witness_lock+0x311
#1  mtx_enter_try+0x95
#2  mtx_enter+0x48
#3  pfsync_q_del+0x34
#4  pfsync_delete_state+0x118
#5  pf_remove_state+0x14e
#6  pf_purge_expired_states+0x1b3
#7  pf_purge+0x28
#8  taskq_thread+0x11a
#9  proc_trampoline+0x1c
ddb{1}>


ddb{1}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 72924  135192      1      0  3    0x100083  ttyin         ksh
  6737  208915      1      0  3    0x100098  kqread        cron
 62620  522684  84978     95  3   0x1100092  kqread        smtpd
 47849  455145  84978    103  3   0x1100092  kqread        smtpd
 80040  121376  84978     95  3   0x1100092  kqread        smtpd
 98360   76825  84978     95  3    0x100092  kqread        smtpd
 85092  274987  84978     95  3   0x1100092  kqread        smtpd
 80411  491082  84978     95  3   0x1100092  kqread        smtpd
 84978  249291      1      0  3    0x100080  kqread        smtpd
 69234  417371      1      0  3        0x88  kqread        sshd
  1291  346731      1      0  3    0x100080  kqread        ntpd
  3267  116907  24468     83  3    0x100092  kqread        ntpd
 24468     659      1     83  3   0x1100092  kqread        ntpd
 44721  123762  83111     74  3   0x1100092  bpf           pflogd
 83111  335808      1      0  3        0x80  netio         pflogd
  7033  158871  35929     73  3   0x1100090  kqread        syslogd
 35929  340682      1      0  3    0x100082  netio         syslogd
 58344  299309      0      0  3     0x14200  bored         smr
 69972  211353      0      0  3     0x14200  pgzero        zerothread
 74312  135454      0      0  3     0x14200  aiodoned      aiodoned
 17451  407422      0      0  3     0x14200  syncer        update
 15319  137293      0      0  3     0x14200  cleaner       cleaner
 60332  100692      0      0  3     0x14200  reaper        reaper
 83325  243858      0      0  3     0x14200  pgdaemon      pagedaemon
 70773  507534      0      0  3     0x14200  usbtsk        usbtask
 93746  490724      0      0  3     0x14200  usbatsk       usbatsk
 78460  157144      0      0  3  0x40014200  acpi0         acpi0
 79066  206654      0      0  7  0x40014200                idle5
  4481  189765      0      0  7  0x40014200                idle4
 68671  435094      0      0  7  0x40014200                idle3
 68275  325163      0      0  7  0x40014200                idle2
 32431   75656      0      0  3  0x40014200                idle1
 57463  232938      0      0  3     0x14200  bored         sensors
 59000  506424      0      0  3     0x14200  netlock       softnet
 93398  127100      0      0  3     0x14200  netlock       softnet
 48784  175058      0      0  3     0x14200  netlock       softnet
 80823  498503      0      0  3     0x14200  netlock       softnet
 59077  478262      0      0  3     0x14200  netlock       softnet
 31150  149748      0      0  3     0x14200  netlock       softnet
 44931  352078      0      0  3     0x14200  bored         systqmp
*35643  300703      0      0  7     0x14200                systq
 10061  237790      0      0  7  0x40014200                softclock
 62030  203604      0      0  3  0x40014200                idle0
     1  404603      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{1}>


ddb{1}> ps /o
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*300703  35643      0     0x14000      0x200    1K systq
 237790  10061      0     0x14000 0x40000200    0  softclock


ddb{1}> trace /t 0t300703
ffff8000008c47e0(1,1000000000000,ff050000000000,0,ffffffff00000000,1bc000001bc)
 at 0xffff8000008c47e0
end of kernel
end trace frame: 0x2cf8ab0045348363, count: -1

ddb{1}> trace /t 0t237790
sleep_finish(ffff800022d53150,1) at sleep_finish+0xfe
rw_enter(ffffffff822b2580,1) at rw_enter+0x232
nd6_timer(0) at nd6_timer+0x29
timeout_run(ffffffff823ad658) at timeout_run+0x93
softclock_thread(ffff8000fffff260) at softclock_thread+0x11d
end trace frame: 0x0, count: -5
ddb{1}>



ddb{1}> mach ddbcpu 0
Stopped at      x86_ipi_db+0x12:        leave
x86_ipi_db(ffffffff822c3ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
__mp_lock(ffffffff824bdff0) at __mp_lock+0xb3
__mp_acquire_count(ffffffff824bdff0,1) at __mp_acquire_count+0x38
mi_switch() at mi_switch+0x28b
sleep_finish(ffff800022d53150,1) at sleep_finish+0xfe
rw_enter(ffffffff822b2580,1) at rw_enter+0x232
nd6_timer(0) at nd6_timer+0x29
timeout_run(ffffffff823ad658) at timeout_run+0x93
softclock_thread(ffff8000fffff260) at softclock_thread+0x11d
end trace frame: 0x0, count: 4


ddb{0}> mach ddbcpu 1
Stopped at      pfsync_q_del+0x96:      movq    %rdx,0x8(%rax)
pfsync_q_del(fffffd8323dc3900) at pfsync_q_del+0x96
pfsync_delete_state(fffffd8323dc3900) at pfsync_delete_state+0x118
pf_remove_state(fffffd8323dc3900) at pf_remove_state+0x14e
pf_purge_expired_states(c3501) at pf_purge_expired_states+0x1b3
pf_purge(ffffffff823ae080) at pf_purge+0x28
taskq_thread(ffffffff822cbe30) at taskq_thread+0x11a
end trace frame: 0x0, count: 9


ddb{1}> mach ddbcpu 2
Stopped at      x86_ipi_db+0x12:        leave
x86_ipi_db(ffff800022512ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x281
sched_idle(ffff800022512ff0) at sched_idle+0x280
end trace frame: 0x0, count: 10


ddb{2}> mach ddbcpu 3
Stopped at      x86_ipi_db+0x12:        leave
x86_ipi_db(ffff80002251bff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x281
sched_idle(ffff80002251bff0) at sched_idle+0x280
end trace frame: 0x0, count: 10


ddb{3}> mach ddbcpu 4
Stopped at      x86_ipi_db+0x12:        leave
x86_ipi_db(ffff800022524ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x281
sched_idle(ffff800022524ff0) at sched_idle+0x280
end trace frame: 0x0, count: 10


ddb{4}> mach ddbcpu 5
Stopped at      x86_ipi_db+0x12:        leave
x86_ipi_db(ffff80002252dff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x281
sched_idle(ffff80002252dff0) at sched_idle+0x280
end trace frame: 0x0, count: 10
ddb{5}>

Reply via email to