On 27.11.2022. 9:28, Hrvoje Popovski wrote: > On 27.11.2022. 1:51, Alexandr Nedvedicky wrote: >> Hello, >> >> On Sat, Nov 26, 2022 at 08:33:28PM +0100, Hrvoje Popovski wrote: >> </snip> >>> I just need to say that with all pf, pfsync and with pf_purge diffs >>> after hackaton + this diff on tech@ >>> https://www.mail-archive.com/tech@openbsd.org/msg72582.html >>> my production firewall seems stable and it wasn't without that diff >> this diff still waits for OK. it makes pfsync to use >> state mutex to safely dereference keys. >> >>> I'm not sure if we have same diffs but even Josmar Pierri on bugs@ >>> https://www.mail-archive.com/bugs@openbsd.org/msg18994.html >>> who had panics quite regularly with that diff on tech@ seems to have >>> stable firewall now. >>> >>> >>> >>> r620-1# uvm_fault(0xffffffff82374288, 0x17, 0, 2) -> e >>> kernel: page fault trap, code=0 >>> Stopped at pfsync_q_del+0x96: movq %rdx,0x8(%rax) >>> TID PID UID PRFLAGS PFLAGS CPU COMMAND >>> *192892 19920 0 0x14000 0x200 5K softnet >>> pfsync_q_del(fffffd82e8a4ce20) at pfsync_q_del+0x96 >>> pf_remove_state(fffffd82e8a4ce20) at pf_remove_state+0x14b >>> pfsync_in_del_c(fffffd8006d843b8,c,79,0) at pfsync_in_del_c+0x6f >>> pfsync_input(ffff800022d60ad8,ffff800022d60ae4,f0,2) at pfsync_input+0x33c >>> ip_deliver(ffff800022d60ad8,ffff800022d60ae4,f0,2) at ip_deliver+0x113 >>> ipintr() at ipintr+0x69 >>> if_netisr(0) at if_netisr+0xea >>> taskq_thread(ffff800000030000) at taskq_thread+0x100 >>> end trace frame: 0x0, count: 7 >>> https://www.openbsd.org/ddb.html describes the minimum info required in >>> bug reports. Insufficient info makes it difficult to find and fix bugs. >>> ddb{5}> >>> >> those panics are causing me headaches. this got most-likely uncovered >> by diff which adds a mutex. The mutex makes pfsync stable enough >> so you can trigger unknown bugs. > > Hi, > > here's panic with WITNESS. Now I will try to trigger panic with that > mutex diff on tech@
Hi, here's panic with WITNESS and this diff on tech@ https://www.mail-archive.com/tech@openbsd.org/msg72582.html I will stop now because I'm not sure what I'm doing and which diffs I'm testing... r620-1# uvm_fault(0xffffffff8248ea28, 0x17, 0, 2) -> e kernel: page fault trap, code=0 Stopped at pfsync_q_del+0x96: movq %rdx,0x8(%rax) TID PID UID PRFLAGS PFLAGS CPU COMMAND *300703 35643 0 0x14000 0x200 1K systq 237790 10061 0 0x14000 0x40000200 0 softclock pfsync_q_del(fffffd8323dc3900) at pfsync_q_del+0x96 pfsync_delete_state(fffffd8323dc3900) at pfsync_delete_state+0x118 pf_remove_state(fffffd8323dc3900) at pf_remove_state+0x14e pf_purge_expired_states(c3501) at pf_purge_expired_states+0x1b3 pf_purge(ffffffff823ae080) at pf_purge+0x28 taskq_thread(ffffffff822cbe30) at taskq_thread+0x11a end trace frame: 0x0, count: 9 https://www.openbsd.org/ddb.html describes the minimum info required in bug reports. Insufficient info makes it difficult to find and fix bugs. ddb{1}> ddb{1}> show panic *cpu1: uvm_fault(0xffffffff8248ea28, 0x17, 0, 2) -> e ddb{1}> ddb{1}> show reg rdi 0x9 rsi 0xf rbp 0xffff800022d593c0 rbx 0xfffffd83347714a8 rdx 0xffffffffffffffff rcx 0x10 rax 0xf r8 0xffff7fffffffffff r9 0xffff800022d59570 r10 0xcc7e29c6fd100f64 r11 0x1e575244acf63fd3 r12 0xffff8000008c4000 r13 0xfffffd8318aac200 r14 0xfffffd8323dc3900 r15 0xffff8000008c47e0 rip 0xffffffff817d3ec6 pfsync_q_del+0x96 cs 0x8 rflags 0x10286 __ALIGN_SIZE+0xf286 rsp 0xffff800022d59390 ss 0x10 pfsync_q_del+0x96: movq %rdx,0x8(%rax) ddb{1}> ddb{1}> show locks exclusive rwlock pf_state_lock r = 0 (0xffffffff822b03a0) #0 witness_lock+0x311 #1 pf_purge_expired_states+0x17f #2 pf_purge+0x28 #3 taskq_thread+0x11a #4 proc_trampoline+0x1c exclusive rwlock pf_lock r = 0 (0xffffffff822b0370) #0 witness_lock+0x311 #1 pf_purge_expired_states+0x173 #2 pf_purge+0x28 #3 taskq_thread+0x11a #4 proc_trampoline+0x1c exclusive rwlock pfstates r = 0 (0xffffffff822c57d0) #0 witness_lock+0x311 #1 pf_purge_expired_states+0x167 #2 pf_purge+0x28 #3 taskq_thread+0x11a #4 proc_trampoline+0x1c exclusive rwlock netlock r = 0 (0xffffffff822b2590) #0 witness_lock+0x311 #1 rw_enter+0x292 #2 pf_purge_expired_states+0x15b #3 pf_purge+0x28 #4 taskq_thread+0x11a #5 proc_trampoline+0x1c exclusive kernel_lock &kernel_lock r = 1 (0xffffffff824be1f8) #0 witness_lock+0x311 #1 __mp_acquire_count+0x38 #2 mi_switch+0x28b #3 sleep_finish+0xfe #4 rw_enter+0x232 #5 pf_purge_expired_states+0x15b #6 pf_purge+0x28 #7 taskq_thread+0x11a #8 proc_trampoline+0x1c shared rwlock systq r = 0 (0xffffffff822cbea0) #0 witness_lock+0x311 #1 taskq_thread+0x10d #2 proc_trampoline+0x1c exclusive mutex &sc->sc_st_mtx r = 0 (0xffff8000008c47f0) #0 witness_lock+0x311 #1 mtx_enter_try+0x95 #2 mtx_enter+0x48 #3 pfsync_q_del+0x34 #4 pfsync_delete_state+0x118 #5 pf_remove_state+0x14e #6 pf_purge_expired_states+0x1b3 #7 pf_purge+0x28 #8 taskq_thread+0x11a #9 proc_trampoline+0x1c ddb{1}> ddb{1}> ps PID TID PPID UID S FLAGS WAIT COMMAND 72924 135192 1 0 3 0x100083 ttyin ksh 6737 208915 1 0 3 0x100098 kqread cron 62620 522684 84978 95 3 0x1100092 kqread smtpd 47849 455145 84978 103 3 0x1100092 kqread smtpd 80040 121376 84978 95 3 0x1100092 kqread smtpd 98360 76825 84978 95 3 0x100092 kqread smtpd 85092 274987 84978 95 3 0x1100092 kqread smtpd 80411 491082 84978 95 3 0x1100092 kqread smtpd 84978 249291 1 0 3 0x100080 kqread smtpd 69234 417371 1 0 3 0x88 kqread sshd 1291 346731 1 0 3 0x100080 kqread ntpd 3267 116907 24468 83 3 0x100092 kqread ntpd 24468 659 1 83 3 0x1100092 kqread ntpd 44721 123762 83111 74 3 0x1100092 bpf pflogd 83111 335808 1 0 3 0x80 netio pflogd 7033 158871 35929 73 3 0x1100090 kqread syslogd 35929 340682 1 0 3 0x100082 netio syslogd 58344 299309 0 0 3 0x14200 bored smr 69972 211353 0 0 3 0x14200 pgzero zerothread 74312 135454 0 0 3 0x14200 aiodoned aiodoned 17451 407422 0 0 3 0x14200 syncer update 15319 137293 0 0 3 0x14200 cleaner cleaner 60332 100692 0 0 3 0x14200 reaper reaper 83325 243858 0 0 3 0x14200 pgdaemon pagedaemon 70773 507534 0 0 3 0x14200 usbtsk usbtask 93746 490724 0 0 3 0x14200 usbatsk usbatsk 78460 157144 0 0 3 0x40014200 acpi0 acpi0 79066 206654 0 0 7 0x40014200 idle5 4481 189765 0 0 7 0x40014200 idle4 68671 435094 0 0 7 0x40014200 idle3 68275 325163 0 0 7 0x40014200 idle2 32431 75656 0 0 3 0x40014200 idle1 57463 232938 0 0 3 0x14200 bored sensors 59000 506424 0 0 3 0x14200 netlock softnet 93398 127100 0 0 3 0x14200 netlock softnet 48784 175058 0 0 3 0x14200 netlock softnet 80823 498503 0 0 3 0x14200 netlock softnet 59077 478262 0 0 3 0x14200 netlock softnet 31150 149748 0 0 3 0x14200 netlock softnet 44931 352078 0 0 3 0x14200 bored systqmp *35643 300703 0 0 7 0x14200 systq 10061 237790 0 0 7 0x40014200 softclock 62030 203604 0 0 3 0x40014200 idle0 1 404603 0 0 3 0x82 wait init 0 0 -1 0 3 0x10200 scheduler swapper ddb{1}> ddb{1}> ps /o TID PID UID PRFLAGS PFLAGS CPU COMMAND *300703 35643 0 0x14000 0x200 1K systq 237790 10061 0 0x14000 0x40000200 0 softclock ddb{1}> trace /t 0t300703 ffff8000008c47e0(1,1000000000000,ff050000000000,0,ffffffff00000000,1bc000001bc) at 0xffff8000008c47e0 end of kernel end trace frame: 0x2cf8ab0045348363, count: -1 ddb{1}> trace /t 0t237790 sleep_finish(ffff800022d53150,1) at sleep_finish+0xfe rw_enter(ffffffff822b2580,1) at rw_enter+0x232 nd6_timer(0) at nd6_timer+0x29 timeout_run(ffffffff823ad658) at timeout_run+0x93 softclock_thread(ffff8000fffff260) at softclock_thread+0x11d end trace frame: 0x0, count: -5 ddb{1}> ddb{1}> mach ddbcpu 0 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffffffff822c3ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 __mp_lock(ffffffff824bdff0) at __mp_lock+0xb3 __mp_acquire_count(ffffffff824bdff0,1) at __mp_acquire_count+0x38 mi_switch() at mi_switch+0x28b sleep_finish(ffff800022d53150,1) at sleep_finish+0xfe rw_enter(ffffffff822b2580,1) at rw_enter+0x232 nd6_timer(0) at nd6_timer+0x29 timeout_run(ffffffff823ad658) at timeout_run+0x93 softclock_thread(ffff8000fffff260) at softclock_thread+0x11d end trace frame: 0x0, count: 4 ddb{0}> mach ddbcpu 1 Stopped at pfsync_q_del+0x96: movq %rdx,0x8(%rax) pfsync_q_del(fffffd8323dc3900) at pfsync_q_del+0x96 pfsync_delete_state(fffffd8323dc3900) at pfsync_delete_state+0x118 pf_remove_state(fffffd8323dc3900) at pf_remove_state+0x14e pf_purge_expired_states(c3501) at pf_purge_expired_states+0x1b3 pf_purge(ffffffff823ae080) at pf_purge+0x28 taskq_thread(ffffffff822cbe30) at taskq_thread+0x11a end trace frame: 0x0, count: 9 ddb{1}> mach ddbcpu 2 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022512ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff800022512ff0) at sched_idle+0x280 end trace frame: 0x0, count: 10 ddb{2}> mach ddbcpu 3 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff80002251bff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff80002251bff0) at sched_idle+0x280 end trace frame: 0x0, count: 10 ddb{3}> mach ddbcpu 4 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022524ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff800022524ff0) at sched_idle+0x280 end trace frame: 0x0, count: 10 ddb{4}> mach ddbcpu 5 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff80002252dff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff80002252dff0) at sched_idle+0x280 end trace frame: 0x0, count: 10 ddb{5}>