On 27.11.2022. 1:51, Alexandr Nedvedicky wrote: > Hello, > > On Sat, Nov 26, 2022 at 08:33:28PM +0100, Hrvoje Popovski wrote: > </snip> >> I just need to say that with all pf, pfsync and with pf_purge diffs >> after hackaton + this diff on tech@ >> https://www.mail-archive.com/tech@openbsd.org/msg72582.html >> my production firewall seems stable and it wasn't without that diff > > this diff still waits for OK. it makes pfsync to use > state mutex to safely dereference keys. > >> >> I'm not sure if we have same diffs but even Josmar Pierri on bugs@ >> https://www.mail-archive.com/bugs@openbsd.org/msg18994.html >> who had panics quite regularly with that diff on tech@ seems to have >> stable firewall now. >> >> >> >> r620-1# uvm_fault(0xffffffff82374288, 0x17, 0, 2) -> e >> kernel: page fault trap, code=0 >> Stopped at pfsync_q_del+0x96: movq %rdx,0x8(%rax) >> TID PID UID PRFLAGS PFLAGS CPU COMMAND >> *192892 19920 0 0x14000 0x200 5K softnet >> pfsync_q_del(fffffd82e8a4ce20) at pfsync_q_del+0x96 >> pf_remove_state(fffffd82e8a4ce20) at pf_remove_state+0x14b >> pfsync_in_del_c(fffffd8006d843b8,c,79,0) at pfsync_in_del_c+0x6f >> pfsync_input(ffff800022d60ad8,ffff800022d60ae4,f0,2) at pfsync_input+0x33c >> ip_deliver(ffff800022d60ad8,ffff800022d60ae4,f0,2) at ip_deliver+0x113 >> ipintr() at ipintr+0x69 >> if_netisr(0) at if_netisr+0xea >> taskq_thread(ffff800000030000) at taskq_thread+0x100 >> end trace frame: 0x0, count: 7 >> https://www.openbsd.org/ddb.html describes the minimum info required in >> bug reports. Insufficient info makes it difficult to find and fix bugs. >> ddb{5}> >> > > those panics are causing me headaches. this got most-likely uncovered > by diff which adds a mutex. The mutex makes pfsync stable enough > so you can trigger unknown bugs.
Hi, here's panic with WITNESS. Now I will try to trigger panic with that mutex diff on tech@ r620-1# uvm_fault(0xffffffff824be2f8, 0x17, 0, 2) -> e kernel: page fault trap, code=0 Stopped at pfsync_q_del+0x96: movq %rdx,0x8(%rax) TID PID UID PRFLAGS PFLAGS CPU COMMAND 267891 77512 83 0x100012 0 2 ntpd *250028 94877 0 0x14000 0x200 3K systq 35323 58401 0 0x14000 0x40000200 0 softclock pfsync_q_del(fffffd82c6826da0) at pfsync_q_del+0x96 pfsync_delete_state(fffffd82c6826da0) at pfsync_delete_state+0x118 pf_remove_state(fffffd82c6826da0) at pf_remove_state+0x14b pf_purge_expired_states(b3368) at pf_purge_expired_states+0x1b3 pf_purge(ffffffff823d8c80) at pf_purge+0x28 taskq_thread(ffffffff822c73e8) at taskq_thread+0x11a end trace frame: 0x0, count: 9 https://www.openbsd.org/ddb.html describes the minimum info required in bug reports. Insufficient info makes it difficult to find and fix bugs. ddb{3}> show panic *cpu3: uvm_fault(0xffffffff824be2f8, 0x17, 0, 2) -> e ddb{3}> ddb{3}> show reg rdi 0x9 rsi 0xf rbp 0xffff800022d59b80 rbx 0xfffffd842e15b7d8 rdx 0xffffffffffffffff rcx 0x10 rax 0xf r8 0xffff7fffffffffff r9 0xffff800022d59d20 r10 0x362267f9c796ed3a r11 0xcadda0efd2fc372f r12 0xffff8000008c3000 r13 0xfffffd8310c597b0 r14 0xfffffd82c6826da0 r15 0xffff8000008c37e0 rip 0xffffffff81398f56 pfsync_q_del+0x96 cs 0x8 rflags 0x10286 __ALIGN_SIZE+0xf286 rsp 0xffff800022d59b50 ss 0x10 pfsync_q_del+0x96: movq %rdx,0x8(%rax) ddb{3}> show locks exclusive rwlock pf_state_lock r = 0 (0xffffffff822e2d58) #0 witness_lock+0x311 #1 pf_purge_expired_states+0x17f #2 pf_purge+0x28 #3 taskq_thread+0x11a #4 proc_trampoline+0x1c exclusive rwlock pf_lock r = 0 (0xffffffff822e2d28) #0 witness_lock+0x311 #1 pf_purge_expired_states+0x173 #2 pf_purge+0x28 #3 taskq_thread+0x11a #4 proc_trampoline+0x1c exclusive rwlock pfstates r = 0 (0xffffffff822bf040) #0 witness_lock+0x311 #1 pf_purge_expired_states+0x167 #2 pf_purge+0x28 #3 taskq_thread+0x11a #4 proc_trampoline+0x1c exclusive rwlock netlock r = 0 (0xffffffff822bb6a0) #0 witness_lock+0x311 #1 rw_enter+0x292 #2 pf_purge_expired_states+0x15b #3 pf_purge+0x28 #4 taskq_thread+0x11a #5 proc_trampoline+0x1c exclusive kernel_lock &kernel_lock r = 1 (0xffffffff824bd7e8) #0 witness_lock+0x311 #1 __mp_acquire_count+0x38 #2 mi_switch+0x28b #3 sleep_finish+0xfe #4 rw_enter+0x232 #5 pf_purge_expired_states+0x15b #6 pf_purge+0x28 #7 taskq_thread+0x11a #8 proc_trampoline+0x1c shared rwlock systq r = 0 (0xffffffff822c7458) #0 witness_lock+0x311 #1 taskq_thread+0x10d #2 proc_trampoline+0x1c exclusive mutex &sc->sc_st_mtx r = 0 (0xffff8000008c37f0) #0 witness_lock+0x311 #1 mtx_enter_try+0x95 #2 mtx_enter+0x48 #3 pfsync_q_del+0x34 #4 pfsync_delete_state+0x118 #5 pf_remove_state+0x14b #6 pf_purge_expired_states+0x1b3 #7 pf_purge+0x28 #8 taskq_thread+0x11a #9 proc_trampoline+0x1c ddb{3}> ddb{3}> ps PID TID PPID UID S FLAGS WAIT COMMAND 64605 27158 1 0 3 0x100083 ttyin ksh 1647 514612 1 0 3 0x100098 kqread cron 48222 210533 67433 95 3 0x1100092 kqread smtpd 65914 35320 67433 103 3 0x1100092 kqread smtpd 61746 233409 67433 95 3 0x1100092 kqread smtpd 52667 256224 67433 95 3 0x100092 kqread smtpd 95906 203922 67433 95 3 0x1100092 kqread smtpd 57231 58537 67433 95 3 0x1100092 kqread smtpd 67433 152691 1 0 3 0x100080 kqread smtpd 97649 506323 1 0 3 0x88 kqread sshd 45366 57467 1 0 3 0x100080 kqread ntpd 77512 267891 6824 83 7 0x100012 ntpd 6824 88890 1 83 3 0x1100092 kqread ntpd 31005 238715 43236 74 3 0x1100092 bpf pflogd 43236 250601 1 0 3 0x80 netio pflogd 68108 521674 43371 73 3 0x1100090 kqread syslogd 43371 415292 1 0 3 0x100082 netio syslogd 81117 43543 0 0 3 0x14200 bored smr 86838 148691 0 0 3 0x14200 pgzero zerothread 80313 485970 0 0 3 0x14200 aiodoned aiodoned 32854 188214 0 0 3 0x14200 syncer update 29438 4815 0 0 3 0x14200 cleaner cleaner 37163 482518 0 0 3 0x14200 reaper reaper 99970 193361 0 0 3 0x14200 pgdaemon pagedaemon 16005 423786 0 0 3 0x14200 usbtsk usbtask 8859 281689 0 0 3 0x14200 usbatsk usbatsk 96446 45151 0 0 3 0x40014200 acpi0 acpi0 40490 16284 0 0 7 0x40014200 idle5 47574 460149 0 0 7 0x40014200 idle4 79401 251160 0 0 3 0x40014200 idle3 41337 430655 0 0 3 0x40014200 idle2 74515 339752 0 0 7 0x40014200 idle1 72276 301528 0 0 3 0x14200 bored sensors 70845 493314 0 0 3 0x14200 netlock softnet 52535 320222 0 0 3 0x14200 netlock softnet 84582 75425 0 0 3 0x14200 netlock softnet 31468 426237 0 0 3 0x14200 netlock softnet 2106 515368 0 0 3 0x14200 netlock softnet 77607 431181 0 0 3 0x14200 netlock softnet 58907 91384 0 0 3 0x14200 bored systqmp *94877 250028 0 0 7 0x14200 systq 58401 35323 0 0 7 0x40014200 softclock 37921 329649 0 0 3 0x40014200 idle0 1 321132 0 0 3 0x82 wait init 0 0 -1 0 3 0x10200 scheduler swapper ddb{3}> ddb{3}> ps /o TID PID UID PRFLAGS PFLAGS CPU COMMAND 267891 77512 83 0x100012 0 2 ntpd *250028 94877 0 0x14000 0x200 3K systq 35323 58401 0 0x14000 0x40000200 0 softclock ddb{3}> ddb{3}> trace /t 0t267891 sleep_finish(ffff800022e66b90,1) at sleep_finish+0xfe rw_enter(ffffffff822bb690,1) at rw_enter+0x232 filt_soprocess(fffffd83a5d5e088,0) at filt_soprocess+0x32 kqueue_register(fffffd83b24b2288,ffff800022e66dc0,0,ffff800022dff7a0) at kqueue_register+0x6f4 ppollregister_evts(ffff800022dff7a0,ffff800022e66dc0,1,ffff800022e670b0,0) at ppollregister_evts+0x83 ppollregister(ffff800022dff7a0,ffff800022e670b0,1,ffff800022e67138,ffff800022e6 7134) at ppollregister+0x209 doppoll(ffff800022dff7a0,7f7fffffad78,1,ffff800022e671b8,0,ffff800022e67260) at doppoll+0x132 sys_poll(ffff800022dff7a0,ffff800022e67200,ffff800022e67260) at sys_poll+0x6a syscall(ffff800022e672d0) at syscall+0x35f Xsyscall() at Xsyscall+0x128 end of kernel end trace frame: 0x7f7fffffad40, count: -10 ddb{3}> trace /t 0t250028 fffffd8310c597b0(0,0,0,0,0,0) at 0xfffffd8310c597b0 Bad frame pointer: 0xffff8000008c3000 end trace frame: 0xffff8000008c3000, count: -1 ddb{3}> trace /t 0t35323 sleep_finish(ffff800022d53b50,1) at sleep_finish+0xfe rw_enter(ffffffff822bb690,1) at rw_enter+0x232 igmp_slowtimo() at igmp_slowtimo+0x10 pfslowtimo(ffffffff82335318) at pfslowtimo+0xc7 timeout_run(ffffffff82335318) at timeout_run+0x93 softclock_thread(ffff8000fffff500) at softclock_thread+0x11d end trace frame: 0x0, count: -6 ddb{3}> ddb{3}> mach ddbcpu 0 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffffffff822edff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 __mp_lock(ffffffff824bd5e0) at __mp_lock+0xb3 __mp_acquire_count(ffffffff824bd5e0,1) at __mp_acquire_count+0x38 mi_switch() at mi_switch+0x28b sleep_finish(ffff800022d53b50,1) at sleep_finish+0xfe rw_enter(ffffffff822bb690,1) at rw_enter+0x232 igmp_slowtimo() at igmp_slowtimo+0x10 pfslowtimo(ffffffff82335318) at pfslowtimo+0xc7 timeout_run(ffffffff82335318) at timeout_run+0x93 softclock_thread(ffff8000fffff500) at softclock_thread+0x11d end trace frame: 0x0, count: 3 ddb{0}> mach ddbcpu 1 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022509ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff800022509ff0) at sched_idle+0x280 end trace frame: 0x0, count: 10 ddb{1}> mach ddbcpu 2 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022512ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 __mp_lock(ffffffff824bd5e0) at __mp_lock+0xa0 __mp_acquire_count(ffffffff824bd5e0,1) at __mp_acquire_count+0x38 mi_switch() at mi_switch+0x28b sleep_finish(ffff800022e66b90,1) at sleep_finish+0xfe rw_enter(ffffffff822bb690,1) at rw_enter+0x232 filt_soprocess(fffffd83a5d5e088,0) at filt_soprocess+0x32 kqueue_register(fffffd83b24b2288,ffff800022e66dc0,0,ffff800022dff7a0) at kqueue _register+0x6f4 ppollregister_evts(ffff800022dff7a0,ffff800022e66dc0,1,ffff800022e670b0,0) at p pollregister_evts+0x83 ppollregister(ffff800022dff7a0,ffff800022e670b0,1,ffff800022e67138,ffff800022e6 7134) at ppollregister+0x209 doppoll(ffff800022dff7a0,7f7fffffad78,1,ffff800022e671b8,0,ffff800022e67260) at doppoll+0x132 sys_poll(ffff800022dff7a0,ffff800022e67200,ffff800022e67260) at sys_poll+0x6a end trace frame: 0xffff800022e672c0, count: 0 ddb{2}> mach ddbcpu 3 Stopped at pfsync_q_del+0x96: movq %rdx,0x8(%rax) pfsync_q_del(fffffd82c6826da0) at pfsync_q_del+0x96 pfsync_delete_state(fffffd82c6826da0) at pfsync_delete_state+0x118 pf_remove_state(fffffd82c6826da0) at pf_remove_state+0x14b pf_purge_expired_states(b3368) at pf_purge_expired_states+0x1b3 pf_purge(ffffffff823d8c80) at pf_purge+0x28 taskq_thread(ffffffff822c73e8) at taskq_thread+0x11a end trace frame: 0x0, count: 9 ddb{3}> mach ddbcpu 4 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022524ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff800022524ff0) at sched_idle+0x280 end trace frame: 0x0, count: 10 ddb{4}> mach ddbcpu 5 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff80002252dff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff80002252dff0) at sched_idle+0x280 end trace frame: 0x0, count: 10 ddb{5}>