On 4.12.2021. 10:41, Hrvoje Popovski wrote: > On 3.12.2021. 20:35, Alexander Bluhm wrote: >> Hi, >> >> After implementing MP safe refcounting for IPsec TDB, I wonder how >> stable my diff for forwarding on multiple CPU is. >> >> Note that IPsec still has the workaround to disable multiple queues. >> We do not have a mutex that protects the TDB fields yet. We have >> only fixed the memory management. >> >> My goal is to get real MP pressure on the lower part of the IP >> network stack. Only this will show remaining bugs. > Hi, > > with only plain forwarding it seems that everything works just fine, but > with ipsec i'm getting this panic. i will leave ddb console active if > something else will be needed
Now with WITNESS r620-2# uvm_fault(0xffffffff8226ee78, 0x137, 0, 2) -> e kernel: page fault trap, code=0 Stopped at ipsp_spd_lookup+0xa2f: movq %rax,0(%rcx) TID PID UID PRFLAGS PFLAGS CPU COMMAND 258332 43705 0 0x14000 0x200 4 softnet *440260 99913 0 0x14000 0x200 3K softnet 63340 24364 0 0x14000 0x200 2 softnet ipsp_spd_lookup(fffffd80a46ed700,2,14,2,0,0,dcfe3c78989b6cd1,fffffd80a46ed700) at ipsp_spd_lookup+0xa2f ip_output_ipsec_lookup(fffffd80a46ed700,14,0,ffff800022c71248,0) at ip_output_ipsec_lookup+0x4c ip_output(fffffd80a46ed700,0,ffff800022c71408,1,0,0,6db6075421f27eb8) at ip_output+0x39d ip_forward(fffffd80a46ed700,ffff800000087048,fffffd83b4374cb0,0) at ip_forward+0x26a ip_input_if(ffff800022c71548,ffff800022c71554,4,0,ffff800000087048) at ip_input_if+0x353 ipv4_input(ffff800000087048,fffffd80a46ed700) at ipv4_input+0x39 ether_input(ffff800000087048,fffffd80a46ed700) at ether_input+0x3aa if_input_process(ffff800000087048,ffff800022c71638) at if_input_process+0x92 ifiq_process(ffff800000086e00) at ifiq_process+0x69 taskq_thread(ffff800000030200) at taskq_thread+0x9f end trace frame: 0x0, count: 5 https://www.openbsd.org/ddb.html describes the minimum info required in bug reports. Insufficient info makes it difficult to find and fix bugs. ddb{3}> ddb{3}> show reg rdi 0 rsi 0xffff80000145d018 rbp 0xffff800022c71150 rbx 0 rdx 0x1 rcx 0x137 rax 0xffffffffffffffff r8 0xfffffd839a222d30 r9 0xffff800001453904 r10 0xffff80000142f240 r11 0xebc2ae563992db8e r12 0xffff800022c71178 r13 0 r14 0x1 r15 0 rip 0xffffffff811f56cf ipsp_spd_lookup+0xa2f cs 0x8 rflags 0x10213 __ALIGN_SIZE+0xf213 rsp 0xffff800022c71040 ss 0x10 ipsp_spd_lookup+0xa2f: movq %rax,0(%rcx) ddb{3}> ddb{3}> show locks exclusive kernel_lock &kernel_lock r = 0 (0xffffffff822b3ce8) #0 witness_lock+0x333 #1 kpageflttrap+0x172 #2 kerntrap+0x91 #3 alltraps_kern_meltdown+0x7b #4 ipsp_spd_lookup+0xa2f #5 ip_output_ipsec_lookup+0x4c #6 ip_output+0x39d #7 ip_forward+0x26a #8 ip_input_if+0x353 #9 ipv4_input+0x39 #10 ether_input+0x3aa #11 if_input_process+0x92 #12 ifiq_process+0x69 #13 taskq_thread+0x9f #14 proc_trampoline+0x1c shared rwlock netlock r = 0 (0xffffffff8217f598) #0 witness_lock+0x333 #1 rw_enter+0x27f #2 if_input_process+0x75 #3 ifiq_process+0x69 #4 taskq_thread+0x9f #5 proc_trampoline+0x1c shared rwlock softnet r = 0 (0xffff800000030270) #0 witness_lock+0x333 #1 taskq_thread+0x92 #2 proc_trampoline+0x1c ddb{3}> ddb{3}> show all locks Process 43705 (softnet) thread 0xffff8000ffffe540 (258332) shared rwlock netlock r = 0 (0xffffffff8217f598) #0 witness_lock+0x333 #1 rw_enter+0x27f #2 if_input_process+0x75 #3 ifiq_process+0x69 #4 taskq_thread+0x9f #5 proc_trampoline+0x1c shared rwlock softnet r = 0 (0xffff800000030370) #0 witness_lock+0x333 #1 taskq_thread+0x92 #2 proc_trampoline+0x1c Process 99913 (softnet) thread 0xffff8000ffffe000 (440260) exclusive kernel_lock &kernel_lock r = 0 (0xffffffff822b3ce8) #0 witness_lock+0x333 #1 kpageflttrap+0x172 #2 kerntrap+0x91 #3 alltraps_kern_meltdown+0x7b #4 ipsp_spd_lookup+0xa2f #5 ip_output_ipsec_lookup+0x4c #6 ip_output+0x39d #7 ip_forward+0x26a #8 ip_input_if+0x353 #9 ipv4_input+0x39 #10 ether_input+0x3aa #11 if_input_process+0x92 #12 ifiq_process+0x69 #13 taskq_thread+0x9f #14 proc_trampoline+0x1c shared rwlock netlock r = 0 (0xffffffff8217f598) #0 witness_lock+0x333 #1 rw_enter+0x27f #2 if_input_process+0x75 #3 ifiq_process+0x69 #4 taskq_thread+0x9f #5 proc_trampoline+0x1c shared rwlock softnet r = 0 (0xffff800000030270) #0 witness_lock+0x333 #1 taskq_thread+0x92 #2 proc_trampoline+0x1c Process 39040 (softnet) thread 0xffff8000ffffe2a0 (358256) shared rwlock softnet r = 0 (0xffff800000030170) #0 witness_lock+0x333 #1 taskq_thread+0x92 #2 proc_trampoline+0x1c Process 24364 (softnet) thread 0xffff8000ffffe7e0 (63340) shared rwlock netlock r = 0 (0xffffffff8217f598) #0 witness_lock+0x333 #1 rw_enter+0x27f #2 if_input_process+0x75 #3 ifiq_process+0x69 #4 taskq_thread+0x9f #5 proc_trampoline+0x1c shared rwlock softnet r = 0 (0xffff800000030070) #0 witness_lock+0x333 #1 taskq_thread+0x92 #2 proc_trampoline+0x1c ddb{3}> ddb{3}> show tdb 0xffffffff811f56cf: 000041bd (unknown address family)->(unknown address family) :15 #-1958084609 74f68445 ddb{3}> show tdb /f 0xffffffff811f56cf tdb at 0xffffffff811f56cf hnext: 0x130808b49018948 dnext: 0x138888b490000 snext: 0x3880c74901894800 inext: 0x49ffffffff000001 onext: 0xffff0000013080c7 xform: 0x8b4c002691dbe800 refcnt: -1958084609 encalgxform: 0x11080c7498845 authalgxform: 0x80c7490000000000 compalgxform: 0xf8 flags: 74f68445<UNIQUE,BYTES,DELETED,SOFT_FIRSTUSE,UDPENCAP,PFSYNC ,ESN> seq: -34635 exp_allocations: -2133708289 soft_allocations: 1207998333 cur_allocations: 949306639 exp_bytes: -53887557573804033 soft_bytes: -7572652700634382337 cur_bytes: -8986685076615987194 exp_timeout: 1274901163972733 soft_timeout: -1044682726943537152 established: -23376416473088 first_use: 1207830954720511 soft_first_use: -2045619644365824 exp_first_use: -956873529326089985 last_used: -528463706791608320 last_marked: -5151950549254864897 cryptoid: 434571561984018687 tdb_spi: 000041bd amxkeylen: 13 emxkeylen: 0 ivlen: 49285 sproto: 15 wnd: 133 satype: 76 updates: 248 dst: (unknown address family) src: (unknown address family) amxkey: 0x110b88b49ffffff emxkey: 0x4c00268f92e80000 rpl: 76702787085813131 ids: 0xf2b8809845 ids_swapped: -578482432 mtu: 1224736758 mtutimeout: 264675222111627 udpencap_port: 0 tag: 4073 tap: 1006632963 rdomain: 1012364293 rdomain_post: -1048244478 ddb{3}> ddb{3}> ps PID TID PPID UID S FLAGS WAIT COMMAND 48889 516122 49524 68 3 0x10 netlock isakmpd 49524 136902 1 0 3 0x80 netio isakmpd 7860 162241 1 0 3 0x100083 ttyin ksh 29826 116692 1 0 3 0x100098 poll cron 60841 164826 66234 95 3 0x100092 kqread smtpd 55097 2406 66234 103 3 0x100092 kqread smtpd 73287 413363 66234 95 3 0x100092 kqread smtpd 72259 146575 66234 95 3 0x100092 kqread smtpd 58288 270273 66234 95 3 0x100092 kqread smtpd 12210 125515 66234 95 3 0x100092 kqread smtpd 66234 365791 1 0 3 0x100080 kqread smtpd 27746 82886 1 0 3 0x88 poll sshd 65737 129938 1 0 3 0x100080 poll ntpd 46028 11125 33739 83 3 0x100092 poll ntpd 33739 250917 1 83 3 0x100092 poll ntpd 78022 412651 14622 73 3 0x100090 kqread syslogd 14622 207875 1 0 3 0x100082 netio syslogd 95123 271769 0 0 3 0x14200 bored smr 44020 301722 0 0 3 0x14200 pgzero zerothread 95530 514370 0 0 3 0x14200 aiodoned aiodoned 26250 378519 0 0 3 0x14200 syncer update 69845 146993 0 0 3 0x14200 cleaner cleaner 19457 388805 0 0 3 0x14200 reaper reaper 54770 476596 0 0 3 0x14200 pgdaemon pagedaemon 1279 29813 0 0 3 0x14200 usbtsk usbtask 54131 121668 0 0 3 0x14200 usbatsk usbatsk 3557 323237 0 0 3 0x40014200 acpi0 acpi0 2093 393367 0 0 7 0x40014200 idle5 83675 132444 0 0 3 0x40014200 idle4 18222 312526 0 0 3 0x40014200 idle3 733 72421 0 0 3 0x40014200 idle2 20085 265763 0 0 7 0x40014200 idle1 22638 502832 0 0 3 0x14200 bored sensors 43705 258332 0 0 7 0x14200 softnet *99913 440260 0 0 7 0x14200 softnet 39040 358256 0 0 3 0x14200 netlock softnet 24364 63340 0 0 7 0x14200 softnet 7337 83749 0 0 3 0x14200 bored systqmp 66311 283484 0 0 3 0x14200 bored systq 41321 291222 0 0 3 0x40014200 bored softclock 74282 319709 0 0 7 0x40014200 idle0 1 12703 0 0 3 0x82 wait init 0 0 -1 0 3 0x10200 scheduler swapper ddb{3}> ddb{3}> mach ddbcpu 0 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffffffff8219eff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 __mp_lock(ffffffff822b3ae0) at __mp_lock+0xb3 softintr_dispatch(0) at softintr_dispatch+0x4a Xsoftclock() at Xsoftclock+0x1f acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffffffff8219eff0) at sched_idle+0x27e end trace frame: 0x0, count: 7 ddb{0}> ddb{0}> mach ddbcpu 1 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022409ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff800022409ff0) at sched_idle+0x27e end trace frame: 0x0, count: 10 ddb{1}> ddb{1}> mach ddbcpu 2 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022412ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 __mp_lock(ffffffff822b3ae0) at __mp_lock+0xb3 kpageflttrap(ffff800022c65210,137) at kpageflttrap+0x172 kerntrap(ffff800022c65210) at kerntrap+0x91 alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b ipsp_spd_lookup(fffffd80a3a81a00,2,14,2,0,0,dcfe3c78989b6cd1,fffffd80a3a81a00) at ipsp_spd_lookup+0xa2f ip_output_ipsec_lookup(fffffd80a3a81a00,14,0,ffff800022c654c8,0) at ip_output_ipsec_lookup+0x4c ip_output(fffffd80a3a81a00,0,ffff800022c65688,1,0,0,6db6075421f27eb8) at ip_output+0x39d ip_forward(fffffd80a3a81a00,ffff800000087048,fffffd83b4374cb0,0) at ip_forward+0x26a ip_input_if(ffff800022c657c8,ffff800022c657d4,4,0,ffff800000087048) at ip_input_if+0x353 ipv4_input(ffff800000087048,fffffd80a3a81a00) at ipv4_input+0x39 ether_input(ffff800000087048,fffffd80a3a81a00) at ether_input+0x3aa end trace frame: 0xffff800022c658a0, count: 0 ddb{2}> ddb{2}> mach ddbcpu 3 Stopped at ipsp_spd_lookup+0xa2f: movq %rax,0(%rcx) ipsp_spd_lookup(fffffd80a46ed700,2,14,2,0,0,dcfe3c78989b6cd1,fffffd80a46ed700) at ipsp_spd_lookup+0xa2f ip_output_ipsec_lookup(fffffd80a46ed700,14,0,ffff800022c71248,0) at ip_output_ipsec_lookup+0x4c ip_output(fffffd80a46ed700,0,ffff800022c71408,1,0,0,6db6075421f27eb8) at ip_output+0x39d ip_forward(fffffd80a46ed700,ffff800000087048,fffffd83b4374cb0,0) at ip_forward+0x26a ip_input_if(ffff800022c71548,ffff800022c71554,4,0,ffff800000087048) at ip_input_if+0x353 ipv4_input(ffff800000087048,fffffd80a46ed700) at ipv4_input+0x39 ether_input(ffff800000087048,fffffd80a46ed700) at ether_input+0x3aa if_input_process(ffff800000087048,ffff800022c71638) at if_input_process+0x92 ifiq_process(ffff800000086e00) at ifiq_process+0x69 taskq_thread(ffff800000030200) at taskq_thread+0x9f end trace frame: 0x0, count: 5 ddb{3}> ddb{3}> mach ddbcpu 4 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff800022424ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 spllower(7) at spllower+0x50 pool_cache_get(ffffffff822a3dd8) at pool_cache_get+0x1f0 pool_get(ffffffff822a3dd8,2) at pool_get+0x5e m_clget(0,2,802) at m_clget+0x1c5 ixgbe_get_buf(ffff80000008a1d0,9) at ixgbe_get_buf+0xa3 ixgbe_rxfill(ffff80000008a1d0) at ixgbe_rxfill+0xae ixgbe_queue_intr(ffff800000085880) at ixgbe_queue_intr+0x4f intr_handler(ffff800022c76a60,ffff80000007d480) at intr_handler+0x6e Xintr_ioapic_edge1_untramp() at Xintr_ioapic_edge1_untramp+0x18f __mp_lock(ffffffff822b3ae0) at __mp_lock+0xb3 esp_output(fffffd80a35edb00,ffff80000145d018,14,9) at esp_output+0x82d end trace frame: 0xffff800022c76c60, count: 0 ddb{4}> ddb{4}> mach ddbcpu 5 Stopped at x86_ipi_db+0x12: leave x86_ipi_db(ffff80002242dff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x281 sched_idle(ffff80002242dff0) at sched_idle+0x27e end trace frame: 0x0, count: 10 ddb{5}>