On 26.2.2022. 21:36, Hrvoje Popovski wrote:
> On 25.2.2022. 23:22, Alexander Bluhm wrote:
>> On Fri, Feb 25, 2022 at 10:21:50PM +0100, Hrvoje Popovski wrote:
>>> On 24.2.2022. 22:42, Alexander Bluhm wrote:
>>>> Hi,
>>>>
>>>> Hrvoje reported some crashes with pfsync, IPsec and parallel
>>>> forwarding. Some locks were missing around the tdb flags in pfsync.
>>>
>>> before trying this diff i wanted to see if i could still trigger panic
>>> with sasyncd setup and parallel forwarding diff. and here's panic
>>>
>>> r620-2# uvm_fault(0xffffffff822c8f70, 0xe7, 0, 2) -> e
>>> kernel: page fault trap, code=0
>>> Stopped at ipsp_delete_acquire+0x61: movq %rax,0xe8(%rcx)
>>> TID PID UID PRFLAGS PFLAGS CPU COMMAND
>>> 486149 44310 68 0x1000010 0 3 sasyncd
>>> 330850 23209 0 0x14000 0x200 4 softnet
>>> 48580 70158 0 0x14000 0x200 1 softnet
>>> 4459 11329 0 0x14000 0x200 2 softnet
>>> 70953 79831 0 0x14000 0x200 5 softnet
>>> *303117 65399 0 0x14000 0x40000200 0 softclock
>>> ipsp_delete_acquire(fffffd837c504c30) at ipsp_delete_acquire+0x61
>>> ipsp_delete_acquire_timo(fffffd837c504c30) at ipsp_delete_acquire_timo+0x20
>>> softclock_thread(ffff8000fffff500) at softclock_thread+0x13b
>>> end trace frame: 0x0, count: 12
>>> https://www.openbsd.org/ddb.html describes the minimum info required in bug
>>> reports. Insufficient info makes it difficult to find and fix bugs.
>>> ddb{0}>
>>>
>>>
>>> now i will try all the same with this diff ..
>>
>> I just commited it before I saw your mail. I hope the best.
>>
>
> I'm hitting committed diff for a sometime and by now boxes should panic,
> but they running just fine ..
>
> i will still play with sasyncd setup, maybe something comes up
>
And when I thought everything is fine.. after 2 days of hitting sasyncd
setup I've got this panic .. will stay in ddb if some other information
is needed
r620-2# uvm_fault(0xfufvfm_ffafulftf(f08xf2f3f9f1ff2fef802,3 91 2 e 0,
0 x 17 , 0 ,
2 ) -> e
ke0rxne7e,l: p0ag,e 2 ) - > f a eu
t kterrnape,l: p caogde e = 0 l
Stopped at pfsync_sendout+0x5a5: movq %rax,0(%rcx)
TID PID UID PRFLAGS PFLAGS CPU COMMAND
190670 3268 68 0x1000010 0 5 isakmpd
*304096 4574 0 0x14000 0x200 1K softnet
129724 5880 0 0x14000 0x200 4 softnet
73765 66549 0 0x14000 0x200 3 softnet
358805 73199 0 0x14000 0x200 2 softnet
388277 75259 0 0x14000 0x40000200 0 softclock
pfsync_sendout() at pfsync_sendout+0x5a5
pfsync_update_state(fffffd8425b77030) at pfsync_update_state+0x15b
pf_test(2,3,ffff8000000c2048,ffff800022c71dd8) at pf_test+0xd61
ip_output(fffffd8007117b00,0,ffff800022c71f68,1,0,0,e436e0ec669ea3ac) at
ip_output+0x6b7
ip_forward(fffffd8007117b00,ffff800000099048,fffffd842d596cb8,0) at
ip_forward+0x2da
ip_input_if(ffff800022c720a8,ffff800022c720b4,4,0,ffff800000099048) at
ip_input_if+0x353
ipv4_input(ffff800000099048,fffffd8007117b00) at ipv4_input+0x39
ether_input(ffff800000099048,fffffd8007117b00) at ether_input+0x3ad
if_input_process(ffff800000099048,ffff800022c72198) at if_input_process+0x92
ifiq_process(ffff800000098800) at ifiq_process+0x69
taskq_thread(ffff80000002f200) at taskq_thread+0x100
end trace frame: 0x0, count: 4
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb{1}>
ddb{1}> show panic
*cpu0: uvm_fault(0xffffffff823912e0, 0xe7, 0, 2) -> e
cpu1: uvm_fault(0xffffffff823912e0, 0x17, 0, 2) -> e
ddb{1}> trace
pfsync_sendout() at pfsync_sendout+0x5a5
pfsync_update_state(fffffd8425b77030) at pfsync_update_state+0x15b
pf_test(2,3,ffff8000000c2048,ffff800022c71dd8) at pf_test+0xd61
ip_output(fffffd8007117b00,0,ffff800022c71f68,1,0,0,e436e0ec669ea3ac) at
ip_out
put+0x6b7
ip_forward(fffffd8007117b00,ffff800000099048,fffffd842d596cb8,0) at
ip_forward+
0x2da
ip_input_if(ffff800022c720a8,ffff800022c720b4,4,0,ffff800000099048) at
ip_input
_if+0x353
ipv4_input(ffff800000099048,fffffd8007117b00) at ipv4_input+0x39
ether_input(ffff800000099048,fffffd8007117b00) at ether_input+0x3ad
if_input_process(ffff800000099048,ffff800022c72198) at if_input_process+0x92
ifiq_process(ffff800000098800) at ifiq_process+0x69
taskq_thread(ffff80000002f200) at taskq_thread+0x100
end trace frame: 0x0, count: -11
ddb{1}> show reg
rdi 0xfffffd80a3151c00
rsi 0xfffffd800ee1a06c
rbp 0xffff800022c71b70
rbx 0x80
rdx 0x28
rcx 0x17
rax 0xffffffffffffffff
r8 0xffff800000092500
r9 0x97f8651120a146b0
r10 0x984529fed483aed5
r11 0x6b6c7f4869727447
r12 0xfffffd83972ea308
r13 0x1
r14 0xffff800022c71aa0
r15 0xffffffff8227f868 pfsync_qs+0x18
rip 0xffffffff81359835 pfsync_sendout+0x5a5
cs 0x8
rflags 0x10286 __ALIGN_SIZE+0xf286
rsp 0xffff800022c71a80
ss 0x10
pfsync_sendout+0x5a5: movq %rax,0(%rcx)
ddb{1}> show tdb
0xffffffff81359835: c7431800 (unknown address family)->(unknown address
family)
:32 #1166756059 a0758b48
ddb{1}> show tdb /f 0xffffffff81359835
tdb at 0xffffffff81359835
hnext: 0x1024448b49018948
dnext: 0x1894818244c8b49
snext: 0xffffff182444c749
inext: 0xffff102444c749ff
onext: 0x1262484c641ffff
xform: 0x64cb5be8e7894c
refcnt: 1166756059
encalgxform: 0xc0458b48c87d8b48
authalgxform: 0x4d01c58341208b4c
compalgxform: 0xb8558b488775e485
flags:
a0758b48<ALLOCATIONS,DELETED,SOFT_BYTES,SOFT_ALLOCATIONS,PF
S,USEDTUNNEL,PFSYNC,ESN>
seq: 1209395455
exp_allocations: -1924333502
soft_allocations: 563365
cur_allocations: -410432512
exp_bytes: -5364546917621037336
soft_bytes: -1659706978152216072
cur_bytes: 595474916124542967
exp_timeout: 138659656
soft_timeout: -2266001891575756472
established: 3707809967508553735
first_use: 5188146804563479881
soft_first_use: 567609182413933449
exp_first_use: -8410164159382552576
last_used: 4866500037188249733
last_marked: 8660898498832
cryptoid: 5194976444249276423
tdb_spi: c7431800
amxkeylen: 0
emxkeylen: 18432
ivlen: 17289
sproto: 32
wnd: 73
satype: 139
updates: 141
dst: (unknown address family)
src: (unknown address family)
amxkey: 0xc74828438d480000
emxkey: 0x8948000000002843
rpl: 2252407740903491
ids: 0x8885c74960438948
ids_swapped: 8
mtu: 1224736768
mtutimeout: -8266357115894852215
udpencap_port: 17256
tag: 51016
tap: 26691
rdomain: -1991770112
rdomain_post: -1958121405
ddb{1}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
73707 308539 1 0 3 0x100083 ttyin ksh
53580 13385 1 0 3 0x100098 kqread cron
41556 42970 62979 95 3 0x1100092 kqread smtpd
60447 264537 62979 103 3 0x1100092 kqread smtpd
16432 176136 62979 95 3 0x1100092 kqread smtpd
17014 301456 62979 95 3 0x100092 kqread smtpd
79643 296414 62979 95 3 0x1100092 kqread smtpd
52849 206862 62979 95 3 0x1100092 kqread smtpd
62979 130211 1 0 3 0x100080 kqread smtpd
59561 234226 1 0 3 0x88 kqread sshd
34146 338927 687 68 2 0x1000010 sasyncd
687 371001 1 0 3 0x80 kqread sasyncd
3268 190670 50986 68 7 0x1000010 isakmpd
50986 286116 1 0 3 0x80 netio isakmpd
89210 296173 52808 74 3 0x1100092 bpf pflogd
52808 106776 1 0 3 0x80 netio pflogd
75230 386787 56071 73 2 0x1100090 syslogd
56071 21039 1 0 3 0x100082 netio syslogd
83437 361989 0 0 3 0x14200 bored smr
10255 519623 0 0 3 0x14200 pgzero zerothread
53441 481932 0 0 3 0x14200 aiodoned aiodoned
93896 491626 0 0 3 0x14200 syncer update
13135 207378 0 0 3 0x14200 cleaner cleaner
23641 285338 0 0 3 0x14200 reaper reaper
64695 175830 0 0 3 0x14200 pgdaemon pagedaemon
4153 218785 0 0 3 0x14200 usbtsk usbtask
77187 463714 0 0 3 0x14200 usbatsk usbatsk
56210 231618 0 0 3 0x40014200 acpi0 acpi0
73120 495721 0 0 3 0x40014200 idle5
41435 455115 0 0 3 0x40014200 idle4
51120 70524 0 0 3 0x40014200 idle3
62794 507710 0 0 3 0x40014200 idle2
35019 218377 0 0 3 0x40014200 idle1
25810 187904 0 0 3 0x14200 bored sensors
* 4574 304096 0 0 7 0x14200 softnet
5880 129724 0 0 7 0x14200 softnet
66549 73765 0 0 7 0x14200 softnet
73199 358805 0 0 7 0x14200 softnet
56945 207055 0 0 3 0x14200 bored systqmp
89963 261638 0 0 3 0x14200 bored systq
75259 388277 0 0 7 0x40014200 softclock
60433 274727 0 0 3 0x40014200 idle0
1 4220 0 0 3 0x82 wait init
0 0 -1 0 3 0x10200 scheduler swapper
ddb{1}>
ddb{1}> trace /t 0t304096
fffffd8007117b00(da7dca64a8c00164,a8c0221e32400000,6a10240200450008,88e9c8bbf4e
c9fdd,8e399bc384e79010,5606fef28f5f0000) at 0xfffffd8007117b00
end of kernel
end trace frame: 0xe2908ae9c8bbf4ec, count: -1
ddb{1}> mach ddbcpu 0
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffffffff8227cff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
x86_bus_space_io_read_1(2f8,5) at x86_bus_space_io_read_1+0x15
comcnputc(801,20) at comcnputc+0x12f
cnputc(20) at cnputc+0x37
db_putchar(66) at db_putchar+0x25a
kprintf() at kprintf+0x133b
db_printf(ffffffff81f014de) at db_printf+0x69
db_ktrap(6,0,ffff800022c4e960) at db_ktrap+0x196
kerntrap(ffff800022c4e960) at kerntrap+0xa2
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
ipsp_delete_acquire(fffffd839d6c6d20) at ipsp_delete_acquire+0x61
ipsp_delete_acquire_timo(fffffd839d6c6d20) at ipsp_delete_acquire_timo+0x20
end trace frame: 0xffff800022c4eab0, count: 0
ddb{0}>
ddb{0}> mach ddbcpu 1
Stopped at pfsync_sendout+0x5a5: movq %rax,0(%rcx)
pfsync_sendout() at pfsync_sendout+0x5a5
pfsync_update_state(fffffd8425b77030) at pfsync_update_state+0x15b
pf_test(2,3,ffff8000000c2048,ffff800022c71dd8) at pf_test+0xd61
ip_output(fffffd8007117b00,0,ffff800022c71f68,1,0,0,e436e0ec669ea3ac) at
ip_out
put+0x6b7
ip_forward(fffffd8007117b00,ffff800000099048,fffffd842d596cb8,0) at
ip_forward+
0x2da
ip_input_if(ffff800022c720a8,ffff800022c720b4,4,0,ffff800000099048) at
ip_input
_if+0x353
ipv4_input(ffff800000099048,fffffd8007117b00) at ipv4_input+0x39
ether_input(ffff800000099048,fffffd8007117b00) at ether_input+0x3ad
if_input_process(ffff800000099048,ffff800022c72198) at if_input_process+0x92
ifiq_process(ffff800000098800) at ifiq_process+0x69
taskq_thread(ffff80000002f200) at taskq_thread+0x100
end trace frame: 0x0, count: 4
ddb{1}>
ddb{1}> mach ddbcpu 2
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff800022412ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
_kernel_lock() at _kernel_lock+0xa9
ether_resolve(ffff8000000c2048,fffffd80a3161200,ffff80000013d830,fffffd842d596c
b8,ffff800022c60678) at ether_resolve+0x1ad
ether_output(ffff8000000c2048,fffffd80a3161200,ffff80000013d830,fffffd842d596cb
8) at ether_output+0x2c
ip_output(fffffd80a3161200,0,ffff800022c608f8,1,0,0,e436e0ec669ea3ac) at
ip_out
put+0x8ee
ip_forward(fffffd80a3161200,ffff800000099048,fffffd842d596cb8,0) at
ip_forward+
0x2da
ip_input_if(ffff800022c60a38,ffff800022c60a44,4,0,ffff800000099048) at
ip_input
_if+0x353
ipv4_input(ffff800000099048,fffffd80a3161200) at ipv4_input+0x39
ether_input(ffff800000099048,fffffd80a3161200) at ether_input+0x3ad
if_input_process(ffff800000099048,ffff800022c60b28) at if_input_process+0x92
ifiq_process(ffff800000098900) at ifiq_process+0x69
taskq_thread(ffff80000002f080) at taskq_thread+0x100
end trace frame: 0x0, count: 1
ddb{2}>
ddb{2}> mach ddbcpu 3
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff80002241bff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
_kernel_lock() at _kernel_lock+0xa0
ether_resolve(ffff8000000c2048,fffffd80a405d600,ffff80000013d830,fffffd842d596c
b8,ffff800022c65c48) at ether_resolve+0x1ad
ether_output(ffff8000000c2048,fffffd80a405d600,ffff80000013d830,fffffd842d596cb
8) at ether_output+0x2c
ip_output(fffffd80a405d600,0,ffff800022c65ec8,1,0,0,e436e0ec669ea3ac) at
ip_out
put+0x8ee
ip_forward(fffffd80a405d600,ffff800000099048,fffffd842d596cb8,0) at
ip_forward+
0x2da
ip_input_if(ffff800022c66008,ffff800022c66014,4,0,ffff800000099048) at
ip_input
_if+0x353
ipv4_input(ffff800000099048,fffffd80a405d600) at ipv4_input+0x39
ether_input(ffff800000099048,fffffd80a405d600) at ether_input+0x3ad
if_input_process(ffff800000099048,ffff800022c660f8) at if_input_process+0x92
ifiq_process(ffff800000098a00) at ifiq_process+0x69
taskq_thread(ffff80000002f100) at taskq_thread+0x100
end trace frame: 0x0, count: 1
ddb{3}>
ddb{3}> mach ddbcpu 4
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff800022424ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
_kernel_lock() at _kernel_lock+0xb0
ether_resolve(ffff8000000c2048,fffffd80a38a5c00,ffff80000013d830,fffffd842d596c
b8,ffff800022c6b9d8) at ether_resolve+0x1ad
ether_output(ffff8000000c2048,fffffd80a38a5c00,ffff80000013d830,fffffd842d596cb
8) at ether_output+0x2c
ip_output(fffffd80a38a5c00,0,ffff800022c6bc58,1,0,0,e436e0ec669ea3ac) at
ip_out
put+0x8ee
ip_forward(fffffd80a38a5c00,ffff800000099048,fffffd842d596cb8,0) at
ip_forward+
0x2da
ip_input_if(ffff800022c6bd98,ffff800022c6bda4,4,0,ffff800000099048) at
ip_input
_if+0x353
ipv4_input(ffff800000099048,fffffd80a38a5c00) at ipv4_input+0x39
ether_input(ffff800000099048,fffffd80a38a5c00) at ether_input+0x3ad
if_input_process(ffff800000099048,ffff800022c6be88) at if_input_process+0x92
ifiq_process(ffff800000098b00) at ifiq_process+0x69
taskq_thread(ffff80000002f180) at taskq_thread+0x100
end trace frame: 0x0, count: 1
ddb{4}>
ddb{4}> mach ddbcpu 5
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff80002242dff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
__mp_acquire_count(ffffffff82383a08,2) at __mp_acquire_count+0x89
msleep(fffffd83b08c8678,fffffd83b08c8678,318,ffffffff81f72f9e,179) at
msleep+0x
12f
kqueue_sleep(fffffd83b08c8678,ffff800022d07190) at kqueue_sleep+0xbe
kqueue_scan(ffff800022d06e20,8,ffff800022d06f20,ffff800022d07190,ffff800022ce87
e0,ffff800022d070fc) at kqueue_scan+0xfc
dopselect(ffff800022ce87e0,19,23a0172eb00,23a85212420,0,ffff800022d07190,90ad59
42bdf1a75d,ffff800022ce87e0) at dopselect+0x415
sys_pselect(ffff800022ce87e0,ffff800022d07200,ffff800022d07250) at
sys_pselect+
0xdb
syscall(ffff800022d072c0) at syscall+0x374
Xsyscall() at Xsyscall+0x128
end of kernel
end trace frame: 0x7f7ffffd0900, count: 4
ddb{5}>