Hello all,
I attempted an upgrade from 7.6 to 7.7 to 7.8 a few days ago (all with
sysupgrade). When I got to 7.8 things went sideways.
I've been trying to isolate the problem as best I can. The original
problem is happening on physical hardware. I'm able to reproduce the
problem under VMWare. Panic information is below. Please let me know
if I can provide any additional information.
There are six rdomains: 10, 11, 101, 102, 103, 104
Each looks similar to the 101 config shown below (replace 101 with 10,
11, 102, etc). System is stable with static routing across the veb.
The panic only occurs when there is more than one instance of bgpd.
To launch each bgpd instance I'm manually running:
route -T 101 exec bgpd -f /etc/bgpd-101.conf
(replacing '101' with whatever the appropriate rdomain is)
I see in the 7.7 changelog there were some bgpd improvements, and some
veb/vport improvements on 7.8. I've looked through the man pages and
nothing jumped out at me as new or different in terms of necessary
config.
The veb seems happy enough with vports in differing rdomains - it
works fine with static routing.
Can anyone shed some light on where I may have gone wrong?
Thanks,
Mark
bgpd-test# cat /etc/hostname.lo101
rdomain 101
inet 101.101.101.101 255.255.255.255 NONE
up
bgpd-test# cat /etc/hostname.vport101
rdomain 101
inet 198.19.1.101 255.255.255.0 NONE
up
bgpd-test# cat /etc/bgpd-101.conf
mynetwork="101.101.101.101/32"
rid="198.19.1.101"
rtable 101
router-id $rid
listen on $rid
AS 65101
network $mynetwork
group "LANs" {
neighbor 198.19.1.10 {
remote-as 65010
}
neighbor 198.19.1.11 {
remote-as 65011
}
neighbor 198.19.1.102 {
remote-as 65102
}
neighbor 198.19.1.103 {
remote-as 65103
}
neighbor 198.19.1.104 {
remote-as 65104
}
}
bgpd-test# cat /etc/hostname.veb0
add vport10
add vport11
add vport101
add vport102
add vport103
add vport104
panic: inet rwlock 0xffff80001efd55d0: enter write deadlock
Stopped at db_enter+0x14: popq %rbp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
235033 33212 0 0 0 1 bgpd
*248122 68830 0 0x14000 0x200 0 softnet0
db_enter() at db_enter+0x14
panic(ffffffff826b5b05) at panic+0xd5
rw_do_enter_write(ffff80001efd55d0,0) at rw_do_enter_write+0x265
in_pcbsolock(fffffd83bf539900) at in_pcbsolock+0x83
tcp_input_solocked(ffff8000396998f8,ffff800039699904,6,2,0) at
tcp_input_solocked+0x71e
tcp_input(ffff8000396998f8,ffff800039699904,6,2,0) at tcp_input+0x4b
ip_deliver(ffff8000396998f8,ffff800039699904,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff8000396998f8,ffff800039699904,ffff80003969984c,0,0) at ip_ours+0x6f
ip_input_if(ffff8000396998f8,ffff800039699904,0,0,ffff800000acd000,0)
at ip_input_if+0x21f
ipv4_input(ffff800000acd000,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000acd000,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000acd000,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000ac2800,fffffd800307fc00,fee1bad5f783,ffff800000ce4100,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_enqueue+0x109
end trace frame: 0xffff800039699b30, count: 0
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb{0}>
ddb{0}> show panic
*cpu0: inet rwlock 0xffff80001efd55d0: enter write deadlock
ddb{0}>
ddb{0}> trace
db_enter() at db_enter+0x14
panic(ffffffff826b5b05) at panic+0xd5
rw_do_enter_write(ffff80001efd55d0,0) at rw_do_enter_write+0x265
in_pcbsolock(fffffd83bf539900) at in_pcbsolock+0x83
tcp_input_solocked(ffff8000396998f8,ffff800039699904,6,2,0) at
tcp_input_solocked+0x71e
tcp_input(ffff8000396998f8,ffff800039699904,6,2,0) at tcp_input+0x4b
ip_deliver(ffff8000396998f8,ffff800039699904,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff8000396998f8,ffff800039699904,ffff80003969984c,0,0) at ip_ours+0x6f
ip_input_if(ffff8000396998f8,ffff800039699904,0,0,ffff800000acd000,0)
at ip_input_if+0x21f
ipv4_input(ffff800000acd000,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000acd000,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000acd000,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000ac2800,fffffd800307fc00,fee1bad5f783,ffff800000ce4100,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_enqueue+0x109
ether_output(ffff800000ac2800,fffffd800307fc00,fffffd843e651d58,fffffd843e519560)
at ether_output+0x9d
if_output_tso(ffff800000ac2800,ffff800039699c60,fffffd843e651d58,fffffd843e519560,5dc)
at if_output_tso+0xf6
ip_output(fffffd800307fc00,0,fffffd843e651d40,800,0,fffffd843e651dd8,b5af8a2933b0ab0)
at ip_output+0x7ee
tcp_output(ffff800000dbb148) at tcp_output+0x1a53
tcp_input_solocked(ffff80003969a298,ffff80003969a2a4,6,2,0) at
tcp_input_solocked+0x2854
tcp_input(ffff80003969a298,ffff80003969a2a4,6,2,0) at tcp_input+0x4b
ip_deliver(ffff80003969a298,ffff80003969a2a4,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff80003969a298,ffff80003969a2a4,ffff80003969a1b8,0,0) at ip_ours+0x6f
ip_input_if(ffff80003969a298,ffff80003969a2a4,0,0,ffff800000ac2800,0)
at ip_input_if+0x21f
ipv4_input(ffff800000ac2800,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000ac2800,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000acd000,fffffd800307fc00,fee1bad060ff,ffff800000ce4600,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000acd000,fffffd800307fc00) at vport_enqueue+0x109
ether_output(ffff800000acd000,fffffd800307fc00,fffffd83b76ff318,fffffd843e519230)
at ether_output+0x9d
if_output_tso(ffff800000acd000,ffff80003969a600,fffffd83b76ff318,fffffd843e519230,5dc)
at if_output_tso+0xf6
ip_output(fffffd800307fc00,0,fffffd83b76ff300,800,0,fffffd83bf539a08,b5af8a2938a1874)
at ip_output+0x7ee
syn_cache_respond(fffffd83b76ff2a0,fffffd800307fc00,bcd8d4c624c757e,0)
at syn_cache_respond+0x6f7
syn_cache_add(ffff80003969ab90,ffff80003969ab70,fffffd800307fcc4,14,ffff80001efd55c8,fffffd800307fc00,dcd8c2a5db40dac6,fffffd83bf539900,bcd8d4c624c757e,0,ffff80001efd55c8,ffff800000d20cc0)
at syn_cache_add+0x736
tcp_input_solocked(ffff80003969af38,ffff80003969af44,6,2,0) at
tcp_input_solocked+0x156b
tcp_input(ffff80003969af38,ffff80003969af44,6,2,0) at tcp_input+0x4b
ip_deliver(ffff80003969af38,ffff80003969af44,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff80003969af38,ffff80003969af44,ffff80003969ae8c,0,0) at ip_ours+0x6f
ip_input_if(ffff80003969af38,ffff80003969af44,0,0,ffff800000acd000,0)
at ip_input_if+0x21f
ipv4_input(ffff800000acd000,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000acd000,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000acd000,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000ac2800,fffffd800307fc00,fee1bad5f783,ffff800000ce4100,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_enqueue+0x109
ether_output(ffff800000ac2800,fffffd800307fc00,ffff800000cf3310,fffffd843e519560)
at ether_output+0x9d
if_output_mq(ffff800000ac2800,fffffd83beb07a40,ffffffff82a67888,ffff800000cf3310,fffffd843e519560)
at if_output_mq+0x8e
arpcache(ffff800000ac2800,fffffd80be0be4c0,fffffd843e519560) at arpcache+0x2ad
in_arpinput(ffff800000ac2800,fffffd80be0be400) at in_arpinput+0x1da
arpintr() at arpintr+0xb7
if_netisr(0) at if_netisr+0xe5
taskq_thread(ffff800000032000) at taskq_thread+0x129
end trace frame: 0x0, count: -50
ddb{0}>
ddb{0}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
1496 17267 68265 75 3 0x1100092 kqread bgpd
69041 381639 68265 75 3 0x1100092 kqread bgpd
63388 447180 68265 75 3 0x1100092 kqread bgpd
68265 364856 1 0 2 0 bgpd
50444 202782 31826 75 3 0x1100092 kqread bgpd
74335 384030 31826 75 3 0x1100092 kqread bgpd
19222 375007 31826 75 3 0x1100092 kqread bgpd
31826 507767 1 0 2 0x80 bgpd
3949 507476 33212 75 3 0x1100092 kqread bgpd
93170 348775 33212 75 3 0x1100092 kqread bgpd
62062 395225 33212 75 3 0x1100092 kqread bgpd
33212 235033 1 0 7 0 bgpd
44141 41338 73659 75 3 0x1100092 kqread bgpd
4588 403342 73659 75 3 0x1100092 kqread bgpd
93636 263923 73659 75 3 0x1100092 kqread bgpd
73659 394 1 0 2 0x80 bgpd
86229 469905 14359 75 3 0x1100092 kqread bgpd
46941 203026 14359 75 3 0x1100092 kqread bgpd
31053 200804 14359 75 3 0x1100092 kqread bgpd
14359 383798 1 0 3 0x80 kqread bgpd
54152 519551 946 75 3 0x1100092 kqread bgpd
97856 110458 946 75 3 0x1100012 netlock bgpd
17826 147606 946 75 3 0x1100092 kqread bgpd
946 13063 1 0 2 0 bgpd
68620 446763 1 0 3 0x100083 ttyin ksh
35902 331893 1 0 3 0x100098 kqread cron
66200 306014 1 99 3 0x1100090 kqread sndiod
15877 455910 1 110 3 0x100090 kqread sndiod
72330 152984 23765 95 3 0x1100092 kqread smtpd
22531 450007 23765 103 3 0x1100092 kqread smtpd
70473 280225 23765 95 3 0x1100092 kqread smtpd
16322 441746 23765 95 3 0x100092 kqread smtpd
38657 174061 23765 95 3 0x1100092 kqread smtpd
39758 178601 23765 95 3 0x1100092 kqread smtpd
23765 222788 1 0 3 0x100080 kqread smtpd
67560 415553 1 0 3 0x88 kqread sshd
54868 68589 1 0 3 0x100080 kqread ntpd
244 61525 53245 83 3 0x100092 kqread ntpd
53245 144965 1 83 3 0x1100092 kqread ntpd
83961 291509 95038 74 3 0x1100092 bpf pflogd
95038 29063 1 0 3 0x80 sbwait pflogd
59951 430874 25278 73 3 0x1100090 kqread syslogd
25278 312898 1 0 3 0x100082 sbwait syslogd
42588 250068 1 0 3 0x100080 kqread resolvd
70593 472015 54573 77 3 0x100092 kqread dhcpleased
13724 172652 54573 77 3 0x100092 kqread dhcpleased
54573 123098 1 0 3 0x80 kqread dhcpleased
83363 446758 20907 115 3 0x100092 kqread slaacd
69930 49483 20907 115 3 0x100092 kqread slaacd
20907 417758 1 0 3 0x100080 kqread slaacd
55017 268958 0 0 3 0x14200 bored smr
10058 110293 0 0 3 0x14200 pgzero zerothread
2912 62489 0 0 3 0x14200 aiodoned aiodoned
17218 258242 0 0 3 0x14200 syncer update
82831 366423 0 0 3 0x14200 cleaner cleaner
48081 180628 0 0 3 0x14200 reaper reaper
16239 260093 0 0 3 0x14200 pgdaemon pagedaemon
87003 319209 0 0 3 0x40014200 acpi0 acpi0
16586 291978 0 0 3 0x40014200 idle1
30775 23574 0 0 3 0x14200 bored softnet1
*68830 248122 0 0 7 0x14200 softnet0
35730 303928 0 0 3 0x14200 bored systqmp
72631 29706 0 0 3 0x14200 bored systq
70811 20495 0 0 3 0x14200 tmoslp softclockmp
50744 300006 0 0 3 0x40014200 tmoslp softclock
63512 347235 0 0 3 0x40014200 idle0
1 344850 0 0 3 0x82 wait init
0 0 -1 0 3 0x10200 scheduler swapper
ddb{0}>