Hello all,

I attempted an upgrade from 7.6 to 7.7 to 7.8 a few days ago (all with
sysupgrade).  When I got to 7.8 things went sideways.

I've been trying to isolate the problem as best I can.  The original
problem is happening on physical hardware.  I'm able to reproduce the
problem under VMWare.  Panic information is below.  Please let me know
if I can provide any additional information.

There are six rdomains: 10, 11, 101, 102, 103, 104
Each looks similar to the 101 config shown below (replace 101 with 10,
11, 102, etc).  System is stable with static routing across the veb.
The panic only occurs when there is more than one instance of bgpd.

To launch each bgpd instance I'm manually running:
route -T 101 exec bgpd -f /etc/bgpd-101.conf
(replacing '101' with whatever the appropriate rdomain is)

I see in the 7.7 changelog there were some bgpd improvements, and some
veb/vport improvements on 7.8.  I've looked through the man pages and
nothing jumped out at me as new or different in terms of necessary
config.

The veb seems happy enough with vports in differing rdomains - it
works fine with static routing.

Can anyone shed some light on where I may have gone wrong?

Thanks,
Mark

bgpd-test# cat /etc/hostname.lo101
rdomain 101
inet 101.101.101.101 255.255.255.255 NONE
up

bgpd-test# cat /etc/hostname.vport101
rdomain 101
inet 198.19.1.101 255.255.255.0 NONE
up

bgpd-test# cat /etc/bgpd-101.conf
mynetwork="101.101.101.101/32"
rid="198.19.1.101"
rtable 101
router-id $rid
listen on $rid
AS 65101
network $mynetwork

group "LANs" {
    neighbor 198.19.1.10 {
        remote-as 65010
    }
    neighbor 198.19.1.11 {
        remote-as 65011
    }
    neighbor 198.19.1.102 {
        remote-as 65102
    }
    neighbor 198.19.1.103 {
        remote-as 65103
    }
    neighbor 198.19.1.104 {
        remote-as 65104
    }
}

bgpd-test# cat /etc/hostname.veb0
add vport10
add vport11
add vport101
add vport102
add vport103
add vport104



panic: inet rwlock 0xffff80001efd55d0: enter write deadlock
Stopped at      db_enter+0x14:  popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
 235033  33212      0           0          0    1  bgpd
*248122  68830      0     0x14000      0x200    0  softnet0
db_enter() at db_enter+0x14
panic(ffffffff826b5b05) at panic+0xd5
rw_do_enter_write(ffff80001efd55d0,0) at rw_do_enter_write+0x265
in_pcbsolock(fffffd83bf539900) at in_pcbsolock+0x83
tcp_input_solocked(ffff8000396998f8,ffff800039699904,6,2,0) at
tcp_input_solocked+0x71e
tcp_input(ffff8000396998f8,ffff800039699904,6,2,0) at tcp_input+0x4b
ip_deliver(ffff8000396998f8,ffff800039699904,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff8000396998f8,ffff800039699904,ffff80003969984c,0,0) at ip_ours+0x6f

ip_input_if(ffff8000396998f8,ffff800039699904,0,0,ffff800000acd000,0)
at ip_input_if+0x21f
ipv4_input(ffff800000acd000,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000acd000,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000acd000,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000ac2800,fffffd800307fc00,fee1bad5f783,ffff800000ce4100,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_enqueue+0x109
end trace frame: 0xffff800039699b30, count: 0
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}>
ddb{0}> show panic
*cpu0: inet rwlock 0xffff80001efd55d0: enter write deadlock
ddb{0}>
ddb{0}> trace
db_enter() at db_enter+0x14
panic(ffffffff826b5b05) at panic+0xd5
rw_do_enter_write(ffff80001efd55d0,0) at rw_do_enter_write+0x265
in_pcbsolock(fffffd83bf539900) at in_pcbsolock+0x83
tcp_input_solocked(ffff8000396998f8,ffff800039699904,6,2,0) at
tcp_input_solocked+0x71e
tcp_input(ffff8000396998f8,ffff800039699904,6,2,0) at tcp_input+0x4b
ip_deliver(ffff8000396998f8,ffff800039699904,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff8000396998f8,ffff800039699904,ffff80003969984c,0,0) at ip_ours+0x6f

ip_input_if(ffff8000396998f8,ffff800039699904,0,0,ffff800000acd000,0)
at ip_input_if+0x21f
ipv4_input(ffff800000acd000,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000acd000,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000acd000,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000ac2800,fffffd800307fc00,fee1bad5f783,ffff800000ce4100,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_enqueue+0x109
ether_output(ffff800000ac2800,fffffd800307fc00,fffffd843e651d58,fffffd843e519560)
at ether_output+0x9d
if_output_tso(ffff800000ac2800,ffff800039699c60,fffffd843e651d58,fffffd843e519560,5dc)
at if_output_tso+0xf6
ip_output(fffffd800307fc00,0,fffffd843e651d40,800,0,fffffd843e651dd8,b5af8a2933b0ab0)
at ip_output+0x7ee
tcp_output(ffff800000dbb148) at tcp_output+0x1a53
tcp_input_solocked(ffff80003969a298,ffff80003969a2a4,6,2,0) at
tcp_input_solocked+0x2854
tcp_input(ffff80003969a298,ffff80003969a2a4,6,2,0) at tcp_input+0x4b
ip_deliver(ffff80003969a298,ffff80003969a2a4,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff80003969a298,ffff80003969a2a4,ffff80003969a1b8,0,0) at ip_ours+0x6f

ip_input_if(ffff80003969a298,ffff80003969a2a4,0,0,ffff800000ac2800,0)
at ip_input_if+0x21f
ipv4_input(ffff800000ac2800,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000ac2800,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000acd000,fffffd800307fc00,fee1bad060ff,ffff800000ce4600,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000acd000,fffffd800307fc00) at vport_enqueue+0x109
ether_output(ffff800000acd000,fffffd800307fc00,fffffd83b76ff318,fffffd843e519230)
at ether_output+0x9d
if_output_tso(ffff800000acd000,ffff80003969a600,fffffd83b76ff318,fffffd843e519230,5dc)
at if_output_tso+0xf6
ip_output(fffffd800307fc00,0,fffffd83b76ff300,800,0,fffffd83bf539a08,b5af8a2938a1874)
at ip_output+0x7ee
syn_cache_respond(fffffd83b76ff2a0,fffffd800307fc00,bcd8d4c624c757e,0)
at syn_cache_respond+0x6f7
syn_cache_add(ffff80003969ab90,ffff80003969ab70,fffffd800307fcc4,14,ffff80001efd55c8,fffffd800307fc00,dcd8c2a5db40dac6,fffffd83bf539900,bcd8d4c624c757e,0,ffff80001efd55c8,ffff800000d20cc0)
at syn_cache_add+0x736
tcp_input_solocked(ffff80003969af38,ffff80003969af44,6,2,0) at
tcp_input_solocked+0x156b
tcp_input(ffff80003969af38,ffff80003969af44,6,2,0) at tcp_input+0x4b
ip_deliver(ffff80003969af38,ffff80003969af44,6,2,1,0) at ip_deliver+0xfc
ip_ours(ffff80003969af38,ffff80003969af44,ffff80003969ae8c,0,0) at ip_ours+0x6f

ip_input_if(ffff80003969af38,ffff80003969af44,0,0,ffff800000acd000,0)
at ip_input_if+0x21f
ipv4_input(ffff800000acd000,fffffd800307fc00,0) at ipv4_input+0x3d
ether_input(ffff800000acd000,fffffd800307fc00,0) at ether_input+0x40f
vport_if_enqueue(ffff800000acd000,fffffd800307fc00) at vport_if_enqueue+0x5a
veb_port_input(ffff800000ac2800,fffffd800307fc00,fee1bad5f783,ffff800000ce4100,0)
at veb_port_input+0x39c
vport_enqueue(ffff800000ac2800,fffffd800307fc00) at vport_enqueue+0x109
ether_output(ffff800000ac2800,fffffd800307fc00,ffff800000cf3310,fffffd843e519560)
at ether_output+0x9d
if_output_mq(ffff800000ac2800,fffffd83beb07a40,ffffffff82a67888,ffff800000cf3310,fffffd843e519560)
at if_output_mq+0x8e
arpcache(ffff800000ac2800,fffffd80be0be4c0,fffffd843e519560) at arpcache+0x2ad
in_arpinput(ffff800000ac2800,fffffd80be0be400) at in_arpinput+0x1da
arpintr() at arpintr+0xb7
if_netisr(0) at if_netisr+0xe5
taskq_thread(ffff800000032000) at taskq_thread+0x129
end trace frame: 0x0, count: -50
ddb{0}>
ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
  1496   17267  68265     75  3   0x1100092  kqread        bgpd
 69041  381639  68265     75  3   0x1100092  kqread        bgpd
 63388  447180  68265     75  3   0x1100092  kqread        bgpd
 68265  364856      1      0  2           0                bgpd
 50444  202782  31826     75  3   0x1100092  kqread        bgpd
 74335  384030  31826     75  3   0x1100092  kqread        bgpd
 19222  375007  31826     75  3   0x1100092  kqread        bgpd
 31826  507767      1      0  2        0x80                bgpd
  3949  507476  33212     75  3   0x1100092  kqread        bgpd
 93170  348775  33212     75  3   0x1100092  kqread        bgpd
 62062  395225  33212     75  3   0x1100092  kqread        bgpd
 33212  235033      1      0  7           0                bgpd
 44141   41338  73659     75  3   0x1100092  kqread        bgpd
  4588  403342  73659     75  3   0x1100092  kqread        bgpd
 93636  263923  73659     75  3   0x1100092  kqread        bgpd
 73659     394      1      0  2        0x80                bgpd
 86229  469905  14359     75  3   0x1100092  kqread        bgpd
 46941  203026  14359     75  3   0x1100092  kqread        bgpd
 31053  200804  14359     75  3   0x1100092  kqread        bgpd
 14359  383798      1      0  3        0x80  kqread        bgpd
 54152  519551    946     75  3   0x1100092  kqread        bgpd
 97856  110458    946     75  3   0x1100012  netlock       bgpd
 17826  147606    946     75  3   0x1100092  kqread        bgpd
   946   13063      1      0  2           0                bgpd
 68620  446763      1      0  3    0x100083  ttyin         ksh
 35902  331893      1      0  3    0x100098  kqread        cron
 66200  306014      1     99  3   0x1100090  kqread        sndiod
 15877  455910      1    110  3    0x100090  kqread        sndiod
 72330  152984  23765     95  3   0x1100092  kqread        smtpd
 22531  450007  23765    103  3   0x1100092  kqread        smtpd
 70473  280225  23765     95  3   0x1100092  kqread        smtpd
 16322  441746  23765     95  3    0x100092  kqread        smtpd
 38657  174061  23765     95  3   0x1100092  kqread        smtpd
 39758  178601  23765     95  3   0x1100092  kqread        smtpd
 23765  222788      1      0  3    0x100080  kqread        smtpd
 67560  415553      1      0  3        0x88  kqread        sshd
 54868   68589      1      0  3    0x100080  kqread        ntpd
   244   61525  53245     83  3    0x100092  kqread        ntpd
 53245  144965      1     83  3   0x1100092  kqread        ntpd
 83961  291509  95038     74  3   0x1100092  bpf           pflogd
 95038   29063      1      0  3        0x80  sbwait        pflogd
 59951  430874  25278     73  3   0x1100090  kqread        syslogd
 25278  312898      1      0  3    0x100082  sbwait        syslogd
 42588  250068      1      0  3    0x100080  kqread        resolvd
 70593  472015  54573     77  3    0x100092  kqread        dhcpleased
 13724  172652  54573     77  3    0x100092  kqread        dhcpleased
 54573  123098      1      0  3        0x80  kqread        dhcpleased
 83363  446758  20907    115  3    0x100092  kqread        slaacd
 69930   49483  20907    115  3    0x100092  kqread        slaacd
 20907  417758      1      0  3    0x100080  kqread        slaacd
 55017  268958      0      0  3     0x14200  bored         smr
 10058  110293      0      0  3     0x14200  pgzero        zerothread
  2912   62489      0      0  3     0x14200  aiodoned      aiodoned
 17218  258242      0      0  3     0x14200  syncer        update
 82831  366423      0      0  3     0x14200  cleaner       cleaner
 48081  180628      0      0  3     0x14200  reaper        reaper
 16239  260093      0      0  3     0x14200  pgdaemon      pagedaemon
 87003  319209      0      0  3  0x40014200  acpi0         acpi0
 16586  291978      0      0  3  0x40014200                idle1
 30775   23574      0      0  3     0x14200  bored         softnet1
*68830  248122      0      0  7     0x14200                softnet0
 35730  303928      0      0  3     0x14200  bored         systqmp
 72631   29706      0      0  3     0x14200  bored         systq
 70811   20495      0      0  3     0x14200  tmoslp        softclockmp
 50744  300006      0      0  3  0x40014200  tmoslp        softclock
 63512  347235      0      0  3  0x40014200                idle0
     1  344850      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
ddb{0}>

Reply via email to