Hello,

We are running BIRD v1.3.10 and are seeing an issue that is causing BIRD to 
crash. In this setup we're running one "core" router that's using a single 
routing table, an OSPF protocol, and 2 BGP protocols. That core router connects 
to 2 remote routers that are running one OSPF protocol, one BGP protocol, and 
using 2 routing tables (main and one other). The routing tables are connected 
via pipe, and they are both connected to Linux tables via the kernel protocols.

The issue we're seeing is that when a route is withdrawn (we're only installing 
the default route) from the 2nd routing table on the remote routers it crashes 
BIRD with a segfault. This is typically caused by a loss in connectivity on the 
network that tears down the OSPF and BGP sessions.

We tried a number of things to prevent the issue from happening. If we prevent 
either the pipe or kernel protocols from exporting routes then the issue does 
not occur. We also tried setting the pipe to opaque mode and that immediately 
causes a segfault on start up. This appears to be a bug with the kernel or pipe 
protocol, but I'm certainly no expert and would love some guidance if there's a 
workaround or error in my config.

Below is the remote router config, and some snippits of the syslog with the 
segfaults.

Thanks,

Joel Mulkey


function infrastructure() {
        if net = 10.99.0.4/32 then return true;
        if net ~ [10.2.0.0/30+] then return true;
        if net ~ [172.17.1.0/24+] then return true;
        if net ~ [172.17.2.0/24+] then return true;
        if net ~ [172.17.3.0/24+] then return true;
        if net ~ [172.17.4.0/24+] then return true;
        return false;
}

function default_only() {
        return net = 0.0.0.0/0;
}

filter table2_out_filter {
        if ! defined( krt_metric ) then accept;
        if krt_metric > 1 then reject;
        else accept;
}

filter ospf_out {
        if infrastructure() then accept;
        else reject;
}

filter bgp_out {
        if infrastructure() then reject;
        if ! defined( krt_metric ) then accept;
        if krt_metric > 19 then {
                bgp_local_pref = 90;
                accept;
        }
        else accept;
}

table table2;

router id 10.99.0.4;

log syslog all;
debug protocols { states, routes, filters, interfaces };

protocol kernel kmain {
        learn;                  # Learn all alien routes from the kernel
        persist;                # Don't remove routes on bird shutdown
        scan time 2;            # Scan kernel routing table every 10 seconds
        import all;             # Default is import all
        export all;             # Default is export none
        preference 190;
}

protocol kernel ktable2 {
        persist;                # Don't remove routes on bird shutdown
        scan time 2;            # Scan kernel routing table every 2 seconds
        import none;            # Default is import all
        export filter table2_out_filter;                # Default is export none
        kernel table 100;       # Kernel table to synchronize with (default: 
main)
        table table2;   # connect to alternate BIRD table
}

# This pseudo-protocol watches all interface up/down events.
protocol device {
        scan time 2;            # Scan interfaces every 2 seconds
}

# Pipe protocol connects two routing tables... Beware of loops.
# Connect table2 to main
protocol pipe {
        mode transparent;
        peer table table2;
        export all;
        import none;
}

protocol ospf to_core_ospf {
        debug { events };
        export filter ospf_out;
        import all;
        area 0.0.0.0 {
                stubnet 10.99.0.4/32;
                interface 10.2.0.2 {
                        hello 1;
                        retransmit 2;
                        cost 10;
                        dead 5;
                        wait 4;
                        type broadcast;
                };
        };
}

protocol bgp bgp_peer_1 {
        local as 64600;
        export filter bgp_out;
        import all;
        source address 10.99.0.4;
        next hop self;
        neighbor 10.99.0.1 as 64600;
        multihop 20;
        hold time 12;
        keepalive time 3;
        startup hold time 10;
        connect retry time 5;
        start delay time 5;
        error wait time 30, 120;
}


Jun 11 21:16:44 tunnel2 bird: bgp_peer_1: Error: Hold timer expired
Jun 11 21:16:44 tunnel2 bird: bgp_peer_1: State changed to flush
Jun 11 21:16:44 tunnel2 bird: bgp_peer_1 > removed [sole] 0.0.0.0/0 unreachable
Jun 11 21:16:44 tunnel2 bird: pipe1 < removed 0.0.0.0/0 unreachable
Jun 11 21:16:44 tunnel2 bird: pipe1 > removed [sole] 0.0.0.0/0 unreachable
Jun 11 21:16:44 tunnel2 bird: ktable2 < removed 0.0.0.0/0 unreachable
Jun 11 21:16:44 tunnel2 bird: kmain < removed 0.0.0.0/0 unreachable
Jun 11 21:16:44 tunnel2 kernel: [  769.220353] bird[4516]: segfault at 18 ip 
00007ff0e8e44e26 sp 00007fffc45719f0 error 4 in bird[7ff0e8e3a000+5e000]

Jun 11 21:27:09 tunnel2 bird: to_core_ospf: Scheduling routing table calculation
Jun 11 21:27:09 tunnel2 bird: to_core_ospf: Neighbor 10.2.0.1 changes state 
from " loading" to "    full".
Jun 11 21:27:09 tunnel2 bird: to_core_ospf: Scheduling router-LSA origination 
for area 0.0.0.0
Jun 11 21:27:09 tunnel2 bird: to_core_ospf: Scheduling network-LSA origination 
for iface eth0
Jun 11 21:27:10 tunnel2 bird: bgp_peer_1: Error: Hold timer expired
Jun 11 21:27:10 tunnel2 bird: bgp_peer_1: State changed to flush
Jun 11 21:27:10 tunnel2 bird: to_core_ospf: Originating network-LSA for iface 
eth0
Jun 11 21:27:10 tunnel2 kernel: [ 1395.215433] bird[5318]: segfault at 18 ip 
00007f6232c2ae26 sp 00007fff05713ea0 error 4 in bird[7f6232c20000+5e000]

mode opaque causing immediate segfault on restart
Jun 11 21:44:05 tunnel1 bird: kmain: Initializing
Jun 11 21:44:05 tunnel1 bird: ktable2: Initializing
Jun 11 21:44:05 tunnel1 bird: device1: Initializing
Jun 11 21:44:05 tunnel1 bird: pipe1: Initializing
Jun 11 21:44:05 tunnel1 bird: bgp_peer_1: Initializing
Jun 11 21:44:05 tunnel1 bird: device1: Starting
Jun 11 21:44:05 tunnel1 bird: device1: Connected to table master
Jun 11 21:44:05 tunnel1 bird: device1: State changed to feed
Jun 11 21:44:05 tunnel1 bird: kmain: Starting
Jun 11 21:44:05 tunnel1 bird: kmain: Connected to table master
Jun 11 21:44:05 tunnel1 bird: kmain: State changed to feed
Jun 11 21:44:05 tunnel1 bird: ktable2: Starting
Jun 11 21:44:05 tunnel1 bird: ktable2: Connected to table table2
Jun 11 21:44:05 tunnel1 bird: ktable2: State changed to feed
Jun 11 21:44:05 tunnel1 bird: pipe1: Starting
Jun 11 21:44:05 tunnel1 bird: pipe1: Connected to table master
Jun 11 21:44:05 tunnel1 bird: pipe1: Connected to table table2
Jun 11 21:44:05 tunnel1 bird: pipe1: State changed to feed
Jun 11 21:44:05 tunnel1 bird: to_core_ospf: Adding area 0.0.0.0
Jun 11 21:44:05 tunnel1 bird: bgp_peer_1: Starting
Jun 11 21:44:05 tunnel1 bird: Started
Jun 11 21:44:05 tunnel1 bird: device1: State changed to up
Jun 11 21:44:05 tunnel1 bird: kmain: State changed to up
Jun 11 21:44:05 tunnel1 bird: ktable2: State changed to up
Jun 11 21:44:05 tunnel1 bird: pipe1: State changed to up
Jun 11 21:44:05 tunnel1 bird: to_core_ospf: Adding interface eth0 (10.1.0.0/30) 
to area 0.0.0.0
Jun 11 21:44:05 tunnel1 bird: kmain > added [best] 172.16.4.0/24 via 172.16.4.2 
on tun4
Jun 11 21:44:05 tunnel1 bird: kmain < rejected by protocol 172.16.4.0/24 via 
172.16.4.2 on tun4
Jun 11 21:44:05 tunnel1 bird: pipe1 < added 172.16.4.0/24 via 172.16.4.2 on tun4
Jun 11 21:44:05 tunnel1 bird: pipe1 > added [best] 172.16.4.0/24 via 172.16.4.2 
on tun4
Jun 11 21:44:05 tunnel1 bird: kvpn_output < added 172.16.4.0/24 via 172.16.4.2 
on tun4
Jun 11 21:44:05 tunnel1 bird: pipe1 < rejected by protocol 172.16.4.0/24 via 
172.16.4.2 on tun4
Jun 11 21:44:05 tunnel1 kernel: [ 2403.848692] bird[5450]: segfault at 18 ip 
00007f0cc629f277 sp 00007fff33e14ed8 error 4 in bird[7f0cc6271000+5e000]

Reply via email to