Hi.
Yes, it's repeatable always. I'll try to enable debug and reproduce it
in next days.
Config file (with replaced networks and shrinked comments + legacy
unused filters/functions) is attached.
It seems like uclibc-ng has slow memory allocator, and it triggers
hidden Bird bug.
24.09.2021 19:53, Maria Matějka пишет:
Hello!
Do you have any log? Are you able to replicate such a behavior
consistently? If so, could you please share an exact configuration
with us to put it into our testbed?
You can also enable "debug protocols all;" in your conf file. This
produces a s***load of logs, yet it should yield enough clues to
isolate the problem and find a suitable solution.
Thank you for your report!
Maria
On September 24, 2021 3:13:45 PM UTC, Andrew wrote:
Hi all.
I have Bird 2.0.8 on one of border routers, it runs with kernel 5.10.26
and uClibc-ng 1.0.38. It acts as RR and receives FV from uplink + FV
from second border (also RR), and it has 2 routing tables (one which
receives BGP routes, then routes sinks to main table.
When second border's BGP link fails, Bird starts to rebuild routing
table, and acts quite strange (too slow birdc response, etc) and after
~5 minutes OSPF looses neighbors and falls to state 'Alone'. It can be
in that state for hours, and initiated only after bird restart (I didn't
tried protocol restart - usually I don't wait for end of route table
recalculation).
When table recalculation is in progress, perf top shows that 40+% CPU
time is used by malloc routine.
Are there fixes in trunk for such behavior? If no - what extra info is
needed for debugging?
-- Sent from my Android device with K-9 Mail. Please excuse my brevity.
router id 10.x.x.x;
define my_as = x;
function net_martian()
{
return net ~ [ 169.254.0.0/16+, 172.16.0.0/12+, 192.168.0.0/16+,
10.0.0.0/8+,
224.0.0.0/4+, 240.0.0.0/4+, 0.0.0.0/32-,
0.0.0.0/0{25,32}, 0.0.0.0/0{0,7} ];
}
function net_internal()
{
return net ~ [ 172.16.0.0/12+, 192.168.0.0/16+, 10.0.0.0/8+,
x.x.x.0/22+, y.y.y.0/24+ ];
}
function net_local()
{
return net ~ [ x.x.x.0/22+, y.y.y.0/24+ ];
}
function net_peers_world()
{
return net ~ [ a.a.a.a/31+, b.b.b.b/31+ ];
}
function rt_client(int asn_t; prefix set nets)
{
return (net ~ nets &&
(bgp_path.first = asn_t || bgp_path ~ [= my_as asn_t * =]) &&
bgp_path.last = asn_t);
}
function net_clients()
{
return rt_client(y, [c.c.c.0/24]);
}
function rt_export()
{
return ((proto = "static_bgp_ua" || proto = "static_bgp_w" ||
source = RTS_BGP) &&
!net_martian() &&
bgp_path.len < 64);
}
function rt_export_OSPF() {
#connected & static
if ( source = RTS_DEVICE || proto = "ospf_static" ) then {
return true;
}
return false;
}
filter import_BGP {
# Add prepend for datagroup AS
if ( net_martian() || bgp_path.len > 45 ) then {
reject;
}
accept;
}
filter import_BGP_local {
if ( (net_martian() && !net_peers_world())
|| bgp_path.len > 45 ) then {
reject;
}
accept;
}
filter export_BGP_local_w {
# only BGP routes
if ( rt_export() ||
(net_peers_world()
) ) then {
accept;
}
reject;
}
filter export_BGP_uplink_prepend {
if ( rt_export() && (net_local() || net_clients()) ) then {
bgp_path.prepend(my_as);
bgp_path.prepend(my_as);
accept;
}
reject;
}
filter export_OSPF_world {
#connected & static
if ( rt_export_OSPF() || net = 0.0.0.0/0 ) then {
accept;
}
reject;
}
ipv4 table world;
protocol direct {
ipv4 {
};
}
protocol kernel kernel_main {
persist;# Don't remove routes on bird shutdown
scan time 20; # Scan kernel routing table every 20 seconds
ipv4 {
export all; # Default is export none
import none;
};
}
protocol device {
scan time 2;# Scan interfaces every 10 seconds
}
protocol static static_bgp_w {
ipv4 {
};
route x.x.x.0/22 unreachable;
route y.y.y.0/24 unreachable;
}
protocol static static_ospf {
ipv4 {
preference 10; # Default preference of routes
};
route 0.0.0.0/0 unreachable;
}
protocol pipe main_pipe_world {
peer table world;
import filter {krt_metric = 100; accept;};
export filter {if (source = RTS_DEVICE ) then {accept;} reject;};
table master4;
}
protocol ospf ospf_world {
ipv4 {
import all;
export filter export_OSPF_world;