When an RTR session updates the data it happens between CACHE_RESPONSE and END_OF_DATA PDUs. When an END_OF_DATA PDU is received the various sources are merged into one table and sent to the RDE. Now since bgpd supports multiple RTR servers it is possible that two servers run updates roughly at the same time. In that case the first END_OF_DATA PDU results in a table recalculation of intermediate data (at least from the point of the other session). To prevent this from happening introduce a semaphore that prevents the rtr_recalc() from happening if there are other active RTR sessions. On top of this add a 60sec timeout that prevents RTR sessions from hogging the semaphore for too long.
The static table (roa-set, aspa-set) are also handled by the rtr porcess but config reloads happen work on a 2nd table which is switched into place at the end of the reload process so there is never a case were intermediate data is visible to rtr_recalc(). Therefore there is no need to use the semaphore there. -- :wq Claudio Index: bgpd.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v retrieving revision 1.465 diff -u -p -r1.465 bgpd.h --- bgpd.h 13 Mar 2023 16:52:41 -0000 1.465 +++ bgpd.h 20 Mar 2023 09:23:47 -0000 @@ -1638,6 +1638,7 @@ static const char * const timernames[] = "RTR RefreshTimer", "RTR RetryTimer", "RTR ExpireTimer", + "RTR ActiveTimer", "" }; Index: rtr.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rtr.c,v retrieving revision 1.12 diff -u -p -r1.12 rtr.c --- rtr.c 9 Mar 2023 17:21:21 -0000 1.12 +++ rtr.c 20 Mar 2023 09:41:22 -0000 @@ -40,6 +40,7 @@ static struct imsgbuf *ibuf_main; static struct imsgbuf *ibuf_rde; static struct bgpd_config *conf, *nconf; static struct timer_head expire_timer; +static int rtr_recalc_semaphore; static void rtr_sighdlr(int sig) @@ -58,6 +59,20 @@ rtr_sighdlr(int sig) #define EXPIRE_TIMEOUT 300 +void +rtr_sem_acquire(int cnt) +{ + rtr_recalc_semaphore += cnt; +} + +void +rtr_sem_release(int cnt) +{ + rtr_recalc_semaphore -= cnt; + if (rtr_recalc_semaphore < 0) + fatalx("rtr recalc semaphore underflow"); +} + /* * Every EXPIRE_TIMEOUT seconds traverse the static roa-set table and expire * all elements where the expires timestamp is smaller or equal to now. @@ -541,6 +556,9 @@ rtr_recalc(void) struct roa *roa, *nr; struct aspa_set *aspa; struct aspa_prep ap = { 0 }; + + if (rtr_recalc_semaphore > 0) + return; RB_INIT(&rt); RB_INIT(&at); Index: rtr_proto.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v retrieving revision 1.15 diff -u -p -r1.15 rtr_proto.c --- rtr_proto.c 17 Mar 2023 11:14:10 -0000 1.15 +++ rtr_proto.c 20 Mar 2023 09:46:54 -0000 @@ -40,6 +40,7 @@ struct rtr_header { #define RTR_DEFAULT_REFRESH 3600 #define RTR_DEFAULT_RETRY 600 #define RTR_DEFAULT_EXPIRE 7200 +#define RTR_DEFAULT_ACTIVE 60 enum rtr_pdu_type { SERIAL_NOTIFY = 0, @@ -99,6 +100,7 @@ enum rtr_event { RTR_EVNT_TIMER_REFRESH, RTR_EVNT_TIMER_RETRY, RTR_EVNT_TIMER_EXPIRE, + RTR_EVNT_TIMER_ACTIVE, RTR_EVNT_SEND_ERROR, RTR_EVNT_SERIAL_NOTIFY, RTR_EVNT_CACHE_RESPONSE, @@ -116,6 +118,7 @@ static const char *rtr_eventnames[] = { "refresh timer expired", "retry timer expired", "expire timer expired", + "activity timer expired", "sent error", "serial notify received", "cache response received", @@ -157,8 +160,10 @@ struct rtr_session { uint32_t refresh; uint32_t retry; uint32_t expire; + uint32_t active; int session_id; int fd; + int active_lock; enum rtr_state state; enum reconf_action reconf_action; enum rtr_error last_sent_error; @@ -1033,18 +1038,30 @@ rtr_fsm(struct rtr_session *rs, enum rtr rtr_reset_cache(rs); rtr_recalc(); break; + case RTR_EVNT_TIMER_ACTIVE: + log_warnx("rtr %s: activity timer fired", log_rtr(rs)); + rtr_sem_release(rs->active_lock); + rtr_recalc(); + rs->active_lock = 0; + break; case RTR_EVNT_CACHE_RESPONSE: rs->state = RTR_STATE_ACTIVE; timer_stop(&rs->timers, Timer_Rtr_Refresh); timer_stop(&rs->timers, Timer_Rtr_Retry); - /* XXX start timer to limit active time */ + timer_set(&rs->timers, Timer_Rtr_Active, rs->active); + /* prevent rtr_recalc from running while active */ + rs->active_lock = 1; + rtr_sem_acquire(rs->active_lock); break; case RTR_EVNT_END_OF_DATA: /* start refresh and expire timers */ timer_set(&rs->timers, Timer_Rtr_Refresh, rs->refresh); timer_set(&rs->timers, Timer_Rtr_Expire, rs->expire); + timer_stop(&rs->timers, Timer_Rtr_Active); rs->state = RTR_STATE_IDLE; + rtr_sem_release(rs->active_lock); rtr_recalc(); + rs->active_lock = 0; break; case RTR_EVNT_CACHE_RESET: rtr_reset_cache(rs); @@ -1164,6 +1181,9 @@ rtr_check_events(struct pollfd *pfds, si case Timer_Rtr_Expire: rtr_fsm(rs, RTR_EVNT_TIMER_EXPIRE); break; + case Timer_Rtr_Active: + rtr_fsm(rs, RTR_EVNT_TIMER_ACTIVE); + break; default: fatalx("King Bula lost in time"); } @@ -1237,6 +1257,7 @@ rtr_new(uint32_t id, char *descr) rs->refresh = RTR_DEFAULT_REFRESH; rs->retry = RTR_DEFAULT_RETRY; rs->expire = RTR_DEFAULT_EXPIRE; + rs->active = RTR_DEFAULT_ACTIVE; rs->state = RTR_STATE_CLOSED; rs->reconf_action = RECONF_REINIT; rs->last_recv_error = NO_ERROR; Index: session.h =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/session.h,v retrieving revision 1.161 diff -u -p -r1.161 session.h --- session.h 9 Mar 2023 17:21:21 -0000 1.161 +++ session.h 20 Mar 2023 09:42:04 -0000 @@ -201,6 +201,7 @@ enum Timer { Timer_Rtr_Refresh, Timer_Rtr_Retry, Timer_Rtr_Expire, + Timer_Rtr_Active, Timer_Max }; @@ -334,6 +335,8 @@ void rtr_shutdown(void); void rtr_show(struct rtr_session *, pid_t); /* rtr.c */ +void rtr_sem_acquire(int); +void rtr_sem_release(int); void rtr_roa_insert(struct roa_tree *, struct roa *); void rtr_aspa_insert(struct aspa_tree *, struct aspa_set *); void rtr_main(int, int);