On Tue, Mar 28, 2023 at 11:48:07AM +0200, Claudio Jeker wrote: > When an RTR session updates the data it happens between CACHE_RESPONSE and > END_OF_DATA PDUs. When an END_OF_DATA PDU is received the various sources > are merged into one table and sent to the RDE. > Now since bgpd supports multiple RTR servers it is possible that two > servers run updates roughly at the same time. In that case the first > END_OF_DATA PDU results in a table recalculation of intermediate data (at > least from the point of the other session). > To prevent this from happening introduce a semaphore that prevents the > rtr_recalc() from happening if there are other active RTR sessions. > On top of this add a 60sec timeout that prevents RTR sessions from hogging > the semaphore for too long. > > The static table (roa-set, aspa-set) are also handled by the rtr porcess > but config reloads happen work on a 2nd table which is switched into place > at the end of the reload process so there is never a case were intermediate > data is visible to rtr_recalc(). Therefore there is no need to use the > semaphore there.
This also makes complete sense. Can't spot anything wrong with the logic. ok tb > > -- > :wq Claudio > > Index: bgpd.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v > retrieving revision 1.465 > diff -u -p -r1.465 bgpd.h > --- bgpd.h 13 Mar 2023 16:52:41 -0000 1.465 > +++ bgpd.h 20 Mar 2023 09:23:47 -0000 > @@ -1638,6 +1638,7 @@ static const char * const timernames[] = > "RTR RefreshTimer", > "RTR RetryTimer", > "RTR ExpireTimer", > + "RTR ActiveTimer", > "" > }; > > Index: rtr.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rtr.c,v > retrieving revision 1.12 > diff -u -p -r1.12 rtr.c > --- rtr.c 9 Mar 2023 17:21:21 -0000 1.12 > +++ rtr.c 20 Mar 2023 09:41:22 -0000 > @@ -40,6 +40,7 @@ static struct imsgbuf *ibuf_main; > static struct imsgbuf *ibuf_rde; > static struct bgpd_config *conf, *nconf; > static struct timer_head expire_timer; > +static int rtr_recalc_semaphore; > > static void > rtr_sighdlr(int sig) > @@ -58,6 +59,20 @@ rtr_sighdlr(int sig) > > #define EXPIRE_TIMEOUT 300 > > +void > +rtr_sem_acquire(int cnt) > +{ > + rtr_recalc_semaphore += cnt; > +} > + > +void > +rtr_sem_release(int cnt) > +{ > + rtr_recalc_semaphore -= cnt; > + if (rtr_recalc_semaphore < 0) > + fatalx("rtr recalc semaphore underflow"); > +} > + > /* > * Every EXPIRE_TIMEOUT seconds traverse the static roa-set table and expire > * all elements where the expires timestamp is smaller or equal to now. > @@ -541,6 +556,9 @@ rtr_recalc(void) > struct roa *roa, *nr; > struct aspa_set *aspa; > struct aspa_prep ap = { 0 }; > + > + if (rtr_recalc_semaphore > 0) > + return; > > RB_INIT(&rt); > RB_INIT(&at); > Index: rtr_proto.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v > retrieving revision 1.15 > diff -u -p -r1.15 rtr_proto.c > --- rtr_proto.c 17 Mar 2023 11:14:10 -0000 1.15 > +++ rtr_proto.c 20 Mar 2023 09:46:54 -0000 > @@ -40,6 +40,7 @@ struct rtr_header { > #define RTR_DEFAULT_REFRESH 3600 > #define RTR_DEFAULT_RETRY 600 > #define RTR_DEFAULT_EXPIRE 7200 > +#define RTR_DEFAULT_ACTIVE 60 > > enum rtr_pdu_type { > SERIAL_NOTIFY = 0, > @@ -99,6 +100,7 @@ enum rtr_event { > RTR_EVNT_TIMER_REFRESH, > RTR_EVNT_TIMER_RETRY, > RTR_EVNT_TIMER_EXPIRE, > + RTR_EVNT_TIMER_ACTIVE, > RTR_EVNT_SEND_ERROR, > RTR_EVNT_SERIAL_NOTIFY, > RTR_EVNT_CACHE_RESPONSE, > @@ -116,6 +118,7 @@ static const char *rtr_eventnames[] = { > "refresh timer expired", > "retry timer expired", > "expire timer expired", > + "activity timer expired", > "sent error", > "serial notify received", > "cache response received", > @@ -157,8 +160,10 @@ struct rtr_session { > uint32_t refresh; > uint32_t retry; > uint32_t expire; > + uint32_t active; > int session_id; > int fd; > + int active_lock; > enum rtr_state state; > enum reconf_action reconf_action; > enum rtr_error last_sent_error; > @@ -1033,18 +1038,30 @@ rtr_fsm(struct rtr_session *rs, enum rtr > rtr_reset_cache(rs); > rtr_recalc(); > break; > + case RTR_EVNT_TIMER_ACTIVE: > + log_warnx("rtr %s: activity timer fired", log_rtr(rs)); > + rtr_sem_release(rs->active_lock); > + rtr_recalc(); > + rs->active_lock = 0; > + break; > case RTR_EVNT_CACHE_RESPONSE: > rs->state = RTR_STATE_ACTIVE; > timer_stop(&rs->timers, Timer_Rtr_Refresh); > timer_stop(&rs->timers, Timer_Rtr_Retry); > - /* XXX start timer to limit active time */ > + timer_set(&rs->timers, Timer_Rtr_Active, rs->active); > + /* prevent rtr_recalc from running while active */ > + rs->active_lock = 1; > + rtr_sem_acquire(rs->active_lock); > break; > case RTR_EVNT_END_OF_DATA: > /* start refresh and expire timers */ > timer_set(&rs->timers, Timer_Rtr_Refresh, rs->refresh); > timer_set(&rs->timers, Timer_Rtr_Expire, rs->expire); > + timer_stop(&rs->timers, Timer_Rtr_Active); > rs->state = RTR_STATE_IDLE; > + rtr_sem_release(rs->active_lock); > rtr_recalc(); > + rs->active_lock = 0; > break; > case RTR_EVNT_CACHE_RESET: > rtr_reset_cache(rs); > @@ -1164,6 +1181,9 @@ rtr_check_events(struct pollfd *pfds, si > case Timer_Rtr_Expire: > rtr_fsm(rs, RTR_EVNT_TIMER_EXPIRE); > break; > + case Timer_Rtr_Active: > + rtr_fsm(rs, RTR_EVNT_TIMER_ACTIVE); > + break; > default: > fatalx("King Bula lost in time"); > } > @@ -1237,6 +1257,7 @@ rtr_new(uint32_t id, char *descr) > rs->refresh = RTR_DEFAULT_REFRESH; > rs->retry = RTR_DEFAULT_RETRY; > rs->expire = RTR_DEFAULT_EXPIRE; > + rs->active = RTR_DEFAULT_ACTIVE; > rs->state = RTR_STATE_CLOSED; > rs->reconf_action = RECONF_REINIT; > rs->last_recv_error = NO_ERROR; > Index: session.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v > retrieving revision 1.161 > diff -u -p -r1.161 session.h > --- session.h 9 Mar 2023 17:21:21 -0000 1.161 > +++ session.h 20 Mar 2023 09:42:04 -0000 > @@ -201,6 +201,7 @@ enum Timer { > Timer_Rtr_Refresh, > Timer_Rtr_Retry, > Timer_Rtr_Expire, > + Timer_Rtr_Active, > Timer_Max > }; > > @@ -334,6 +335,8 @@ void rtr_shutdown(void); > void rtr_show(struct rtr_session *, pid_t); > > /* rtr.c */ > +void rtr_sem_acquire(int); > +void rtr_sem_release(int); > void rtr_roa_insert(struct roa_tree *, struct roa *); > void rtr_aspa_insert(struct aspa_tree *, struct aspa_set *); > void rtr_main(int, int); >