On Tue, Mar 28, 2023 at 11:48:07AM +0200, Claudio Jeker wrote:
> When an RTR session updates the data it happens between CACHE_RESPONSE and
> END_OF_DATA PDUs. When an END_OF_DATA PDU is received the various sources
> are merged into one table and sent to the RDE.
> Now since bgpd supports multiple RTR servers it is possible that two
> servers run updates roughly at the same time. In that case the first
> END_OF_DATA PDU results in a table recalculation of intermediate data (at
> least from the point of the other session).
> To prevent this from happening introduce a semaphore that prevents the
> rtr_recalc() from happening if there are other active RTR sessions.
> On top of this add a 60sec timeout that prevents RTR sessions from hogging
> the semaphore for too long.
> 
> The static table (roa-set, aspa-set) are also handled by the rtr porcess
> but config reloads happen work on a 2nd table which is switched into place
> at the end of the reload process so there is never a case were intermediate
> data is visible to rtr_recalc(). Therefore there is no need to use the
> semaphore there.

This also makes complete sense. Can't spot anything wrong with the
logic.

ok tb

> 
> -- 
> :wq Claudio
> 
> Index: bgpd.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
> retrieving revision 1.465
> diff -u -p -r1.465 bgpd.h
> --- bgpd.h    13 Mar 2023 16:52:41 -0000      1.465
> +++ bgpd.h    20 Mar 2023 09:23:47 -0000
> @@ -1638,6 +1638,7 @@ static const char * const timernames[] =
>       "RTR RefreshTimer",
>       "RTR RetryTimer",
>       "RTR ExpireTimer",
> +     "RTR ActiveTimer",
>       ""
>  };
>  
> Index: rtr.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rtr.c,v
> retrieving revision 1.12
> diff -u -p -r1.12 rtr.c
> --- rtr.c     9 Mar 2023 17:21:21 -0000       1.12
> +++ rtr.c     20 Mar 2023 09:41:22 -0000
> @@ -40,6 +40,7 @@ static struct imsgbuf               *ibuf_main;
>  static struct imsgbuf                *ibuf_rde;
>  static struct bgpd_config    *conf, *nconf;
>  static struct timer_head      expire_timer;
> +static int                    rtr_recalc_semaphore;
>  
>  static void
>  rtr_sighdlr(int sig)
> @@ -58,6 +59,20 @@ rtr_sighdlr(int sig)
>  
>  #define EXPIRE_TIMEOUT       300
>  
> +void
> +rtr_sem_acquire(int cnt)
> +{
> +     rtr_recalc_semaphore += cnt;
> +}
> +
> +void
> +rtr_sem_release(int cnt)
> +{
> +     rtr_recalc_semaphore -= cnt;
> +     if (rtr_recalc_semaphore < 0)
> +             fatalx("rtr recalc semaphore underflow");
> +}
> +
>  /*
>   * Every EXPIRE_TIMEOUT seconds traverse the static roa-set table and expire
>   * all elements where the expires timestamp is smaller or equal to now.
> @@ -541,6 +556,9 @@ rtr_recalc(void)
>       struct roa *roa, *nr;
>       struct aspa_set *aspa;
>       struct aspa_prep ap = { 0 };
> +
> +     if (rtr_recalc_semaphore > 0)
> +             return;
>  
>       RB_INIT(&rt);
>       RB_INIT(&at);
> Index: rtr_proto.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 rtr_proto.c
> --- rtr_proto.c       17 Mar 2023 11:14:10 -0000      1.15
> +++ rtr_proto.c       20 Mar 2023 09:46:54 -0000
> @@ -40,6 +40,7 @@ struct rtr_header {
>  #define RTR_DEFAULT_REFRESH  3600
>  #define RTR_DEFAULT_RETRY    600
>  #define RTR_DEFAULT_EXPIRE   7200
> +#define RTR_DEFAULT_ACTIVE   60
>  
>  enum rtr_pdu_type {
>       SERIAL_NOTIFY = 0,
> @@ -99,6 +100,7 @@ enum rtr_event {
>       RTR_EVNT_TIMER_REFRESH,
>       RTR_EVNT_TIMER_RETRY,
>       RTR_EVNT_TIMER_EXPIRE,
> +     RTR_EVNT_TIMER_ACTIVE,
>       RTR_EVNT_SEND_ERROR,
>       RTR_EVNT_SERIAL_NOTIFY,
>       RTR_EVNT_CACHE_RESPONSE,
> @@ -116,6 +118,7 @@ static const char *rtr_eventnames[] = {
>       "refresh timer expired",
>       "retry timer expired",
>       "expire timer expired",
> +     "activity timer expired",
>       "sent error",
>       "serial notify received",
>       "cache response received",
> @@ -157,8 +160,10 @@ struct rtr_session {
>       uint32_t                        refresh;
>       uint32_t                        retry;
>       uint32_t                        expire;
> +     uint32_t                        active;
>       int                             session_id;
>       int                             fd;
> +     int                             active_lock;
>       enum rtr_state                  state;
>       enum reconf_action              reconf_action;
>       enum rtr_error                  last_sent_error;
> @@ -1033,18 +1038,30 @@ rtr_fsm(struct rtr_session *rs, enum rtr
>               rtr_reset_cache(rs);
>               rtr_recalc();
>               break;
> +     case RTR_EVNT_TIMER_ACTIVE:
> +             log_warnx("rtr %s: activity timer fired", log_rtr(rs));
> +             rtr_sem_release(rs->active_lock);
> +             rtr_recalc();
> +             rs->active_lock = 0;
> +             break;
>       case RTR_EVNT_CACHE_RESPONSE:
>               rs->state = RTR_STATE_ACTIVE;
>               timer_stop(&rs->timers, Timer_Rtr_Refresh);
>               timer_stop(&rs->timers, Timer_Rtr_Retry);
> -             /* XXX start timer to limit active time */
> +             timer_set(&rs->timers, Timer_Rtr_Active, rs->active);
> +             /* prevent rtr_recalc from running while active */
> +             rs->active_lock = 1;
> +             rtr_sem_acquire(rs->active_lock);
>               break;
>       case RTR_EVNT_END_OF_DATA:
>               /* start refresh and expire timers */
>               timer_set(&rs->timers, Timer_Rtr_Refresh, rs->refresh);
>               timer_set(&rs->timers, Timer_Rtr_Expire, rs->expire);
> +             timer_stop(&rs->timers, Timer_Rtr_Active);
>               rs->state = RTR_STATE_IDLE;
> +             rtr_sem_release(rs->active_lock);
>               rtr_recalc();
> +             rs->active_lock = 0;
>               break;
>       case RTR_EVNT_CACHE_RESET:
>               rtr_reset_cache(rs);
> @@ -1164,6 +1181,9 @@ rtr_check_events(struct pollfd *pfds, si
>                       case Timer_Rtr_Expire:
>                               rtr_fsm(rs, RTR_EVNT_TIMER_EXPIRE);
>                               break;
> +                     case Timer_Rtr_Active:
> +                             rtr_fsm(rs, RTR_EVNT_TIMER_ACTIVE);
> +                             break;
>                       default:
>                               fatalx("King Bula lost in time");
>                       }
> @@ -1237,6 +1257,7 @@ rtr_new(uint32_t id, char *descr)
>       rs->refresh = RTR_DEFAULT_REFRESH;
>       rs->retry = RTR_DEFAULT_RETRY;
>       rs->expire = RTR_DEFAULT_EXPIRE;
> +     rs->active = RTR_DEFAULT_ACTIVE;
>       rs->state = RTR_STATE_CLOSED;
>       rs->reconf_action = RECONF_REINIT;
>       rs->last_recv_error = NO_ERROR;
> Index: session.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> retrieving revision 1.161
> diff -u -p -r1.161 session.h
> --- session.h 9 Mar 2023 17:21:21 -0000       1.161
> +++ session.h 20 Mar 2023 09:42:04 -0000
> @@ -201,6 +201,7 @@ enum Timer {
>       Timer_Rtr_Refresh,
>       Timer_Rtr_Retry,
>       Timer_Rtr_Expire,
> +     Timer_Rtr_Active,
>       Timer_Max
>  };
>  
> @@ -334,6 +335,8 @@ void                       rtr_shutdown(void);
>  void                  rtr_show(struct rtr_session *, pid_t);
>  
>  /* rtr.c */
> +void rtr_sem_acquire(int);
> +void rtr_sem_release(int);
>  void rtr_roa_insert(struct roa_tree *, struct roa *);
>  void rtr_aspa_insert(struct aspa_tree *, struct aspa_set *);
>  void rtr_main(int, int);
> 

Reply via email to