When an RTR session updates the data it happens between CACHE_RESPONSE and
END_OF_DATA PDUs. When an END_OF_DATA PDU is received the various sources
are merged into one table and sent to the RDE.
Now since bgpd supports multiple RTR servers it is possible that two
servers run updates roughly at the same time. In that case the first
END_OF_DATA PDU results in a table recalculation of intermediate data (at
least from the point of the other session).
To prevent this from happening introduce a semaphore that prevents the
rtr_recalc() from happening if there are other active RTR sessions.
On top of this add a 60sec timeout that prevents RTR sessions from hogging
the semaphore for too long.

The static table (roa-set, aspa-set) are also handled by the rtr porcess
but config reloads happen work on a 2nd table which is switched into place
at the end of the reload process so there is never a case were intermediate
data is visible to rtr_recalc(). Therefore there is no need to use the
semaphore there.

-- 
:wq Claudio

Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.465
diff -u -p -r1.465 bgpd.h
--- bgpd.h      13 Mar 2023 16:52:41 -0000      1.465
+++ bgpd.h      20 Mar 2023 09:23:47 -0000
@@ -1638,6 +1638,7 @@ static const char * const timernames[] =
        "RTR RefreshTimer",
        "RTR RetryTimer",
        "RTR ExpireTimer",
+       "RTR ActiveTimer",
        ""
 };
 
Index: rtr.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rtr.c,v
retrieving revision 1.12
diff -u -p -r1.12 rtr.c
--- rtr.c       9 Mar 2023 17:21:21 -0000       1.12
+++ rtr.c       20 Mar 2023 09:41:22 -0000
@@ -40,6 +40,7 @@ static struct imsgbuf         *ibuf_main;
 static struct imsgbuf          *ibuf_rde;
 static struct bgpd_config      *conf, *nconf;
 static struct timer_head        expire_timer;
+static int                      rtr_recalc_semaphore;
 
 static void
 rtr_sighdlr(int sig)
@@ -58,6 +59,20 @@ rtr_sighdlr(int sig)
 
 #define EXPIRE_TIMEOUT 300
 
+void
+rtr_sem_acquire(int cnt)
+{
+       rtr_recalc_semaphore += cnt;
+}
+
+void
+rtr_sem_release(int cnt)
+{
+       rtr_recalc_semaphore -= cnt;
+       if (rtr_recalc_semaphore < 0)
+               fatalx("rtr recalc semaphore underflow");
+}
+
 /*
  * Every EXPIRE_TIMEOUT seconds traverse the static roa-set table and expire
  * all elements where the expires timestamp is smaller or equal to now.
@@ -541,6 +556,9 @@ rtr_recalc(void)
        struct roa *roa, *nr;
        struct aspa_set *aspa;
        struct aspa_prep ap = { 0 };
+
+       if (rtr_recalc_semaphore > 0)
+               return;
 
        RB_INIT(&rt);
        RB_INIT(&at);
Index: rtr_proto.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v
retrieving revision 1.15
diff -u -p -r1.15 rtr_proto.c
--- rtr_proto.c 17 Mar 2023 11:14:10 -0000      1.15
+++ rtr_proto.c 20 Mar 2023 09:46:54 -0000
@@ -40,6 +40,7 @@ struct rtr_header {
 #define RTR_DEFAULT_REFRESH    3600
 #define RTR_DEFAULT_RETRY      600
 #define RTR_DEFAULT_EXPIRE     7200
+#define RTR_DEFAULT_ACTIVE     60
 
 enum rtr_pdu_type {
        SERIAL_NOTIFY = 0,
@@ -99,6 +100,7 @@ enum rtr_event {
        RTR_EVNT_TIMER_REFRESH,
        RTR_EVNT_TIMER_RETRY,
        RTR_EVNT_TIMER_EXPIRE,
+       RTR_EVNT_TIMER_ACTIVE,
        RTR_EVNT_SEND_ERROR,
        RTR_EVNT_SERIAL_NOTIFY,
        RTR_EVNT_CACHE_RESPONSE,
@@ -116,6 +118,7 @@ static const char *rtr_eventnames[] = {
        "refresh timer expired",
        "retry timer expired",
        "expire timer expired",
+       "activity timer expired",
        "sent error",
        "serial notify received",
        "cache response received",
@@ -157,8 +160,10 @@ struct rtr_session {
        uint32_t                        refresh;
        uint32_t                        retry;
        uint32_t                        expire;
+       uint32_t                        active;
        int                             session_id;
        int                             fd;
+       int                             active_lock;
        enum rtr_state                  state;
        enum reconf_action              reconf_action;
        enum rtr_error                  last_sent_error;
@@ -1033,18 +1038,30 @@ rtr_fsm(struct rtr_session *rs, enum rtr
                rtr_reset_cache(rs);
                rtr_recalc();
                break;
+       case RTR_EVNT_TIMER_ACTIVE:
+               log_warnx("rtr %s: activity timer fired", log_rtr(rs));
+               rtr_sem_release(rs->active_lock);
+               rtr_recalc();
+               rs->active_lock = 0;
+               break;
        case RTR_EVNT_CACHE_RESPONSE:
                rs->state = RTR_STATE_ACTIVE;
                timer_stop(&rs->timers, Timer_Rtr_Refresh);
                timer_stop(&rs->timers, Timer_Rtr_Retry);
-               /* XXX start timer to limit active time */
+               timer_set(&rs->timers, Timer_Rtr_Active, rs->active);
+               /* prevent rtr_recalc from running while active */
+               rs->active_lock = 1;
+               rtr_sem_acquire(rs->active_lock);
                break;
        case RTR_EVNT_END_OF_DATA:
                /* start refresh and expire timers */
                timer_set(&rs->timers, Timer_Rtr_Refresh, rs->refresh);
                timer_set(&rs->timers, Timer_Rtr_Expire, rs->expire);
+               timer_stop(&rs->timers, Timer_Rtr_Active);
                rs->state = RTR_STATE_IDLE;
+               rtr_sem_release(rs->active_lock);
                rtr_recalc();
+               rs->active_lock = 0;
                break;
        case RTR_EVNT_CACHE_RESET:
                rtr_reset_cache(rs);
@@ -1164,6 +1181,9 @@ rtr_check_events(struct pollfd *pfds, si
                        case Timer_Rtr_Expire:
                                rtr_fsm(rs, RTR_EVNT_TIMER_EXPIRE);
                                break;
+                       case Timer_Rtr_Active:
+                               rtr_fsm(rs, RTR_EVNT_TIMER_ACTIVE);
+                               break;
                        default:
                                fatalx("King Bula lost in time");
                        }
@@ -1237,6 +1257,7 @@ rtr_new(uint32_t id, char *descr)
        rs->refresh = RTR_DEFAULT_REFRESH;
        rs->retry = RTR_DEFAULT_RETRY;
        rs->expire = RTR_DEFAULT_EXPIRE;
+       rs->active = RTR_DEFAULT_ACTIVE;
        rs->state = RTR_STATE_CLOSED;
        rs->reconf_action = RECONF_REINIT;
        rs->last_recv_error = NO_ERROR;
Index: session.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.161
diff -u -p -r1.161 session.h
--- session.h   9 Mar 2023 17:21:21 -0000       1.161
+++ session.h   20 Mar 2023 09:42:04 -0000
@@ -201,6 +201,7 @@ enum Timer {
        Timer_Rtr_Refresh,
        Timer_Rtr_Retry,
        Timer_Rtr_Expire,
+       Timer_Rtr_Active,
        Timer_Max
 };
 
@@ -334,6 +335,8 @@ void                         rtr_shutdown(void);
 void                    rtr_show(struct rtr_session *, pid_t);
 
 /* rtr.c */
+void   rtr_sem_acquire(int);
+void   rtr_sem_release(int);
 void   rtr_roa_insert(struct roa_tree *, struct roa *);
 void   rtr_aspa_insert(struct aspa_tree *, struct aspa_set *);
 void   rtr_main(int, int);

Reply via email to