From: Vivek Venkatraman <[email protected]>

- Schedule write thread for advertisements and withdraws only if corresponding
  FIFOs are growing and/or upon work_queue getting fully processed.
- Set non-default yield time for the main work_queue, as the default value
  of 10ms results in yielding after processing very few nodes.
- Remove unnecessary scheduling of write thread when update packet is formed.
- If MRAI is 0, don't start a timer unnecessarily, directly schedule write
  thread.
- Some debugs.

Signed-off-by: Vivek Venkatraman <[email protected]>
---
 bgpd/bgp_advertise.c |  39 ++++++++++++++++---
 bgpd/bgp_advertise.h |   3 ++
 bgpd/bgp_attr.c      |   8 +++-
 bgpd/bgp_fsm.c       |  28 ++++++++++----
 bgpd/bgp_packet.c    | 104 ++++++++++++++++++++++++++++++++++++++++++++-------
 bgpd/bgp_packet.h    |   9 +++++
 bgpd/bgp_route.c     |  34 ++++++++++++++++-
 bgpd/bgp_route.h     |   2 +-
 bgpd/bgpd.c          |   2 +
 bgpd/bgpd.h          |   7 ++++
 10 files changed, 207 insertions(+), 29 deletions(-)

diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c
index afcbfde..ea1e283 100644
--- a/bgpd/bgp_advertise.c
+++ b/bgpd/bgp_advertise.c
@@ -32,6 +32,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, 
Boston, MA
 #include "bgpd/bgp_route.h"
 #include "bgpd/bgp_advertise.h"
 #include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_debug.h"
 #include "bgpd/bgp_aspath.h"
 #include "bgpd/bgp_packet.h"
 #include "bgpd/bgp_fsm.h"
@@ -268,10 +269,24 @@ bgp_adj_out_set (struct bgp_node *rn, struct peer *peer, 
struct prefix *p,
   /* Add new advertisement to advertisement attribute list. */
   bgp_advertise_add (adv->baa, adv);
 
-  if (FIFO_EMPTY(&peer->sync[afi][safi]->update))
-    bgp_adjust_routeadv(peer);
-
   BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->update, &adv->fifo);
+
+  /*
+   * Schedule write thread (by triggering adjustment of MRAI timer) only if
+   * update FIFO has grown. Otherwise, it will be done upon the work queue
+   * being fully processed. Only adjust timer if needed.
+   */
+  if (!BGP_ROUTE_ADV_HOLD(peer->bgp) &&
+      (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->update) >=
+       peer->bgp->adv_quanta))
+    {
+      if (!peer->radv_adjusted)
+        {
+          if (BGP_DEBUG (events, EVENTS))
+            zlog_debug("%s scheduling MRAI timer after adj_out_set", 
peer->host);
+          bgp_adjust_routeadv(peer);
+        }
+    }
 }
 
 void
@@ -307,8 +322,22 @@ bgp_adj_out_unset (struct bgp_node *rn, struct peer *peer, 
struct prefix *p,
       /* Add to synchronization entry for withdraw announcement.  */
       BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->withdraw, &adv->fifo);
 
-      /* Schedule packet write. */
-      BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+      /*
+       * Schedule write thread only if withdraw FIFO has grown. Otherwise,
+       * it will be done upon the work queue being fully processed.
+       */
+      if (!BGP_ROUTE_ADV_HOLD(peer->bgp) &&
+         (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->withdraw) >=
+          peer->bgp->wd_quanta))
+        {
+          if (!peer->t_write)
+            {
+              if (BGP_DEBUG (events, EVENTS))
+                zlog_debug("%s scheduling write thread after adj_out_unset",
+                           peer->host);
+              BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+            }
+        }
     }
   else
     {
diff --git a/bgpd/bgp_advertise.h b/bgpd/bgp_advertise.h
index 71d4d41..1e9d581 100644
--- a/bgpd/bgp_advertise.h
+++ b/bgpd/bgp_advertise.h
@@ -148,6 +148,9 @@ struct bgp_synchronize
     (F)->count = 0;                            \
   } while (0)
 
+#define BGP_ADV_FIFO_COUNT(F) \
+  (F)->count
+
 /* Prototypes.  */
 extern void bgp_adj_out_set (struct bgp_node *, struct peer *, struct prefix *,
                      struct attr *, afi_t, safi_t, struct bgp_info *);
diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c
index f12f757..189af4b 100644
--- a/bgpd/bgp_attr.c
+++ b/bgpd/bgp_attr.c
@@ -1545,6 +1545,7 @@ bgp_mp_reach_parse (struct bgp_attr_parser_args *args,
   bgp_size_t nlri_len;
   size_t start;
   int ret;
+  int num_mp_pfx = 0;
   struct stream *s;
   struct peer *const peer = args->peer;  
   struct attr *const attr = args->attr;
@@ -1647,7 +1648,8 @@ bgp_mp_reach_parse (struct bgp_attr_parser_args *args,
  
   if (safi != SAFI_MPLS_LABELED_VPN)
     {
-      ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), nlri_len);
+      ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), nlri_len,
+                                   &num_mp_pfx);
       if (ret < 0) 
         {
           zlog_info ("%s: (%s) NLRI doesn't pass sanity check",
@@ -1679,6 +1681,7 @@ bgp_mp_unreach_parse (struct bgp_attr_parser_args *args,
   safi_t safi;
   u_int16_t withdraw_len;
   int ret;
+  int num_mp_pfx = 0;
   struct peer *const peer = args->peer;  
   struct attr *const attr = args->attr;
   const bgp_size_t length = args->length;
@@ -1696,7 +1699,8 @@ bgp_mp_unreach_parse (struct bgp_attr_parser_args *args,
 
   if (safi != SAFI_MPLS_LABELED_VPN)
     {
-      ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), withdraw_len);
+      ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), withdraw_len,
+                                   &num_mp_pfx);
       if (ret < 0)
        return BGP_ATTR_PARSE_ERROR_NOTIFYPLS;
     }
diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c
index e190ce1..4344cb2 100644
--- a/bgpd/bgp_fsm.c
+++ b/bgpd/bgp_fsm.c
@@ -409,6 +409,7 @@ bgp_routeadv_timer (struct thread *thread)
 
   peer = THREAD_ARG (thread);
   peer->t_routeadv = NULL;
+  peer->radv_adjusted = 0;
 
   if (BGP_DEBUG (fsm, FSM))
     zlog (peer->log, LOG_DEBUG,
@@ -419,14 +420,9 @@ bgp_routeadv_timer (struct thread *thread)
 
   BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
 
-  /*
-   * If there is no UPDATE to send, don't start the timer. We will start
-   * it when the queues go non-empty.
+  /* MRAI timer is no longer restarted here, it would be done
+   * when the FIFO is built.
    */
-  if (bgp_routeq_empty(peer))
-    return 0;
-
-  BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, peer->v_routeadv);
 
   return 0;
 }
@@ -621,6 +617,24 @@ bgp_adjust_routeadv (struct peer *peer)
   double diff;
   unsigned long remain;
 
+  /* Bypass checks for special case of MRAI being 0 */
+  if (peer->v_routeadv == 0)
+    {
+      /* Stop existing timer, just in case it is running for a different
+       * duration and schedule write thread immediately.
+       */
+      if (peer->t_routeadv)
+        BGP_TIMER_OFF(peer->t_routeadv);
+
+      peer->synctime = bgp_clock ();
+      BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+      return;
+    }
+
+  /* Mark that we've adjusted the timer */
+  peer->radv_adjusted = 1;
+
+
   /*
    * CASE I:
    * If the last update was written more than MRAI back, expire the timer
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index bc3cfe0..ff12709 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -158,6 +158,7 @@ bgp_update_packet (struct peer *peer, afi_t afi, safi_t 
safi)
   int space_needed = 0;
   size_t mpattrlen_pos = 0;
   size_t mpattr_pos = 0;
+  int num_pfx_adv = 0;
 
   s = peer->work;
   stream_reset (s);
@@ -249,6 +250,8 @@ bgp_update_packet (struct peer *peer, afi_t afi, safi_t 
safi)
                                                    adv->baa->attr);
          bgp_packet_mpattr_prefix(snlri, afi, safi, &rn->p, prd, tag);
        }
+      num_pfx_adv++;
+
       if (BGP_DEBUG (update, UPDATE_OUT))
         {
           char buf[INET6_BUFSIZ];
@@ -286,8 +289,12 @@ bgp_update_packet (struct peer *peer, afi_t afi, safi_t 
safi)
       else
        packet = stream_dup (s);
       bgp_packet_set_size (packet);
+      if (BGP_DEBUG (update, UPDATE_OUT))
+        zlog(peer->log, LOG_DEBUG,
+             "%s form UPDATE (adv) total len %zd numPfx %d",
+             peer->host,
+             (stream_get_endp (s) - stream_get_getp (s)), num_pfx_adv);
       bgp_packet_add (peer, packet);
-      BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
       stream_reset (s);
       stream_reset (snlri);
       return packet;
@@ -361,6 +368,7 @@ bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t 
safi)
   u_char first_time = 1;
   int space_remaining = 0;
   int space_needed = 0;
+  int num_pfx_wd = 0;
 
   s = peer->work;
   stream_reset (s);
@@ -408,6 +416,7 @@ bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t 
safi)
 
          bgp_packet_mpunreach_prefix(s, &rn->p, afi, safi, prd, NULL);
        }
+      num_pfx_wd++;
 
       if (BGP_DEBUG (update, UPDATE_OUT))
         {
@@ -444,6 +453,11 @@ bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t 
safi)
          stream_putw_at (s, attrlen_pos, total_attr_len);
        }
       bgp_packet_set_size (s);
+      if (BGP_DEBUG (update, UPDATE_OUT))
+        zlog(peer->log, LOG_DEBUG,
+             "%s form UPDATE (wd) total len %zd numPfx %d",
+             peer->host,
+             (stream_get_endp (s) - stream_get_getp (s)), num_pfx_wd);
       packet = stream_dup (s);
       bgp_packet_add (peer, packet);
       stream_reset (s);
@@ -673,28 +687,82 @@ bgp_write_packet (struct peer *peer)
   return NULL;
 }
 
-/* Is there partially written packet or updates we can send right
-   now.  */
-static int
-bgp_write_proceed (struct peer *peer)
+/* Are there prefixes queued for being withdrawn? */
+int
+bgp_peer_wd_fifo_exists (struct peer *peer)
 {
   afi_t afi;
   safi_t safi;
-  struct bgp_advertise *adv;
-
-  if (stream_fifo_head (peer->obuf))
-    return 1;
 
   for (afi = AFI_IP; afi < AFI_MAX; afi++)
     for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
       if (FIFO_HEAD (&peer->sync[afi][safi]->withdraw))
        return 1;
 
+  return 0;
+}
+
+/* Are there prefixes queued for being advertised?
+ * Are they recent?
+ */
+int
+bgp_peer_adv_fifo_exists (struct peer *peer, int chk_recent)
+{
+  afi_t afi;
+  safi_t safi;
+  struct bgp_advertise *adv;
+
   for (afi = AFI_IP; afi < AFI_MAX; afi++)
     for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
       if ((adv = BGP_ADV_FIFO_HEAD (&peer->sync[afi][safi]->update)) != NULL)
-       if (adv->binfo->uptime < peer->synctime)
-         return 1;
+        {
+          if (!chk_recent)
+            return 1;
+          if (adv->binfo->uptime < peer->synctime)
+            return 1;
+        }
+
+  return 0;
+}
+
+/*
+ * Schedule updates for the peer, if needed.
+ */
+void
+bgp_peer_schedule_updates(struct peer *peer)
+{
+  /* If withdraw FIFO exists, immediately schedule write */
+  if (bgp_peer_wd_fifo_exists(peer) && !peer->t_write)
+    {
+      if (BGP_DEBUG (events, EVENTS))
+        zlog_debug("%s scheduling write thread", peer->host);
+      BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+    }
+
+  /* If update FIFO exists, fire MRAI timer */
+  if (bgp_peer_adv_fifo_exists(peer, 0) && !peer->radv_adjusted)
+    {
+      if (BGP_DEBUG (events, EVENTS))
+        zlog_debug("%s scheduling MRAI timer", peer->host);
+      bgp_adjust_routeadv(peer);
+    }
+}
+
+/* Is there partially written packet or updates we can send right
+   now.  */
+static int
+bgp_write_proceed (struct peer *peer)
+{
+  /* If queued packet exists, we should try to write it */
+  if (stream_fifo_head (peer->obuf))
+    return 1;
+
+  /* If there are prefixes to be withdrawn or to be advertised (and
+   * queued before last MRAI timer expiry), schedule write
+   */
+  if (bgp_peer_wd_fifo_exists(peer)
+   || bgp_peer_adv_fifo_exists(peer, 1))
+    return 1;
 
   return 0;
 }
@@ -1639,6 +1707,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
   struct bgp_nlri withdraw;
   struct bgp_nlri mp_update;
   struct bgp_nlri mp_withdraw;
+  int num_pfx_adv, num_pfx_wd;
 
   /* Status must be Established. */
   if (peer->status != Established) 
@@ -1657,6 +1726,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
   memset (&mp_update, 0, sizeof (struct bgp_nlri));
   memset (&mp_withdraw, 0, sizeof (struct bgp_nlri));
   attr.extra = &extra;
+  num_pfx_adv = num_pfx_wd = 0;
 
   s = peer->ibuf;
   end = stream_pnt (s) + size;
@@ -1692,7 +1762,8 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
   /* Unfeasible Route packet format check. */
   if (withdraw_len > 0)
     {
-      ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), withdraw_len);
+      ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), withdraw_len,
+                                   &num_pfx_wd);
       if (ret < 0)
        return -1;
 
@@ -1783,7 +1854,8 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
   if (update_len)
     {
       /* Check NLRI packet format and prefix length. */
-      ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), update_len);
+      ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), update_len,
+                                   &num_pfx_adv);
       if (ret < 0)
         {
           bgp_attr_unintern_sub (&attr);
@@ -1798,6 +1870,12 @@ bgp_update_receive (struct peer *peer, bgp_size_t size)
       stream_forward_getp (s, update_len);
     }
 
+  if (BGP_DEBUG (update, UPDATE_IN))
+    zlog(peer->log, LOG_DEBUG,
+         "%s rcvd UPDATE wlen %d wpfx %d attrlen %d alen %d apfx %d",
+         peer->host, withdraw_len, num_pfx_wd, attribute_len,
+         update_len, num_pfx_adv);
+
   /* NLRI is processed only when the peer is configured specific
      Address Family and Subsequent Address Family. */
   if (peer->afc[AFI_IP][SAFI_UNICAST])
diff --git a/bgpd/bgp_packet.h b/bgpd/bgp_packet.h
index fe3917f..18b0024 100644
--- a/bgpd/bgp_packet.h
+++ b/bgpd/bgp_packet.h
@@ -26,6 +26,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, 
Boston, MA
 #define BGP_UNFEASIBLE_LEN    2U
 #define BGP_WRITE_PACKET_MAX 10U
 
+/* Size of FIFOs upon which write thread is triggered. Note that write
+ * thread is also triggered upon BGP work-queue completion.
+ */
+#define BGP_ADV_FIFO_QUANTA 500
+#define BGP_WD_FIFO_QUANTA 200
+
 /* When to refresh */
 #define REFRESH_IMMEDIATE 1
 #define REFRESH_DEFER     2 
@@ -57,4 +63,7 @@ extern int bgp_capability_receive (struct peer *, bgp_size_t);
 extern void bgp_update_restarted_peers (struct peer *);
 extern void bgp_update_implicit_eors (struct peer *);
 extern void bgp_check_update_delay (struct bgp *);
+extern int bgp_peer_wd_fifo_exists (struct peer *);
+extern int bgp_peer_adv_fifo_exists (struct peer *, int);
+extern void bgp_peer_schedule_updates(struct peer *peer);
 #endif /* _QUAGGA_BGP_PACKET_H */
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index c56e511..234caec 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -1818,6 +1818,27 @@ bgp_processq_del (struct work_queue *wq, void *data)
   XFREE (MTYPE_BGP_PROCESS_QUEUE, pq);
 }
 
+static void
+bgp_process_queue_complete (struct work_queue *wq)
+{
+  struct bgp *bgp;
+  struct peer *peer;
+  struct listnode *node, *nnode;
+
+  /* Schedule write thread either directly or through the MRAI timer
+   * if needed.
+   */
+  bgp = bgp_get_default ();
+  if (!bgp)
+    return;
+
+  if (BGP_ROUTE_ADV_HOLD(bgp))
+    return;
+
+  for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer))
+    bgp_peer_schedule_updates(peer);
+}
+
 void
 bgp_process_queue_init (void)
 {
@@ -1834,8 +1855,11 @@ bgp_process_queue_init (void)
   
   bm->process_main_queue->spec.workfunc = &bgp_process_main;
   bm->process_main_queue->spec.del_item_data = &bgp_processq_del;
+  bm->process_main_queue->spec.completion_func = &bgp_process_queue_complete;
   bm->process_main_queue->spec.max_retries = 0;
   bm->process_main_queue->spec.hold = 50;
+  /* Use a higher yield value of 50ms for main queue processing */
+  bm->process_main_queue->spec.yield = 50 * 1000L;
   
   bm->process_rsclient_queue->spec.workfunc = &bgp_process_rsclient;
   bm->process_rsclient_queue->spec.del_item_data = &bgp_processq_del;
@@ -2862,6 +2886,12 @@ bgp_announce_route (struct peer *peer, afi_t afi, safi_t 
safi)
 
   if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT))
     bgp_announce_table (peer, afi, safi, NULL, 1);
+
+  /*
+   * The write thread needs to be scheduled since it may not be done as
+   * part of building adj_out.
+   */
+  bgp_peer_schedule_updates(peer);
 }
 
 void
@@ -3404,12 +3434,13 @@ bgp_nlri_parse (struct peer *peer, struct attr *attr, 
struct bgp_nlri *packet)
 /* NLRI encode syntax check routine. */
 int
 bgp_nlri_sanity_check (struct peer *peer, int afi, u_char *pnt,
-                      bgp_size_t length)
+                      bgp_size_t length, int *numpfx)
 {
   u_char *end;
   u_char prefixlen;
   int psize;
 
+  *numpfx = 0;
   end = pnt + length;
 
   /* RFC1771 6.3 The NLRI field in the UPDATE message is checked for
@@ -3447,6 +3478,7 @@ bgp_nlri_sanity_check (struct peer *peer, int afi, u_char 
*pnt,
        }
 
       pnt += psize;
+      (*numpfx)++;
     }
 
   /* Packet length consistency check. */
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
index 2ed93f3..07ae02a 100644
--- a/bgpd/bgp_route.h
+++ b/bgpd/bgp_route.h
@@ -217,7 +217,7 @@ extern struct bgp_info_extra *bgp_info_extra_get (struct 
bgp_info *);
 extern void bgp_info_set_flag (struct bgp_node *, struct bgp_info *, 
u_int32_t);
 extern void bgp_info_unset_flag (struct bgp_node *, struct bgp_info *, 
u_int32_t);
 
-extern int bgp_nlri_sanity_check (struct peer *, int, u_char *, bgp_size_t);
+extern int bgp_nlri_sanity_check (struct peer *, int, u_char *, bgp_size_t, 
int *);
 extern int bgp_nlri_parse (struct peer *, struct attr *, struct bgp_nlri *);
 
 extern int bgp_maximum_prefix_overflow (struct peer *, afi_t, safi_t, int);
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 73c9a22..524e900 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -2299,6 +2299,8 @@ bgp_create (as_t *as, const char *name)
     bgp->name = strdup (name);
 
   bgp->wpkt_quanta = BGP_WRITE_PACKET_MAX;
+  bgp->adv_quanta = BGP_ADV_FIFO_QUANTA;
+  bgp->wd_quanta = BGP_WD_FIFO_QUANTA;
 
   THREAD_TIMER_ON (bm->master, bgp->t_startup, bgp_startup_timer_expire,
                    bgp, bgp->restart_time);
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
index 4ba5131..d06e5d0 100644
--- a/bgpd/bgpd.h
+++ b/bgpd/bgpd.h
@@ -224,8 +224,13 @@ struct bgp
   } maxpaths[AFI_MAX][SAFI_MAX];
 
   u_int32_t wpkt_quanta;  /* per peer packet quanta to write */
+  u_int32_t adv_quanta;  /* adv FIFO size that triggers write */
+  u_int32_t wd_quanta;  /* withdraw FIFO size that triggers write */
 };
 
+#define BGP_ROUTE_ADV_HOLD(bgp) \
+        (bgp->main_peers_update_hold || bgp->rsclient_peers_update_hold)
+
 /* BGP peer-group support. */
 struct peer_group
 {
@@ -549,6 +554,8 @@ struct peer
   struct thread *t_gr_restart;
   struct thread *t_gr_stale;
   
+  int radv_adjusted; /* flag if MRAI has been adjusted or not */
+
   /* workqueues */
   struct work_queue *clear_node_queue;
   
-- 
1.9.1


_______________________________________________
Quagga-dev mailing list
[email protected]
https://lists.quagga.net/mailman/listinfo/quagga-dev

Reply via email to