Timo -

Sorry for the delay in looking at this.  Xmas and all that :)

netlink_route_multipath -> Does this need the 4th parameter?  Why not just
depend on the prefix?
kernel_rtm_ipv4 & _ipv6 -> Same comment family is never used.

kernel_route_rib adds a HAVE_IPV6.  This is not needed anymore.

Looks good otherwise.

donald




On Wed, Dec 23, 2015 at 9:47 AM, Timo Teräs <[email protected]> wrote:

> This commit updates the kernel API so that route changes are
> atomically updated using change/replaces messages instead
> of first sending a withdraw followed with update.
>
> Same for zclient updates, changes are sent as single ADD
> instead of DELETE + ADD.
>
> Signed-off-by: Timo Teräs <[email protected]>
> ---
>  zebra/kernel_null.c   |  14 +--
>  zebra/kernel_socket.c |   8 +-
>  zebra/rt.h            |   9 +-
>  zebra/rt_netlink.c    |  36 +++---
>  zebra/rt_socket.c     |  76 +++++-------
>  zebra/zebra_rib.c     | 333
> ++++++++++++++++++--------------------------------
>  6 files changed, 171 insertions(+), 305 deletions(-)
>
> diff --git a/zebra/kernel_null.c b/zebra/kernel_null.c
> index 58d2c3a..1a16a75 100644
> --- a/zebra/kernel_null.c
> +++ b/zebra/kernel_null.c
> @@ -30,19 +30,7 @@
>  #include "zebra/connected.h"
>  #include "zebra/rib.h"
>
> -int kernel_add_ipv4 (struct prefix *a, struct rib *b) { return 0; }
> -#ifdef HAVE_SYS_WEAK_ALIAS_PRAGMA
> -#pragma weak kernel_delete_ipv4 = kernel_add_ipv4
> -#else
> -int kernel_delete_ipv4 (struct prefix *a, struct rib *b) { return 0; }
> -#endif
> -
> -int kernel_add_ipv6 (struct prefix *a, struct rib *b) { return 0; }
> -#ifdef HAVE_SYS_WEAK_ALIAS_PRAGMA
> -#pragma weak kernel_delete_ipv6 = kernel_add_ipv6
> -#else
> -int kernel_delete_ipv6 (struct prefix *a, struct rib *b) { return 0; }
> -#endif
> +int kernel_route_rib (struct prefix *a, struct rib *old, struct rib *new)
> { return 0; }
>
>  int kernel_add_route (struct prefix_ipv4 *a, struct in_addr *b, int c,
> int d)
>  { return 0; }
> diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c
> index fe9e4ac..c75c05b 100644
> --- a/zebra/kernel_socket.c
> +++ b/zebra/kernel_socket.c
> @@ -859,7 +859,7 @@ rtm_read (struct rt_msghdr *rtm)
>      return;
>  #endif
>
> -  if ((rtm->rtm_type == RTM_ADD) && ! (flags & RTF_UP))
> +  if ((rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) && !
> (flags & RTF_UP))
>      return;
>
>    /* This is connected route. */
> @@ -1070,14 +1070,14 @@ rtm_write (int message,
>
>    ifp = if_lookup_by_index (index);
>
> -  if (gate && message == RTM_ADD)
> +  if (gate && (message == RTM_ADD || message == RTM_CHANGE))
>      msg.rtm.rtm_flags |= RTF_GATEWAY;
>
>    /* When RTF_CLONING is unavailable on BSD, should we set some
>     * other flag instead?
>     */
>  #ifdef RTF_CLONING
> -  if (! gate && message == RTM_ADD && ifp &&
> +  if (! gate && (message == RTM_ADD || message == RTM_CHANGE) && ifp &&
>        (ifp->flags & IFF_POINTOPOINT) == 0)
>      msg.rtm.rtm_flags |= RTF_CLONING;
>  #endif /* RTF_CLONING */
> @@ -1102,7 +1102,7 @@ rtm_write (int message,
>
>    if (mask)
>      msg.rtm.rtm_addrs |= RTA_NETMASK;
> -  else if (message == RTM_ADD)
> +  else if (message == RTM_ADD || message == RTM_CHANGE)
>      msg.rtm.rtm_flags |= RTF_HOST;
>
>    /* Tagging route with flags */
> diff --git a/zebra/rt.h b/zebra/rt.h
> index 7faa127..8c1c476 100644
> --- a/zebra/rt.h
> +++ b/zebra/rt.h
> @@ -27,16 +27,9 @@
>  #include "if.h"
>  #include "zebra/rib.h"
>
> -extern int kernel_add_ipv4 (struct prefix *, struct rib *);
> -extern int kernel_delete_ipv4 (struct prefix *, struct rib *);
> +extern int kernel_route_rib (struct prefix *, struct rib *, struct rib *);
>  extern int kernel_add_route (struct prefix_ipv4 *, struct in_addr *, int,
> int);
>  extern int kernel_address_add_ipv4 (struct interface *, struct connected
> *);
>  extern int kernel_address_delete_ipv4 (struct interface *, struct
> connected *);
>
> -#ifdef HAVE_IPV6
> -extern int kernel_add_ipv6 (struct prefix *, struct rib *);
> -extern int kernel_delete_ipv6 (struct prefix *, struct rib *);
> -
> -#endif /* HAVE_IPV6 */
> -
>  #endif /* _ZEBRA_RT_H */
> diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
> index 614f809..5a3576f 100644
> --- a/zebra/rt_netlink.c
> +++ b/zebra/rt_netlink.c
> @@ -1632,7 +1632,7 @@ netlink_route_multipath (int cmd, struct prefix *p,
> struct rib *rib,
>    bytelen = (family == AF_INET ? 4 : 16);
>
>    req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg));
> -  req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
> +  req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
>    req.n.nlmsg_type = cmd;
>    req.r.rtm_family = family;
>    req.r.rtm_table = rib->table;
> @@ -1803,30 +1803,26 @@ skip:
>  }
>
>  int
> -kernel_add_ipv4 (struct prefix *p, struct rib *rib)
> +kernel_route_rib (struct prefix *p, struct rib *old, struct rib *new)
>  {
> -  return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET);
> -}
> +  int ret;
>
> -int
> -kernel_delete_ipv4 (struct prefix *p, struct rib *rib)
> -{
> -  return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET);
> -}
> +  if (!old && new)
> +    return netlink_route_multipath (RTM_NEWROUTE, p, new,
> PREFIX_FAMILY(p));
> +  if (old && !new)
> +    return netlink_route_multipath (RTM_DELROUTE, p, old,
> PREFIX_FAMILY(p));
>
> -#ifdef HAVE_IPV6
> -int
> -kernel_add_ipv6 (struct prefix *p, struct rib *rib)
> -{
> -  return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET6);
> -}
> +  /* Replace, can be done atomically if metric does not change;
> +   * netlink uses [prefix, tos, priority] to identify prefix */
> +  if (old->metric == new->metric)
> +    return netlink_route_multipath (RTM_NEWROUTE, p, new,
> PREFIX_FAMILY(p));
>
> -int
> -kernel_delete_ipv6 (struct prefix *p, struct rib *rib)
> -{
> -  return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET6);
> +  /* Add + delete so the prefix does not disappear temporarily */
> +  ret = netlink_route_multipath (RTM_NEWROUTE, p, new, PREFIX_FAMILY(p));
> +  if (netlink_route_multipath (RTM_DELROUTE, p, old, PREFIX_FAMILY(p)) <
> 0)
> +    ret = -1;
> +  return ret;
>  }
> -#endif /* HAVE_IPV6 */
>
>  /* Interface address modification. */
>  static int
> diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c
> index a7ef457..513c320 100644
> --- a/zebra/rt_socket.c
> +++ b/zebra/rt_socket.c
> @@ -216,34 +216,6 @@ kernel_rtm_ipv4 (int cmd, struct prefix *p, struct
> rib *rib, int family)
>    return 0; /*XXX*/
>  }
>
> -int
> -kernel_add_ipv4 (struct prefix *p, struct rib *rib)
> -{
> -  int route;
> -
> -  if (zserv_privs.change(ZPRIVS_RAISE))
> -    zlog (NULL, LOG_ERR, "Can't raise privileges");
> -  route = kernel_rtm_ipv4 (RTM_ADD, p, rib, AF_INET);
> -  if (zserv_privs.change(ZPRIVS_LOWER))
> -    zlog (NULL, LOG_ERR, "Can't lower privileges");
> -
> -  return route;
> -}
> -
> -int
> -kernel_delete_ipv4 (struct prefix *p, struct rib *rib)
> -{
> -  int route;
> -
> -  if (zserv_privs.change(ZPRIVS_RAISE))
> -    zlog (NULL, LOG_ERR, "Can't raise privileges");
> -  route = kernel_rtm_ipv4 (RTM_DELETE, p, rib, AF_INET);
> -  if (zserv_privs.change(ZPRIVS_LOWER))
> -    zlog (NULL, LOG_ERR, "Can't lower privileges");
> -
> -  return route;
> -}
> -
>  #ifdef HAVE_IPV6
>
>  #ifdef SIN6_LEN
> @@ -273,8 +245,7 @@ sin6_masklen (struct in6_addr mask)
>
>  /* Interface between zebra message and rtm message. */
>  static int
> -kernel_rtm_ipv6_multipath (int cmd, struct prefix *p, struct rib *rib,
> -                          int family)
> +kernel_rtm_ipv6 (int cmd, struct prefix *p, struct rib *rib, int family)
>  {
>    struct sockaddr_in6 *mask;
>    struct sockaddr_in6 sin_dest, sin_mask, sin_gate;
> @@ -369,7 +340,7 @@ kernel_rtm_ipv6_multipath (int cmd, struct prefix *p,
> struct rib *rib,
>  #if 0
>        if (error)
>         {
> -         zlog_info ("kernel_rtm_ipv6_multipath(): nexthop %d add
> error=%d.",
> +         zlog_info ("kernel_rtm_ipv6(): nexthop %d add error=%d.",
>             nexthop_num, error);
>         }
>  #else
> @@ -383,38 +354,47 @@ kernel_rtm_ipv6_multipath (int cmd, struct prefix
> *p, struct rib *rib,
>    if (nexthop_num == 0)
>      {
>        if (IS_ZEBRA_DEBUG_KERNEL)
> -       zlog_debug ("kernel_rtm_ipv6_multipath(): No useful nexthop.");
> +       zlog_debug ("kernel_rtm_ipv6(): No useful nexthop.");
>        return 0;
>      }
>
>    return 0; /*XXX*/
>  }
>
> +#endif
> +
>  int
> -kernel_add_ipv6 (struct prefix *p, struct rib *rib)
> +kernel_route_rib (struct prefix *p, struct rib *old, struct rib *new)
>  {
> -  int route;
> +  struct rib *rib;
> +  int route = 0, cmd;
>
> -  if (zserv_privs.change(ZPRIVS_RAISE))
> -    zlog (NULL, LOG_ERR, "Can't raise privileges");
> -  route =  kernel_rtm_ipv6_multipath (RTM_ADD, p, rib, AF_INET6);
> -  if (zserv_privs.change(ZPRIVS_LOWER))
> -    zlog (NULL, LOG_ERR, "Can't lower privileges");
> +  if (!old && new)
> +    cmd = RTM_ADD;
> +  else if (old && !new)
> +    cmd = RTM_DELETE;
> +  else
> +    cmd = RTM_CHANGE;
>
> -  return route;
> -}
> -
> -int
> -kernel_delete_ipv6 (struct prefix *p, struct rib *rib)
> -{
> -  int route;
> +  rib = new ? new : old;
>
>    if (zserv_privs.change(ZPRIVS_RAISE))
>      zlog (NULL, LOG_ERR, "Can't raise privileges");
> -  route =  kernel_rtm_ipv6_multipath (RTM_DELETE, p, rib, AF_INET6);
> +
> +  switch (PREFIX_FAMILY(p))
> +    {
> +    case AF_INET:
> +      route = kernel_rtm_ipv4 (cmd, p, rib, AF_INET);
> +      break;
> +#ifdef HAVE_IPV6
> +    case AF_INET6:
> +      route = kernel_rtm_ipv6 (cmd, p, rib, AF_INET6);
> +      break;
> +#endif
> +    }
> +
>    if (zserv_privs.change(ZPRIVS_LOWER))
>      zlog (NULL, LOG_ERR, "Can't lower privileges");
>
>    return route;
>  }
> -#endif /* HAVE_IPV6 */
> diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
> index 38357ff..7cce13f 100644
> --- a/zebra/zebra_rib.c
> +++ b/zebra/zebra_rib.c
> @@ -1103,49 +1103,8 @@ nexthop_active_update (struct route_node *rn,
> struct rib *rib, int set)
>
>
>
> -static void
> -rib_install_kernel (struct route_node *rn, struct rib *rib)
> -{
> -  int ret = 0;
> -  struct nexthop *nexthop, *tnexthop;
> -  rib_table_info_t *info = rn->table->info;
> -  int recursing;
> -
> -  if (info->safi != SAFI_UNICAST)
> -    {
> -      for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
> -        SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
> -      return;
> -    }
> -
> -  /*
> -   * Make sure we update the FPM any time we send new information to
> -   * the kernel.
> -   */
> -  zfpm_trigger_update (rn, "installing in kernel");
> -  switch (PREFIX_FAMILY (&rn->p))
> -    {
> -    case AF_INET:
> -      ret = kernel_add_ipv4 (&rn->p, rib);
> -      break;
> -#ifdef HAVE_IPV6
> -    case AF_INET6:
> -      ret = kernel_add_ipv6 (&rn->p, rib);
> -      break;
> -#endif /* HAVE_IPV6 */
> -    }
> -
> -  /* This condition is never met, if we are using rt_socket.c */
> -  if (ret < 0)
> -    {
> -      for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
> -       UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
> -    }
> -}
> -
> -/* Uninstall the route from kernel. */
>  static int
> -rib_uninstall_kernel (struct route_node *rn, struct rib *rib)
> +rib_update_kernel (struct route_node *rn, struct rib *old, struct rib
> *new)
>  {
>    int ret = 0;
>    struct nexthop *nexthop, *tnexthop;
> @@ -1154,31 +1113,31 @@ rib_uninstall_kernel (struct route_node *rn,
> struct rib *rib)
>
>    if (info->safi != SAFI_UNICAST)
>      {
> -      for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
> -        SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
> -      return ret;
> +      if (new)
> +        for (ALL_NEXTHOPS_RO(new->nexthop, nexthop, tnexthop, recursing))
> +          SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
> +      if (old)
> +        for (ALL_NEXTHOPS_RO(old->nexthop, nexthop, tnexthop, recursing))
> +          UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
> +      return 0;
>      }
>
>    /*
>     * Make sure we update the FPM any time we send new information to
>     * the kernel.
>     */
> -  zfpm_trigger_update (rn, "uninstalling from kernel");
> +  zfpm_trigger_update (rn, "updating in kernel");
>
> -  switch (PREFIX_FAMILY (&rn->p))
> -    {
> -    case AF_INET:
> -      ret = kernel_delete_ipv4 (&rn->p, rib);
> -      break;
> -#ifdef HAVE_IPV6
> -    case AF_INET6:
> -      ret = kernel_delete_ipv6 (&rn->p, rib);
> -      break;
> -#endif /* HAVE_IPV6 */
> -    }
> +  ret = kernel_route_rib (&rn->p, old, new);
>
> -  for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
> -    UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
> +  /* This condition is never met, if we are using rt_socket.c */
> +  if (ret < 0 && new)
> +      for (ALL_NEXTHOPS_RO(new->nexthop, nexthop, tnexthop, recursing))
> +        UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
> +
> +  if (old)
> +    for (ALL_NEXTHOPS_RO(old->nexthop, nexthop, tnexthop, recursing))
> +      UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
>
>    return ret;
>  }
> @@ -1196,7 +1155,7 @@ rib_uninstall (struct route_node *rn, struct rib
> *rib)
>
>        redistribute_delete (&rn->p, rib);
>        if (! RIB_SYSTEM_ROUTE (rib))
> -       rib_uninstall_kernel (rn, rib);
> +       rib_update_kernel (rn, rib, NULL);
>        UNSET_FLAG (rib->flags, ZEBRA_FLAG_SELECTED);
>      }
>  }
> @@ -1261,15 +1220,56 @@ rib_gc_dest (struct route_node *rn)
>    return 1;
>  }
>
> +/* Check if 'alternate' RIB entry is better than 'current'. */
> +static struct rib *
> +rib_choose_best (struct rib *current, struct rib *alternate)
> +{
> +  if (current == NULL)
> +    return alternate;
> +
> +  /* filter route selection in following order:
> +   * - connected beats other types
> +   * - lower distance beats higher
> +   * - lower metric beats higher for equal distance
> +   * - last, hence oldest, route wins tie break.
> +   */
> +
> +  /* Connected routes. Pick the last connected
> +   * route of the set of lowest metric connected routes.
> +   */
> +  if (alternate->type == ZEBRA_ROUTE_CONNECT)
> +    {
> +      if (current->type != ZEBRA_ROUTE_CONNECT
> +          || alternate->metric <= current->metric)
> +        return alternate;
> +
> +      return current;
> +    }
> +
> +  if (current->type == ZEBRA_ROUTE_CONNECT)
> +    return current;
> +
> +  /* higher distance loses */
> +  if (alternate->distance < current->distance)
> +    return alternate;
> +  if (current->distance < alternate->distance)
> +    return current;
> +
> +  /* metric tie-breaks equal distance */
> +  if (alternate->metric <= current->metric)
> +    return alternate;
> +
> +  return current;
> +}
> +
>  /* Core function for processing routing information base. */
>  static void
>  rib_process (struct route_node *rn)
>  {
>    struct rib *rib;
>    struct rib *next;
> -  struct rib *fib = NULL;
> -  struct rib *select = NULL;
> -  struct rib *del = NULL;
> +  struct rib *old_fib = NULL;
> +  struct rib *new_fib = NULL;
>    int installed = 0;
>    struct nexthop *nexthop = NULL, *tnexthop;
>    int recursing;
> @@ -1279,32 +1279,18 @@ rib_process (struct route_node *rn)
>
>    info = rn->table->info;
>
> -  RNODE_FOREACH_RIB_SAFE (rn, rib, next)
> +  RNODE_FOREACH_RIB (rn, rib)
>      {
>        /* Currently installed rib. */
>        if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
>          {
> -          assert (fib == NULL);
> -          fib = rib;
> +          assert (old_fib == NULL);
> +          old_fib = rib;
>          }
> -
> -      /* Unlock removed routes, so they'll be freed, bar the FIB entry,
> -       * which we need to do do further work with below.
> -       */
> +
> +      /* Skip deleted entries from selection */
>        if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
> -        {
> -          if (rib != fib)
> -            {
> -              if (IS_ZEBRA_DEBUG_RIB)
> -                rnode_debug (rn, "rn %p, removing rib %p",
> -                            (void *)rn, (void *)rib);
> -              rib_unlink (rn, rib);
> -            }
> -          else
> -            del = rib;
> -
> -          continue;
> -        }
> +        continue;
>
>        /* Skip unreachable nexthop. */
>        if (! nexthop_active_update (rn, rib, 0))
> @@ -1314,150 +1300,73 @@ rib_process (struct route_node *rn)
>        if (rib->distance == DISTANCE_INFINITY)
>          continue;
>
> -      /* Newly selected rib, the common case. */
> -      if (!select)
> -        {
> -          select = rib;
> -          continue;
> -        }
> -
> -      /* filter route selection in following order:
> -       * - connected beats other types
> -       * - lower distance beats higher
> -       * - lower metric beats higher for equal distance
> -       * - last, hence oldest, route wins tie break.
> -       */
> -
> -      /* Connected routes. Pick the last connected
> -       * route of the set of lowest metric connected routes.
> -       */
> -      if (rib->type == ZEBRA_ROUTE_CONNECT)
> -        {
> -          if (select->type != ZEBRA_ROUTE_CONNECT
> -              || rib->metric <= select->metric)
> -            select = rib;
> -          continue;
> -        }
> -      else if (select->type == ZEBRA_ROUTE_CONNECT)
> -        continue;
> -
> -      /* higher distance loses */
> -      if (rib->distance > select->distance)
> -        continue;
> -
> -      /* lower wins */
> -      if (rib->distance < select->distance)
> -        {
> -          select = rib;
> -          continue;
> -        }
> -
> -      /* metric tie-breaks equal distance */
> -      if (rib->metric <= select->metric)
> -        select = rib;
> +      new_fib = rib_choose_best(new_fib, rib);
>      } /* RNODE_FOREACH_RIB_SAFE */
>
>    /* After the cycle is finished, the following pointers will be set:
> -   * select --- the winner RIB entry, if any was found, otherwise NULL
> -   * fib    --- the SELECTED RIB entry, if any, otherwise NULL
> -   * del    --- equal to fib, if fib is queued for deletion, NULL
> otherwise
> -   * rib    --- NULL
> +   * old_fib --- RIB entry currently having SELECTED
> +   * new_fib --- RIB entry that is newly SELECTED
>     */
>
> -  /* Same RIB entry is selected. Update FIB and finish. */
> -  if (select && select == fib)
> -    {
> -      if (IS_ZEBRA_DEBUG_RIB)
> -       rnode_debug (rn, "Updating existing route, select %p, fib %p",
> -                     (void *)select, (void *)fib);
> -      if (CHECK_FLAG (select->status, RIB_ENTRY_CHANGED))
> -        {
> -          if (info->safi == SAFI_UNICAST)
> -           zfpm_trigger_update (rn, "updating existing route");
> -
> -          redistribute_delete (&rn->p, select);
> -          if (! RIB_SYSTEM_ROUTE (select))
> -            rib_uninstall_kernel (rn, select);
> -
> -          /* Set real nexthop. */
> -          nexthop_active_update (rn, select, 1);
> -
> -          if (! RIB_SYSTEM_ROUTE (select))
> -            rib_install_kernel (rn, select);
> -          redistribute_add (&rn->p, select);
> -        }
> -      else if (! RIB_SYSTEM_ROUTE (select))
> -        {
> -          /* Housekeeping code to deal with
> -             race conditions in kernel with linux
> -             netlink reporting interface up before IPv4 or IPv6 protocol
> -             is ready to add routes.
> -             This makes sure the routes are IN the kernel.
> -           */
> -
> -          for (ALL_NEXTHOPS_RO(select->nexthop, nexthop, tnexthop,
> recursing))
> -            if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
> -            {
> -              installed = 1;
> -              break;
> -            }
> -          if (! installed)
> -            rib_install_kernel (rn, select);
> -        }
> -      goto end;
> -    }
> +  /* Set real nexthops. */
> +  if (new_fib)
> +    nexthop_active_update (rn, new_fib, 1);
>
> -  /* At this point we either haven't found the best RIB entry or it is
> -   * different from what we currently intend to flag with SELECTED. In
> both
> -   * cases, if a RIB block is present in FIB, it should be withdrawn.
> -   */
> -  if (fib)
> +  /* Update kernel if FIB entry has changed */
> +  if (old_fib != new_fib
> +      || (new_fib && CHECK_FLAG (new_fib->status, RIB_ENTRY_CHANGED)))
>      {
> -      if (IS_ZEBRA_DEBUG_RIB)
> -        rnode_debug (rn, "Removing existing route, fib %p", (void *)fib);
> +        if (old_fib && old_fib != new_fib)
> +          {
> +            if (! new_fib)
> +              redistribute_delete (&rn->p, old_fib);
>
> -      if (info->safi == SAFI_UNICAST)
> -        zfpm_trigger_update (rn, "removing existing route");
> +            if (! RIB_SYSTEM_ROUTE (old_fib) && (! new_fib ||
> RIB_SYSTEM_ROUTE (new_fib)))
> +              rib_update_kernel (rn, old_fib, NULL);
> +            UNSET_FLAG (old_fib->flags, ZEBRA_FLAG_SELECTED);
> +          }
>
> -      redistribute_delete (&rn->p, fib);
> -      if (! RIB_SYSTEM_ROUTE (fib))
> -       rib_uninstall_kernel (rn, fib);
> -      UNSET_FLAG (fib->flags, ZEBRA_FLAG_SELECTED);
> +        if (new_fib)
> +          {
> +            /* Install new or replace existing FIB entry */
> +            SET_FLAG (new_fib->flags, ZEBRA_FLAG_SELECTED);
> +            redistribute_add (&rn->p, new_fib);
>
> -      /* Set real nexthop. */
> -      nexthop_active_update (rn, fib, 1);
> -    }
> +            if (! RIB_SYSTEM_ROUTE (new_fib))
> +              rib_update_kernel (rn, old_fib, new_fib);
> +          }
>
> -  /* Regardless of some RIB entry being SELECTED or not before, now we can
> -   * tell, that if a new winner exists, FIB is still not updated with this
> -   * data, but ready to be.
> -   */
> -  if (select)
> +        if (info->safi == SAFI_UNICAST)
> +          zfpm_trigger_update (rn, "updating existing route");
> +    }
> +  else if (old_fib == new_fib && new_fib && ! RIB_SYSTEM_ROUTE (new_fib))
>      {
> -      if (IS_ZEBRA_DEBUG_RIB)
> -        rnode_debug (rn, "Adding route, select %p", (void *)select);
> -
> -      if (info->safi == SAFI_UNICAST)
> -        zfpm_trigger_update (rn, "new route selected");
> -
> -      /* Set real nexthop. */
> -      nexthop_active_update (rn, select, 1);
> -
> -      if (! RIB_SYSTEM_ROUTE (select))
> -        rib_install_kernel (rn, select);
> -      SET_FLAG (select->flags, ZEBRA_FLAG_SELECTED);
> -      redistribute_add (&rn->p, select);
> +      /* Housekeeping code to deal with race conditions in kernel with
> +       * linux netlink reporting interface up before IPv4 or IPv6 protocol
> +       * is ready to add routes. This makes sure routes are IN the kernel.
> +       */
> +      for (ALL_NEXTHOPS_RO(new_fib->nexthop, nexthop, tnexthop,
> recursing))
> +        if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
> +          {
> +            installed = 1;
> +            break;
> +          }
> +      if (! installed)
> +        rib_update_kernel (rn, NULL, new_fib);
>      }
>
> -  /* FIB route was removed, should be deleted */
> -  if (del)
> +  /* Remove all RIB entries queued for removal */
> +  RNODE_FOREACH_RIB_SAFE (rn, rib, next)
>      {
> -      if (IS_ZEBRA_DEBUG_RIB)
> -        rnode_debug (rn, "Deleting fib %p, rn %p", (void *)del, (void
> *)rn);
> -      rib_unlink (rn, del);
> +      if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
> +        {
> +          if (IS_ZEBRA_DEBUG_RIB)
> +            rnode_debug (rn, "rn %p, removing rib %p",
> +                        (void *)rn, (void *)rib);
> +          rib_unlink (rn, rib);
> +        }
>      }
>
> -end:
>    if (IS_ZEBRA_DEBUG_RIB_Q)
>      rnode_debug (rn, "rn %p dequeued", (void *)rn);
>
> @@ -3082,7 +2991,7 @@ rib_sweep_table (struct route_table *table)
>           if (rib->type == ZEBRA_ROUTE_KERNEL &&
>               CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELFROUTE))
>             {
> -             ret = rib_uninstall_kernel (rn, rib);
> +             ret = rib_update_kernel (rn, rib, NULL);
>               if (! ret)
>                  rib_delnode (rn, rib);
>             }
> @@ -3165,7 +3074,7 @@ rib_close_table (struct route_table *table)
>              zfpm_trigger_update (rn, NULL);
>
>           if (! RIB_SYSTEM_ROUTE (rib))
> -           rib_uninstall_kernel (rn, rib);
> +           rib_update_kernel (rn, rib, NULL);
>          }
>  }
>
> --
> 2.6.4
>
>
> _______________________________________________
> Quagga-dev mailing list
> [email protected]
> https://lists.quagga.net/mailman/listinfo/quagga-dev
>
_______________________________________________
Quagga-dev mailing list
[email protected]
https://lists.quagga.net/mailman/listinfo/quagga-dev

Reply via email to