This commit updates the kernel API so that route changes are atomically updated using CHANGE notification messages instead of first sending a withdraw or update. Same for zclient updates, changes are sent as ADD instead of first DELETE + ADD.
Signed-off-by: Timo Teräs <[email protected]> --- zebra/kernel_null.c | 14 +-- zebra/kernel_socket.c | 8 +- zebra/rt.h | 14 +-- zebra/rt_netlink.c | 33 ++--- zebra/rt_socket.c | 74 +++++------ zebra/zebra_rib.c | 340 +++++++++++++++++++------------------------------- 6 files changed, 182 insertions(+), 301 deletions(-) diff --git a/zebra/kernel_null.c b/zebra/kernel_null.c index 4cd43db..e188a90 100644 --- a/zebra/kernel_null.c +++ b/zebra/kernel_null.c @@ -29,19 +29,7 @@ #include "zebra/redistribute.h" #include "zebra/connected.h" -int kernel_add_ipv4 (struct prefix *a, struct rib *b) { return 0; } -#ifdef HAVE_SYS_WEAK_ALIAS_PRAGMA -#pragma weak kernel_delete_ipv4 = kernel_add_ipv4 -#else -int kernel_delete_ipv4 (struct prefix *a, struct rib *b) { return 0; } -#endif - -int kernel_add_ipv6 (struct prefix *a, struct rib *b) { return 0; } -#ifdef HAVE_SYS_WEAK_ALIAS_PRAGMA -#pragma weak kernel_delete_ipv6 = kernel_add_ipv6 -#else -int kernel_delete_ipv6 (struct prefix *a, struct rib *b) { return 0; } -#endif +int kernel_route_rib (struct prefix *a, int cmd, struct rib *rib) { return 0; } int kernel_add_route (struct prefix_ipv4 *a, struct in_addr *b, int c, int d) { return 0; } diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 916dad9..f20a6b7 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -867,7 +867,7 @@ rtm_read (struct rt_msghdr *rtm) return; #endif - if ((rtm->rtm_type == RTM_ADD) && ! (flags & RTF_UP)) + if ((rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) && ! (flags & RTF_UP)) return; /* This is connected route. */ @@ -1077,14 +1077,14 @@ rtm_write (int message, ifp = if_lookup_by_index (index); - if (gate && message == RTM_ADD) + if (gate && (message == RTM_ADD || message == RTM_CHANGE)) msg.rtm.rtm_flags |= RTF_GATEWAY; /* When RTF_CLONING is unavailable on BSD, should we set some * other flag instead? */ #ifdef RTF_CLONING - if (! gate && message == RTM_ADD && ifp && + if (! gate && (message == RTM_ADD || message == RTM_CHANGE) && ifp && (ifp->flags & IFF_POINTOPOINT) == 0) msg.rtm.rtm_flags |= RTF_CLONING; #endif /* RTF_CLONING */ @@ -1109,7 +1109,7 @@ rtm_write (int message, if (mask) msg.rtm.rtm_addrs |= RTA_NETMASK; - else if (message == RTM_ADD) + else if (message == RTM_ADD || message == RTM_CHANGE) msg.rtm.rtm_flags |= RTF_HOST; /* Tagging route with flags */ diff --git a/zebra/rt.h b/zebra/rt.h index 7faa127..bb55bb3 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -27,16 +27,14 @@ #include "if.h" #include "zebra/rib.h" -extern int kernel_add_ipv4 (struct prefix *, struct rib *); -extern int kernel_delete_ipv4 (struct prefix *, struct rib *); +#define KERNEL_ROUTE_ADD 1 +#define KERNEL_ROUTE_DEL 2 +#define KERNEL_ROUTE_CHANGE_DEL 3 +#define KERNEL_ROUTE_CHANGE 4 + +extern int kernel_route_rib (struct prefix *, int, struct rib *); extern int kernel_add_route (struct prefix_ipv4 *, struct in_addr *, int, int); extern int kernel_address_add_ipv4 (struct interface *, struct connected *); extern int kernel_address_delete_ipv4 (struct interface *, struct connected *); -#ifdef HAVE_IPV6 -extern int kernel_add_ipv6 (struct prefix *, struct rib *); -extern int kernel_delete_ipv6 (struct prefix *, struct rib *); - -#endif /* HAVE_IPV6 */ - #endif /* _ZEBRA_RT_H */ diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index 7e41072..2854029 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -1601,7 +1601,7 @@ netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib, bytelen = (family == AF_INET ? 4 : 16); req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg)); - req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST; req.n.nlmsg_type = cmd; req.r.rtm_family = family; req.r.rtm_table = rib->table; @@ -1758,30 +1758,19 @@ skip: } int -kernel_add_ipv4 (struct prefix *p, struct rib *rib) +kernel_route_rib (struct prefix *p, int cmd, struct rib *rib) { - return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET); -} - -int -kernel_delete_ipv4 (struct prefix *p, struct rib *rib) -{ - return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET); -} - -#ifdef HAVE_IPV6 -int -kernel_add_ipv6 (struct prefix *p, struct rib *rib) -{ - return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET6); -} + switch (cmd) + { + case KERNEL_ROUTE_ADD: + case KERNEL_ROUTE_CHANGE: + return netlink_route_multipath (RTM_NEWROUTE, p, rib, PREFIX_FAMILY(p)); + case KERNEL_ROUTE_DEL: + return netlink_route_multipath (RTM_DELROUTE, p, rib, PREFIX_FAMILY(p)); + } -int -kernel_delete_ipv6 (struct prefix *p, struct rib *rib) -{ - return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET6); + return 0; } -#endif /* HAVE_IPV6 */ /* Interface address modification. */ static int diff --git a/zebra/rt_socket.c b/zebra/rt_socket.c index d8c947c..2fe2018 100644 --- a/zebra/rt_socket.c +++ b/zebra/rt_socket.c @@ -216,34 +216,6 @@ kernel_rtm_ipv4 (int cmd, struct prefix *p, struct rib *rib, int family) return 0; /*XXX*/ } -int -kernel_add_ipv4 (struct prefix *p, struct rib *rib) -{ - int route; - - if (zserv_privs.change(ZPRIVS_RAISE)) - zlog (NULL, LOG_ERR, "Can't raise privileges"); - route = kernel_rtm_ipv4 (RTM_ADD, p, rib, AF_INET); - if (zserv_privs.change(ZPRIVS_LOWER)) - zlog (NULL, LOG_ERR, "Can't lower privileges"); - - return route; -} - -int -kernel_delete_ipv4 (struct prefix *p, struct rib *rib) -{ - int route; - - if (zserv_privs.change(ZPRIVS_RAISE)) - zlog (NULL, LOG_ERR, "Can't raise privileges"); - route = kernel_rtm_ipv4 (RTM_DELETE, p, rib, AF_INET); - if (zserv_privs.change(ZPRIVS_LOWER)) - zlog (NULL, LOG_ERR, "Can't lower privileges"); - - return route; -} - #ifdef HAVE_IPV6 /* Calculate sin6_len value for netmask socket value. */ @@ -386,31 +358,45 @@ kernel_rtm_ipv6_multipath (int cmd, struct prefix *p, struct rib *rib, return 0; /*XXX*/ } +#endif + int -kernel_add_ipv6 (struct prefix *p, struct rib *rib) +kernel_route_rib (struct prefix *p, int cmd, struct rib *rib) { - int route; + int route = 0, rtcmd; + + switch (cmd) + { + case KERNEL_ROUTE_ADD: + rtcmd = RTM_ADD; + break; + case KERNEL_ROUTE_CHANGE: + rtcmd = RTM_CHANGE; + break; + case KERNEL_ROUTE_DEL: + rtcmd = RTM_DEL; + break; + default: + return; + } if (zserv_privs.change(ZPRIVS_RAISE)) zlog (NULL, LOG_ERR, "Can't raise privileges"); - route = kernel_rtm_ipv6_multipath (RTM_ADD, p, rib, AF_INET6); - if (zserv_privs.change(ZPRIVS_LOWER)) - zlog (NULL, LOG_ERR, "Can't lower privileges"); - - return route; -} -int -kernel_delete_ipv6 (struct prefix *p, struct rib *rib) -{ - int route; + switch (PREFIX_FAMILY(p)) + { + case AF_INET: + route = kernel_rtm_ipv4 (rtcmd, p, rib, AF_INET); + break; +#ifdef HAVE_IPV6 + case AF_INET6: + route = kernel_rtm_ipv6 (rtcmd, p, rib, AF_INET6); + break; +#endif + } - if (zserv_privs.change(ZPRIVS_RAISE)) - zlog (NULL, LOG_ERR, "Can't raise privileges"); - route = kernel_rtm_ipv6_multipath (RTM_DELETE, p, rib, AF_INET6); if (zserv_privs.change(ZPRIVS_LOWER)) zlog (NULL, LOG_ERR, "Can't lower privileges"); return route; } -#endif /* HAVE_IPV6 */ diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 5c8322d..241cfec 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -1201,8 +1201,8 @@ nexthop_active_update (struct route_node *rn, struct rib *rib, int set) -static void -rib_install_kernel (struct route_node *rn, struct rib *rib) +static int +rib_update_kernel (struct route_node *rn, int cmd, struct rib *rib) { int ret = 0; struct nexthop *nexthop, *tnexthop; @@ -1211,72 +1211,36 @@ rib_install_kernel (struct route_node *rn, struct rib *rib) if (info->safi != SAFI_UNICAST) { - for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing)) - SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); - return; + switch (cmd) + { + case KERNEL_ROUTE_ADD: + case KERNEL_ROUTE_CHANGE: + for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing)) + SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); + break; + case KERNEL_ROUTE_CHANGE_DEL: + case KERNEL_ROUTE_DEL: + for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing)) + UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); + break; + } + return 0; } /* * Make sure we update the FPM any time we send new information to * the kernel. */ - zfpm_trigger_update (rn, "installing in kernel"); - switch (PREFIX_FAMILY (&rn->p)) - { - case AF_INET: - ret = kernel_add_ipv4 (&rn->p, rib); - break; -#ifdef HAVE_IPV6 - case AF_INET6: - ret = kernel_add_ipv6 (&rn->p, rib); - break; -#endif /* HAVE_IPV6 */ - } + zfpm_trigger_update (rn, "updating in kernel"); + + ret = kernel_route_rib (&rn->p, cmd, rib); /* This condition is never met, if we are using rt_socket.c */ - if (ret < 0) + if (ret < 0 || cmd == KERNEL_ROUTE_DEL || cmd == KERNEL_ROUTE_CHANGE_DEL) { for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing)) UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); } -} - -/* Uninstall the route from kernel. */ -static int -rib_uninstall_kernel (struct route_node *rn, struct rib *rib) -{ - int ret = 0; - struct nexthop *nexthop, *tnexthop; - rib_table_info_t *info = rn->table->info; - int recursing; - - if (info->safi != SAFI_UNICAST) - { - for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing)) - SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); - return ret; - } - - /* - * Make sure we update the FPM any time we send new information to - * the kernel. - */ - zfpm_trigger_update (rn, "uninstalling from kernel"); - - switch (PREFIX_FAMILY (&rn->p)) - { - case AF_INET: - ret = kernel_delete_ipv4 (&rn->p, rib); - break; -#ifdef HAVE_IPV6 - case AF_INET6: - ret = kernel_delete_ipv6 (&rn->p, rib); - break; -#endif /* HAVE_IPV6 */ - } - - for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing)) - UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB); return ret; } @@ -1294,7 +1258,7 @@ rib_uninstall (struct route_node *rn, struct rib *rib) redistribute_delete (&rn->p, rib); if (! RIB_SYSTEM_ROUTE (rib)) - rib_uninstall_kernel (rn, rib); + rib_update_kernel (rn, KERNEL_ROUTE_DEL, rib); UNSET_FLAG (rib->flags, ZEBRA_FLAG_SELECTED); } } @@ -1359,15 +1323,56 @@ rib_gc_dest (struct route_node *rn) return 1; } +/* Check if 'alternate' RIB entry is better than 'current'. */ +static struct rib * +rib_choose_best (struct rib *current, struct rib *alternate) +{ + if (current == NULL) + return alternate; + + /* filter route selection in following order: + * - connected beats other types + * - lower distance beats higher + * - lower metric beats higher for equal distance + * - last, hence oldest, route wins tie break. + */ + + /* Connected routes. Pick the last connected + * route of the set of lowest metric connected routes. + */ + if (alternate->type == ZEBRA_ROUTE_CONNECT) + { + if (current->type != ZEBRA_ROUTE_CONNECT + || alternate->metric <= current->metric) + return alternate; + + return current; + } + + if (current->type == ZEBRA_ROUTE_CONNECT) + return current; + + /* higher distance loses */ + if (alternate->distance > current->distance) + return alternate; + if (current->distance < alternate->distance) + return current; + + /* metric tie-breaks equal distance */ + if (alternate->metric <= current->metric) + return alternate; + + return current; +} + /* Core function for processing routing information base. */ static void rib_process (struct route_node *rn) { struct rib *rib; struct rib *next; - struct rib *fib = NULL; - struct rib *select = NULL; - struct rib *del = NULL; + struct rib *old_selected = NULL; + struct rib *new_selected = NULL; int installed = 0; struct nexthop *nexthop = NULL, *tnexthop; int recursing; @@ -1377,32 +1382,18 @@ rib_process (struct route_node *rn) info = rn->table->info; - RNODE_FOREACH_RIB_SAFE (rn, rib, next) + RNODE_FOREACH_RIB (rn, rib) { /* Currently installed rib. */ if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED)) { - assert (fib == NULL); - fib = rib; + assert (old_selected == NULL); + old_selected = rib; } - - /* Unlock removed routes, so they'll be freed, bar the FIB entry, - * which we need to do do further work with below. - */ + + /* Skip deleted entries from selection */ if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) - { - if (rib != fib) - { - if (IS_ZEBRA_DEBUG_RIB) - rnode_debug (rn, "rn %p, removing rib %p", - (void *)rn, (void *)rib); - rib_unlink (rn, rib); - } - else - del = rib; - - continue; - } + continue; /* Skip unreachable nexthop. */ if (! nexthop_active_update (rn, rib, 0)) @@ -1412,150 +1403,79 @@ rib_process (struct route_node *rn) if (rib->distance == DISTANCE_INFINITY) continue; - /* Newly selected rib, the common case. */ - if (!select) - { - select = rib; - continue; - } - - /* filter route selection in following order: - * - connected beats other types - * - lower distance beats higher - * - lower metric beats higher for equal distance - * - last, hence oldest, route wins tie break. - */ - - /* Connected routes. Pick the last connected - * route of the set of lowest metric connected routes. - */ - if (rib->type == ZEBRA_ROUTE_CONNECT) - { - if (select->type != ZEBRA_ROUTE_CONNECT - || rib->metric <= select->metric) - select = rib; - continue; - } - else if (select->type == ZEBRA_ROUTE_CONNECT) - continue; - - /* higher distance loses */ - if (rib->distance > select->distance) - continue; - - /* lower wins */ - if (rib->distance < select->distance) - { - select = rib; - continue; - } - - /* metric tie-breaks equal distance */ - if (rib->metric <= select->metric) - select = rib; + new_selected = rib_choose_best(new_selected, rib); } /* RNODE_FOREACH_RIB_SAFE */ /* After the cycle is finished, the following pointers will be set: - * select --- the winner RIB entry, if any was found, otherwise NULL - * fib --- the SELECTED RIB entry, if any, otherwise NULL - * del --- equal to fib, if fib is queued for deletion, NULL otherwise - * rib --- NULL + * old_selected --- RIB entry currently having SELECTED + * new_selected --- RIB entry that is newly SELECTED */ - /* Same RIB entry is selected. Update FIB and finish. */ - if (select && select == fib) - { - if (IS_ZEBRA_DEBUG_RIB) - rnode_debug (rn, "Updating existing route, select %p, fib %p", - (void *)select, (void *)fib); - if (CHECK_FLAG (select->status, RIB_ENTRY_CHANGED)) - { - if (info->safi == SAFI_UNICAST) - zfpm_trigger_update (rn, "updating existing route"); - - redistribute_delete (&rn->p, select); - if (! RIB_SYSTEM_ROUTE (select)) - rib_uninstall_kernel (rn, select); + /* Set real nexthops. */ + if (new_selected) + nexthop_active_update (rn, new_selected, 1); - /* Set real nexthop. */ - nexthop_active_update (rn, select, 1); - - if (! RIB_SYSTEM_ROUTE (select)) - rib_install_kernel (rn, select); - redistribute_add (&rn->p, select); - } - else if (! RIB_SYSTEM_ROUTE (select)) - { - /* Housekeeping code to deal with - race conditions in kernel with linux - netlink reporting interface up before IPv4 or IPv6 protocol - is ready to add routes. - This makes sure the routes are IN the kernel. - */ - - for (ALL_NEXTHOPS_RO(select->nexthop, nexthop, tnexthop, recursing)) - if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)) - { - installed = 1; - break; - } - if (! installed) - rib_install_kernel (rn, select); - } - goto end; - } - - /* At this point we either haven't found the best RIB entry or it is - * different from what we currently intend to flag with SELECTED. In both - * cases, if a RIB block is present in FIB, it should be withdrawn. - */ - if (fib) + /* Update kernel if FIB entry has changed */ + if (old_selected != new_selected + || (new_selected && CHECK_FLAG (new_selected->status, RIB_ENTRY_CHANGED))) { - if (IS_ZEBRA_DEBUG_RIB) - rnode_debug (rn, "Removing existing route, fib %p", (void *)fib); - - if (info->safi == SAFI_UNICAST) - zfpm_trigger_update (rn, "removing existing route"); + if (old_selected && old_selected != new_selected) + { + if (! new_selected) + redistribute_delete (&rn->p, old_selected); + + if (! RIB_SYSTEM_ROUTE (old_selected)) + rib_update_kernel (rn, + new_selected && ! RIB_SYSTEM_ROUTE (new_selected) + ? KERNEL_ROUTE_CHANGE_DEL : KERNEL_ROUTE_DEL, + old_selected); + UNSET_FLAG (old_selected->flags, ZEBRA_FLAG_SELECTED); + } - redistribute_delete (&rn->p, fib); - if (! RIB_SYSTEM_ROUTE (fib)) - rib_uninstall_kernel (rn, fib); - UNSET_FLAG (fib->flags, ZEBRA_FLAG_SELECTED); + if (new_selected) + { + /* Install new or replace existing FIB entry */ + SET_FLAG (new_selected->flags, ZEBRA_FLAG_SELECTED); + redistribute_add (&rn->p, new_selected); + + if (! RIB_SYSTEM_ROUTE (new_selected)) + rib_update_kernel (rn, + old_selected && ! RIB_SYSTEM_ROUTE (old_selected) + ? KERNEL_ROUTE_CHANGE : KERNEL_ROUTE_ADD, + new_selected); + } - /* Set real nexthop. */ - nexthop_active_update (rn, fib, 1); + if (info->safi == SAFI_UNICAST) + zfpm_trigger_update (rn, "updating existing route"); } - - /* Regardless of some RIB entry being SELECTED or not before, now we can - * tell, that if a new winner exists, FIB is still not updated with this - * data, but ready to be. - */ - if (select) + else if (old_selected == new_selected && new_selected && ! RIB_SYSTEM_ROUTE (new_selected)) { - if (IS_ZEBRA_DEBUG_RIB) - rnode_debug (rn, "Adding route, select %p", (void *)select); - - if (info->safi == SAFI_UNICAST) - zfpm_trigger_update (rn, "new route selected"); - - /* Set real nexthop. */ - nexthop_active_update (rn, select, 1); - - if (! RIB_SYSTEM_ROUTE (select)) - rib_install_kernel (rn, select); - SET_FLAG (select->flags, ZEBRA_FLAG_SELECTED); - redistribute_add (&rn->p, select); + /* Housekeeping code to deal with race conditions in kernel with + * linux netlink reporting interface up before IPv4 or IPv6 protocol + * is ready to add routes. This makes sure routes are IN the kernel. + */ + for (ALL_NEXTHOPS_RO(new_selected->nexthop, nexthop, tnexthop, recursing)) + if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)) + { + installed = 1; + break; + } + if (! installed) + rib_update_kernel (rn, KERNEL_ROUTE_ADD, new_selected); } - /* FIB route was removed, should be deleted */ - if (del) + /* Remove all RIB entries queued for removal */ + RNODE_FOREACH_RIB_SAFE (rn, rib, next) { - if (IS_ZEBRA_DEBUG_RIB) - rnode_debug (rn, "Deleting fib %p, rn %p", (void *)del, (void *)rn); - rib_unlink (rn, del); + if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) + { + if (IS_ZEBRA_DEBUG_RIB) + rnode_debug (rn, "rn %p, removing rib %p", + (void *)rn, (void *)rib); + rib_unlink (rn, rib); + } } -end: if (IS_ZEBRA_DEBUG_RIB_Q) rnode_debug (rn, "rn %p dequeued", (void *)rn); @@ -3294,7 +3214,7 @@ rib_sweep_table (struct route_table *table) if (rib->type == ZEBRA_ROUTE_KERNEL && CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELFROUTE)) { - ret = rib_uninstall_kernel (rn, rib); + ret = rib_update_kernel (rn, KERNEL_ROUTE_DEL, rib); if (! ret) rib_delnode (rn, rib); } @@ -3361,7 +3281,7 @@ rib_close_table (struct route_table *table) zfpm_trigger_update (rn, NULL); if (! RIB_SYSTEM_ROUTE (rib)) - rib_uninstall_kernel (rn, rib); + rib_update_kernel (rn, KERNEL_ROUTE_DEL, rib); } } -- 2.4.1 _______________________________________________ Quagga-dev mailing list [email protected] https://lists.quagga.net/mailman/listinfo/quagga-dev
