Rework how route table is read to use netlink interface.
This solved major performance problems when using SNMP on full gateway
table (>256K routes).

The old code read /proc/net/route and then looked up the mapping from
name "eth0" to ifindex for each entry. Much more efficient to use netlink
to dump route table which already gives ifindex and scales better.

Netlink has been supported all the way back to early 2.4 Linux.
---
Patch against current svn trunk.

--- a/agent/mibgroup/ip-forward-mib/data_access/route_linux.c   2008-09-04 
11:48:18.000000000 -0700
+++ b/agent/mibgroup/ip-forward-mib/data_access/route_linux.c   2008-09-04 
12:23:15.000000000 -0700
@@ -16,147 +16,119 @@
 #include "ip-forward-mib/inetCidrRouteTable/inetCidrRouteTable_constants.h"
 #include "if-mib/data_access/interface_ioctl.h"
 
-static int
-_type_from_flags(unsigned int flags)
-{
-    /*
-     *  RTF_GATEWAY RTF_UP RTF_DYNAMIC RTF_CACHE
-     *  RTF_MODIFIED RTF_EXPIRES RTF_NONEXTHOP
-     *  RTF_DYNAMIC RTF_LOCAL RTF_PREFIX_RT
-     *
-     * xxx: can we distinguish between reject & blackhole?
-     */
-    if (flags & RTF_UP) {
-        if (flags & RTF_GATEWAY)
-            return INETCIDRROUTETYPE_REMOTE;
-        else /*if (flags & RTF_LOCAL) */
-            return INETCIDRROUTETYPE_LOCAL;
-    } else 
-        return 0; /* route not up */
+#include <errno.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
 
-}
-static int
-_load_ipv4(netsnmp_container* container, u_long *index )
-{
-    FILE           *in;
-    char            line[256];
-    netsnmp_route_entry *entry = NULL;
-    char            name[16];
-    int             fd;
+/*
+ * Get routing table via netlink
+ */
 
-    DEBUGMSGTL(("access:route:container",
-                "route_container_arch_load ipv4\n"));
+static void
+_netlink_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int 
len)
+{
+    memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+    while (RTA_OK(rta, len)) {
+       if (rta->rta_type <= max)
+           tb[rta->rta_type] = rta;
+       rta = RTA_NEXT(rta,len);
+    }
+}
 
-    netsnmp_assert(NULL != container);
 
-    /*
-     * fetch routes from the proc file-system:
-     */
-    if (!(in = fopen("/proc/net/route", "r"))) {
-        snmp_log(LOG_ERR, "cannot open /proc/net/route\n");
-        return -2;
+static int
+_get_route(struct nlmsghdr *n, void *arg1, void *arg2)
+{
+    struct rtmsg *r = NLMSG_DATA(n);
+    netsnmp_container* container = arg1;
+    u_long *index = arg2;
+    netsnmp_route_entry *entry = NULL;
+    int len = n->nlmsg_len;
+    struct rtattr * tb[RTA_MAX+1];
+    void *dest;
+    void *gate;
+    void *src = NULL;
+    char anyaddr[16] = { 0 };
+
+    if (n->nlmsg_type != RTM_NEWROUTE) {
+       snmp_log(LOG_ERR, "netlink got wrong type %d response to get route\n",
+                n->nlmsg_type);
+       return -1;
     }
 
-    /*
-     * create socket for ioctls (see NOTE[1], below)
-     */
-    fd = socket(AF_INET, SOCK_DGRAM, 0);
-    if(fd < 0) {
-        snmp_log(LOG_ERR, "could not create socket\n");
-        fclose(in);
-        return -2;
+    len -= NLMSG_LENGTH(sizeof(*r));
+    if (len < 0) {
+       snmp_log(LOG_ERR, "netlink got truncated response to get route\n");
+       return -1;
     }
 
-    fgets(line, sizeof(line), in); /* skip header */
-
-    while (fgets(line, sizeof(line), in)) {
-        char            rtent_name[32];
-        int             refcnt, flags, rc;
-        uint32_t        dest, nexthop, mask, tmp_mask;
-        unsigned        use;
+    if (r->rtm_flags & RTM_F_CLONED)
+       return 0;
 
-        entry = netsnmp_access_route_entry_create();
+    _netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);
+    if (r->rtm_type != RTN_UNICAST)
+       return 0;
 
-        /*
-         * as with 1.99.14:
-         *    Iface Dest     GW       Flags RefCnt Use Met Mask     MTU  Win 
IRTT
-         * BE eth0  00000000 C0A80101 0003  0      0   0   FFFFFFFF 1500 0   0 
-         * LE eth0  00000000 0101A8C0 0003  0      0   0   00FFFFFF    0 0   0 
 
-         */
-        rc = sscanf(line, "%s %x %x %x %u %d %d %x %*d %*d %*d\n",
-                    rtent_name, &dest, &nexthop,
-                    /*
-                     * XXX: fix type of the args 
-                     */
-                    &flags, &refcnt, &use, &entry->rt_metric1,
-                    &tmp_mask);
-        DEBUGMSGTL(("9:access:route:container", "line |%s|\n", line));
-        if (8 != rc) {
-            snmp_log(LOG_ERR,
-                     "/proc/net/route data format error (%d!=8), line ==|%s|",
-                     rc, line);
-            
-            netsnmp_access_route_entry_free(entry);        
-            continue;
-        }
-
-        /*
-         * temporary null terminated name
-         */
-        strncpy(name, rtent_name, sizeof(name));
-        name[ sizeof(name)-1 ] = 0;
-
-        /*
-         * don't bother to try and get the ifindex for routes with
-         * no interface name.
-         * NOTE[1]: normally we'd use netsnmp_access_interface_index_find,
-         * but since that will open/close a socket, and we might
-         * have a lot of routes, call the ioctl routine directly.
-         */
-        if ('*' != name[0])
-            entry->if_index =
-                netsnmp_access_interface_ioctl_ifindex_get(fd,name);
+    entry = netsnmp_access_route_entry_create();
+    /*
+     * arbitrary index
+     */
+    entry->ns_rt_index = ++(*index);
 
-        /*
-         * arbitrary index
-         */
-        entry->ns_rt_index = ++(*index);
+    if (tb[RTA_OIF])
+       entry->if_index = *(unsigned int*)RTA_DATA(tb[RTA_OIF]);
+    else
+       entry->if_index = 0;
+
+    if (tb[RTA_DST])
+       dest = RTA_DATA(tb[RTA_DST]);
+    else
+       dest = anyaddr;
+
+    if (tb[RTA_PREFSRC])
+       src = RTA_DATA(tb[RTA_PREFSRC]);
+
+    if (tb[RTA_GATEWAY]) {
+       gate = RTA_DATA(tb[RTA_GATEWAY]);
+       entry->rt_type = INETCIDRROUTETYPE_REMOTE;
+    } else {
+       entry->rt_type = INETCIDRROUTETYPE_LOCAL;
+       gate = anyaddr;
+    }
 
-        mask = htonl(tmp_mask);
+    entry->rt_proto = (r->rtm_protocol == RTPROT_REDIRECT)
+       ? IANAIPROUTEPROTOCOL_ICMP : IANAIPROUTEPROTOCOL_LOCAL;
+
+    if (tb[RTA_PRIORITY])
+       entry->rt_metric1 = *(int *) RTA_DATA(tb[RTA_PRIORITY]);
 
+    if (r->rtm_family == AF_INET) {
+       entry->rt_pfx_len = r->rtm_dst_len;
 #ifdef USING_IP_FORWARD_MIB_IPCIDRROUTETABLE_IPCIDRROUTETABLE_MODULE
-        entry->rt_mask = mask;
+       entry->rt_mask = ~0 << r->rtm_dst_len;
         /** entry->rt_tos = XXX; */
         /** rt info ?? */
 #endif
-        /*
-         * copy dest & next hop
-         */
         entry->rt_dest_type = INETADDRESSTYPE_IPV4;
         entry->rt_dest_len = 4;
-        memcpy(entry->rt_dest, &dest, 4);
+        memcpy(entry->rt_dest, dest, 4);
 
-        entry->rt_nexthop_type = INETADDRESSTYPE_IPV4;
+       entry->rt_nexthop_type = INETADDRESSTYPE_IPV4;
         entry->rt_nexthop_len = 4;
-        memcpy(entry->rt_nexthop, &nexthop, 4);
-
-        /*
-         * count bits in mask
-         */
-        entry->rt_pfx_len = netsnmp_ipaddress_ipv4_prefix_len(mask);
+        memcpy(entry->rt_nexthop, gate, 4);
 
 #ifdef USING_IP_FORWARD_MIB_INETCIDRROUTETABLE_INETCIDRROUTETABLE_MODULE
         /*
-    inetCidrRoutePolicy OBJECT-TYPE 
-        SYNTAX     OBJECT IDENTIFIER 
-        MAX-ACCESS not-accessible 
-        STATUS     current 
-        DESCRIPTION 
-               "This object is an opaque object without any defined 
-                semantics.  Its purpose is to serve as an additional 
-                index which may delineate between multiple entries to 
-                the same destination.  The value { 0 0 } shall be used 
-                as the default value for this object."
+         inetCidrRoutePolicy OBJECT-TYPE
+         SYNTAX     OBJECT IDENTIFIER
+         MAX-ACCESS not-accessible
+         STATUS     current
+         DESCRIPTION
+         "This object is an opaque object without any defined
+         semantics.  Its purpose is to serve as an additional
+         index which may delineate between multiple entries to
+         the same destination.  The value { 0 0 } shall be used
+         as the default value for this object."
         */
         /*
          * on linux, default routes all look alike, and would have the same
@@ -166,186 +138,217 @@ _load_ipv4(netsnmp_container* container,
          * xxx-rks: It should really only be for the duplicate case, but that
          *     would be more complicated thanI want to get into now. Fix later.
          */
-        if (0 == nexthop) {
+        if (dest == anyaddr) {
             entry->rt_policy = &entry->if_index;
             entry->rt_policy_len = 1;
             entry->flags |= NETSNMP_ACCESS_ROUTE_POLICY_STATIC;
         }
 #endif
+    }
+#ifdef NETSNMP_ENABLE_IPV6
+    if (r->rtm_family == AF_INET6) {
+        entry->rt_pfx_len = r->rtm_dst_len;
+        entry->rt_dest_type = INETADDRESSTYPE_IPV6;
+        entry->rt_dest_len = 16;
+       memcpy(entry->rt_dest, dest, 16);
 
-        /*
-         * get protocol and type from flags
-         */
-        entry->rt_type = _type_from_flags(flags);
-        
-        entry->rt_proto = (flags & RTF_DYNAMIC)
-            ? IANAIPROUTEPROTOCOL_ICMP : IANAIPROUTEPROTOCOL_LOCAL;
+        entry->rt_nexthop_type = INETADDRESSTYPE_IPV6;
+        entry->rt_nexthop_len = 16;
+       memcpy(entry->rt_nexthop, gate, 16);
 
+#ifdef USING_IP_FORWARD_MIB_INETCIDRROUTETABLE_INETCIDRROUTETABLE_MODULE
+        /*
+         inetCidrRoutePolicy OBJECT-TYPE
+         SYNTAX     OBJECT IDENTIFIER
+         MAX-ACCESS not-accessible
+         STATUS     current
+         DESCRIPTION
+         "This object is an opaque object without any defined
+         semantics.  Its purpose is to serve as an additional
+         index which may delineate between multiple entries to
+         the same destination.  The value { 0 0 } shall be used
+         as the default value for this object."
+        */
         /*
-         * insert into container
+         * on linux, default routes all look alike, and would have the same
+         * indexed based on dest and next hop. So we use our arbitrary index
+         * as the policy, to distinguise between them.
          */
-        if (CONTAINER_INSERT(container, entry) < 0)
-        {
-            DEBUGMSGTL(("access:route:container", "error with route_entry: 
insert into container failed.\n"));
-            netsnmp_access_route_entry_free(entry);
-            continue;
-        }
+        entry->rt_policy = &entry->ns_rt_index;
+        entry->rt_policy_len = 1;
+        entry->flags |= NETSNMP_ACCESS_ROUTE_POLICY_STATIC;
+#endif
     }
+#endif
 
-    fclose(in);
-    close(fd);
+    /*
+     * insert into container
+     */
+    if (CONTAINER_INSERT(container, entry) < 0) {
+       DEBUGMSGTL(("access:route:container", "error with route_entry: insert 
into container failed.\n"));
+       netsnmp_access_route_entry_free(entry);
+
+    }
     return 0;
 }
 
-#ifdef NETSNMP_ENABLE_IPV6
 static int
-_load_ipv6(netsnmp_container* container, u_long *index )
+_netlink_open(int protocol)
 {
-    FILE           *in;
-    char            line[256];
-    netsnmp_route_entry *entry = NULL;
-    char            name[16];
-    static int      log_open_err = 1;
+    int fd;
+    int rcvbuf = 32768;
+    struct sockaddr_nl snl = { .nl_family = AF_NETLINK };
+
+    fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+    if (fd < 0) {
+       snmp_log_perror("Cannot open netlink socket");
+       return -1;
+    }
+
+    /* increase default rx buffer for performance */
+    if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) < 0) {
+       snmp_log_perror("netlink SO_RCVBUF");
+       close(fd);
+       return -1;
+    }
 
-    DEBUGMSGTL(("access:route:container",
-                "route_container_arch_load ipv6\n"));
+    if (bind(fd, (struct sockaddr*)&snl, sizeof(snl)) < 0) {
+       snmp_log_perror("Cannot bind netlink socket");
+       close(fd);
+       return -1;
+    }
 
-    netsnmp_assert(NULL != container);
+    return fd;
+}
 
-    /*
-     * fetch routes from the proc file-system:
-     */
-    if (!(in = fopen("/proc/net/ipv6_route", "r"))) {
-        if (1 == log_open_err) {
-            snmp_log(LOG_ERR, "cannot open /proc/net/ipv6_route\n");
-            log_open_err = 0;
-        }
-        return -2;
-    }
-    /*
-     * if we turned off logging of open errors, turn it back on now that
-     * we have been able to open the file.
-     */
-    if (0 == log_open_err)
-        log_open_err = 1;
-    fgets(line,sizeof(line),in); /* skip header */
-    while (fgets(line, sizeof(line), in)) {
-        char            c_name[IFNAMSIZ+1];
-        char            c_dest[33], c_src[33], c_next[33];
-        int             rc;
-        unsigned int    dest_pfx, flags;
-        size_t          buf_len, buf_offset;
-        u_char          *temp_uchar_ptr;
+static int
+_netlink_dump_request(int fd, int family, int type)
+{
+    struct {
+       struct nlmsghdr nlh;
+       struct rtgenmsg g;
+    } req = {
+       .nlh = {
+           .nlmsg_len  = sizeof(req),
+           .nlmsg_type = type,
+           .nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST,
+           .nlmsg_seq = time(NULL),
+       },
+       .g = { .rtgen_family = family },
+    };
 
-        entry = netsnmp_access_route_entry_create();
+    return send(fd, &req, sizeof(req), 0);
+}
 
-        /*
-         * based on /usr/src/linux/net/ipv6/route.c, kernel 2.6.7:
-         *
-         * [        Dest addr /         plen ]
-         * fe80000000000000025056fffec00008 80 \
-         *
-         * [ (?subtree) : src addr/plen : 0/0]
-         * 00000000000000000000000000000000 00 \
-         *
-         * [        next hop              ][ metric ][ref ctn][ use   ]
-         * 00000000000000000000000000000000 00000000 00000000 00000000 \
-         *
-         * [ flags ][dev name]
-         * 80200001       lo
-         */
-        rc = sscanf(line, "%32s %2x %32s %*x %32s %x %*x %*x %x %"
-                    SNMP_MACRO_VAL_TO_STR(IFNAMSIZ) "s\n",
-                    c_dest, &dest_pfx, c_src, /*src_pfx,*/ c_next,
-                    &entry->rt_metric1, /** ref,*/ /* use, */ &flags, c_name);
-        DEBUGMSGTL(("9:access:route:container", "line |%s|\n", line));
-        if (7 != rc) {
-            snmp_log(LOG_ERR,
-                     "/proc/net/ipv6_route data format error (%d!=8), "
-                     "line ==|%s|", rc, line);
-            continue;
-        }
+static int
+_netlink_dump_filter(int fd,
+                    int (*filter)(struct nlmsghdr *, void *, void *),
+                    void *arg1, void *arg2)
+{
+    int status;
+    char buf[16384];
 
-        /*
-         * temporary null terminated name
-         */
-        c_name[ sizeof(c_name)-1 ] = 0;
-        entry->if_index = se_find_value_in_slist("interfaces", c_name);
-        if(SE_DNE == entry->if_index) {
-            snmp_log(LOG_ERR,"unknown interface in /proc/net/ipv6_route "
-                     "('%s')\n", name);
-            netsnmp_access_route_entry_free(entry);
-            continue;
-        }
-        /*
-         * arbitrary index
-         */
-        entry->ns_rt_index = ++(*index);
+    do {
+       struct nlmsghdr *h;
 
-#ifdef USING_IP_FORWARD_MIB_IPCIDRROUTETABLE_IPCIDRROUTETABLE_MODULE
-        /** entry->rt_mask = mask; */ /* IPv4 only */
-        /** entry->rt_tos = XXX; */
-        /** rt info ?? */
-#endif
-        /*
-         * convert hex addresses to binary
-         */
-        entry->rt_dest_type = INETADDRESSTYPE_IPV6;
-        entry->rt_dest_len = 16;
-        buf_len = sizeof(entry->rt_dest);
-        buf_offset = 0;
-        temp_uchar_ptr = entry->rt_dest;
-        netsnmp_hex_to_binary(&temp_uchar_ptr, &buf_len, &buf_offset, 0,
-                              c_dest, NULL);
+       status = recv(fd, buf, sizeof(buf), 0);
+       if (status < 0) {
+           if (errno == EINTR || errno == EAGAIN)
+               continue;
+
+           snmp_log_perror("netlink recv");
+           return -1;
+       }
+
+       if (status == 0) {
+           snmp_log(LOG_ERR, "EOF on netlink\n");
+           return -1;
+       }
+
+       for (h = (struct nlmsghdr*)buf;
+            NLMSG_OK(h, status);
+            h = NLMSG_NEXT(h, status)) {
+           int rc;
+
+           if (h->nlmsg_type == NLMSG_DONE)
+               return 0;
+
+           if (h->nlmsg_type == NLMSG_ERROR) {
+               struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+               if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
+                   snmp_log(LOG_ERR, "truncated message %u < %lu\n",
+                            h->nlmsg_len, NLMSG_LENGTH(sizeof(struct 
nlmsgerr)));
+               } else {
+                   errno = -err->error;
+                   snmp_log_perror("RTNETLINK answers");
+               }
+               return -1;
+           }
+
+           rc = filter(h, arg1, arg2);
+           if (rc < 0)
+               return rc;
 
-        entry->rt_nexthop_type = INETADDRESSTYPE_IPV6;
-        entry->rt_nexthop_len = 16;
-        buf_len = sizeof(entry->rt_nexthop);
-        buf_offset = 0;
-        temp_uchar_ptr = entry->rt_nexthop;
-        netsnmp_hex_to_binary(&temp_uchar_ptr, &buf_len, &buf_offset, 0,
-                              c_next, NULL);
+       }
 
-        entry->rt_pfx_len = dest_pfx;
+    } while (status == 0);
 
-#ifdef USING_IP_FORWARD_MIB_INETCIDRROUTETABLE_INETCIDRROUTETABLE_MODULE
-        /*
-    inetCidrRoutePolicy OBJECT-TYPE 
-        SYNTAX     OBJECT IDENTIFIER 
-        MAX-ACCESS not-accessible 
-        STATUS     current 
-        DESCRIPTION 
-               "This object is an opaque object without any defined 
-                semantics.  Its purpose is to serve as an additional 
-                index which may delineate between multiple entries to 
-                the same destination.  The value { 0 0 } shall be used 
-                as the default value for this object."
-        */
-        /*
-         * on linux, default routes all look alike, and would have the same
-         * indexed based on dest and next hop. So we use our arbitrary index
-         * as the policy, to distinguise between them.
-         */
-        entry->rt_policy = &entry->ns_rt_index;
-        entry->rt_policy_len = 1;
-        entry->flags |= NETSNMP_ACCESS_ROUTE_POLICY_STATIC;
-#endif
+    snmp_log(LOG_ERR, "!!!Remnant of size %d\n", status);
+    return -1;
+}
 
-        /*
-         * get protocol and type from flags
-         */
-        entry->rt_type = _type_from_flags(flags);
-        
-        entry->rt_proto = (flags & RTF_DYNAMIC)
-            ? IANAIPROUTEPROTOCOL_ICMP : IANAIPROUTEPROTOCOL_LOCAL;
 
-        /*
-         * insert into container
-         */
-        CONTAINER_INSERT(container, entry);
+static int
+_load_ipv4(netsnmp_container* container, u_long *index )
+{
+    int             fd;
+
+    DEBUGMSGTL(("access:route:container",
+               "route_container_arch_load ipv4\n"));
+
+    netsnmp_assert(NULL != container);
+
+    fd = _netlink_open(NETLINK_ROUTE);
+    if (fd < 0)
+       return -1;
+
+    if (_netlink_dump_request(fd, AF_INET, RTM_GETROUTE) < 0) {
+       snmp_log_perror("netlink send");
+       close(fd);
+       return -2;
+    }
+
+    if (_netlink_dump_filter(fd, _get_route, container, index) < 0) {
+       close(fd);
+       return -3;
+    }
+
+    close(fd);
+    return 0;
+}
+
+#ifdef NETSNMP_ENABLE_IPV6
+static int
+_load_ipv6(netsnmp_container* container, u_long *index )
+{
+    int fd;
+
+    fd = _netlink_open(NETLINK_ROUTE);
+    if (fd < 0)
+       return -1;
+
+    if (_netlink_dump_request(fd, AF_INET6, RTM_GETROUTE) < 0) {
+       snmp_log_perror("netlink send");
+       close(fd);
+       return -2;
+    }
+
+    if (_netlink_dump_filter(fd, _get_route, container, index) < 0) {
+       close(fd);
+       return -3;
     }
 
-    fclose(in);
+    close(fd);
     return 0;
 }
 #endif

-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Net-snmp-coders mailing list
Net-snmp-coders@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/net-snmp-coders

Reply via email to