Greetings!

I made an attempt to fix the bug in the spirit of the callback-based
approach suggested by Florian.  As one can see from Florian's
analysis, the problem is due to current BIRD code not being able to
handle more than one netlink "session" at once.

The patch is below.  The idea is to allow multiple outstanding netlink
sessions.  All netlink replies are now handled by nl_harvest(), which
multiplexes the replies based on nlmsg_seq, calling appropriate
handler from the nl_sessions list.

nl_harvest() is called by any function willing to get netlink reply to
its request.  However, nl_harvest processes any reply the kernel
sends, not necessarely only replies to the request sent by the caller.

For example, in the case analysed by Florian, nl_harvest called from
nl_send_route will first reap the RTM_NEWLINK/RTM_DELLINK replies to
the request sent by krt_if_scan, and only then will make it to the
NLMSG_ERROR reply expected by nl_send_route.

Thanks to Florian for doing the analysis of the problem.
Thanks to all authors of BIRD -- clean terse code, fun to hack on :)


-- sizif


On Wed, Jun 20, 2007 at 12:24:02PM +0200, Florian Lohoff wrote:
> 
> Hi,
> i debugged further and build a version with debugging symbols and
> put an assert into the code where we detect the out of sequence
> netlink message. My understanding is that we do an krt_if_scan on a
> regular basis - Now we go through the notification chain and end up
> sending another netlink message before krt_if_scan pulled all netlink
> messages. nl_get_reply gets an out-of-sequence netlink message and drops
> it although nl_get_scan would need it for an end-of-messages marker. Thus
> returning from our notification chain nl_get_scan ends up polling for the
> next message which already got removed via nl_send_route -> nl_exchange ->
> nl_get_reply as an out-of-sequence. As the netlink FD is blocking we
> deadlock here.
> 
> One solution i see would be to convert all nl_get_scan users to first
> poll ALL messages before starting to process them which could mean a lot 
> of memory usage especially on full routing BGP where we need to poll all
> routes first. 
> 
> Another solution would be to make some generic polling/callback based
> approach, or probably replacing the netlink.c with an API wrapper around
> libnl (http://people.suug.ch/~tgr/libnl/)
> 
> (gdb) bt
> #0  0xb7e3e947 in raise () from /lib/tls/libc.so.6
> #1  0xb7e400c9 in abort () from /lib/tls/libc.so.6
> #2  0xb7e3805f in __assert_fail () from /lib/tls/libc.so.6
> #3  0x08078a09 in nl_get_reply () at netlink.c:135
> #4  0x08078b0d in nl_exchange (pkt=0xbfe3f2b4) at netlink.c:193
> #5  0x08079562 in nl_send_route (p=0x80905a0, e=0x809921c, new=0) at 
> netlink.c:542
> #6  0x080795d2 in krt_set_notify (p=0x80905a0, n=0x80981c4, new=0x0, 
> old=0x809921c)
>     at netlink.c:561
> #7  0x0807614e in krt_notify (P=0x80905a0, net=0x80981c4, new=0x0, 
> old=0x809921c,
>     attrs=0x0) at krt.c:698
> #8  0x08049910 in do_rte_announce (a=0x8095e40, net=0x80981c4, new=0x80991cc,
>     old=0x809921c, tmpa=0x0, class=4100)
>     at /home/flo/p/root/bird-1.0.11/./nest/rt-table.c:227
> #9  0x08049521 in rte_announce (tab=0x808f4b0, net=0x80981c4, new=0x80991cc,
>     old=0x809921c, tmpa=0x0) at 
> /home/flo/p/root/bird-1.0.11/./nest/rt-table.c:261
> #10 0x08049b94 in rte_recalculate (table=0x808f4b0, net=0x80981c4, 
> p=0x80906c8,
>     new=0x80991cc, tmpa=0x0) at 
> /home/flo/p/root/bird-1.0.11/./nest/rt-table.c:368
> #11 0x08049ff1 in rte_update (table=0x808f4b0, net=0x80981c4, p=0x80906c8,
>     new=0x80991cc) at /home/flo/p/root/bird-1.0.11/./nest/rt-table.c:514
> #12 0x0804fd41 in dev_ifa_notify (p=0x80906c8, c=1, ad=0x8095c80)
>     at /home/flo/p/root/bird-1.0.11/./nest/rt-dev.c:69
> #13 0x0804e962 in ifa_send_notify (p=0x80906c8, c=1, a=0x8095c80)
>     at /home/flo/p/root/bird-1.0.11/./nest/iface.c:148
> #14 0x0804e87e in ifa_notify_change (c=1, a=0x8095c80)
>     at /home/flo/p/root/bird-1.0.11/./nest/iface.c:159
> #15 0x0804ea7d in if_notify_change (c=1, i=0x8095b38)
>     at /home/flo/p/root/bird-1.0.11/./nest/iface.c:211
> #16 0x0804ed24 in if_update (new=0xbfe3f5ec)
>     at /home/flo/p/root/bird-1.0.11/./nest/iface.c:280
> #17 0x08078f5c in nl_parse_link (h=0x80919b8, scan=1) at netlink.c:334
> #18 0x080792a1 in krt_if_scan (p=0x8090778) at netlink.c:445
> #19 0x08074fd5 in kif_scan (t=0x8095af8) at krt.c:94
> #20 0x08075004 in kif_force_scan () at krt.c:102
> #21 0x08076018 in krt_scan (t=0x8095a48) at krt.c:655
> #22 0x08073005 in tm_shot () at io.c:298
> #23 0x08074886 in io_loop () at io.c:1126
> #24 0x080774ef in main (argc=Cannot access memory at address 0x67ec
> ) at main.c:462 
> 
> 
> -- 
> Florian Lohoff                  [EMAIL PROTECTED]             +49-171-2280134
>       Those who would give up a little freedom to get a little 
>           security shall soon have neither - Benjamin Franklin


--- bird-1.0.11/sysdep/linux/netlink/netlink.c  2008/06/24 15:50:27     1.1
+++ bird-1.0.11/sysdep/linux/netlink/netlink.c  2008/06/24 19:38:40
@@ -49,6 +49,13 @@
 static struct nlmsghdr *nl_last_hdr;   /* Recently received packet */
 static unsigned int nl_last_size;
 
+static list nl_sessions;
+struct nl_session {
+  node n;
+  void *handler;
+  u32 seq;
+};
+
 static void
 nl_open(void)
 {
@@ -59,13 +66,15 @@
        die("Unable to open rtnetlink socket: %m");
       nl_sync_seq = now;
       nl_rx_buffer = xmalloc(NL_RX_SIZE);
+      init_list(&nl_sessions);
     }
 }
 
 static void
-nl_send(struct nlmsghdr *nh)
+nl_send(struct nlmsghdr *nh, void *handler)
 {
   struct sockaddr_nl sa;
+  struct nl_session *s = xmalloc (sizeof (struct nl_session));
 
   memset(&sa, 0, sizeof(sa));
   sa.nl_family = AF_NETLINK;
@@ -73,11 +82,14 @@
   nh->nlmsg_seq = ++nl_sync_seq;
   if (sendto(nl_sync_fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, 
sizeof(sa)) < 0)
     die("rtnetlink sendto: %m");
-  nl_last_hdr = NULL;
+
+  s->seq = nl_sync_seq;
+  s->handler = handler;
+  add_head (&nl_sessions, &s->n);
 }
 
-static void
-nl_request_dump(int cmd)
+static u32
+nl_request_dump(int cmd, void *handler)
 {
   struct {
     struct nlmsghdr nh;
@@ -87,7 +99,8 @@
   req.nh.nlmsg_len = sizeof(req);
   req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
   req.g.rtgen_family = BIRD_PF;
-  nl_send(&req.nh);
+  nl_send(&req.nh, handler);
+  return req.nh.nlmsg_seq;
 }
 
 static struct nlmsghdr *
@@ -117,12 +130,6 @@
        {
          struct nlmsghdr *h = nl_last_hdr;
          nl_last_hdr = NLMSG_NEXT(h, nl_last_size);
-         if (h->nlmsg_seq != nl_sync_seq)
-           {
-             log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet 
(%x != %x)",
-                 h->nlmsg_seq, nl_sync_seq);
-             continue;
-           }
          return h;
        }
       if (nl_last_size)
@@ -149,35 +156,43 @@
   return ec;
 }
 
-static struct nlmsghdr *
-nl_get_scan(void)
+static struct nl_session *
+nl_find_session (u32 seq)
 {
-  struct nlmsghdr *h = nl_get_reply();
+  struct nl_session *s;
 
-  if (h->nlmsg_type == NLMSG_DONE)
-    return NULL;
-  if (h->nlmsg_type == NLMSG_ERROR)
-    {
-      nl_error(h);
-      return NULL;
-    }
-  return h;
+  WALK_LIST (s, nl_sessions)
+    if (s->seq == seq)
+      return s;
+
+  return NULL;
 }
 
-static int
-nl_exchange(struct nlmsghdr *pkt)
+static void
+nl_harvest()
 {
-  struct nlmsghdr *h;
+  struct nlmsghdr *h = nl_get_reply();
+  struct nl_session *s = nl_find_session(h->nlmsg_seq);
 
-  nl_send(pkt);
-  for(;;)
+  if (!s)
     {
-      h = nl_get_reply();
-      if (h->nlmsg_type == NLMSG_ERROR)
-       break;
-      log(L_WARN "nl_exchange: Unexpected reply received");
+      log(L_WARN "nl_harvest: Unknown netlink session %x", h->nlmsg_seq);
+      return;
     }
-  return nl_error(h);
+
+  if (h->nlmsg_type == NLMSG_ERROR)
+    nl_error(h);
+  if (h->nlmsg_type == NLMSG_ERROR || h->nlmsg_type == NLMSG_DONE)
+    {
+      rem_node (&s->n);
+      free (s);
+      return;
+    }
+
+  if (s->handler)
+    ((void (*)(struct nlmsghdr *, int))s->handler) (h, 1);
+  else
+      log(L_WARN "nl_harvest: Unexpected reply received (%d)", h->nlmsg_type);
 }
 
 /*
@@ -265,6 +280,11 @@
   u32 mtu;
   unsigned int fl;
 
+  if (h->nlmsg_type != RTM_NEWLINK && h->nlmsg_type != RTM_DELLINK)
+    {
+      log(L_DEBUG "nl_parse_link: Unknown packet received (type=%d)", 
h->nlmsg_type);
+      return;
+    }
   if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), a, 
sizeof(a)))
     return;
   if (!a[IFLA_IFNAME] || RTA_PAYLOAD(a[IFLA_IFNAME]) < 2 ||
@@ -325,6 +345,11 @@
   struct iface *ifi;
   int scope;
 
+  if (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)
+    {
+      log(L_DEBUG "nl_parse_addr: Unknown packet received (type=%d)", 
h->nlmsg_type);
+      return;
+    }
   if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFA_RTA(i), a, 
sizeof(a)))
     return;
   if (i->ifa_family != BIRD_AF)
@@ -413,23 +438,15 @@
 void
 krt_if_scan(struct kif_proto *p UNUSED)
 {
-  struct nlmsghdr *h;
+  u32 seq;
 
   if_start_update();
 
-  nl_request_dump(RTM_GETLINK);
-  while (h = nl_get_scan())
-    if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
-      nl_parse_link(h, 1);
-    else
-      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", 
h->nlmsg_type);
-
-  nl_request_dump(RTM_GETADDR);
-  while (h = nl_get_scan())
-    if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
-      nl_parse_addr(h);
-    else
-      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", 
h->nlmsg_type);
+  seq = nl_request_dump(RTM_GETLINK, nl_parse_link);
+  do nl_harvest(); while (nl_find_session(seq));
+
+  seq = nl_request_dump(RTM_GETADDR, nl_parse_addr);
+  do nl_harvest(); while (nl_find_session(seq));
 
   if_end_update();
 }
@@ -516,7 +533,8 @@
       bug("krt_capable inconsistent with nl_send_route");
     }
 
-  nl_exchange(&r.h);
+  nl_send(&r.h, NULL);
+  do nl_harvest(); while (nl_find_session(r.h.nlmsg_seq));
 }
 
 void
@@ -575,6 +593,11 @@
   u32 oif;
   int src;
 
+  if (h->nlmsg_type != RTM_NEWROUTE && h->nlmsg_type != RTM_DELROUTE)
+    {
+      log(L_DEBUG "nl_parse_route: Unknown packet received (type=%d)", 
h->nlmsg_type);
+      return;
+    }
   if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, 
sizeof(a)))
     return;
   if (i->rtm_family != BIRD_AF)
@@ -730,14 +753,10 @@
 void
 krt_scan_fire(struct krt_proto *p UNUSED)      /* CONFIG_ALL_TABLES_AT_ONCE => 
p is NULL */
 {
-  struct nlmsghdr *h;
+  u32 seq;
 
-  nl_request_dump(RTM_GETROUTE);
-  while (h = nl_get_scan())
-    if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
-      nl_parse_route(h, 1);
-    else
-      log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", 
h->nlmsg_type);
+  seq = nl_request_dump(RTM_GETROUTE, nl_parse_route);
+  do nl_harvest(); while (nl_find_session(seq));
 }
 
 /*





-- 
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]

Reply via email to