Hrvoje Popovski the great, found another crazy race related with NDP:

 # ifconfig vlan300 destroy
 kernel: protection fault trap, code=0
 Stopped at      nd6_ns_output+0x30:     cmpb    $0xff,0(%r13)
 ddb{0}> trace
 nd6_ns_output(ffffff0786bfe210,ffff800001434800,...) at nd6_ns_output+0x30
 nd6_llinfo_timer(ffffff0786bfe210) at nd6_llinfo_timer+0x1a8
 softclock_thread(0) at softclock_thread+0xc6
 end trace frame: 0x0, count: -3
 
What happened in this case is a complicated use-after-free.  The 'softclock'
thread was sleeping on the NET_LOCK() trying to execute the already schedule
per-ND timer, while the route entry was freed by ifconfig(8).

The problems is in nd6_rtrequest(). For the RTM_DELETE case, we do not
check if timeout_del(9) did not remove anything:

970:            TAILQ_REMOVE(&nd6_list, ln, ln_list);
971:            nd6_llinfo_settimer(ln, -1);


There are multiple ways to fix this problem:

 1/ Use the new proposed API timeout_barrier(9) from dlg@.  However this
    introduces a new sleeping point and requires a NET_LOCK()/UNLOCK()
    dance.

 2/ Refcount `ln' or abuse the `rt' refcounting.  But I'd prefer to
    reduce the number of references.

 3/ Move the timeout outside of the short lived structure.

Diff below implements 3/ because it seems the simplest approach to
me and reduce differences with ARP a bit further.  This pave the way
for further MP in the L2 layer.  The idea is that a single global list
will have to be protected and no refcounting should be necessary.

Briefly tested here, I appreciate more tests and reviews.

Index: netinet6/nd6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.221
diff -u -p -r1.221 nd6.c
--- netinet6/nd6.c      31 Oct 2017 22:05:13 -0000      1.221
+++ netinet6/nd6.c      22 Nov 2017 15:01:05 -0000
@@ -67,7 +67,8 @@
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
 /* timer values */
-time_t nd6_expire_time = -1;   /* at which time_uptime nd6_expire runs */
+int    nd6_timer_next  = -1;   /* at which time_uptime nd6_timer runs */
+time_t nd6_expire_next = -1;   /* at which time_uptime nd6_expire runs */
 int    nd6_delay       = 5;    /* delay first probe time 5 second */
 int    nd6_umaxtries   = 3;    /* maximum unicast query */
 int    nd6_mmaxtries   = 3;    /* maximum multicast query */
@@ -90,13 +91,15 @@ int nd6_inuse, nd6_allocated;
 
 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
 
+void nd6_timer(void *);
 void nd6_slowtimo(void *);
 void nd6_expire(void *);
 void nd6_expire_timer(void *);
 void nd6_invalidate(struct rtentry *);
 void nd6_free(struct rtentry *);
-void nd6_llinfo_timer(void *);
+void nd6_llinfo_timer(struct rtentry *);
 
+struct timeout nd6_timer_to;
 struct timeout nd6_slowtimo_ch;
 struct timeout nd6_expire_timeout;
 struct task nd6_expire_task;
@@ -120,6 +123,7 @@ nd6_init(void)
        nd6_init_done = 1;
 
        /* start timer */
+       timeout_set_proc(&nd6_timer_to, nd6_timer, &nd6_timer_to);
        timeout_set_proc(&nd6_slowtimo_ch, nd6_slowtimo, NULL);
        timeout_add_sec(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL);
        timeout_set(&nd6_expire_timeout, nd6_expire_timer, NULL);
@@ -297,45 +301,57 @@ skip1:
  * ND6 timer routine to handle ND6 entries
  */
 void
-nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
+nd6_llinfo_settimer(struct llinfo_nd6 *ln, unsigned int secs)
 {
-       if (secs < 0) {
-               ln->ln_rt->rt_expire = 0;
-               timeout_del(&ln->ln_timer_ch);
-       } else {
-               ln->ln_rt->rt_expire = time_uptime + secs;
-               timeout_add_sec(&ln->ln_timer_ch, secs);
+       time_t expire = time_uptime + secs;
+
+       ln->ln_rt->rt_expire = expire;
+       if (!timeout_pending(&nd6_timer_to) || expire < nd6_timer_next) {
+               nd6_timer_next = expire;
+               timeout_add_sec(&nd6_timer_to, secs);
        }
 }
 
 void
-nd6_llinfo_timer(void *arg)
+nd6_timer(void *arg)
 {
-       struct llinfo_nd6 *ln;
-       struct rtentry *rt;
-       struct sockaddr_in6 *dst;
-       struct ifnet *ifp;
-       struct nd_ifinfo *ndi = NULL;
+       struct llinfo_nd6 *ln, *nln;
+       time_t expire = time_uptime + nd6_gctimer;
+       int secs;
 
        NET_LOCK();
+       TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
+               struct rtentry *rt = ln->ln_rt;
 
-       ln = (struct llinfo_nd6 *)arg;
+               if (rt->rt_expire && rt->rt_expire <= time_uptime)
+                       nd6_llinfo_timer(rt);
 
-       if ((rt = ln->ln_rt) == NULL)
-               panic("ln->ln_rt == NULL");
-       if ((ifp = if_get(rt->rt_ifidx)) == NULL) {
-               NET_UNLOCK();
-               return;
+               if (rt->rt_expire && rt->rt_expire < expire)
+                       expire = rt->rt_expire;
        }
-       ndi = ND_IFINFO(ifp);
-       dst = satosin6(rt_key(rt));
 
-       /* sanity check */
-       if (rt->rt_llinfo != NULL && (struct llinfo_nd6 *)rt->rt_llinfo != ln)
-               panic("rt_llinfo(%p) is not equal to ln(%p)",
-                     rt->rt_llinfo, ln);
-       if (!dst)
-               panic("dst=0 in nd6_timer(ln=%p)", ln);
+       secs = expire - time_uptime;
+       if (secs < 0)
+               secs = 0;
+       if (!TAILQ_EMPTY(&nd6_list))
+               timeout_add_sec(&nd6_timer_to, secs);
+
+       NET_UNLOCK();
+}
+
+void
+nd6_llinfo_timer(struct rtentry *rt)
+{
+       struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
+       struct sockaddr_in6 *dst = satosin6(rt_key(rt));
+       struct ifnet *ifp;
+       struct nd_ifinfo *ndi = NULL;
+
+       NET_ASSERT_LOCKED();
+
+       if ((ifp = if_get(rt->rt_ifidx)) == NULL)
+               return;
+       ndi = ND_IFINFO(ifp);
 
        switch (ln->ln_state) {
        case ND6_LLINFO_INCOMPLETE:
@@ -408,7 +424,6 @@ nd6_llinfo_timer(void *arg)
        }
 
        if_put(ifp);
-       NET_UNLOCK();
 }
 
 void
@@ -436,16 +451,16 @@ nd6_expire_timer_update(struct in6_ifadd
         * Schedule timeout one second later so that either IFA6_IS_INVALID()
         * or IFA6_IS_DEPRECATED() is true.
         */
-       expire_time++; 
+       expire_time++;
 
-       if (!timeout_pending(&nd6_expire_timeout) || nd6_expire_time >
-           expire_time) {
+       if (!timeout_pending(&nd6_expire_timeout) ||
+           nd6_expire_next > expire_time) {
                secs = expire_time - time_uptime;
-               if ( secs < 0)
+               if (secs < 0)
                        secs = 0;
 
                timeout_add_sec(&nd6_expire_timeout, secs);
-               nd6_expire_time = expire_time;
+               nd6_expire_next = expire_time;
        }
 }
 
@@ -852,7 +867,6 @@ nd6_rtrequest(struct ifnet *ifp, int req
                nd6_inuse++;
                nd6_allocated++;
                ln->ln_rt = rt;
-               timeout_set_proc(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
                /* this is required for "ndp" command. - shin */
                if (req == RTM_ADD) {
                        /*
@@ -913,14 +927,14 @@ nd6_rtrequest(struct ifnet *ifp, int req
                ifa = &in6ifa_ifpwithaddr(ifp,
                    &satosin6(rt_key(rt))->sin6_addr)->ia_ifa;
                if (ifa) {
-                       nd6_llinfo_settimer(ln, -1);
                        ln->ln_state = ND6_LLINFO_REACHABLE;
                        ln->ln_byhint = 0;
+                       rt->rt_expire = 0;
                        KASSERT(ifa == rt->rt_ifa);
                } else if (rt->rt_flags & RTF_ANNOUNCE) {
-                       nd6_llinfo_settimer(ln, -1);
                        ln->ln_state = ND6_LLINFO_REACHABLE;
                        ln->ln_byhint = 0;
+                       rt->rt_expire = 0;
 
                        /* join solicited node multicast for proxy ND */
                        if (ifp->if_flags & IFF_MULTICAST) {
@@ -968,7 +982,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
                }
                nd6_inuse--;
                TAILQ_REMOVE(&nd6_list, ln, ln_list);
-               nd6_llinfo_settimer(ln, -1);
+               rt->rt_expire = 0;
                rt->rt_llinfo = NULL;
                rt->rt_flags &= ~RTF_LLINFO;
                m_freem(ln->ln_hold);
@@ -1195,7 +1209,7 @@ fail:
                        }
                } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
                        /* probe right away */
-                       nd6_llinfo_settimer((void *)ln, 0);
+                       nd6_llinfo_settimer(ln, 0);
                }
        }
 
Index: netinet6/nd6.h
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6.h,v
retrieving revision 1.73
diff -u -p -r1.73 nd6.h
--- netinet6/nd6.h      3 Nov 2017 14:28:57 -0000       1.73
+++ netinet6/nd6.h      22 Nov 2017 11:18:16 -0000
@@ -97,7 +97,6 @@ struct        in6_ndifreq {
 #ifdef _KERNEL
 
 #include <sys/queue.h>
-#include <sys/timeout.h>
 
 #define ND_IFINFO(ifp) \
        (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo)
@@ -113,8 +112,6 @@ struct      llinfo_nd6 {
        int     ln_byhint;      /* # of times we made it reachable by UL hint */
        short   ln_state;       /* reachability state */
        short   ln_router;      /* 2^0: ND6 router bit */
-
-       struct  timeout ln_timer_ch;
 };
 
 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE)
@@ -173,7 +170,7 @@ struct nd_opt_hdr *nd6_option(union nd_o
 int nd6_options(union nd_opts *);
 struct rtentry *nd6_lookup(struct in6_addr *, int, struct ifnet *, u_int);
 void nd6_setmtu(struct ifnet *);
-void nd6_llinfo_settimer(struct llinfo_nd6 *, int);
+void nd6_llinfo_settimer(struct llinfo_nd6 *, unsigned int);
 void nd6_purge(struct ifnet *);
 void nd6_nud_hint(struct rtentry *);
 void nd6_rtrequest(struct ifnet *, int, struct rtentry *);

Reply via email to