[moving to tech@]

On 26/01/17(Thu) 00:15, Adam Wolk wrote:
> One of the diff from testing had this guard in place:
[...]
> Index: if_athn_usb.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/usb/if_athn_usb.c,v
> retrieving revision 1.42
> diff -u -p -r1.42 if_athn_usb.c
> --- if_athn_usb.c       11 Dec 2015 16:07:02 -0000      1.42
> +++ if_athn_usb.c       4 Sep 2016 18:48:14 -0000
> @@ -2098,13 +2098,17 @@ void
>  athn_usb_watchdog(struct ifnet *ifp)
>  {
>         struct athn_softc *sc = ifp->if_softc;
> +       struct ieee80211com *ic = &sc->sc_ic;
> 
>         ifp->if_timer = 0;
> 
>         if (sc->sc_tx_timer > 0) {
>                 if (--sc->sc_tx_timer == 0) {
>                         printf("%s: device timeout\n", sc->sc_dev.dv_xname);
> -                       /* athn_usb_init(ifp); XXX needs a process context! */
> +                       if (ic->ic_bss == NULL)
> +                               return;
> +                       athn_usb_stop(ifp);
> +                       athn_usb_init(ifp);
>                         ifp->if_oerrors++;
>                         return;
>                 }
> 
> 
> the ic->ic_bss being null doing stop resulted in further crashing. Though it 
> was

This diff cannot be better than the other one.  Let me explain what the
problem is.

When an interface is being detached, the detaching thread waits in
if_detach() until all if_get(9) references are release.  The problem
is that wireless driver call:

        ieee80211_ifdetach(ifp);
        if_detach(ifp);

So when the detaching thread is waiting for if_get(9) references it is
no longer safe to dereference wireless data structures. 

That's why you added the (ic->ic_bss == NULL) check.  But to be correct
the driver should do such check every time it comes back from sleeping.
This is not possible since you'd have to go deep in the USB layer.

What I suggested at Cambridge is to move part of the if_detach() code
into an if_barrier() function and call it like the diff below does.

Now this needs conversion and testing on multiple interfaces to make
sure it is safe and we can convert all the tree.  But as long as we
call if_barrier() first in an interface detach routing it will be safe
to free any memory afterwards.

Index: dev/ic/rtwn.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/rtwn.c,v
retrieving revision 1.11
diff -u -p -r1.11 rtwn.c
--- dev/ic/rtwn.c       8 Jan 2017 05:48:27 -0000       1.11
+++ dev/ic/rtwn.c       25 Jan 2017 23:50:58 -0000
@@ -262,8 +262,9 @@ rtwn_detach(struct rtwn_softc *sc, int f
        task_del(systq, &sc->init_task);
 
        if (ifp->if_softc != NULL) {
+               if_barrier(ifp);
                ieee80211_ifdetach(ifp);
-               if_detach(ifp);
+               if_detach2(ifp);
        }
 
        splx(s);
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.482
diff -u -p -r1.482 if.c
--- net/if.c    25 Jan 2017 21:59:41 -0000      1.482
+++ net/if.c    25 Jan 2017 23:57:13 -0000
@@ -999,26 +999,18 @@ if_deactivate(struct ifnet *ifp)
        splx(s);
 }
 
-/*
- * Detach an interface from everything in the kernel.  Also deallocate
- * private resources.
- */
 void
-if_detach(struct ifnet *ifp)
+if_barrier(struct ifnet *ifp)
 {
-       struct ifaddr *ifa;
-       struct ifg_list *ifg;
-       struct domain *dp;
-       int i, s, s2;
+       int s;
+
+       NET_LOCK(s);
 
        /* Undo pseudo-driver changes. */
        if_deactivate(ifp);
 
        ifq_clr_oactive(&ifp->if_snd);
 
-       NET_LOCK(s);
-       s2 = splnet();
-       /* Other CPUs must not have a reference before we start destroying. */
        if_idxmap_remove(ifp);
 
        ifp->if_qstart = if_detached_qstart;
@@ -1033,9 +1025,36 @@ if_detach(struct ifnet *ifp)
        timeout_del(ifp->if_slowtimo);
        task_del(systq, ifp->if_watchdogtask);
 
-       /* Remove the link state task */
        task_del(systq, ifp->if_linkstatetask);
 
+       NET_UNLOCK(s);
+}
+
+/*
+ * Detach an interface from everything in the kernel.  Also deallocate
+ * private resources.
+ */
+void
+if_detach(struct ifnet *ifp)
+{
+       int s;
+
+       s = splnet();
+       if_barrier(ifp);
+       if_detach2(ifp);
+       splx(s);
+}
+
+void
+if_detach2(struct ifnet *ifp)
+{
+       struct ifaddr *ifa;
+       struct ifg_list *ifg;
+       struct domain *dp;
+       int i, s;
+
+       NET_LOCK(s);
+
 #if NBPFILTER > 0
        bpfdetach(ifp);
 #endif
@@ -1092,7 +1111,6 @@ if_detach(struct ifnet *ifp)
 
        /* Announce that the interface is gone. */
        rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
-       splx(s2);
        NET_UNLOCK(s);
 
        for (i = 0; i < ifp->if_nifqs; i++)
Index: net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.186
diff -u -p -r1.186 if.h
--- net/if.h    24 Jan 2017 10:08:30 -0000      1.186
+++ net/if.h    25 Jan 2017 23:57:13 -0000
@@ -467,7 +467,9 @@ void        if_attach_ifq(struct ifnet *, const
 void   if_attachtail(struct ifnet *);
 void   if_attachhead(struct ifnet *);
 void   if_deactivate(struct ifnet *);
+void   if_barrier(struct ifnet *);
 void   if_detach(struct ifnet *);
+void   if_detach2(struct ifnet *);
 void   if_down(struct ifnet *);
 void   if_downall(void);
 void   if_link_state_change(struct ifnet *);

Reply via email to