if_clone_create() has the races caused by context switch.

---- cut begin ----
if_clone_create(const char *name, int rdomain)
{
        struct if_clone *ifc;
        struct ifnet *ifp;
        int unit, ret;

        ifc = if_clone_lookup(name, &unit); /* [1] */
        if (ifc == NULL)
                return (EINVAL);

        if (ifunit(name) != NULL) /* [2] */
                return (EEXIST);

        ret = (*ifc->ifc_create)(ifc, unit); /* [3] */

        if (ret != 0 || (ifp = ifunit(name)) == NULL) /* [4] */
                return (ret);

        NET_LOCK();
        if_addgroup(ifp, ifc->ifc_name);
        if (rdomain != 0)
                if_setrdomain(ifp, rdomain);
        NET_UNLOCK();

        return (ret);
}

---- cut end ----


The race is:

Thread 1:
1. We pass the string "ifacename0" with contains interface name and
it's unit to if_clone_lookup(). if_clone_lookup() extracts the unit '0'
and return it to us as integer `unit'. this `unit' is our local
variable, nobody knows what we are going to create interface with this
unit.

2. There is no "ifacename0" yet, so ifunit() will return NULL. 

3. We did some checks add call `ifc_create' which will call
pseudo-driver's callback to create instance with passed `unit'. We
initialize ifnet with our softwere context and the `if_xname' set to
"ifacename0".

3.1. We do if_attach() which call NET_LOCK() before we link `ifnet' to
`if_list'. Also if_attach() doesn't check anything so we always attach
passed `ifnet'.

Context was switched to Thread 2:

1. We also passed string "ifacename0" to if_clone_lookup() and received
`unit' with value `0'

2. There is no "ifacename0" yet, so ifunit() will return NULL.

3. All checks done, we call `ifc_create'. We initialize out softwere
context with passed `unit', Who knows do we chech is software context
for `unit' already exists before? In fact we don't :)
3.1 We initialized `ifnet' with the `if_xname' set to "ifacename0" and
we link it `if_list'.

4. This check is passed, `ifc_create' returned `0' and we have
"ifacename0" linked so ifunit() will return `ifp'.

We returned to userland with success and switched to Thread 1:

We continue 3.1. if_attach() continue his work and we have another
`ifnet' with `if_xname' set to "ifacename0" in list.

Now we have inconsistent `if_list'.

4. We do these checks. ifunit() will return pointer to `ifp' with
requested "ifacename0". All ok we return to userland with success.

Diff below fixes this reces.

Each `struct if_clone' has the bitmap where requested `unit' marked as
busy if it was not allocated before. We have new function
if_clone_alloc_unit() for. If `unit' already was allocated
if_clone_alloc_unit() will return EEXIST. Also we have
if_clone_rele_unit() to release `unit'. We do unit allocation before we
do context switch so now we can't allocate the same unit twice or more.

Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.610
diff -u -p -r1.610 if.c
--- net/if.c    22 Jun 2020 09:45:13 -0000      1.610
+++ net/if.c    26 Jun 2020 13:49:08 -0000
@@ -157,6 +157,8 @@ void        if_linkstate_task(void *);
 
 int    if_clone_list(struct if_clonereq *);
 struct if_clone        *if_clone_lookup(const char *, int *);
+int    if_clone_alloc_unit(struct if_clone *, int);
+void   if_clone_rele_unit(struct if_clone *, int);
 
 int    if_group_egress_build(void);
 
@@ -1244,19 +1246,23 @@ if_clone_create(const char *name, int rd
 {
        struct if_clone *ifc;
        struct ifnet *ifp;
-       int unit, ret;
+       int unit, error;
 
        ifc = if_clone_lookup(name, &unit);
        if (ifc == NULL)
                return (EINVAL);
+       error = if_clone_alloc_unit(ifc, unit);
+       if (error != 0)
+               return (error);
 
-       if (ifunit(name) != NULL)
-               return (EEXIST);
-
-       ret = (*ifc->ifc_create)(ifc, unit);
+       if (ifunit(name) != NULL) {
+               error = (EEXIST);
+               goto rele;
+       }
 
-       if (ret != 0 || (ifp = ifunit(name)) == NULL)
-               return (ret);
+       error = (*ifc->ifc_create)(ifc, unit);
+       if (error != 0 || (ifp = ifunit(name)) == NULL)
+               return (0);
 
        NET_LOCK();
        if_addgroup(ifp, ifc->ifc_name);
@@ -1264,7 +1270,10 @@ if_clone_create(const char *name, int rd
                if_setrdomain(ifp, rdomain);
        NET_UNLOCK();
 
-       return (ret);
+       return (0);
+rele:
+       if_clone_rele_unit(ifc, unit);
+       return (error);
 }
 
 /*
@@ -1275,9 +1284,9 @@ if_clone_destroy(const char *name)
 {
        struct if_clone *ifc;
        struct ifnet *ifp;
-       int ret;
+       int ret, unit;
 
-       ifc = if_clone_lookup(name, NULL);
+       ifc = if_clone_lookup(name, &unit);
        if (ifc == NULL)
                return (EINVAL);
 
@@ -1297,6 +1306,7 @@ if_clone_destroy(const char *name)
        }
        NET_UNLOCK();
        ret = (*ifc->ifc_destroy)(ifp);
+       if_clone_rele_unit(ifc, unit);
 
        return (ret);
 }
@@ -1342,12 +1352,96 @@ if_clone_lookup(const char *name, int *u
                unit = (unit * 10) + (*cp++ - '0');
        }
 
-       if (unitp != NULL)
-               *unitp = unit;
+       *unitp = unit;
        return (ifc);
 }
 
 /*
+ * Allocate unit for cloned network interface.
+ */
+int if_clone_alloc_unit(struct if_clone *ifc, int unit)
+{
+       int word, bit, ret;
+
+       word = unit / (sizeof(*ifc->ifc_map) * 8);
+       bit = unit % (sizeof(*ifc->ifc_map) * 8);
+
+       rw_enter_write(&ifc->ifc_lock);
+
+       if(word >= ifc->ifc_map_size) {
+               u_long *map;
+               int size;
+
+               size = word + 1;
+               map = mallocarray(size, sizeof(*map), M_TEMP, M_WAITOK |
+                   M_ZERO);
+
+               if (ifc->ifc_map != NULL) {
+                       memcpy(map, ifc->ifc_map, ifc->ifc_map_size);
+                       free(ifc->ifc_map, M_TEMP,
+                           ifc->ifc_map_size * sizeof(*map));
+               }
+
+               ifc->ifc_map = map;
+               ifc->ifc_map_size = size;
+       }
+
+       if (ifc->ifc_map[word] & (1UL << bit))
+               ret = EEXIST;
+       else {
+               ifc->ifc_map[word] |= (1UL << bit);
+               ret = 0;
+       }
+
+       rw_exit_write(&ifc->ifc_lock);
+
+       return ret;
+}
+
+/*
+ * Release allocated unit for cloned network interface.
+ */
+void if_clone_rele_unit(struct if_clone *ifc, int unit)
+{
+       int word, bit;
+
+       word = unit / (sizeof(*ifc->ifc_map) * 8);
+       bit = unit % (sizeof(*ifc->ifc_map) * 8);
+
+       rw_enter_write(&ifc->ifc_lock);
+       KASSERT(word < ifc->ifc_map_size);
+
+       ifc->ifc_map[word] &= ~(1UL << bit);
+
+       if (ifc->ifc_map[word] == 0) {
+               u_long *map;
+               int size;
+
+               size = ifc->ifc_map_size - 2;
+               while (size>=0) {
+                       if (ifc->ifc_map[size] != 0)
+                               break;
+                       --size;
+               }
+               if (size<0) {
+                       size = 0;
+                       map = NULL;
+               } else {
+                       size += 1;
+                       map = mallocarray(size, sizeof(*map), M_TEMP,
+                           M_WAITOK);
+                       memcpy(map, ifc->ifc_map, size);
+                       free(ifc->ifc_map, M_TEMP,
+                           ifc->ifc_map_size * sizeof(*map));
+               }
+
+               ifc->ifc_map = map;
+               ifc->ifc_map_size = size;
+       }
+       rw_exit_write(&ifc->ifc_lock);
+}
+
+/*
  * Register a network interface cloner.
  */
 void
@@ -1360,6 +1454,7 @@ if_clone_attach(struct if_clone *ifc)
         * initialization, the if_cloners becomes immutable.
         */
        KASSERT(pdevinit_done == 0);
+       rw_init(&ifc->ifc_lock, "icflck");
        LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
        if_cloners_count++;
 }
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.105
diff -u -p -r1.105 if_var.h
--- net/if_var.h        12 May 2020 08:49:54 -0000      1.105
+++ net/if_var.h        26 Jun 2020 13:49:08 -0000
@@ -45,6 +45,7 @@
 #include <sys/task.h>
 #include <sys/time.h>
 #include <sys/timeout.h>
+#include <sys/rwlock.h>
 
 #include <net/ifq.h>
 
@@ -86,6 +87,10 @@ struct if_clone {
        const char              *ifc_name;      /* name of device, e.g. `gif' */
        size_t                   ifc_namelen;   /* length of name */
 
+       struct rwlock           ifc_lock;       /* lock for map */
+       u_long                  *ifc_map;       /* units map */
+       int                     ifc_map_size;   /* units map size */
+
        int                     (*ifc_create)(struct if_clone *, int);
        int                     (*ifc_destroy)(struct ifnet *);
 };
@@ -95,6 +100,8 @@ struct if_clone {
   .ifc_list    = { NULL, NULL },                                       \
   .ifc_name    = name,                                                 \
   .ifc_namelen = sizeof(name) - 1,                                     \
+  .ifc_map     = NULL,                                                 \
+  .ifc_map_size        = 0,                                                    
\
   .ifc_create  = create,                                               \
   .ifc_destroy = destroy,                                              \
 }

Reply via email to