The branch main has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3b263fa76c08993f37ce94cb7cd4605f67dd5965

commit 3b263fa76c08993f37ce94cb7cd4605f67dd5965
Author:     Mark Johnston <ma...@freebsd.org>
AuthorDate: 2025-07-28 16:03:20 +0000
Commit:     Mark Johnston <ma...@freebsd.org>
CommitDate: 2025-07-28 16:19:38 +0000

    if_tuntap: Try to fix device refcount bugs
    
    There are two ways to create a tun device, via devfs cloning or with
    ifconfig.  The latter is implemented by tun_clone_create() and the
    former by tunclone(), which invokes tun_clone_create() via
    if_clone_create().  Both of these functions were invoking dev_ref()
    after creating the devfs_node(), but this was extraneous.  tunclone()
    does need to acquire an extra reference since this is required by the
    dev_clone EVENTHANDLER interface contract, but it was already doing so
    by specifying MAKEDEV_REF.  Fix this by removing unnecessary refcount
    acquisitions.
    
    A second problem is with teardown in a VNET jail.  A tun interface
    created by device cloning will hold a credential reference for the jail,
    which prevents it from being destroyed and in particular prevents VNET
    SYSUNINITs from running.  To fix this, we need to register a
    PR_METHOD_REMOVE callback for jail teardown which, in a VNET jail,
    destroys cloned interfaces.  This way, jail teardown can proceed.
    
    These bugs are noticeable with something like
    
      # jail -c name=test vnet command=ls /dev/tun
      # jls -vd
    
    While here, add some comments and be sure to destroy a nascent mutex and
    condition variable in an error path.
    
    Reviewed by:    kib
    MFC after:      1 month
    Sponsored by:   Stormshield
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D51525
---
 sys/net/if_tuntap.c | 76 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 58 insertions(+), 18 deletions(-)

diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
index 3bab04aa4d38..5e6f65c04b2f 100644
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -74,6 +74,7 @@
 #include <sys/malloc.h>
 #include <sys/random.h>
 #include <sys/ctype.h>
+#include <sys/osd.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
@@ -178,6 +179,7 @@ struct tuntap_softc {
 static struct mtx tunmtx;
 static eventhandler_tag arrival_tag;
 static eventhandler_tag clone_tag;
+static int tuntap_osd_jail_slot;
 static const char tunname[] = "tun";
 static const char tapname[] = "tap";
 static const char vmnetname[] = "vmnet";
@@ -497,6 +499,10 @@ vmnet_clone_match(struct if_clone *ifc, const char *name)
        return (0);
 }
 
+/*
+ * Create a clone via the ifnet cloning mechanism.  Note that this is invoked
+ * indirectly by tunclone() below.
+ */
 static int
 tun_clone_create(struct if_clone *ifc, char *name, size_t len,
     struct ifc_data *ifd, struct ifnet **ifpp)
@@ -532,15 +538,19 @@ tun_clone_create(struct if_clone *ifc, char *name, size_t 
len,
        if (i != 0)
                i = tun_create_device(drv, unit, NULL, &dev, name);
        if (i == 0) {
-               dev_ref(dev);
+               struct tuntap_softc *tp;
+
                tuncreate(dev);
-               struct tuntap_softc *tp = dev->si_drv1;
+               tp = dev->si_drv1;
                *ifpp = tp->tun_ifp;
        }
 
        return (i);
 }
 
+/*
+ * Create a clone via devfs access.
+ */
 static void
 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
     struct cdev **dev)
@@ -595,11 +605,12 @@ tunclone(void *arg, struct ucred *cred, char *name, int 
namelen,
                }
 
                i = tun_create_device(drv, u, cred, dev, name);
-       }
-       if (i == 0) {
+       } else {
+               /* Consumed by the dev_clone invoker. */
                dev_ref(*dev);
-               if_clone_create(name, namelen, NULL);
        }
+       if (i == 0)
+               if_clone_create(name, namelen, NULL);
 out:
        CURVNET_RESTORE();
 }
@@ -669,16 +680,6 @@ vnet_tun_init(const void *unused __unused)
 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
                vnet_tun_init, NULL);
 
-static void
-vnet_tun_uninit(const void *unused __unused)
-{
-
-       for (u_int i = 0; i < NDRV; ++i)
-               if_clone_detach(V_tuntap_driver_cloners[i]);
-}
-VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
-    vnet_tun_uninit, NULL);
-
 static void
 tun_uninit(const void *unused __unused)
 {
@@ -689,6 +690,16 @@ tun_uninit(const void *unused __unused)
        EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
        EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
 
+       CURVNET_SET(vnet0);
+       for (u_int i = 0; i < NDRV; i++) {
+               if_clone_detach(V_tuntap_driver_cloners[i]);
+               V_tuntap_driver_cloners[i] = NULL;
+       }
+       CURVNET_RESTORE();
+
+       if (tuntap_osd_jail_slot != 0)
+               osd_jail_deregister(tuntap_osd_jail_slot);
+
        mtx_lock(&tunmtx);
        while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
                TAILQ_REMOVE(&tunhead, tp, tun_list);
@@ -724,6 +735,30 @@ tuntap_driver_from_ifnet(const struct ifnet *ifp)
        return (NULL);
 }
 
+/*
+ * Remove devices that were created by devfs cloning, as they hold references
+ * which prevent the prison from collapsing, in which state VNET sysuninits 
will
+ * not be invoked.
+ */
+static int
+tuntap_prison_remove(void *obj, void *data __unused)
+{
+#ifdef VIMAGE
+       struct prison *pr;
+
+       pr = obj;
+       if (prison_owns_vnet(pr)) {
+               CURVNET_SET(pr->pr_vnet);
+               for (u_int i = 0; i < NDRV; i++) {
+                       if_clone_detach(V_tuntap_driver_cloners[i]);
+                       V_tuntap_driver_cloners[i] = NULL;
+               }
+               CURVNET_RESTORE();
+       }
+#endif
+       return (0);
+}
+
 static int
 tuntapmodevent(module_t mod, int type, void *data)
 {
@@ -738,8 +773,12 @@ tuntapmodevent(module_t mod, int type, void *data)
                        clone_setup(&drv->clones);
                        drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
                }
+               osd_method_t methods[PR_MAXMETHOD] = {
+                       [PR_METHOD_REMOVE] = tuntap_prison_remove,
+               };
+               tuntap_osd_jail_slot = osd_jail_register(NULL, methods);
                arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
-                  tunrename, 0, 1000);
+                   tunrename, 0, 1000);
                if (arrival_tag == NULL)
                        return (ENOMEM);
                clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
@@ -747,7 +786,7 @@ tuntapmodevent(module_t mod, int type, void *data)
                        return (ENOMEM);
                break;
        case MOD_UNLOAD:
-               /* See tun_uninit, so it's done after the vnet_sysuninit() */
+               /* See tun_uninit(). */
                break;
        default:
                return EOPNOTSUPP;
@@ -798,6 +837,8 @@ tun_create_device(struct tuntap_driver *drv, int unit, 
struct ucred *cr,
        args.mda_si_drv1 = tp;
        error = make_dev_s(&args, dev, "%s", name);
        if (error != 0) {
+               mtx_destroy(&tp->tun_mtx);
+               cv_destroy(&tp->tun_cv);
                free(tp, M_TUN);
                return (error);
        }
@@ -914,7 +955,6 @@ tap_transmit(struct ifnet *ifp, struct mbuf *m)
        return (error);
 }
 
-/* XXX: should return an error code so it can fail. */
 static void
 tuncreate(struct cdev *dev)
 {

Reply via email to