Author: ae
Date: Thu Jun 14 14:53:24 2018
New Revision: 335141
URL: https://svnweb.freebsd.org/changeset/base/335141

Log:
  Convert if_me(4) driver to use encap_lookup_t method and be lockless on
  data path.

Modified:
  head/sys/net/if_me.c

Modified: head/sys/net/if_me.c
==============================================================================
--- head/sys/net/if_me.c        Thu Jun 14 14:53:01 2018        (r335140)
+++ head/sys/net/if_me.c        Thu Jun 14 14:53:24 2018        (r335141)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2014 Andrey V. Elsukov <[email protected]>
+ * Copyright (c) 2014, 2018 Andrey V. Elsukov <[email protected]>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,22 +28,20 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
-#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
-#include <sys/systm.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
@@ -68,8 +66,6 @@ __FBSDID("$FreeBSD$");
 #define        MEMTU                   (1500 - sizeof(struct mobhdr))
 static const char mename[] = "me";
 static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
-static VNET_DEFINE(struct mtx, me_mtx);
-#define        V_me_mtx        VNET(me_mtx)
 /* Minimal forwarding header RFC 2004 */
 struct mobhdr {
        uint8_t         mob_proto;      /* protocol */
@@ -82,32 +78,27 @@ struct mobhdr {
 
 struct me_softc {
        struct ifnet            *me_ifp;
-       LIST_ENTRY(me_softc)    me_list;
-       struct rmlock           me_lock;
        u_int                   me_fibnum;
-       const struct encaptab   *me_ecookie;
        struct in_addr          me_src;
        struct in_addr          me_dst;
+
+       CK_LIST_ENTRY(me_softc) chain;
 };
+CK_LIST_HEAD(me_list, me_softc);
 #define        ME2IFP(sc)              ((sc)->me_ifp)
 #define        ME_READY(sc)            ((sc)->me_src.s_addr != 0)
-#define        ME_LOCK_INIT(sc)        rm_init(&(sc)->me_lock, "me softc")
-#define        ME_LOCK_DESTROY(sc)     rm_destroy(&(sc)->me_lock)
-#define        ME_RLOCK_TRACKER        struct rm_priotracker me_tracker
-#define        ME_RLOCK(sc)            rm_rlock(&(sc)->me_lock, &me_tracker)
-#define        ME_RUNLOCK(sc)          rm_runlock(&(sc)->me_lock, &me_tracker)
-#define        ME_RLOCK_ASSERT(sc)     rm_assert(&(sc)->me_lock, RA_RLOCKED)
-#define        ME_WLOCK(sc)            rm_wlock(&(sc)->me_lock)
-#define        ME_WUNLOCK(sc)          rm_wunlock(&(sc)->me_lock)
-#define        ME_WLOCK_ASSERT(sc)     rm_assert(&(sc)->me_lock, RA_WLOCKED)
+#define        ME_RLOCK()              epoch_enter_preempt(net_epoch_preempt)
+#define        ME_RUNLOCK()            epoch_exit_preempt(net_epoch_preempt)
+#define        ME_WAIT()               epoch_wait_preempt(net_epoch_preempt)
 
-#define        ME_LIST_LOCK_INIT(x)    mtx_init(&V_me_mtx, "me_mtx", NULL, 
MTX_DEF)
-#define        ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx)
-#define        ME_LIST_LOCK(x)         mtx_lock(&V_me_mtx)
-#define        ME_LIST_UNLOCK(x)       mtx_unlock(&V_me_mtx)
+#ifndef ME_HASH_SIZE
+#define        ME_HASH_SIZE    (1 << 4)
+#endif
+static VNET_DEFINE(struct me_list *, me_hashtbl) = NULL;
+#define        V_me_hashtbl            VNET(me_hashtbl)
+#define        ME_HASH(src, dst)       (V_me_hashtbl[\
+    me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)])
 
-static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
-#define        V_me_softc_list VNET(me_softc_list)
 static struct sx me_ioctl_sx;
 SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
 
@@ -123,21 +114,9 @@ static int me_output(struct ifnet *, struct mbuf *,
                    const struct sockaddr *, struct route *);
 static int     me_input(struct mbuf *, int, int, void *);
 
-static int     me_set_tunnel(struct ifnet *, struct sockaddr_in *,
-    struct sockaddr_in *);
-static void    me_delete_tunnel(struct ifnet *);
-static int     me_encapcheck(const struct mbuf *, int, int, void *);
+static int     me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t);
+static void    me_delete_tunnel(struct me_softc *);
 
-#define        ME_MINLEN       (sizeof(struct ip) + sizeof(struct mobhdr) -\
-    sizeof(in_addr_t))
-static const struct encap_config ipv4_encap_cfg = {
-       .proto = IPPROTO_MOBILE,
-       .min_length = ME_MINLEN,
-       .exact_match = (sizeof(in_addr_t) << 4) + 8,
-       .check = me_encapcheck,
-       .input = me_input
-};
-
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
     "Minimal Encapsulation for IP (RFC 2004)");
@@ -150,11 +129,32 @@ static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
 SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
 
+static uint32_t
+me_hashval(in_addr_t src, in_addr_t dst)
+{
+       uint32_t ret;
+
+       ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+       return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static struct me_list *
+me_hashinit(void)
+{
+       struct me_list *hash;
+       int i;
+
+       hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE,
+           M_IFME, M_WAITOK);
+       for (i = 0; i < ME_HASH_SIZE; i++)
+               CK_LIST_INIT(&hash[i]);
+
+       return (hash);
+}
+
 static void
 vnet_me_init(const void *unused __unused)
 {
-       LIST_INIT(&V_me_softc_list);
-       ME_LIST_LOCK_INIT();
        V_me_cloner = if_clone_simple(mename, me_clone_create,
            me_clone_destroy, 0);
 }
@@ -165,8 +165,9 @@ static void
 vnet_me_uninit(const void *unused __unused)
 {
 
+       if (V_me_hashtbl != NULL)
+               free(V_me_hashtbl, M_IFME);
        if_clone_detach(V_me_cloner);
-       ME_LIST_LOCK_DESTROY();
 }
 VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_uninit, NULL);
@@ -179,7 +180,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_
        sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
        sc->me_fibnum = curthread->td_proc->p_fibnum;
        ME2IFP(sc) = if_alloc(IFT_TUNNEL);
-       ME_LOCK_INIT(sc);
        ME2IFP(sc)->if_softc = sc;
        if_initname(ME2IFP(sc), mename, unit);
 
@@ -193,9 +193,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_
        ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
        if_attach(ME2IFP(sc));
        bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
-       ME_LIST_LOCK();
-       LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
-       ME_LIST_UNLOCK();
        return (0);
 }
 
@@ -206,24 +203,20 @@ me_clone_destroy(struct ifnet *ifp)
 
        sx_xlock(&me_ioctl_sx);
        sc = ifp->if_softc;
-       me_delete_tunnel(ifp);
-       ME_LIST_LOCK();
-       LIST_REMOVE(sc, me_list);
-       ME_LIST_UNLOCK();
+       me_delete_tunnel(sc);
        bpfdetach(ifp);
        if_detach(ifp);
        ifp->if_softc = NULL;
        sx_xunlock(&me_ioctl_sx);
 
+       ME_WAIT();
        if_free(ifp);
-       ME_LOCK_DESTROY(sc);
        free(sc, M_IFME);
 }
 
 static int
 me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-       ME_RLOCK_TRACKER;
        struct ifreq *ifr = (struct ifreq *)data;
        struct sockaddr_in *src, *dst;
        struct me_softc *sc;
@@ -251,10 +244,8 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
        error = 0;
        switch (cmd) {
        case SIOCSIFPHYADDR:
-               src = (struct sockaddr_in *)
-                       &(((struct in_aliasreq *)data)->ifra_addr);
-               dst = (struct sockaddr_in *)
-                       &(((struct in_aliasreq *)data)->ifra_dstaddr);
+               src = &((struct in_aliasreq *)data)->ifra_addr;
+               dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
                if (src->sin_family != dst->sin_family ||
                    src->sin_family != AF_INET ||
                    src->sin_len != dst->sin_len ||
@@ -267,17 +258,16 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                        error = EADDRNOTAVAIL;
                        break;
                }
-               error = me_set_tunnel(ifp, src, dst);
+               error = me_set_tunnel(sc, src->sin_addr.s_addr,
+                   dst->sin_addr.s_addr);
                break;
        case SIOCDIFPHYADDR:
-               me_delete_tunnel(ifp);
+               me_delete_tunnel(sc);
                break;
        case SIOCGIFPSRCADDR:
        case SIOCGIFPDSTADDR:
-               ME_RLOCK(sc);
                if (!ME_READY(sc)) {
                        error = EADDRNOTAVAIL;
-                       ME_RUNLOCK(sc);
                        break;
                }
                src = (struct sockaddr_in *)&ifr->ifr_addr;
@@ -292,7 +282,6 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                        src->sin_addr = sc->me_dst;
                        break;
                }
-               ME_RUNLOCK(sc);
                error = prison_if(curthread->td_ucred, sintosa(src));
                if (error != 0)
                        memset(src, 0, sizeof(*src));
@@ -318,81 +307,71 @@ end:
 }
 
 static int
-me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+me_lookup(const struct mbuf *m, int off, int proto, void **arg)
 {
-       ME_RLOCK_TRACKER;
+       const struct ip *ip;
        struct me_softc *sc;
-       struct ip *ip;
-       int ret;
 
-       sc = (struct me_softc *)arg;
-       if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
-               return (0);
-
-       M_ASSERTPKTHDR(m);
-
-       ret = 0;
-       ME_RLOCK(sc);
-       if (ME_READY(sc)) {
-               ip = mtod(m, struct ip *);
+       MPASS(in_epoch());
+       ip = mtod(m, const struct ip *);
+       CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
+           ip->ip_src.s_addr), chain) {
                if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
-                   sc->me_dst.s_addr == ip->ip_src.s_addr)
-                       ret = 32 * 2 + 8;
+                   sc->me_dst.s_addr == ip->ip_src.s_addr) {
+                       if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
+                               return (0);
+                       *arg = sc;
+                       return (ENCAP_DRV_LOOKUP);
+               }
        }
-       ME_RUNLOCK(sc);
-       return (ret);
+       return (0);
 }
 
 static int
-me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
-    struct sockaddr_in *dst)
+me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst)
 {
-       struct me_softc *sc, *tsc;
+       struct me_softc *tmp;
 
        sx_assert(&me_ioctl_sx, SA_XLOCKED);
-       ME_LIST_LOCK();
-       sc = ifp->if_softc;
-       LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
-               if (tsc == sc || !ME_READY(tsc))
+
+       if (V_me_hashtbl == NULL)
+               V_me_hashtbl = me_hashinit();
+
+       if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst)
+               return (0);
+
+       CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) {
+               if (tmp == sc)
                        continue;
-               if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
-                   tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
-                       ME_LIST_UNLOCK();
+               if (tmp->me_src.s_addr == src &&
+                   tmp->me_dst.s_addr == dst)
                        return (EADDRNOTAVAIL);
-               }
        }
-       ME_LIST_UNLOCK();
 
-       ME_WLOCK(sc);
-       sc->me_dst = dst->sin_addr;
-       sc->me_src = src->sin_addr;
-       ME_WUNLOCK(sc);
+       me_delete_tunnel(sc);
+       sc->me_dst.s_addr = dst;
+       sc->me_src.s_addr = src;
+       CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain);
 
-       if (sc->me_ecookie == NULL)
-               sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg,
-                   sc, M_WAITOK);
-       if (sc->me_ecookie != NULL) {
-               ifp->if_drv_flags |= IFF_DRV_RUNNING;
-               if_link_state_change(ifp, LINK_STATE_UP);
-       }
+       ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
+       if_link_state_change(ME2IFP(sc), LINK_STATE_UP);
        return (0);
 }
 
 static void
-me_delete_tunnel(struct ifnet *ifp)
+me_delete_tunnel(struct me_softc *sc)
 {
-       struct me_softc *sc = ifp->if_softc;
 
        sx_assert(&me_ioctl_sx, SA_XLOCKED);
-       if (sc->me_ecookie != NULL)
-               ip_encap_detach(sc->me_ecookie);
-       sc->me_ecookie = NULL;
-       ME_WLOCK(sc);
-       sc->me_src.s_addr = 0;
-       sc->me_dst.s_addr = 0;
-       ME_WUNLOCK(sc);
-       ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-       if_link_state_change(ifp, LINK_STATE_DOWN);
+       if (ME_READY(sc)) {
+               CK_LIST_REMOVE(sc, chain);
+               ME_WAIT();
+
+               sc->me_src.s_addr = 0;
+               sc->me_dst.s_addr = 0;
+               ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+               if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN);
+       }
 }
 
 static uint16_t
@@ -505,58 +484,48 @@ me_check_nesting(struct ifnet *ifp, struct mbuf *m)
 
 static int
 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
-   struct route *ro)
+   struct route *ro __unused)
 {
        uint32_t af;
-       int error;
 
-#ifdef MAC
-       error = mac_ifnet_check_transmit(ifp, m);
-       if (error != 0)
-               goto drop;
-#endif
-       if ((ifp->if_flags & IFF_MONITOR) != 0 ||
-           (ifp->if_flags & IFF_UP) == 0) {
-               error = ENETDOWN;
-               goto drop;
-       }
-
-       error = me_check_nesting(ifp, m);
-       if (error != 0)
-               goto drop;
-
-       m->m_flags &= ~(M_BCAST|M_MCAST);
        if (dst->sa_family == AF_UNSPEC)
                bcopy(dst->sa_data, &af, sizeof(af));
        else
                af = dst->sa_family;
-       if (af != AF_INET) {
-               error = EAFNOSUPPORT;
-               goto drop;
-       }
-       BPF_MTAP2(ifp, &af, sizeof(af), m);
+       m->m_pkthdr.csum_data = af;
        return (ifp->if_transmit(ifp, m));
-drop:
-       m_freem(m);
-       if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
-       return (error);
 }
 
 static int
 me_transmit(struct ifnet *ifp, struct mbuf *m)
 {
-       ME_RLOCK_TRACKER;
        struct mobhdr mh;
        struct me_softc *sc;
        struct ip *ip;
+       uint32_t af;
        int error, hlen, plen;
 
+#ifdef MAC
+       error = mac_ifnet_check_transmit(ifp, m);
+       if (error != 0)
+               goto drop;
+#endif
+       error = ENETDOWN;
+       ME_RLOCK();
        sc = ifp->if_softc;
-       if (sc == NULL) {
-               error = ENETDOWN;
+       if (sc == NULL || !ME_READY(sc) ||
+           (ifp->if_flags & IFF_MONITOR) != 0 ||
+           (ifp->if_flags & IFF_UP) == 0 ||
+           (error = me_check_nesting(ifp, m) != 0)) {
                m_freem(m);
                goto drop;
        }
+       af = m->m_pkthdr.csum_data;
+       if (af != AF_INET) {
+               error = EAFNOSUPPORT;
+               m_freem(m);
+               goto drop;
+       }
        if (m->m_len < sizeof(struct ip))
                m = m_pullup(m, sizeof(struct ip));
        if (m == NULL) {
@@ -573,13 +542,6 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
        mh.mob_proto = ip->ip_p;
        mh.mob_src = ip->ip_src;
        mh.mob_dst = ip->ip_dst;
-       ME_RLOCK(sc);
-       if (!ME_READY(sc)) {
-               ME_RUNLOCK(sc);
-               error = ENETDOWN;
-               m_freem(m);
-               goto drop;
-       }
        if (in_hosteq(sc->me_src, ip->ip_src)) {
                hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
                mh.mob_flags = 0;
@@ -590,8 +552,8 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
        plen = m->m_pkthdr.len;
        ip->ip_src = sc->me_src;
        ip->ip_dst = sc->me_dst;
+       m->m_flags &= ~(M_BCAST|M_MCAST);
        M_SETFIB(m, sc->me_fibnum);
-       ME_RUNLOCK(sc);
        M_PREPEND(m, hlen, M_NOWAIT);
        if (m == NULL) {
                error = ENOBUFS;
@@ -619,6 +581,7 @@ drop:
                if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
                if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
        }
+       ME_RUNLOCK();
        return (error);
 }
 
@@ -628,13 +591,26 @@ me_qflush(struct ifnet *ifp __unused)
 
 }
 
+static const struct encaptab *ecookie = NULL;
+static const struct encap_config me_encap_cfg = {
+       .proto = IPPROTO_MOBILE,
+       .min_length = sizeof(struct ip) + sizeof(struct mobhdr) -
+           sizeof(in_addr_t),
+       .exact_match = ENCAP_DRV_LOOKUP,
+       .lookup = me_lookup,
+       .input = me_input
+};
+
 static int
 memodevent(module_t mod, int type, void *data)
 {
 
        switch (type) {
        case MOD_LOAD:
+               ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK);
+               break;
        case MOD_UNLOAD:
+               ip_encap_detach(ecookie);
                break;
        default:
                return (EOPNOTSUPP);
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to