Date: Thu, 19 Jan 2017 17:58:17 +0900 From: Kengo NAKAHARA <k-nakah...@iij.ad.jp>
My co-workers implemented L2TPv3(RFC3931) interface for old version NetBSD. And then, I port the inteface to NetBSD-current and MP-ify. Here is the patch. http://netbsd.org/~knakahara/if-l2tp/if-l2tp.patch Cool! A few little comments: diff --git a/sys/net/if.c b/sys/net/if.c index 2386af3..ba63266 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1599,7 +1613,7 @@ if_clone_lookup(const char *name, int *unitp) strcpy(ifname, "if_"); /* separate interface name from unit */ for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ && - *cp && (*cp < '0' || *cp > '9');) + *cp && !if_is_unit(cp);) *dp++ = *cp++; This changes the generic syntax interface names, perhaps to allow the `2' in `l2tp', although since this loop skips over the first three octets that doesn't seem to be necessary. Either way, I don't have a problem with this, but it should be done in a separate change. diff --git a/sys/net/if_l2tp.c b/sys/net/if_l2tp.c new file mode 100644 index 0000000..dda8bbd --- /dev/null +++ b/sys/net/if_l2tp.c @@ -0,0 +1,1541 @@ [...] +/* + * l2tp global variable definitions + */ +LIST_HEAD(l2tp_sclist, l2tp_softc); +static struct l2tp_sclist l2tp_softc_list; +kmutex_t l2tp_list_lock; + +#if !defined(L2TP_ID_HASH_SIZE) +#define L2TP_ID_HASH_SIZE 64 +#endif +static u_long l2tp_id_hash_mask; + +kmutex_t l2tp_hash_lock; +static struct pslist_head *l2tp_hashed_list = NULL; Consider putting related global state into cacheline-aligned structs? static struct { kmutex_t lock; struct l2tp_sclist list; } l2tp_softc __cacheline_aligned; static struct { kmutex_t lock; struct pslist_head *list; unsigned long mask; } l2tp_hash __cacheline_aligned; +pserialize_t l2tp_psz; +struct psref_class *lv_psref_class __read_mostly; __read_mostly for l2tp_psz? +static int +l2tpdetach(void) +{ + int error; + + if (!LIST_EMPTY(&l2tp_softc_list)) + return EBUSY; Need lock here? Need to first set flag preventing new creation? mutex_enter(&l2tp_softc.lock); KASSERT(!l2tp_softc.dying); l2tp_softc.detaching = true; if (!LIST_EMPTY(&l2tp_softc.list)) { l2tp_softc.detaching = false; mutex_exit(&l2tp_softc.lock); return EBUSY; } mutex_exit(&l2tp_softc.lock); Anyone trying to add to l2tp_softc.list must also check l2tp_softc.detaching before proceeding. +static int +l2tp_clone_destroy(struct ifnet *ifp) +{ + struct l2tp_softc *sc = (void *) ifp; Use container_of here: struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc, l2tp_ec.ec_if); No functional difference, but the compiler type-checks it. +static int +l2tp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + const struct rtentry *rt) +{ + struct l2tp_softc *sc = (struct l2tp_softc*)ifp; container_of +void +l2tp_input(struct mbuf *m, struct ifnet *ifp) +{ + + KASSERT(ifp != NULL); + + if (0 == (mtod(m, u_long) & 0x03)) { + /* copy and align head of payload */ + struct mbuf *m_head; + int copy_length; + +#define L2TP_COPY_LENGTH 60 +#define L2TP_LINK_HDR_ROOM (MHLEN - L2TP_COPY_LENGTH - 4/*round4(2)*/) + + if (m->m_pkthdr.len < L2TP_COPY_LENGTH) { + copy_length = m->m_pkthdr.len; + } else { + copy_length = L2TP_COPY_LENGTH; + } + + if (m->m_len < copy_length) { + m = m_pullup(m, copy_length); + if (m == NULL) + return; + } + + MGETHDR(m_head, M_DONTWAIT, MT_HEADER); + if (m_head == NULL) { + m_freem(m); + return; + } + M_COPY_PKTHDR(m_head, m); + + m_head->m_data += 2 /* align */ + L2TP_LINK_HDR_ROOM; + memcpy(m_head->m_data, m->m_data, copy_length); + m_head->m_len = copy_length; + m->m_data += copy_length; + m->m_len -= copy_length; + + /* construct chain */ + if (m->m_len == 0) { + m_head->m_next = m_free(m); /* not m_freem */ + } else { + /* + * copyed mtag in previous call M_COPY_PKTHDR + * but don't delete mtag in case cutt of M_PKTHDR flag + */ + m_tag_delete_chain(m, NULL); + m->m_flags &= ~M_PKTHDR; + m_head->m_next = m; + } + + /* override m */ + m = m_head; + } Someone more familiar with the mbuf API than I should review this mbuf juggling show! +/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ +int +l2tp_ioctl(struct ifnet *ifp, u_long cmd, void *data) +{ + struct l2tp_softc *sc = (struct l2tp_softc*)ifp; container_of + case SIOCSIFMTU: + error = kauth_authorize_generic(kauth_cred_get(), + KAUTH_GENERIC_ISSUSER, NULL); Why the kauth check here and not in any other drivers? Is this kauth check unnecessary, or does its absence in other drivers indicate a bug? Likewise in a few other places below. + if (error) + break; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + src = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); Consider using one more local variable instead of multiple levels of nesting? case SIOCSIFPHYADDR: { struct in_aliasreq *aliasreq = data; src = (struct sockaddr *)&aliasreq->ifra_data; dst = (struct sockaddr *)&aliasreq->ifra_dstaddr; ... } Likewise in a few other places below. +static int +l2tp_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) +{ + struct l2tp_softc *sc = (struct l2tp_softc *)ifp; container_of + error = encap_lock_enter(); + if (error) + goto error; + + mutex_enter(&sc->l2tp_lock); Document lock order of encap_lock ---> struct l2tp_softc::l2tp_lock? + ovar = sc->l2tp_var; + osrc = ovar->lv_psrc; + odst = ovar->lv_pdst; + memcpy(nvar, ovar, sizeof(*nvar)); You can just do *nvar = *ovar; here, since they are both guaranteed to be aligned. +static int id_hash_func(uint32_t id) +{ + uint32_t hash; + + hash = (id >> 16) ^ id; + hash = (hash >> 4) ^ hash; + + return hash & l2tp_id_hash_mask; +} Is this hash function an essential part of the l2tp protocol, or is it just something that will more likely involve all the bits of id when masking with l2tp_id_hash_mask? (Asking so I can know whether it is safe to replace by, e.g., siphash later, once I get around to adding the siphash code I've been sitting on for about five years now.) +/* + * l2tp_variant update API. + * + * Assumption: + * reader side dereferences sc->l2tp_var in reader critical section only, + * that is, all of reader sides do not reader the sc->l2tp_var after + * pserialize_perform(). + */ +static void +l2tp_variant_update(struct l2tp_softc *sc, struct l2tp_variant *nvar) +{ + struct ifnet *ifp = &sc->l2tp_ec.ec_if; + struct l2tp_variant *ovar = sc->l2tp_var; + + KASSERT(mutex_owned(&sc->l2tp_lock)); + + membar_producer(); + atomic_swap_ptr(&sc->l2tp_var, nvar); + pserialize_perform(l2tp_psz); + psref_target_destroy(&ovar->lv_psref, lv_psref_class); No need for atomic_swap_ptr. Just sc->l2tp_var = nvar; is enough. Nobody else can write to it because we hold the lock. diff --git a/sys/net/if_l2tp.h b/sys/net/if_l2tp.h new file mode 100644 index 0000000..1aae23c --- /dev/null +++ b/sys/net/if_l2tp.h @@ -0,0 +1,206 @@ [...] +#include <net/if_ether.h> +#include <netinet/in.h> +/* xxx sigh, why route have struct route instead of pointer? */ Unclear what this comment refers to? + +#define SIOCSL2TPSESSION _IOW('i', 151, struct ifreq) +#define SIOCDL2TPSESSION _IOW('i', 152, struct ifreq) +#define SIOCSL2TPCOOKIE _IOW('i', 153, struct ifreq) +#define SIOCDL2TPCOOKIE _IOW('i', 154, struct ifreq) +#define SIOCSL2TPSTATE _IOW('i', 155, struct ifreq) +#define SIOCGL2TP SIOCGIFGENERIC Pick tabs or spaces and be consistent? (Makes diffs look nicer. Usual rule is `#define<TAB>xyz<TAB>'.) Say struct l2tp_req, not struct ifreq, if that's what you mean? +struct l2tp_req { + int state; + int my_cookie_len; + int peer_cookie_len; Pick a fixed-width unsigned integer type for this unless you actually need negative values? +#ifdef _KERNEL +extern struct psref_class *lv_psref_class __read_mostly; The __read_mostly attribute matters only for definitions, I believe. +struct l2tp_softc { + struct ethercom l2tp_ec; /* common area - must be at the top */ + /* to use ether_input(), we must have this */ + percpu_t *l2tp_ro_percpu; Mark this with what the type of the per-CPU object is. For example, percpu_t *l2tp_ro_percpu; /* struct l2tp_ro */ (Obviously this is not as good for type checking as percpu<l2tp_ro> in C++ or similar, but it's better than nothing for the reader's sake.) +static inline bool +l2tp_heldref_variant(struct l2tp_variant *var) +{ + + if (var == NULL) + return false; + return psref_held(&var->lv_psref, lv_psref_class); +} Both users of this first do KASSERT(var != NULL), so there's no need for the conditional `if (var == NULL)' here. +/* Prototypes */ +void l2tpattach(int); +void l2tpattach0(struct l2tp_softc *); +void l2tp_input(struct mbuf *, struct ifnet *); +int l2tp_ioctl(struct ifnet *, u_long, void *); + +struct l2tp_variant* l2tp_lookup_session_ref(uint32_t, struct psref *); KNF: struct l2tp_variant *l2tp_lookup_session_ref(uint32_t, struct psref *); +/* + * Locking notes: + * + l2tp_softc_list is protected by l2tp_list_lock (an adaptive mutex) + * l2tp_softc_list is list of all l2tp_softcs, and it is used to avoid + * wrong unload. Instead of `wrong unload', maybe `unload while busy' or something? + * + l2tp_hashed_list is protected by + * - l2tp_hash_lock (an adaptive mutex) for writer + * - pserialize for reader + * l2tp_hashed_list is hashed list of all l2tp_softcs, and it is used by + * input processing to find appropriate softc. + * + l2tp_softc->l2tp_var is protected by + * - l2tp_softc->l2tp_lock (an adaptive mutex) for writer + * - l2tp_var->lv_psref for reader + * l2tp_softc->l2tp_var is used for variant values while the l2tp tunnel + * exists. This looks great! Can you also state any lock order constraints here? If the only constraint is that no pair of these locks is ever held simultaneously, so be it -- say that too. It looks like encap_lock needs to be mentioned, though. diff --git a/sys/netinet/in_l2tp.c b/sys/netinet/in_l2tp.c new file mode 100644 index 0000000..9b2ccd6 --- /dev/null +++ b/sys/netinet/in_l2tp.c @@ -0,0 +1,417 @@ [...] +int +in_l2tp_output(struct l2tp_variant *var, struct mbuf *m) +{ [...] + bzero(&iphdr, sizeof(iphdr)); Use memset, not bzero. + if (var->lv_peer_cookie_len == 4) { + cookie_32 = htonl((uint32_t)var->lv_peer_cookie); + memcpy(mtod(m, uint32_t *), &cookie_32, + sizeof(uint32_t)); I have the impression that mtod(m, T *) is supposed to be used only when m is actually aligned for a T. Most uses of memcpy(mtod(m, T *), ...) use void or uint8_t: memcpy(mtod(m, void *), &cookie_32, sizeof(uint32_t)); I would suggest doing that, in case anyone ever makes mtod check alignment -- unless you can guarantee alignment, in which case you can just do *mtod(m, uint32_t *) = cookie_32; + error = ip_output(m, NULL, &lro->lr_ro, 0, NULL, NULL); + mutex_exit(&lro->lr_lock); + percpu_putref(sc->l2tp_ro_percpu); Hope it's safe to call ip_output with this lock held! Is it easy to prove that ip_output can only at worst put the mbuf on a queue, or that if it recursively calls in_l2tp_output, the recursion detection will prevent locking against myself? diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 5534847..e318a7b 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -360,6 +360,16 @@ const struct protosw inetsw[] = { .pr_init = carp_init, }, #endif /* NCARP > 0 */ +{ .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, Should this be conditional on NL2TP > 0?