Module Name: src Committed By: knakahara Date: Wed Dec 7 08:28:46 UTC 2022
Modified Files: src/sys/netinet: ip_encap.c ip_encap.h Log Message: Implement encap_attach_addr() which is used by IP-encaped tunnels. The tunnels attached by encap_attach() can process receiving packets fastly as the softc is searched by radix-tree. However, the tunnels cannot use priority function which decides tunnel's softc by not only source and destination but also other informations. On the other hand, the tunnels attached by encap_attach_func() can use priority function. However, the tunnels can be slow receiving processing as the softc is searched by linear search (and uses each priority function). encap_attach_addr() can be used for tunnels which is fixed tunnel source address and tunnel destination address. The tunnels attached by encap_attach_addr() is searched by thmap(9), so the receiving processing can be fast. Moreover, the tunnels can use priority function. To generate a diff of this commit: cvs rdiff -u -r1.75 -r1.76 src/sys/netinet/ip_encap.c cvs rdiff -u -r1.26 -r1.27 src/sys/netinet/ip_encap.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/netinet/ip_encap.c diff -u src/sys/netinet/ip_encap.c:1.75 src/sys/netinet/ip_encap.c:1.76 --- src/sys/netinet/ip_encap.c:1.75 Wed Dec 7 08:27:03 2022 +++ src/sys/netinet/ip_encap.c Wed Dec 7 08:28:46 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_encap.c,v 1.75 2022/12/07 08:27:03 knakahara Exp $ */ +/* $NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $ */ /* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */ /* @@ -68,7 +68,7 @@ #define USE_RADIX #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.75 2022/12/07 08:27:03 knakahara Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $"); #ifdef _KERNEL_OPT #include "opt_mrouting.h" @@ -89,6 +89,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v #include <sys/condvar.h> #include <sys/psref.h> #include <sys/pslist.h> +#include <sys/thmap.h> #include <net/if.h> @@ -134,6 +135,9 @@ static int mask_matchlen(const struct so static int mask_match(const struct encaptab *, const struct sockaddr *, const struct sockaddr *); #endif +static void encap_key_init(struct encap_key *, const struct sockaddr *, + const struct sockaddr *); +static void encap_key_inc(struct encap_key *); /* * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking @@ -159,6 +163,8 @@ struct radix_node_head *encap_head[2]; / static bool encap_head_updating = false; #endif +static thmap_t *encap_map[2]; /* 0 for AF_INET, 1 for AF_INET6 */ + static bool encap_initialized = false; /* * must be done before other encap interfaces initialization. @@ -210,6 +216,11 @@ encap_init(void) sizeof(struct sockaddr_pack) << 3); #endif #endif + + encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY); +#ifdef INET6 + encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY); +#endif } #ifdef INET @@ -226,6 +237,8 @@ encap4_lookup(struct mbuf *m, int off, i struct radix_node_head *rnh = encap_rnh(AF_INET); struct radix_node *rn; #endif + thmap_t *emap = encap_map[0]; + struct encap_key key; KASSERT(m->m_len >= sizeof(*ip)); @@ -267,6 +280,51 @@ encap4_lookup(struct mbuf *m, int off, i mask_matchlen(match->dstmask); } #endif + + encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours)); + while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) { + struct psref elem_psref; + + KASSERT(ep->af == AF_INET); + + if (ep->proto >= 0 && ep->proto != proto) { + encap_key_inc(&key); + continue; + } + + psref_acquire(&elem_psref, &ep->psref, + encaptab.elem_class); + if (ep->func) { + pserialize_read_exit(s); + prio = (*ep->func)(m, off, proto, ep->arg); + s = pserialize_read_enter(); + } else { + prio = pack.mine.sin_len + pack.yours.sin_len; + } + + if (prio <= 0) { + psref_release(&elem_psref, &ep->psref, + encaptab.elem_class); + encap_key_inc(&key); + continue; + } + if (prio > matchprio) { + /* release last matched ep */ + if (match != NULL) + psref_release(match_psref, &match->psref, + encaptab.elem_class); + + psref_copy(match_psref, &elem_psref, + encaptab.elem_class); + matchprio = prio; + match = ep; + } + + psref_release(&elem_psref, &ep->psref, + encaptab.elem_class); + encap_key_inc(&key); + } + PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { struct psref elem_psref; @@ -386,6 +444,8 @@ encap6_lookup(struct mbuf *m, int off, i struct radix_node_head *rnh = encap_rnh(AF_INET6); struct radix_node *rn; #endif + thmap_t *emap = encap_map[1]; + struct encap_key key; KASSERT(m->m_len >= sizeof(*ip6)); @@ -427,6 +487,50 @@ encap6_lookup(struct mbuf *m, int off, i mask_matchlen(match->dstmask); } #endif + + encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours)); + while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) { + struct psref elem_psref; + + KASSERT(ep->af == AF_INET6); + + if (ep->proto >= 0 && ep->proto != proto) { + encap_key_inc(&key); + continue; + } + + psref_acquire(&elem_psref, &ep->psref, + encaptab.elem_class); + if (ep->func) { + pserialize_read_exit(s); + prio = (*ep->func)(m, off, proto, ep->arg); + s = pserialize_read_enter(); + } else { + prio = pack.mine.sin6_len + pack.yours.sin6_len; + } + + if (prio <= 0) { + psref_release(&elem_psref, &ep->psref, + encaptab.elem_class); + encap_key_inc(&key); + continue; + } + if (prio > matchprio) { + /* release last matched ep */ + if (match != NULL) + psref_release(match_psref, &match->psref, + encaptab.elem_class); + + psref_copy(match_psref, &elem_psref, + encaptab.elem_class); + matchprio = prio; + match = ep; + } + psref_release(&elem_psref, &ep->psref, + encaptab.elem_class); + encap_key_inc(&key); + } + PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) { struct psref elem_psref; @@ -799,6 +903,111 @@ gc: return NULL; } +static void +encap_key_init(struct encap_key *key, + const struct sockaddr *local, const struct sockaddr *remote) +{ + + memset(key, 0, sizeof(*key)); + + sockaddr_copy(&key->local_sa, sizeof(key->local_u), local); + sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote); +} + +static void +encap_key_inc(struct encap_key *key) +{ + + (key->seq)++; +} + +static void +encap_key_dec(struct encap_key *key) +{ + + (key->seq)--; +} + +static void +encap_key_copy(struct encap_key *dst, const struct encap_key *src) +{ + + memset(dst, 0, sizeof(*dst)); + *dst = *src; +} + +/* + * src is always my side, and dst is always remote side. + * Return value will be necessary as input (cookie) for encap_detach(). + */ +const struct encaptab * +encap_attach_addr(int af, int proto, + const struct sockaddr *src, const struct sockaddr *dst, + encap_priofunc_t *func, + const struct encapsw *esw, void *arg) +{ + struct encaptab *ep; + size_t l; + thmap_t *emap; + void *retep; + struct ip_pack4 *pack4; +#ifdef INET6 + struct ip_pack6 *pack6; +#endif + + ASSERT_SLEEPABLE(); + + encap_afcheck(af, src, dst); + + switch (af) { + case AF_INET: + l = sizeof(*pack4); + emap = encap_map[0]; + break; +#ifdef INET6 + case AF_INET6: + l = sizeof(*pack6); + emap = encap_map[1]; + break; +#endif + default: + return NULL; + } + + ep = kmem_zalloc(sizeof(*ep), KM_SLEEP); + ep->addrpack = kmem_zalloc(l, KM_SLEEP); + ep->addrpack->sa_len = l & 0xff; + ep->af = af; + ep->proto = proto; + ep->flag = IP_ENCAP_ADDR_ENABLE; + switch (af) { + case AF_INET: + pack4 = (struct ip_pack4 *)ep->addrpack; + ep->src = (struct sockaddr *)&pack4->mine; + ep->dst = (struct sockaddr *)&pack4->yours; + break; +#ifdef INET6 + case AF_INET6: + pack6 = (struct ip_pack6 *)ep->addrpack; + ep->src = (struct sockaddr *)&pack6->mine; + ep->dst = (struct sockaddr *)&pack6->yours; + break; +#endif + } + memcpy(ep->src, src, src->sa_len); + memcpy(ep->dst, dst, dst->sa_len); + ep->esw = esw; + ep->arg = arg; + ep->func = func; + psref_target_init(&ep->psref, encaptab.elem_class); + + encap_key_init(&ep->key, src, dst); + while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep) + encap_key_inc(&ep->key); + return ep; +} + + /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */ #ifdef INET6 @@ -900,6 +1109,62 @@ encap6_ctlinput(int cmd, const struct so } #endif +static int +encap_detach_addr(const struct encaptab *ep) +{ + thmap_t *emap; + struct encaptab *retep; + struct encaptab *target; + void *thgc; + struct encap_key key; + + KASSERT(encap_lock_held()); + KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE); + + switch (ep->af) { + case AF_INET: + emap = encap_map[0]; + break; +#ifdef INET6 + case AF_INET6: + emap = encap_map[1]; + break; +#endif + default: + return EINVAL; + } + + retep = thmap_del(emap, &ep->key, sizeof(ep->key)); + if (retep != ep) { + return ENOENT; + } + target = retep; + + /* + * To keep continuity, decrement seq after detached encaptab. + */ + encap_key_copy(&key, &ep->key); + encap_key_inc(&key); + while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) { + void *pp; + + encap_key_dec(&retep->key); + pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep); + KASSERT(retep == pp); + + encap_key_inc(&key); + } + + thgc = thmap_stage_gc(emap); + pserialize_perform(encaptab.psz); + thmap_gc(emap, thgc); + psref_target_destroy(&target->psref, encaptab.elem_class); + kmem_free(target->addrpack, target->addrpack->sa_len); + kmem_free(target, sizeof(*target)); + + return 0; +} + int encap_detach(const struct encaptab *cookie) { @@ -909,6 +1174,9 @@ encap_detach(const struct encaptab *cook KASSERT(encap_lock_held()); + if (ep->flag & IP_ENCAP_ADDR_ENABLE) + return encap_detach_addr(ep); + PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) { if (p == ep) { error = encap_remove(p); Index: src/sys/netinet/ip_encap.h diff -u src/sys/netinet/ip_encap.h:1.26 src/sys/netinet/ip_encap.h:1.27 --- src/sys/netinet/ip_encap.h:1.26 Wed Dec 7 08:27:03 2022 +++ src/sys/netinet/ip_encap.h Wed Dec 7 08:28:46 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_encap.h,v 1.26 2022/12/07 08:27:03 knakahara Exp $ */ +/* $NetBSD: ip_encap.h,v 1.27 2022/12/07 08:28:46 knakahara Exp $ */ /* $KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $ */ /* @@ -64,6 +64,28 @@ struct encapsw { typedef int encap_priofunc_t(struct mbuf *, int, int, void *); +struct encap_key { + union { + struct sockaddr local_u_sa; + struct sockaddr_in local_u_sin; + struct sockaddr_in6 local_u_sin6; + } local_u; +#define local_sa local_u.local_u_sa +#define local_sin local_u.local_u_sin +#define local_sin6 local_u.local_u_sin6 + + union { + struct sockaddr remote_u_sa; + struct sockaddr_in remote_u_sin; + struct sockaddr_in6 remote_u_sin6; + } remote_u; +#define remote_sa remote_u.remote_u_sa +#define remote_sin remote_u.remote_u_sin +#define remote_sin6 remote_u.remote_u_sin6 + + u_int seq; +}; + struct encaptab { struct radix_node nodes[2]; struct pslist_entry chain; @@ -78,9 +100,13 @@ struct encaptab { encap_priofunc_t *func; const struct encapsw *esw; void *arg; + struct encap_key key; + u_int flag; struct psref_target psref; }; +#define IP_ENCAP_ADDR_ENABLE __BIT(0) + /* to lookup a pair of address using radix tree */ struct sockaddr_pack { u_int8_t sp_len; @@ -110,6 +136,9 @@ const struct encaptab *encap_attach(int, const struct encaptab *encap_attach_func(int, int, encap_priofunc_t *, const struct encapsw *, void *); +const struct encaptab *encap_attach_addr(int, int, + const struct sockaddr *, const struct sockaddr *, + encap_priofunc_t *, const struct encapsw *, void *); void *encap6_ctlinput(int, const struct sockaddr *, void *); int encap_detach(const struct encaptab *);