Module Name:    src
Committed By:   knakahara
Date:           Wed Dec  7 08:28:46 UTC 2022

Modified Files:
        src/sys/netinet: ip_encap.c ip_encap.h

Log Message:
Implement encap_attach_addr() which is used by IP-encaped tunnels.

The tunnels attached by encap_attach() can process receiving packets
fastly as the softc is searched by radix-tree.  However, the tunnels
cannot use priority function which decides tunnel's softc by not only
source and destination but also other informations.
On the other hand, the tunnels attached by encap_attach_func() can
use priority function. However, the tunnels can be slow receiving
processing as the softc is searched by linear search (and uses each
priority function).

encap_attach_addr() can be used for tunnels which is fixed tunnel
source address and tunnel destination address.  The tunnels attached
by encap_attach_addr() is searched by thmap(9), so the receiving processing
can be fast.  Moreover, the tunnels can use priority function.


To generate a diff of this commit:
cvs rdiff -u -r1.75 -r1.76 src/sys/netinet/ip_encap.c
cvs rdiff -u -r1.26 -r1.27 src/sys/netinet/ip_encap.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/netinet/ip_encap.c
diff -u src/sys/netinet/ip_encap.c:1.75 src/sys/netinet/ip_encap.c:1.76
--- src/sys/netinet/ip_encap.c:1.75	Wed Dec  7 08:27:03 2022
+++ src/sys/netinet/ip_encap.c	Wed Dec  7 08:28:46 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_encap.c,v 1.75 2022/12/07 08:27:03 knakahara Exp $	*/
+/*	$NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $	*/
 /*	$KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $	*/
 
 /*
@@ -68,7 +68,7 @@
 #define USE_RADIX
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.75 2022/12/07 08:27:03 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_mrouting.h"
@@ -89,6 +89,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v
 #include <sys/condvar.h>
 #include <sys/psref.h>
 #include <sys/pslist.h>
+#include <sys/thmap.h>
 
 #include <net/if.h>
 
@@ -134,6 +135,9 @@ static int mask_matchlen(const struct so
 static int mask_match(const struct encaptab *, const struct sockaddr *,
 		const struct sockaddr *);
 #endif
+static void encap_key_init(struct encap_key *, const struct sockaddr *,
+    const struct sockaddr *);
+static void encap_key_inc(struct encap_key *);
 
 /*
  * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
@@ -159,6 +163,8 @@ struct radix_node_head *encap_head[2];	/
 static bool encap_head_updating = false;
 #endif
 
+static thmap_t *encap_map[2];	/* 0 for AF_INET, 1 for AF_INET6 */
+
 static bool encap_initialized = false;
 /*
  * must be done before other encap interfaces initialization.
@@ -210,6 +216,11 @@ encap_init(void)
 	    sizeof(struct sockaddr_pack) << 3);
 #endif
 #endif
+
+	encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY);
+#ifdef INET6
+	encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY);
+#endif
 }
 
 #ifdef INET
@@ -226,6 +237,8 @@ encap4_lookup(struct mbuf *m, int off, i
 	struct radix_node_head *rnh = encap_rnh(AF_INET);
 	struct radix_node *rn;
 #endif
+	thmap_t *emap = encap_map[0];
+	struct encap_key key;
 
 	KASSERT(m->m_len >= sizeof(*ip));
 
@@ -267,6 +280,51 @@ encap4_lookup(struct mbuf *m, int off, i
 		    mask_matchlen(match->dstmask);
 	}
 #endif
+
+	encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours));
+	while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
+		struct psref elem_psref;
+
+		KASSERT(ep->af == AF_INET);
+
+		if (ep->proto >= 0 && ep->proto != proto) {
+			encap_key_inc(&key);
+			continue;
+		}
+
+		psref_acquire(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
+		if (ep->func) {
+			pserialize_read_exit(s);
+			prio = (*ep->func)(m, off, proto, ep->arg);
+			s = pserialize_read_enter();
+		} else {
+			prio = pack.mine.sin_len + pack.yours.sin_len;
+		}
+
+		if (prio <= 0) {
+			psref_release(&elem_psref, &ep->psref,
+			    encaptab.elem_class);
+			encap_key_inc(&key);
+			continue;
+		}
+		if (prio > matchprio) {
+			/* release last matched ep */
+			if (match != NULL)
+				psref_release(match_psref, &match->psref,
+				    encaptab.elem_class);
+
+			psref_copy(match_psref, &elem_psref,
+			    encaptab.elem_class);
+			matchprio = prio;
+			match = ep;
+		}
+
+		psref_release(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
+		encap_key_inc(&key);
+	}
+
 	PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
 		struct psref elem_psref;
 
@@ -386,6 +444,8 @@ encap6_lookup(struct mbuf *m, int off, i
 	struct radix_node_head *rnh = encap_rnh(AF_INET6);
 	struct radix_node *rn;
 #endif
+	thmap_t *emap = encap_map[1];
+	struct encap_key key;
 
 	KASSERT(m->m_len >= sizeof(*ip6));
 
@@ -427,6 +487,50 @@ encap6_lookup(struct mbuf *m, int off, i
 		    mask_matchlen(match->dstmask);
 	}
 #endif
+
+	encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours));
+	while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
+		struct psref elem_psref;
+
+		KASSERT(ep->af == AF_INET6);
+
+		if (ep->proto >= 0 && ep->proto != proto) {
+			encap_key_inc(&key);
+			continue;
+		}
+
+		psref_acquire(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
+		if (ep->func) {
+			pserialize_read_exit(s);
+			prio = (*ep->func)(m, off, proto, ep->arg);
+			s = pserialize_read_enter();
+		} else {
+			prio = pack.mine.sin6_len + pack.yours.sin6_len;
+		}
+
+		if (prio <= 0) {
+			psref_release(&elem_psref, &ep->psref,
+			    encaptab.elem_class);
+			encap_key_inc(&key);
+			continue;
+		}
+		if (prio > matchprio) {
+			/* release last matched ep */
+			if (match != NULL)
+				psref_release(match_psref, &match->psref,
+				    encaptab.elem_class);
+
+			psref_copy(match_psref, &elem_psref,
+			    encaptab.elem_class);
+			matchprio = prio;
+			match = ep;
+		}
+		psref_release(&elem_psref, &ep->psref,
+		    encaptab.elem_class);
+		encap_key_inc(&key);
+	}
+
 	PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
 		struct psref elem_psref;
 
@@ -799,6 +903,111 @@ gc:
 	return NULL;
 }
 
+static void
+encap_key_init(struct encap_key *key,
+    const struct sockaddr *local, const struct sockaddr *remote)
+{
+
+	memset(key, 0, sizeof(*key));
+
+	sockaddr_copy(&key->local_sa, sizeof(key->local_u), local);
+	sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote);
+}
+
+static void
+encap_key_inc(struct encap_key *key)
+{
+
+	(key->seq)++;
+}
+
+static void
+encap_key_dec(struct encap_key *key)
+{
+
+	(key->seq)--;
+}
+
+static void
+encap_key_copy(struct encap_key *dst, const struct encap_key *src)
+{
+
+	memset(dst, 0, sizeof(*dst));
+	*dst = *src;
+}
+
+/*
+ * src is always my side, and dst is always remote side.
+ * Return value will be necessary as input (cookie) for encap_detach().
+ */
+const struct encaptab *
+encap_attach_addr(int af, int proto,
+    const struct sockaddr *src, const struct sockaddr *dst,
+    encap_priofunc_t *func,
+    const struct encapsw *esw, void *arg)
+{
+	struct encaptab *ep;
+	size_t l;
+	thmap_t *emap;
+	void *retep;
+	struct ip_pack4 *pack4;
+#ifdef INET6
+	struct ip_pack6 *pack6;
+#endif
+
+	ASSERT_SLEEPABLE();
+
+	encap_afcheck(af, src, dst);
+
+	switch (af) {
+	case AF_INET:
+		l = sizeof(*pack4);
+		emap = encap_map[0];
+		break;
+#ifdef INET6
+	case AF_INET6:
+		l = sizeof(*pack6);
+		emap = encap_map[1];
+		break;
+#endif
+	default:
+		return NULL;
+	}
+
+	ep = kmem_zalloc(sizeof(*ep), KM_SLEEP);
+	ep->addrpack = kmem_zalloc(l, KM_SLEEP);
+	ep->addrpack->sa_len = l & 0xff;
+	ep->af = af;
+	ep->proto = proto;
+	ep->flag = IP_ENCAP_ADDR_ENABLE;
+	switch (af) {
+	case AF_INET:
+		pack4 = (struct ip_pack4 *)ep->addrpack;
+		ep->src = (struct sockaddr *)&pack4->mine;
+		ep->dst = (struct sockaddr *)&pack4->yours;
+		break;
+#ifdef INET6
+	case AF_INET6:
+		pack6 = (struct ip_pack6 *)ep->addrpack;
+		ep->src = (struct sockaddr *)&pack6->mine;
+		ep->dst = (struct sockaddr *)&pack6->yours;
+		break;
+#endif
+	}
+	memcpy(ep->src, src, src->sa_len);
+	memcpy(ep->dst, dst, dst->sa_len);
+	ep->esw = esw;
+	ep->arg = arg;
+	ep->func = func;
+	psref_target_init(&ep->psref, encaptab.elem_class);
+
+	encap_key_init(&ep->key, src, dst);
+	while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep)
+		encap_key_inc(&ep->key);
+	return ep;
+}
+
+
 /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
 
 #ifdef INET6
@@ -900,6 +1109,62 @@ encap6_ctlinput(int cmd, const struct so
 }
 #endif
 
+static int
+encap_detach_addr(const struct encaptab *ep)
+{
+	thmap_t *emap;
+	struct encaptab *retep;
+	struct encaptab *target;
+	void *thgc;
+	struct encap_key key;
+
+	KASSERT(encap_lock_held());
+	KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE);
+
+	switch (ep->af) {
+	case AF_INET:
+		emap = encap_map[0];
+		break;
+#ifdef INET6
+	case AF_INET6:
+		emap = encap_map[1];
+		break;
+#endif
+	default:
+		return EINVAL;
+	}
+
+	retep = thmap_del(emap, &ep->key, sizeof(ep->key));
+	if (retep != ep) {
+		return ENOENT;
+	}
+	target = retep;
+
+	/*
+	 * To keep continuity, decrement seq after detached encaptab.
+	 */
+	encap_key_copy(&key, &ep->key);
+	encap_key_inc(&key);
+	while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) {
+		void *pp;
+
+		encap_key_dec(&retep->key);
+		pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep);
+		KASSERT(retep == pp);
+
+		encap_key_inc(&key);
+	}
+
+	thgc = thmap_stage_gc(emap);
+	pserialize_perform(encaptab.psz);
+	thmap_gc(emap, thgc);
+	psref_target_destroy(&target->psref, encaptab.elem_class);
+	kmem_free(target->addrpack, target->addrpack->sa_len);
+	kmem_free(target, sizeof(*target));
+
+	return 0;
+}
+
 int
 encap_detach(const struct encaptab *cookie)
 {
@@ -909,6 +1174,9 @@ encap_detach(const struct encaptab *cook
 
 	KASSERT(encap_lock_held());
 
+	if (ep->flag & IP_ENCAP_ADDR_ENABLE)
+		return encap_detach_addr(ep);
+
 	PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
 		if (p == ep) {
 			error = encap_remove(p);

Index: src/sys/netinet/ip_encap.h
diff -u src/sys/netinet/ip_encap.h:1.26 src/sys/netinet/ip_encap.h:1.27
--- src/sys/netinet/ip_encap.h:1.26	Wed Dec  7 08:27:03 2022
+++ src/sys/netinet/ip_encap.h	Wed Dec  7 08:28:46 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_encap.h,v 1.26 2022/12/07 08:27:03 knakahara Exp $	*/
+/*	$NetBSD: ip_encap.h,v 1.27 2022/12/07 08:28:46 knakahara Exp $	*/
 /*	$KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $	*/
 
 /*
@@ -64,6 +64,28 @@ struct encapsw {
 
 typedef	int encap_priofunc_t(struct mbuf *, int, int, void *);
 
+struct encap_key {
+	union  {
+		struct sockaddr		local_u_sa;
+		struct sockaddr_in	local_u_sin;
+		struct sockaddr_in6	local_u_sin6;
+	}	local_u;
+#define local_sa	local_u.local_u_sa
+#define local_sin	local_u.local_u_sin
+#define local_sin6	local_u.local_u_sin6
+
+	union  {
+		struct sockaddr		remote_u_sa;
+		struct sockaddr_in	remote_u_sin;
+		struct sockaddr_in6	remote_u_sin6;
+	}	remote_u;
+#define remote_sa	remote_u.remote_u_sa
+#define remote_sin	remote_u.remote_u_sin
+#define remote_sin6	remote_u.remote_u_sin6
+
+	u_int seq;
+};
+
 struct encaptab {
 	struct radix_node nodes[2];
 	struct pslist_entry chain;
@@ -78,9 +100,13 @@ struct encaptab {
 	encap_priofunc_t *func;
 	const struct encapsw *esw;
 	void *arg;
+	struct encap_key key;
+	u_int flag;
 	struct psref_target	psref;
 };
 
+#define IP_ENCAP_ADDR_ENABLE	__BIT(0)
+
 /* to lookup a pair of address using radix tree */
 struct sockaddr_pack {
 	u_int8_t sp_len;
@@ -110,6 +136,9 @@ const struct encaptab *encap_attach(int,
 const struct encaptab *encap_attach_func(int, int,
 	encap_priofunc_t *,
 	const struct encapsw *, void *);
+const struct encaptab *encap_attach_addr(int, int,
+	const struct sockaddr *, const struct sockaddr *,
+	encap_priofunc_t *, const struct encapsw *, void *);
 void	*encap6_ctlinput(int, const struct sockaddr *, void *);
 int	encap_detach(const struct encaptab *);
 

Reply via email to