CVS commit: src/sys

Kengo NAKAHARA Mon, 13 Jun 2016 01:34:49 -0700

Module Name:    src
Committed By:   knakahara
Date:           Mon Jun 13 08:34:23 UTC 2016


Modified Files:
        src/sys/netinet: ip_flow.c
        src/sys/netinet6: ip6_flow.c

Log Message:
MP-ify fastforward to support GATEWAY kernel option.

I add "ipflow_lock" mutex in ip_flow.c and "ip6flow_lock" mutex in ip6_flow.c
to protect all data in each file. Of course, this is not MP-scalable. However,
it is sufficient as tentative workaround. We should make it scalable somehow
in the future.

ok by ozaki-r@n.o.


To generate a diff of this commit:
cvs rdiff -u -r1.69 -r1.70 src/sys/netinet/ip_flow.c
cvs rdiff -u -r1.24 -r1.25 src/sys/netinet6/ip6_flow.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/netinet/ip_flow.c
diff -u src/sys/netinet/ip_flow.c:1.69 src/sys/netinet/ip_flow.c:1.70
--- src/sys/netinet/ip_flow.c:1.69	Mon Jun 13 08:29:55 2016
+++ src/sys/netinet/ip_flow.c	Mon Jun 13 08:34:23 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_flow.c,v 1.69 2016/06/13 08:29:55 knakahara Exp $	*/
+/*	$NetBSD: ip_flow.c,v 1.70 2016/06/13 08:34:23 knakahara Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.69 2016/06/13 08:29:55 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.70 2016/06/13 08:34:23 knakahara Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -72,6 +72,14 @@ LIST_HEAD(ipflowhead, ipflow);
 #define	IPFLOW_TIMER		(5 * PR_SLOWHZ)
 #define	IPFLOW_DEFAULT_HASHSIZE	(1 << IPFLOW_HASHBITS)
 
+/*
+ * ip_flow.c internal lock.
+ * If we use softnet_lock, it would cause recursive lock.
+ *
+ * This is a tentative workaround.
+ * We should make it scalable somehow in the future.
+ */
+static kmutex_t ipflow_lock;
 static struct ipflowhead *ipflowtable = NULL;
 static struct ipflowhead ipflowlist;
 static int ipflow_inuse;
@@ -117,6 +125,8 @@ ipflow_lookup(const struct ip *ip)
 	size_t hash;
 	struct ipflow *ipf;
 
+	KASSERT(mutex_owned(&ipflow_lock));
+
 	hash = ipflow_hash(ip);
 
 	LIST_FOREACH(ipf, &ipflowtable[hash], ipf_hash) {
@@ -142,6 +152,8 @@ ipflow_reinit(int table_size)
 	struct ipflowhead *new_table;
 	size_t i;
 
+	KASSERT(mutex_owned(&ipflow_lock));
+
 	new_table = (struct ipflowhead *)malloc(sizeof(struct ipflowhead) *
 	    table_size, M_RTABLE, M_NOWAIT);
 
@@ -164,7 +176,12 @@ ipflow_reinit(int table_size)
 void
 ipflow_init(void)
 {
+
+	mutex_init(&ipflow_lock, MUTEX_DEFAULT, IPL_NONE);
+
+	mutex_enter(&ipflow_lock);
 	(void)ipflow_reinit(ip_hashsize);
+	mutex_exit(&ipflow_lock);
 	ipflow_sysctl_init(NULL);
 }
 
@@ -180,19 +197,21 @@ ipflow_fastforward(struct mbuf *m)
 	int iplen;
 	struct ifnet *ifp;
 	int s;
+	int ret = 0;
 
+	mutex_enter(&ipflow_lock);
 	/*
 	 * Are we forwarding packets?  Big enough for an IP packet?
 	 */
 	if (!ipforwarding || ipflow_inuse == 0 || m->m_len < sizeof(struct ip))
-		return 0;
+		goto out;
 
 	/*
 	 * Was packet received as a link-level multicast or broadcast?
 	 * If so, don't try to fast forward..
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
-		return 0;
+		goto out;
 
 	/*
 	 * IP header with no option and valid version and length
@@ -206,12 +225,12 @@ ipflow_fastforward(struct mbuf *m)
 	iplen = ntohs(ip->ip_len);
 	if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) ||
 	    iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len)
-		return 0;
+		goto out;
 	/*
 	 * Find a flow.
 	 */
 	if ((ipf = ipflow_lookup(ip)) == NULL)
-		return 0;
+		goto out;
 
 	ifp = m_get_rcvif(m, &s);
 	/*
@@ -222,7 +241,7 @@ ipflow_fastforward(struct mbuf *m)
 		 M_CSUM_IPv4_BAD)) {
 	case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
 		m_put_rcvif(ifp, &s);
-		return 0;
+		goto out;
 
 	case M_CSUM_IPv4:
 		/* Checksum was okay. */
@@ -232,7 +251,7 @@ ipflow_fastforward(struct mbuf *m)
 		/* Must compute it ourselves. */
 		if (in_cksum(m, sizeof(struct ip)) != 0) {
 			m_put_rcvif(ifp, &s);
-			return 0;
+			goto out;
 		}
 		break;
 	}
@@ -244,13 +263,13 @@ ipflow_fastforward(struct mbuf *m)
 	if ((rt = rtcache_validate(&ipf->ipf_ro)) == NULL ||
 	    (rt->rt_ifp->if_flags & IFF_UP) == 0 ||
 	    (rt->rt_flags & (RTF_BLACKHOLE | RTF_BROADCAST)) != 0)
-		return 0;
+		goto out;
 
 	/*
 	 * Packet size OK?  TTL?
 	 */
 	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
-		return 0;
+		goto out;
 
 	/*
 	 * Clear any in-bound checksum flags for this packet.
@@ -312,7 +331,10 @@ ipflow_fastforward(struct mbuf *m)
 			ipf->ipf_errors++;
 	}
 	KERNEL_UNLOCK_ONE(NULL);
-	return 1;
+	ret = 1;
+ out:
+	mutex_exit(&ipflow_lock);
+	return ret;
 }
 
 static void
@@ -336,6 +358,9 @@ static void
 ipflow_free(struct ipflow *ipf)
 {
 	int s;
+
+	KASSERT(mutex_owned(&ipflow_lock));
+
 	/*
 	 * Remove the flow from the hash table (at elevated IPL).
 	 * Once it's off the list, we can deal with it at normal
@@ -353,6 +378,9 @@ ipflow_free(struct ipflow *ipf)
 static struct ipflow *
 ipflow_reap(bool just_one)
 {
+
+	KASSERT(mutex_owned(&ipflow_lock));
+
 	while (just_one || ipflow_inuse > ip_maxflows) {
 		struct ipflow *ipf, *maybe_ipf = NULL;
 		int s;
@@ -405,6 +433,7 @@ ipflow_slowtimo(void)
 	uint64_t *ips;
 
 	mutex_enter(softnet_lock);
+	mutex_enter(&ipflow_lock);
 	KERNEL_LOCK(1, NULL);
 	for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
 		next_ipf = LIST_NEXT(ipf, ipf_list);
@@ -423,6 +452,7 @@ ipflow_slowtimo(void)
 		}
 	}
 	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(&ipflow_lock);
 	mutex_exit(softnet_lock);
 }
 
@@ -434,11 +464,15 @@ ipflow_create(const struct route *ro, st
 	size_t hash;
 	int s;
 
+	mutex_enter(&ipflow_lock);
+
 	/*
 	 * Don't create cache entries for ICMP messages.
 	 */
-	if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP)
+	if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP) {
+		mutex_exit(&ipflow_lock);
 		return;
+	}
 
 	KERNEL_LOCK(1, NULL);
 
@@ -487,6 +521,7 @@ ipflow_create(const struct route *ro, st
 
  out:
 	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(&ipflow_lock);
 }
 
 int
@@ -496,6 +531,9 @@ ipflow_invalidate_all(int new_size)
 	int s, error;
 
 	error = 0;
+
+	mutex_enter(&ipflow_lock);
+
 	s = splnet();
 	for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
 		next_ipf = LIST_NEXT(ipf, ipf_list);
@@ -506,6 +544,8 @@ ipflow_invalidate_all(int new_size)
 		error = ipflow_reinit(new_size);
 	splx(s);
 
+	mutex_exit(&ipflow_lock);
+
 	return error;
 }
 
@@ -523,11 +563,13 @@ sysctl_net_inet_ip_maxflows(SYSCTLFN_ARG
 		return (error);
 
 	mutex_enter(softnet_lock);
+	mutex_enter(&ipflow_lock);
 	KERNEL_LOCK(1, NULL);
 
 	ipflow_reap(false);
 
 	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(&ipflow_lock);
 	mutex_exit(softnet_lock);
 
 	return (0);

Index: src/sys/netinet6/ip6_flow.c
diff -u src/sys/netinet6/ip6_flow.c:1.24 src/sys/netinet6/ip6_flow.c:1.25
--- src/sys/netinet6/ip6_flow.c:1.24	Mon Mar 23 18:33:17 2015
+++ src/sys/netinet6/ip6_flow.c	Mon Jun 13 08:34:23 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip6_flow.c,v 1.24 2015/03/23 18:33:17 roy Exp $	*/
+/*	$NetBSD: ip6_flow.c,v 1.25 2016/06/13 08:34:23 knakahara Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.24 2015/03/23 18:33:17 roy Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.25 2016/06/13 08:34:23 knakahara Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -88,6 +88,14 @@ LIST_HEAD(ip6flowhead, ip6flow);
 #define	IP6FLOW_TIMER		(5 * PR_SLOWHZ)
 #define	IP6FLOW_DEFAULT_HASHSIZE	(1 << IP6FLOW_HASHBITS) 
 
+/*
+ * ip6_flow.c internal lock.
+ * If we use softnet_lock, it would cause recursive lock.
+ *
+ * This is a tentative workaround.
+ * We should make it scalable somehow in the future.
+ */
+static kmutex_t ip6flow_lock;
 static struct ip6flowhead *ip6flowtable = NULL;
 static struct ip6flowhead ip6flowlist;
 static int ip6flow_inuse;
@@ -149,6 +157,8 @@ ip6flow_lookup(const struct ip6_hdr *ip6
 	size_t hash;
 	struct ip6flow *ip6f;
 
+	KASSERT(mutex_owned(&ip6flow_lock));
+
 	hash = ip6flow_hash(ip6);
 
 	LIST_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) {
@@ -177,12 +187,14 @@ ip6flow_poolinit(void)
  * If a newly sized table cannot be malloc'ed we just continue
  * to use the old one.
  */
-int
-ip6flow_init(int table_size)
+static int
+ip6flow_init_locked(int table_size)
 {
 	struct ip6flowhead *new_table;
 	size_t i;
 
+	KASSERT(mutex_owned(&ip6flow_lock));
+
 	new_table = (struct ip6flowhead *)malloc(sizeof(struct ip6flowhead) *
 	    table_size, M_RTABLE, M_NOWAIT);
 
@@ -202,6 +214,20 @@ ip6flow_init(int table_size)
 	return 0;
 }
 
+int
+ip6flow_init(int table_size)
+{
+	int ret;
+
+	mutex_init(&ip6flow_lock, MUTEX_DEFAULT, IPL_NONE);
+
+	mutex_enter(&ip6flow_lock);
+	ret = ip6flow_init_locked(table_size);
+	mutex_exit(&ip6flow_lock);
+
+	return ret;
+}
+
 /*
  * IPv6 Fast Forward routine. Attempt to forward the packet -
  * if any problems are found return to the main IPv6 input 
@@ -216,35 +242,38 @@ ip6flow_fastforward(struct mbuf **mp)
 	struct mbuf *m;
 	const struct sockaddr *dst;
 	int error;
+	int ret = 0;
+
+	mutex_enter(&ip6flow_lock);
 
 	/*
 	 * Are we forwarding packets and have flows?
 	 */
 	if (!ip6_forwarding || ip6flow_inuse == 0)
-		return 0;
+		goto out;
 
 	m = *mp;
 	/*
 	 * At least size of IPv6 Header?
 	 */
 	if (m->m_len < sizeof(struct ip6_hdr))
-		return 0;	
+		goto out;
 	/*
 	 * Was packet received as a link-level multicast or broadcast?
 	 * If so, don't try to fast forward.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
-		return 0;
+		goto out;
 
 	if (IP6_HDR_ALIGNED_P(mtod(m, const void *)) == 0) {
 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
 				(max_linkhdr + 3) & ~3)) == NULL) {
-			return 0;
+			goto out;
 		}
 		*mp = m;
 	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
-			return 0;
+			goto out;
 		}
 		*mp = m;
 	}
@@ -253,7 +282,7 @@ ip6flow_fastforward(struct mbuf **mp)
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		/* Bad version. */
-		return 0;
+		goto out;
 	}
 
 	/*
@@ -261,14 +290,14 @@ ip6flow_fastforward(struct mbuf **mp)
 	 * We just leave this up to ip6_input to deal with. 
 	 */
 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS)
-		return 0;
+		goto out;
 
 	/*
 	 * Attempt to find a flow.
 	 */
 	if ((ip6f = ip6flow_lookup(ip6)) == NULL) {
 		/* No flow found. */
-		return 0;
+		goto out;
 	}
 
 	/*
@@ -277,14 +306,14 @@ ip6flow_fastforward(struct mbuf **mp)
 	if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL ||
 	    (rt->rt_ifp->if_flags & IFF_UP) == 0 ||
 	    (rt->rt_flags & RTF_BLACKHOLE) != 0)
-		return 0;
+		goto out;
 
 	/*
 	 * Packet size greater than MTU?
 	 */
 	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) {
 		/* Return to main IPv6 input function. */
-		return 0;
+		goto out;
 	}
 
 	/*
@@ -293,7 +322,7 @@ ip6flow_fastforward(struct mbuf **mp)
 	m->m_pkthdr.csum_flags = 0;
 
 	if (ip6->ip6_hlim <= IPV6_HLIMDEC)
-		return 0;
+		goto out;
 
 	/* Decrement hop limit (same as TTL) */
 	ip6->ip6_hlim -= IPV6_HLIMDEC;
@@ -315,7 +344,10 @@ ip6flow_fastforward(struct mbuf **mp)
 		ip6f->ip6f_forwarded++;
 	}
 	KERNEL_UNLOCK_ONE(NULL);
-	return 1;
+	ret = 1;
+ out:
+	mutex_exit(&ip6flow_lock);
+	return ret;
 }
 
 /*
@@ -347,6 +379,8 @@ ip6flow_free(struct ip6flow *ip6f)
 {
 	int s;
 
+	KASSERT(mutex_owned(&ip6flow_lock));
+
 	/*
 	 * Remove the flow from the hash table (at elevated IPL).
 	 * Once it's off the list, we can deal with it at normal
@@ -361,13 +395,12 @@ ip6flow_free(struct ip6flow *ip6f)
 	pool_put(&ip6flow_pool, ip6f);
 }
 
-/*
- * Reap one or more flows - ip6flow_reap may remove
- * multiple flows if net.inet6.ip6.maxflows is reduced. 
- */
-struct ip6flow *
-ip6flow_reap(int just_one)
+static struct ip6flow *
+ip6flow_reap_locked(int just_one)
 {
+
+	KASSERT(mutex_owned(&ip6flow_lock));
+
 	while (just_one || ip6flow_inuse > ip6_maxflows) {
 		struct ip6flow *ip6f, *maybe_ip6f = NULL;
 		int s;
@@ -414,12 +447,28 @@ ip6flow_reap(int just_one)
 	return NULL;
 }
 
+/*
+ * Reap one or more flows - ip6flow_reap may remove
+ * multiple flows if net.inet6.ip6.maxflows is reduced. 
+ */
+struct ip6flow *
+ip6flow_reap(int just_one)
+{
+	struct ip6flow *ip6f;
+
+	mutex_enter(&ip6flow_lock);
+	ip6f = ip6flow_reap_locked(just_one);
+	mutex_exit(&ip6flow_lock);
+	return ip6f;
+}
+
 void
 ip6flow_slowtimo(void)
 {
 	struct ip6flow *ip6f, *next_ip6f;
 
 	mutex_enter(softnet_lock);
+	mutex_enter(&ip6flow_lock);
 	KERNEL_LOCK(1, NULL);
 
 	for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
@@ -437,6 +486,7 @@ ip6flow_slowtimo(void)
 	}
 
 	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(&ip6flow_lock);
 	mutex_exit(softnet_lock);
 }
 
@@ -452,6 +502,8 @@ ip6flow_create(const struct route *ro, s
 	size_t hash;
 	int s;
 
+	mutex_enter(&ip6flow_lock);
+
 	ip6 = mtod(m, const struct ip6_hdr *);
 
 	/*
@@ -460,8 +512,10 @@ ip6flow_create(const struct route *ro, s
 	 *
 	 * Don't create a flow for ICMPv6 messages.
 	 */
-	if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP)
+	if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP) {
+		mutex_exit(&ip6flow_lock);
 		return;
+	}
 
 	KERNEL_LOCK(1, NULL);
 
@@ -479,7 +533,7 @@ ip6flow_create(const struct route *ro, s
 	ip6f = ip6flow_lookup(ip6);
 	if (ip6f == NULL) {
 		if (ip6flow_inuse >= ip6_maxflows) {
-			ip6f = ip6flow_reap(1);
+			ip6f = ip6flow_reap_locked(1);
 		} else {
 			ip6f = pool_get(&ip6flow_pool, PR_NOWAIT);
 			if (ip6f == NULL)
@@ -518,6 +572,7 @@ ip6flow_create(const struct route *ro, s
 
  out:
 	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(&ip6flow_lock);
 }
 
 /*
@@ -531,6 +586,9 @@ ip6flow_invalidate_all(int new_size)
 	int s, error;
 
 	error = 0;
+
+	mutex_enter(&ip6flow_lock);
+
 	s = splnet();
 	for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
 		next_ip6f = LIST_NEXT(ip6f, ip6f_list);
@@ -538,8 +596,10 @@ ip6flow_invalidate_all(int new_size)
 	}
 
 	if (new_size) 
-		error = ip6flow_init(new_size);
+		error = ip6flow_init_locked(new_size);
 	splx(s);
 
+	mutex_exit(&ip6flow_lock);
+
 	return error;
 }

CVS commit: src/sys

Reply via email to