Module Name: src Committed By: knakahara Date: Mon Jun 13 08:34:23 UTC 2016
Modified Files: src/sys/netinet: ip_flow.c src/sys/netinet6: ip6_flow.c Log Message: MP-ify fastforward to support GATEWAY kernel option. I add "ipflow_lock" mutex in ip_flow.c and "ip6flow_lock" mutex in ip6_flow.c to protect all data in each file. Of course, this is not MP-scalable. However, it is sufficient as tentative workaround. We should make it scalable somehow in the future. ok by ozaki-r@n.o. To generate a diff of this commit: cvs rdiff -u -r1.69 -r1.70 src/sys/netinet/ip_flow.c cvs rdiff -u -r1.24 -r1.25 src/sys/netinet6/ip6_flow.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/netinet/ip_flow.c diff -u src/sys/netinet/ip_flow.c:1.69 src/sys/netinet/ip_flow.c:1.70 --- src/sys/netinet/ip_flow.c:1.69 Mon Jun 13 08:29:55 2016 +++ src/sys/netinet/ip_flow.c Mon Jun 13 08:34:23 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_flow.c,v 1.69 2016/06/13 08:29:55 knakahara Exp $ */ +/* $NetBSD: ip_flow.c,v 1.70 2016/06/13 08:34:23 knakahara Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.69 2016/06/13 08:29:55 knakahara Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.70 2016/06/13 08:34:23 knakahara Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -72,6 +72,14 @@ LIST_HEAD(ipflowhead, ipflow); #define IPFLOW_TIMER (5 * PR_SLOWHZ) #define IPFLOW_DEFAULT_HASHSIZE (1 << IPFLOW_HASHBITS) +/* + * ip_flow.c internal lock. + * If we use softnet_lock, it would cause recursive lock. + * + * This is a tentative workaround. + * We should make it scalable somehow in the future. + */ +static kmutex_t ipflow_lock; static struct ipflowhead *ipflowtable = NULL; static struct ipflowhead ipflowlist; static int ipflow_inuse; @@ -117,6 +125,8 @@ ipflow_lookup(const struct ip *ip) size_t hash; struct ipflow *ipf; + KASSERT(mutex_owned(&ipflow_lock)); + hash = ipflow_hash(ip); LIST_FOREACH(ipf, &ipflowtable[hash], ipf_hash) { @@ -142,6 +152,8 @@ ipflow_reinit(int table_size) struct ipflowhead *new_table; size_t i; + KASSERT(mutex_owned(&ipflow_lock)); + new_table = (struct ipflowhead *)malloc(sizeof(struct ipflowhead) * table_size, M_RTABLE, M_NOWAIT); @@ -164,7 +176,12 @@ ipflow_reinit(int table_size) void ipflow_init(void) { + + mutex_init(&ipflow_lock, MUTEX_DEFAULT, IPL_NONE); + + mutex_enter(&ipflow_lock); (void)ipflow_reinit(ip_hashsize); + mutex_exit(&ipflow_lock); ipflow_sysctl_init(NULL); } @@ -180,19 +197,21 @@ ipflow_fastforward(struct mbuf *m) int iplen; struct ifnet *ifp; int s; + int ret = 0; + mutex_enter(&ipflow_lock); /* * Are we forwarding packets? Big enough for an IP packet? */ if (!ipforwarding || ipflow_inuse == 0 || m->m_len < sizeof(struct ip)) - return 0; + goto out; /* * Was packet received as a link-level multicast or broadcast? * If so, don't try to fast forward.. */ if ((m->m_flags & (M_BCAST|M_MCAST)) != 0) - return 0; + goto out; /* * IP header with no option and valid version and length @@ -206,12 +225,12 @@ ipflow_fastforward(struct mbuf *m) iplen = ntohs(ip->ip_len); if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) || iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len) - return 0; + goto out; /* * Find a flow. */ if ((ipf = ipflow_lookup(ip)) == NULL) - return 0; + goto out; ifp = m_get_rcvif(m, &s); /* @@ -222,7 +241,7 @@ ipflow_fastforward(struct mbuf *m) M_CSUM_IPv4_BAD)) { case M_CSUM_IPv4|M_CSUM_IPv4_BAD: m_put_rcvif(ifp, &s); - return 0; + goto out; case M_CSUM_IPv4: /* Checksum was okay. */ @@ -232,7 +251,7 @@ ipflow_fastforward(struct mbuf *m) /* Must compute it ourselves. */ if (in_cksum(m, sizeof(struct ip)) != 0) { m_put_rcvif(ifp, &s); - return 0; + goto out; } break; } @@ -244,13 +263,13 @@ ipflow_fastforward(struct mbuf *m) if ((rt = rtcache_validate(&ipf->ipf_ro)) == NULL || (rt->rt_ifp->if_flags & IFF_UP) == 0 || (rt->rt_flags & (RTF_BLACKHOLE | RTF_BROADCAST)) != 0) - return 0; + goto out; /* * Packet size OK? TTL? */ if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) - return 0; + goto out; /* * Clear any in-bound checksum flags for this packet. @@ -312,7 +331,10 @@ ipflow_fastforward(struct mbuf *m) ipf->ipf_errors++; } KERNEL_UNLOCK_ONE(NULL); - return 1; + ret = 1; + out: + mutex_exit(&ipflow_lock); + return ret; } static void @@ -336,6 +358,9 @@ static void ipflow_free(struct ipflow *ipf) { int s; + + KASSERT(mutex_owned(&ipflow_lock)); + /* * Remove the flow from the hash table (at elevated IPL). * Once it's off the list, we can deal with it at normal @@ -353,6 +378,9 @@ ipflow_free(struct ipflow *ipf) static struct ipflow * ipflow_reap(bool just_one) { + + KASSERT(mutex_owned(&ipflow_lock)); + while (just_one || ipflow_inuse > ip_maxflows) { struct ipflow *ipf, *maybe_ipf = NULL; int s; @@ -405,6 +433,7 @@ ipflow_slowtimo(void) uint64_t *ips; mutex_enter(softnet_lock); + mutex_enter(&ipflow_lock); KERNEL_LOCK(1, NULL); for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) { next_ipf = LIST_NEXT(ipf, ipf_list); @@ -423,6 +452,7 @@ ipflow_slowtimo(void) } } KERNEL_UNLOCK_ONE(NULL); + mutex_exit(&ipflow_lock); mutex_exit(softnet_lock); } @@ -434,11 +464,15 @@ ipflow_create(const struct route *ro, st size_t hash; int s; + mutex_enter(&ipflow_lock); + /* * Don't create cache entries for ICMP messages. */ - if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP) + if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP) { + mutex_exit(&ipflow_lock); return; + } KERNEL_LOCK(1, NULL); @@ -487,6 +521,7 @@ ipflow_create(const struct route *ro, st out: KERNEL_UNLOCK_ONE(NULL); + mutex_exit(&ipflow_lock); } int @@ -496,6 +531,9 @@ ipflow_invalidate_all(int new_size) int s, error; error = 0; + + mutex_enter(&ipflow_lock); + s = splnet(); for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) { next_ipf = LIST_NEXT(ipf, ipf_list); @@ -506,6 +544,8 @@ ipflow_invalidate_all(int new_size) error = ipflow_reinit(new_size); splx(s); + mutex_exit(&ipflow_lock); + return error; } @@ -523,11 +563,13 @@ sysctl_net_inet_ip_maxflows(SYSCTLFN_ARG return (error); mutex_enter(softnet_lock); + mutex_enter(&ipflow_lock); KERNEL_LOCK(1, NULL); ipflow_reap(false); KERNEL_UNLOCK_ONE(NULL); + mutex_exit(&ipflow_lock); mutex_exit(softnet_lock); return (0); Index: src/sys/netinet6/ip6_flow.c diff -u src/sys/netinet6/ip6_flow.c:1.24 src/sys/netinet6/ip6_flow.c:1.25 --- src/sys/netinet6/ip6_flow.c:1.24 Mon Mar 23 18:33:17 2015 +++ src/sys/netinet6/ip6_flow.c Mon Jun 13 08:34:23 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: ip6_flow.c,v 1.24 2015/03/23 18:33:17 roy Exp $ */ +/* $NetBSD: ip6_flow.c,v 1.25 2016/06/13 08:34:23 knakahara Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.24 2015/03/23 18:33:17 roy Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.25 2016/06/13 08:34:23 knakahara Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -88,6 +88,14 @@ LIST_HEAD(ip6flowhead, ip6flow); #define IP6FLOW_TIMER (5 * PR_SLOWHZ) #define IP6FLOW_DEFAULT_HASHSIZE (1 << IP6FLOW_HASHBITS) +/* + * ip6_flow.c internal lock. + * If we use softnet_lock, it would cause recursive lock. + * + * This is a tentative workaround. + * We should make it scalable somehow in the future. + */ +static kmutex_t ip6flow_lock; static struct ip6flowhead *ip6flowtable = NULL; static struct ip6flowhead ip6flowlist; static int ip6flow_inuse; @@ -149,6 +157,8 @@ ip6flow_lookup(const struct ip6_hdr *ip6 size_t hash; struct ip6flow *ip6f; + KASSERT(mutex_owned(&ip6flow_lock)); + hash = ip6flow_hash(ip6); LIST_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) { @@ -177,12 +187,14 @@ ip6flow_poolinit(void) * If a newly sized table cannot be malloc'ed we just continue * to use the old one. */ -int -ip6flow_init(int table_size) +static int +ip6flow_init_locked(int table_size) { struct ip6flowhead *new_table; size_t i; + KASSERT(mutex_owned(&ip6flow_lock)); + new_table = (struct ip6flowhead *)malloc(sizeof(struct ip6flowhead) * table_size, M_RTABLE, M_NOWAIT); @@ -202,6 +214,20 @@ ip6flow_init(int table_size) return 0; } +int +ip6flow_init(int table_size) +{ + int ret; + + mutex_init(&ip6flow_lock, MUTEX_DEFAULT, IPL_NONE); + + mutex_enter(&ip6flow_lock); + ret = ip6flow_init_locked(table_size); + mutex_exit(&ip6flow_lock); + + return ret; +} + /* * IPv6 Fast Forward routine. Attempt to forward the packet - * if any problems are found return to the main IPv6 input @@ -216,35 +242,38 @@ ip6flow_fastforward(struct mbuf **mp) struct mbuf *m; const struct sockaddr *dst; int error; + int ret = 0; + + mutex_enter(&ip6flow_lock); /* * Are we forwarding packets and have flows? */ if (!ip6_forwarding || ip6flow_inuse == 0) - return 0; + goto out; m = *mp; /* * At least size of IPv6 Header? */ if (m->m_len < sizeof(struct ip6_hdr)) - return 0; + goto out; /* * Was packet received as a link-level multicast or broadcast? * If so, don't try to fast forward. */ if ((m->m_flags & (M_BCAST|M_MCAST)) != 0) - return 0; + goto out; if (IP6_HDR_ALIGNED_P(mtod(m, const void *)) == 0) { if ((m = m_copyup(m, sizeof(struct ip6_hdr), (max_linkhdr + 3) & ~3)) == NULL) { - return 0; + goto out; } *mp = m; } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) { if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { - return 0; + goto out; } *mp = m; } @@ -253,7 +282,7 @@ ip6flow_fastforward(struct mbuf **mp) if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { /* Bad version. */ - return 0; + goto out; } /* @@ -261,14 +290,14 @@ ip6flow_fastforward(struct mbuf **mp) * We just leave this up to ip6_input to deal with. */ if (ip6->ip6_nxt == IPPROTO_HOPOPTS) - return 0; + goto out; /* * Attempt to find a flow. */ if ((ip6f = ip6flow_lookup(ip6)) == NULL) { /* No flow found. */ - return 0; + goto out; } /* @@ -277,14 +306,14 @@ ip6flow_fastforward(struct mbuf **mp) if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL || (rt->rt_ifp->if_flags & IFF_UP) == 0 || (rt->rt_flags & RTF_BLACKHOLE) != 0) - return 0; + goto out; /* * Packet size greater than MTU? */ if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) { /* Return to main IPv6 input function. */ - return 0; + goto out; } /* @@ -293,7 +322,7 @@ ip6flow_fastforward(struct mbuf **mp) m->m_pkthdr.csum_flags = 0; if (ip6->ip6_hlim <= IPV6_HLIMDEC) - return 0; + goto out; /* Decrement hop limit (same as TTL) */ ip6->ip6_hlim -= IPV6_HLIMDEC; @@ -315,7 +344,10 @@ ip6flow_fastforward(struct mbuf **mp) ip6f->ip6f_forwarded++; } KERNEL_UNLOCK_ONE(NULL); - return 1; + ret = 1; + out: + mutex_exit(&ip6flow_lock); + return ret; } /* @@ -347,6 +379,8 @@ ip6flow_free(struct ip6flow *ip6f) { int s; + KASSERT(mutex_owned(&ip6flow_lock)); + /* * Remove the flow from the hash table (at elevated IPL). * Once it's off the list, we can deal with it at normal @@ -361,13 +395,12 @@ ip6flow_free(struct ip6flow *ip6f) pool_put(&ip6flow_pool, ip6f); } -/* - * Reap one or more flows - ip6flow_reap may remove - * multiple flows if net.inet6.ip6.maxflows is reduced. - */ -struct ip6flow * -ip6flow_reap(int just_one) +static struct ip6flow * +ip6flow_reap_locked(int just_one) { + + KASSERT(mutex_owned(&ip6flow_lock)); + while (just_one || ip6flow_inuse > ip6_maxflows) { struct ip6flow *ip6f, *maybe_ip6f = NULL; int s; @@ -414,12 +447,28 @@ ip6flow_reap(int just_one) return NULL; } +/* + * Reap one or more flows - ip6flow_reap may remove + * multiple flows if net.inet6.ip6.maxflows is reduced. + */ +struct ip6flow * +ip6flow_reap(int just_one) +{ + struct ip6flow *ip6f; + + mutex_enter(&ip6flow_lock); + ip6f = ip6flow_reap_locked(just_one); + mutex_exit(&ip6flow_lock); + return ip6f; +} + void ip6flow_slowtimo(void) { struct ip6flow *ip6f, *next_ip6f; mutex_enter(softnet_lock); + mutex_enter(&ip6flow_lock); KERNEL_LOCK(1, NULL); for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) { @@ -437,6 +486,7 @@ ip6flow_slowtimo(void) } KERNEL_UNLOCK_ONE(NULL); + mutex_exit(&ip6flow_lock); mutex_exit(softnet_lock); } @@ -452,6 +502,8 @@ ip6flow_create(const struct route *ro, s size_t hash; int s; + mutex_enter(&ip6flow_lock); + ip6 = mtod(m, const struct ip6_hdr *); /* @@ -460,8 +512,10 @@ ip6flow_create(const struct route *ro, s * * Don't create a flow for ICMPv6 messages. */ - if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP) + if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP) { + mutex_exit(&ip6flow_lock); return; + } KERNEL_LOCK(1, NULL); @@ -479,7 +533,7 @@ ip6flow_create(const struct route *ro, s ip6f = ip6flow_lookup(ip6); if (ip6f == NULL) { if (ip6flow_inuse >= ip6_maxflows) { - ip6f = ip6flow_reap(1); + ip6f = ip6flow_reap_locked(1); } else { ip6f = pool_get(&ip6flow_pool, PR_NOWAIT); if (ip6f == NULL) @@ -518,6 +572,7 @@ ip6flow_create(const struct route *ro, s out: KERNEL_UNLOCK_ONE(NULL); + mutex_exit(&ip6flow_lock); } /* @@ -531,6 +586,9 @@ ip6flow_invalidate_all(int new_size) int s, error; error = 0; + + mutex_enter(&ip6flow_lock); + s = splnet(); for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) { next_ip6f = LIST_NEXT(ip6f, ip6f_list); @@ -538,8 +596,10 @@ ip6flow_invalidate_all(int new_size) } if (new_size) - error = ip6flow_init(new_size); + error = ip6flow_init_locked(new_size); splx(s); + mutex_exit(&ip6flow_lock); + return error; }