Module Name: src Committed By: knakahara Date: Tue Aug 23 09:59:20 UTC 2016
Modified Files: src/sys/netinet6: ip6_flow.c ip6_var.h Log Message: improve fast-forward performance when the number of flows exceeds ip6_maxflows. This is porting of ip_flow.c:r1.76 In ip6flow case, the before degradation is about 45%, the after degradation is bout 55%. To generate a diff of this commit: cvs rdiff -u -r1.30 -r1.31 src/sys/netinet6/ip6_flow.c cvs rdiff -u -r1.67 -r1.68 src/sys/netinet6/ip6_var.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/netinet6/ip6_flow.c diff -u src/sys/netinet6/ip6_flow.c:1.30 src/sys/netinet6/ip6_flow.c:1.31 --- src/sys/netinet6/ip6_flow.c:1.30 Tue Aug 2 04:50:16 2016 +++ src/sys/netinet6/ip6_flow.c Tue Aug 23 09:59:20 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: ip6_flow.c,v 1.30 2016/08/02 04:50:16 knakahara Exp $ */ +/* $NetBSD: ip6_flow.c,v 1.31 2016/08/23 09:59:20 knakahara Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.30 2016/08/02 04:50:16 knakahara Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.31 2016/08/23 09:59:20 knakahara Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -81,7 +81,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v static struct pool ip6flow_pool; -LIST_HEAD(ip6flowhead, ip6flow); +TAILQ_HEAD(ip6flowhead, ip6flow); /* * We could use IPv4 defines (IPFLOW_HASHBITS) but we'll @@ -113,19 +113,20 @@ static void ip6flow_sysctl_init(struct s /* * Insert an ip6flow into the list. */ -#define IP6FLOW_INSERT(bucket, ip6f) \ +#define IP6FLOW_INSERT(hashidx, ip6f) \ do { \ - LIST_INSERT_HEAD((bucket), (ip6f), ip6f_hash); \ - LIST_INSERT_HEAD(&ip6flowlist, (ip6f), ip6f_list); \ + (ip6f)->ip6f_hashidx = (hashidx); \ + TAILQ_INSERT_HEAD(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \ + TAILQ_INSERT_HEAD(&ip6flowlist, (ip6f), ip6f_list); \ } while (/*CONSTCOND*/ 0) /* * Remove an ip6flow from the list. */ -#define IP6FLOW_REMOVE(ip6f) \ +#define IP6FLOW_REMOVE(hashidx, ip6f) \ do { \ - LIST_REMOVE((ip6f), ip6f_hash); \ - LIST_REMOVE((ip6f), ip6f_list); \ + TAILQ_REMOVE(&ip6flowtable[(hashidx)], (ip6f), ip6f_hash); \ + TAILQ_REMOVE(&ip6flowlist, (ip6f), ip6f_list); \ } while (/*CONSTCOND*/ 0) #ifndef IP6FLOW_DEFAULT @@ -171,7 +172,7 @@ ip6flow_lookup(const struct ip6_hdr *ip6 hash = ip6flow_hash(ip6); - LIST_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) { + TAILQ_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) { if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6f->ip6f_dst) && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6f->ip6f_src) && ip6f->ip6f_flow == ip6->ip6_flow) { @@ -217,9 +218,9 @@ ip6flow_init_locked(int table_size) ip6flowtable = new_table; ip6_hashsize = table_size; - LIST_INIT(&ip6flowlist); + TAILQ_INIT(&ip6flowlist); for (i = 0; i < ip6_hashsize; i++) - LIST_INIT(&ip6flowtable[i]); + TAILQ_INIT(&ip6flowtable[i]); return 0; } @@ -352,6 +353,15 @@ ip6flow_fastforward(struct mbuf **mp) ip6f->ip6f_uses++; +#if 0 + /* + * We use FIFO cache replacement instead of LRU the same ip_flow.c. + */ + /* move to head (LRU) for ip6flowlist. ip6flowtable does not care LRU. */ + TAILQ_REMOVE(&ip6flowlist, ip6f, ip6f_list); + TAILQ_INSERT_HEAD(&ip6flowlist, ip6f, ip6f_list); +#endif + /* Send on its way - straight to the interface output routine. */ if ((error = if_output_lock(rt->rt_ifp, rt->rt_ifp, m, dst, rt)) != 0) { ip6f->ip6f_dropped++; @@ -399,7 +409,7 @@ ip6flow_free(struct ip6flow *ip6f) * Once it's off the list, we can deal with it at normal * network IPL. */ - IP6FLOW_REMOVE(ip6f); + IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f); ip6flow_inuse--; ip6flow_addstats(ip6f); @@ -410,14 +420,34 @@ ip6flow_free(struct ip6flow *ip6f) static struct ip6flow * ip6flow_reap_locked(int just_one) { + struct ip6flow *ip6f; KASSERT(mutex_owned(&ip6flow_lock)); - while (just_one || ip6flow_inuse > ip6_maxflows) { - struct ip6flow *ip6f, *maybe_ip6f = NULL; + /* + * This case must remove one ip6flow. Furthermore, this case is used in + * fast path(packet processing path). So, simply remove TAILQ_LAST one. + */ + if (just_one) { + ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead); + KASSERT(ip6f != NULL); + + IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f); + + ip6flow_addstats(ip6f); + rtcache_free(&ip6f->ip6f_ro); + return ip6f; + } - ip6f = LIST_FIRST(&ip6flowlist); - while (ip6f != NULL) { + /* + * This case is used in slow path(sysctl). + * At first, remove invalid rtcache ip6flow, and then remove TAILQ_LAST + * ip6flow if it is ensured least recently used by comparing last_uses. + */ + while (ip6flow_inuse > ip6_maxflows) { + struct ip6flow *maybe_ip6f = TAILQ_LAST(&ip6flowlist, ip6flowhead); + + TAILQ_FOREACH(ip6f, &ip6flowlist, ip6f_list) { /* * If this no longer points to a valid route - * reclaim it. @@ -429,27 +459,20 @@ ip6flow_reap_locked(int just_one) * used or has had the least uses in the * last 1.5 intervals. */ - if (maybe_ip6f == NULL || - ip6f->ip6f_timer < maybe_ip6f->ip6f_timer || - (ip6f->ip6f_timer == maybe_ip6f->ip6f_timer && - ip6f->ip6f_last_uses + ip6f->ip6f_uses < - maybe_ip6f->ip6f_last_uses + - maybe_ip6f->ip6f_uses)) + if (ip6f->ip6f_timer < maybe_ip6f->ip6f_timer + || ((ip6f->ip6f_timer == maybe_ip6f->ip6f_timer) + && (ip6f->ip6f_last_uses + ip6f->ip6f_uses + < maybe_ip6f->ip6f_last_uses + maybe_ip6f->ip6f_uses))) maybe_ip6f = ip6f; - ip6f = LIST_NEXT(ip6f, ip6f_list); } ip6f = maybe_ip6f; done: /* * Remove the entry from the flow table */ - IP6FLOW_REMOVE(ip6f); + IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f); rtcache_free(&ip6f->ip6f_ro); - if (just_one) { - ip6flow_addstats(ip6f); - return ip6f; - } ip6flow_inuse--; ip6flow_addstats(ip6f); pool_put(&ip6flow_pool, ip6f); @@ -486,8 +509,8 @@ ip6flow_slowtimo_work(struct work *wk, v mutex_enter(&ip6flow_lock); KERNEL_LOCK(1, NULL); - for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) { - next_ip6f = LIST_NEXT(ip6f, ip6f_list); + for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) { + next_ip6f = TAILQ_NEXT(ip6f, ip6f_list); if (PRT_SLOW_ISEXPIRED(ip6f->ip6f_timer) || rtcache_validate(&ip6f->ip6f_ro) == NULL) { ip6flow_free(ip6f); @@ -567,7 +590,7 @@ ip6flow_create(const struct route *ro, s } memset(ip6f, 0, sizeof(*ip6f)); } else { - IP6FLOW_REMOVE(ip6f); + IP6FLOW_REMOVE(ip6f->ip6f_hashidx, ip6f); ip6flow_addstats(ip6f); rtcache_free(&ip6f->ip6f_ro); @@ -590,7 +613,7 @@ ip6flow_create(const struct route *ro, s * Insert into the approriate bucket of the flow table. */ hash = ip6flow_hash(ip6); - IP6FLOW_INSERT(&ip6flowtable[hash], ip6f); + IP6FLOW_INSERT(hash, ip6f); out: KERNEL_UNLOCK_ONE(NULL); @@ -611,8 +634,8 @@ ip6flow_invalidate_all(int new_size) mutex_enter(&ip6flow_lock); - for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) { - next_ip6f = LIST_NEXT(ip6f, ip6f_list); + for (ip6f = TAILQ_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) { + next_ip6f = TAILQ_NEXT(ip6f, ip6f_list); ip6flow_free(ip6f); } Index: src/sys/netinet6/ip6_var.h diff -u src/sys/netinet6/ip6_var.h:1.67 src/sys/netinet6/ip6_var.h:1.68 --- src/sys/netinet6/ip6_var.h:1.67 Tue Jun 21 10:25:27 2016 +++ src/sys/netinet6/ip6_var.h Tue Aug 23 09:59:20 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: ip6_var.h,v 1.67 2016/06/21 10:25:27 ozaki-r Exp $ */ +/* $NetBSD: ip6_var.h,v 1.68 2016/08/23 09:59:20 knakahara Exp $ */ /* $KAME: ip6_var.h,v 1.33 2000/06/11 14:59:20 jinmei Exp $ */ /* @@ -235,8 +235,9 @@ struct ip6_pktopts { * Structure for an IPv6 flow (ip6_fastforward). */ struct ip6flow { - LIST_ENTRY(ip6flow) ip6f_list; /* next in active list */ - LIST_ENTRY(ip6flow) ip6f_hash; /* next ip6flow in bucket */ + TAILQ_ENTRY(ip6flow) ip6f_list; /* next in active list */ + TAILQ_ENTRY(ip6flow) ip6f_hash; /* next ip6flow in bucket */ + size_t ip6f_hashidx; /* own hash index of ipflowtable[] */ struct in6_addr ip6f_dst; /* destination address */ struct in6_addr ip6f_src; /* source address */ struct route ip6f_ro; /* associated route entry */