> -----Original Message----- > From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org] > Subject: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache > > retain last used xfrm_dst in a pcpu cache. > On next request, reuse this dst if the policies are the same. > > The cache does'nt help at all with strictly-RR workloads as > we never have a hit. > > Also, the cache adds cost of this_cpu_xchg() in packet path. > It would be better to use plain this_cpu_read/write, however, > a netdev notifier can run in parallel on other cpu and write same > pcpu value so the xchg is needed to avoid race. > > The notifier is needed so we do not add long hangs when a device > is dismantled but some pcpu xdst still holds a reference. > > Test results using 4 network namespaces and null encryption: > > ns1 ns2 -> ns3 -> ns4 > netperf -> xfrm/null enc -> xfrm/null dec -> netserver > > what TCP_STREAM UDP_STREAM UDP_RR > Flow cache: 14804.4 279.738 326213.0 > No flow cache: 14158.3 257.458 228486.8 > Pcpu cache: 14766.4 286.958 239433.5 > > UDP tests used 64byte packets, tests ran for one minute each, > value is average over ten iterations.
Hi Florian, I want to give this a go with hw-offload and see the impact on performance. It may take us a few days to do that. See one comment below. > > 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this > series but without this one. > > Signed-off-by: Florian Westphal <f...@strlen.de> > --- > include/net/xfrm.h | 1 + > net/xfrm/xfrm_device.c | 1 + > net/xfrm/xfrm_policy.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 46 insertions(+) > > diff --git a/include/net/xfrm.h b/include/net/xfrm.h > index 9b85367529a4..8bde1d569790 100644 > --- a/include/net/xfrm.h > +++ b/include/net/xfrm.h > @@ -316,6 +316,7 @@ int xfrm_policy_register_afinfo(const struct > xfrm_policy_afinfo *afinfo, int fam > void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo > *afinfo); > void km_policy_notify(struct xfrm_policy *xp, int dir, > const struct km_event *c); > +void xfrm_policy_dev_unreg(void); > void km_state_notify(struct xfrm_state *x, const struct km_event *c); > > struct xfrm_tmpl; > diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c > index d01cb256e89c..8221d05d43d1 100644 > --- a/net/xfrm/xfrm_device.c > +++ b/net/xfrm/xfrm_device.c > @@ -151,6 +151,7 @@ static int xfrm_dev_register(struct net_device *dev) > > static int xfrm_dev_unregister(struct net_device *dev) > { > + xfrm_policy_dev_unreg(); > return NOTIFY_DONE; > } > > diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c > index f4419d1b9f38..ac83b39850ce 100644 > --- a/net/xfrm/xfrm_policy.c > +++ b/net/xfrm/xfrm_policy.c > @@ -44,6 +44,7 @@ struct xfrm_flo { > u8 flags; > }; > > +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); > static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); > static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 > + 1] > __read_mostly; > @@ -1700,6 +1701,34 @@ static int xfrm_expand_policies(const struct flowi > *fl, u16 family, > > } > > +void xfrm_policy_dev_unreg(void) Maybe name it xfrm_policy_cache_flush() or something similar, and call it from some places where xfrm_garbage_collect() used to be called? Such as from xfrm_policy_flush() And maybe even from xfrm_flush_sa() as well This would allow to unload esp4 and/or esp4_offload (or other algo module) after 'ip x s f' (or the swan equivalent) > +{ > + int cpu; > + > + local_bh_disable(); > + rcu_read_lock(); > + for_each_possible_cpu(cpu) { > + struct xfrm_dst *tmp, *old; > + > + old = per_cpu(xfrm_last_dst, cpu); > + if (!old || xfrm_bundle_ok(old)) > + continue; > + > + tmp = cmpxchg(&(per_cpu(xfrm_last_dst, cpu)), old, NULL); > + if (tmp == old) > + dst_release(&old->u.dst); > + } > + rcu_read_unlock(); > + local_bh_enable(); > +} > + > +static void xfrm_last_dst_update(struct xfrm_dst *xdst) > +{ > + struct xfrm_dst *old = this_cpu_xchg(xfrm_last_dst, xdst); > + if (old) > + dst_release(&old->u.dst); > +} > + > static struct xfrm_dst * > xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, > const struct flowi *fl, u16 family, > @@ -1711,17 +1740,29 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy > **pols, int num_pols, > struct xfrm_dst *xdst; > int err; > > + xdst = this_cpu_read(xfrm_last_dst); > + if (xdst && > + xdst->u.dst.dev == dst_orig->dev && > + xdst->num_pols == num_pols && > + memcmp(xdst->pols, pols, > + sizeof(struct xfrm_policy *) * num_pols) == 0 && > + xfrm_bundle_ok(xdst) && > + dst_hold_safe(&xdst->u.dst)) > + return xdst; > + > /* Try to instantiate a bundle */ > err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); > if (err <= 0) { > if (err != 0 && err != -EAGAIN) > XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); > + xfrm_last_dst_update(NULL); > return ERR_PTR(err); > } > > dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); > if (IS_ERR(dst)) { > XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); > + xfrm_last_dst_update(NULL); > return ERR_CAST(dst); > } > > @@ -1731,6 +1772,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy > **pols, int num_pols, > memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); > xdst->policy_genid = atomic_read(&pols[0]->genid); > > + atomic_set(&xdst->u.dst.__refcnt, 2); > + xfrm_last_dst_update(xdst); > + > return xdst; > } > > -- > 2.13.0