> -----Original Message-----
> From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org]
> Subject: [RFC net-next 9/9] xfrm: add a small xdst pcpu cache
> 
> retain last used xfrm_dst in a pcpu cache.
> On next request, reuse this dst if the policies are the same.
> 
> The cache does'nt help at all with strictly-RR workloads as
> we never have a hit.
> 
> Also, the cache adds cost of this_cpu_xchg() in packet path.
> It would be better to use plain this_cpu_read/write, however,
> a netdev notifier can run in parallel on other cpu and write same
> pcpu value so the xchg is needed to avoid race.
> 
> The notifier is needed so we do not add long hangs when a device
> is dismantled but some pcpu xdst still holds a reference.
> 
> Test results using 4 network namespaces and null encryption:
> 
> ns1           ns2          -> ns3           -> ns4
> netperf -> xfrm/null enc   -> xfrm/null dec -> netserver
> 
> what                    TCP_STREAM      UDP_STREAM      UDP_RR
> Flow cache:           14804.4         279.738         326213.0
> No flow cache:                14158.3         257.458         228486.8
> Pcpu cache:           14766.4         286.958         239433.5
> 
> UDP tests used 64byte packets, tests ran for one minute each,
> value is average over ten iterations.

Hi Florian,

I want to give this a go with hw-offload and see the impact on performance.
It may take us a few days to do that.

See one comment below.

> 
> 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this
> series but without this one.
> 
> Signed-off-by: Florian Westphal <f...@strlen.de>
> ---
>  include/net/xfrm.h     |  1 +
>  net/xfrm/xfrm_device.c |  1 +
>  net/xfrm/xfrm_policy.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 46 insertions(+)
> 
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 9b85367529a4..8bde1d569790 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -316,6 +316,7 @@ int xfrm_policy_register_afinfo(const struct
> xfrm_policy_afinfo *afinfo, int fam
>  void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo
> *afinfo);
>  void km_policy_notify(struct xfrm_policy *xp, int dir,
>                     const struct km_event *c);
> +void xfrm_policy_dev_unreg(void);
>  void km_state_notify(struct xfrm_state *x, const struct km_event *c);
> 
>  struct xfrm_tmpl;
> diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
> index d01cb256e89c..8221d05d43d1 100644
> --- a/net/xfrm/xfrm_device.c
> +++ b/net/xfrm/xfrm_device.c
> @@ -151,6 +151,7 @@ static int xfrm_dev_register(struct net_device *dev)
> 
>  static int xfrm_dev_unregister(struct net_device *dev)
>  {
> +     xfrm_policy_dev_unreg();
>       return NOTIFY_DONE;
>  }
> 
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index f4419d1b9f38..ac83b39850ce 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -44,6 +44,7 @@ struct xfrm_flo {
>       u8 flags;
>  };
> 
> +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
>  static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
>  static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6
> + 1]
>                                               __read_mostly;
> @@ -1700,6 +1701,34 @@ static int xfrm_expand_policies(const struct flowi
> *fl, u16 family,
> 
>  }
> 
> +void xfrm_policy_dev_unreg(void)

Maybe name it xfrm_policy_cache_flush() or something similar, and call it from 
some places where xfrm_garbage_collect() used to be called?

Such as from xfrm_policy_flush()
And maybe even from xfrm_flush_sa() as well

This would allow to unload esp4 and/or esp4_offload (or other algo module) 
after 'ip x s f' (or the swan equivalent)

> +{
> +     int cpu;
> +
> +     local_bh_disable();
> +     rcu_read_lock();
> +     for_each_possible_cpu(cpu) {
> +             struct xfrm_dst *tmp, *old;
> +
> +             old = per_cpu(xfrm_last_dst, cpu);
> +             if (!old || xfrm_bundle_ok(old))
> +                     continue;
> +
> +             tmp = cmpxchg(&(per_cpu(xfrm_last_dst, cpu)), old, NULL);
> +             if (tmp == old)
> +                     dst_release(&old->u.dst);
> +     }
> +     rcu_read_unlock();
> +     local_bh_enable();
> +}
> +
> +static void xfrm_last_dst_update(struct xfrm_dst *xdst)
> +{
> +     struct xfrm_dst *old = this_cpu_xchg(xfrm_last_dst, xdst);
> +     if (old)
> +             dst_release(&old->u.dst);
> +}
> +
>  static struct xfrm_dst *
>  xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
>                              const struct flowi *fl, u16 family,
> @@ -1711,17 +1740,29 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy
> **pols, int num_pols,
>       struct xfrm_dst *xdst;
>       int err;
> 
> +     xdst = this_cpu_read(xfrm_last_dst);
> +     if (xdst &&
> +         xdst->u.dst.dev == dst_orig->dev &&
> +         xdst->num_pols == num_pols &&
> +         memcmp(xdst->pols, pols,
> +                sizeof(struct xfrm_policy *) * num_pols) == 0 &&
> +         xfrm_bundle_ok(xdst) &&
> +         dst_hold_safe(&xdst->u.dst))
> +             return xdst;
> +
>       /* Try to instantiate a bundle */
>       err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
>       if (err <= 0) {
>               if (err != 0 && err != -EAGAIN)
>                       XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
> +             xfrm_last_dst_update(NULL);
>               return ERR_PTR(err);
>       }
> 
>       dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
>       if (IS_ERR(dst)) {
>               XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
> +             xfrm_last_dst_update(NULL);
>               return ERR_CAST(dst);
>       }
> 
> @@ -1731,6 +1772,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy
> **pols, int num_pols,
>       memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
>       xdst->policy_genid = atomic_read(&pols[0]->genid);
> 
> +     atomic_set(&xdst->u.dst.__refcnt, 2);
> +     xfrm_last_dst_update(xdst);
> +
>       return xdst;
>  }
> 
> --
> 2.13.0

Reply via email to