Kristian Evensen says:
  In a project I am involved in, we are running ipsec (Strongswan) on
  different mt7621-based routers. Each router is configured as an
  initiator and has around ~30 tunnels to different responders (running
  on misc. devices). Before the flow cache was removed (kernel 4.9), we
  got a combined throughput of around 70Mbit/s for all tunnels on one
  router. However, we recently switched to kernel 4.14 (4.14.48), and
  the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
  drop of around 20%. Reverting the flow cache removal restores, as
  expected, performance levels to that of kernel 4.9.

When pcpu xdst exists, it has to be validated first before it can be
used.

A negative hit thus increases cost vs. no-cache.

As number of tunnels increases, hit rate decreases so this pcpu caching
isn't a viable strategy.

Furthermore, the xdst cache also needs to run with BH off, so when
removing this the bh disable/enable pairs can be removed too.

Kristian tested a 4.14.y backport of this change and reported
increased performance:

  In our tests, the throughput reduction has been reduced from around -20%
  to -5%. We also see that the overall throughput is independent of the
  number of tunnels, while before the throughput was reduced as the number
  of tunnels increased.

Reported-by: Kristian Evensen <kristian.even...@gmail.com>
Signed-off-by: Florian Westphal <f...@strlen.de>
---
 v2: rebase on top of current ipsec-next

 include/net/xfrm.h     |   1 -
 net/xfrm/xfrm_device.c |  10 ----
 net/xfrm/xfrm_policy.c | 139 +------------------------------------------------
 net/xfrm/xfrm_state.c  |   5 +-
 4 files changed, 3 insertions(+), 152 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3fa578a6a819..a5378613a49c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -332,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct 
xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
                      const struct km_event *c);
-void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 16c1230d20fa..11d56a44e9e8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -307,12 +307,6 @@ static int xfrm_dev_register(struct net_device *dev)
        return xfrm_api_check(dev);
 }
 
-static int xfrm_dev_unregister(struct net_device *dev)
-{
-       xfrm_policy_cache_flush();
-       return NOTIFY_DONE;
-}
-
 static int xfrm_dev_feat_change(struct net_device *dev)
 {
        return xfrm_api_check(dev);
@@ -323,7 +317,6 @@ static int xfrm_dev_down(struct net_device *dev)
        if (dev->features & NETIF_F_HW_ESP)
                xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-       xfrm_policy_cache_flush();
        return NOTIFY_DONE;
 }
 
@@ -335,9 +328,6 @@ static int xfrm_dev_event(struct notifier_block *this, 
unsigned long event, void
        case NETDEV_REGISTER:
                return xfrm_dev_register(dev);
 
-       case NETDEV_UNREGISTER:
-               return xfrm_dev_unregister(dev);
-
        case NETDEV_FEAT_CHANGE:
                return xfrm_dev_feat_change(dev);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d960ea6657b5..ef75891450e7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,8 +45,6 @@ struct xfrm_flo {
        u8 flags;
 };
 
-static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
-static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_if_cb_lock);
 static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
 
@@ -1732,108 +1730,6 @@ static int xfrm_expand_policies(const struct flowi *fl, 
u16 family,
 
 }
 
-static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
-{
-       this_cpu_write(xfrm_last_dst, xdst);
-       if (old)
-               dst_release(&old->u.dst);
-}
-
-static void __xfrm_pcpu_work_fn(void)
-{
-       struct xfrm_dst *old;
-
-       old = this_cpu_read(xfrm_last_dst);
-       if (old && !xfrm_bundle_ok(old))
-               xfrm_last_dst_update(NULL, old);
-}
-
-static void xfrm_pcpu_work_fn(struct work_struct *work)
-{
-       local_bh_disable();
-       rcu_read_lock();
-       __xfrm_pcpu_work_fn();
-       rcu_read_unlock();
-       local_bh_enable();
-}
-
-void xfrm_policy_cache_flush(void)
-{
-       struct xfrm_dst *old;
-       bool found = false;
-       int cpu;
-
-       might_sleep();
-
-       local_bh_disable();
-       rcu_read_lock();
-       for_each_possible_cpu(cpu) {
-               old = per_cpu(xfrm_last_dst, cpu);
-               if (old && !xfrm_bundle_ok(old)) {
-                       if (smp_processor_id() == cpu) {
-                               __xfrm_pcpu_work_fn();
-                               continue;
-                       }
-                       found = true;
-                       break;
-               }
-       }
-
-       rcu_read_unlock();
-       local_bh_enable();
-
-       if (!found)
-               return;
-
-       get_online_cpus();
-
-       for_each_possible_cpu(cpu) {
-               bool bundle_release;
-
-               rcu_read_lock();
-               old = per_cpu(xfrm_last_dst, cpu);
-               bundle_release = old && !xfrm_bundle_ok(old);
-               rcu_read_unlock();
-
-               if (!bundle_release)
-                       continue;
-
-               if (cpu_online(cpu)) {
-                       schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
-                       continue;
-               }
-
-               rcu_read_lock();
-               old = per_cpu(xfrm_last_dst, cpu);
-               if (old && !xfrm_bundle_ok(old)) {
-                       per_cpu(xfrm_last_dst, cpu) = NULL;
-                       dst_release(&old->u.dst);
-               }
-               rcu_read_unlock();
-       }
-
-       put_online_cpus();
-}
-
-static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
-                               struct xfrm_state * const xfrm[],
-                               int num)
-{
-       const struct dst_entry *dst = &xdst->u.dst;
-       int i;
-
-       if (xdst->num_xfrms != num)
-               return false;
-
-       for (i = 0; i < num; i++) {
-               if (!dst || dst->xfrm != xfrm[i])
-                       return false;
-               dst = xfrm_dst_child(dst);
-       }
-
-       return xfrm_bundle_ok(xdst);
-}
-
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
                               const struct flowi *fl, u16 family,
@@ -1842,7 +1738,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, 
int num_pols,
        struct net *net = xp_net(pols[0]);
        struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
        struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
-       struct xfrm_dst *xdst, *old;
+       struct xfrm_dst *xdst;
        struct dst_entry *dst;
        int err;
 
@@ -1854,22 +1750,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy 
**pols, int num_pols,
                return ERR_PTR(err);
        }
 
-       xdst = this_cpu_read(xfrm_last_dst);
-       if (xdst &&
-           xdst->u.dst.dev == dst_orig->dev &&
-           xdst->num_pols == num_pols &&
-           memcmp(xdst->pols, pols,
-                  sizeof(struct xfrm_policy *) * num_pols) == 0 &&
-           xfrm_xdst_can_reuse(xdst, xfrm, err)) {
-               dst_hold(&xdst->u.dst);
-               xfrm_pols_put(pols, num_pols);
-               while (err > 0)
-                       xfrm_state_put(xfrm[--err]);
-               return xdst;
-       }
-
-       old = xdst;
-
        dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
        if (IS_ERR(dst)) {
                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1882,9 +1762,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, 
int num_pols,
        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
        xdst->policy_genid = atomic_read(&pols[0]->genid);
 
-       atomic_set(&xdst->u.dst.__refcnt, 2);
-       xfrm_last_dst_update(xdst, old);
-
        return xdst;
 }
 
@@ -2085,11 +1962,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi 
*fl, u16 family, u8 dir,
        if (num_xfrms <= 0)
                goto make_dummy_bundle;
 
-       local_bh_disable();
        xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
                                              xflo->dst_orig);
-       local_bh_enable();
-
        if (IS_ERR(xdst)) {
                err = PTR_ERR(xdst);
                if (err == -EREMOTE) {
@@ -2181,11 +2055,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct 
dst_entry *dst_orig,
                                goto no_transform;
                        }
 
-                       local_bh_disable();
                        xdst = xfrm_resolve_and_create_bundle(
                                        pols, num_pols, fl,
                                        family, dst_orig);
-                       local_bh_enable();
 
                        if (IS_ERR(xdst)) {
                                xfrm_pols_put(pols, num_pols);
@@ -3035,15 +2907,6 @@ static struct pernet_operations __net_initdata 
xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-       int i;
-
-       xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
-                                      GFP_KERNEL);
-       BUG_ON(!xfrm_pcpu_work);
-
-       for (i = 0; i < NR_CPUS; i++)
-               INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
-
        register_pernet_subsys(&xfrm_net_ops);
        xfrm_dev_init();
        seqcount_init(&xfrm_policy_hash_generation);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3803b6813fc5..e04a510ec992 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -735,10 +735,9 @@ int xfrm_state_flush(struct net *net, u8 proto, bool 
task_valid)
        }
 out:
        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
-       if (cnt) {
+       if (cnt)
                err = 0;
-               xfrm_policy_cache_flush();
-       }
+
        return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);
-- 
2.16.4

Reply via email to