Giuseppe Scrivano <[email protected]> wrote:
> Florian Westphal <[email protected]> writes:
> 
> > Giuseppe Scrivano <[email protected]> wrote:
> >> SELinux, if enabled, registers for each new network namespace 6
> >> netfilter hooks.  Avoid to use synchronize_net for each new hook, but do
> >> it once after all the hooks are added.  The net benefit on an SMP
> >> machine with two cores is that creating a new network namespace takes
> >> -40% of the original time.
> >
> > but this needs more work.
> >
> >> Signed-off-by: Giuseppe Scrivano <[email protected]>
> >> ---
> >>  net/netfilter/core.c | 15 ++++++++++++---
> >>  1 file changed, 12 insertions(+), 3 deletions(-)
> >> 
> >> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> >> index 52cd2901a097..beeb0b36f429 100644
> >> --- a/net/netfilter/core.c
> >> +++ b/net/netfilter/core.c
> >> @@ -252,7 +252,7 @@ static struct nf_hook_entries __rcu 
> >> **nf_hook_entry_head(struct net *net, const
> >>    return NULL;
> >>  }
> >>  
> >> -int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
> >> +static int __nf_register_net_hook(struct net *net, const struct 
> >> nf_hook_ops *reg)
> >
> > Change this to return struct nf_hook_entries *
> 
> thanks for the quick review.  Are you fine if I change it to:
> 
> static int __nf_register_net_hook(struct net *net, const struct nf_hook_ops 
> *reg, struct nf_hook_entries **old)

You can use ERR_PTR/PTR_ERR instead.

However, I suggest you try to go with call_rcu to get rid of all of the
synchronize_net() calls. I don't even see why its still needed for
nfqueue case provided we invoke the nfqueue drop handler first.

PoC example, untested:

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -77,6 +77,11 @@ struct nf_hook_entry {
        void                            *priv;
 };
 
+struct nf_hook_entries_rcu_head                {
+       struct rcu_head head;
+       void    *allocation;
+};
+
 struct nf_hook_entries {
        u16                             num_hook_entries;
        /* padding */
@@ -87,10 +92,13 @@ struct nf_hook_entries {
         * This is not part of struct nf_hook_entry since its only
         * needed in slow path (hook register/unregister).
         *
-        * const struct nf_hook_ops     *orig_ops[]
+        * const struct nf_hook_ops     *orig_ops[];
+        * struct nf_hook_entries_rcu_head;
         */
 };
 
+void nf_hook_entries_free_rcu(struct nf_hook_entries *e);
+
 static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct 
nf_hook_entries *e)
 {
        unsigned int n = e->num_hook_entries;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -69,22 +69,61 @@ static DEFINE_MUTEX(nf_hook_mutex);
 #define nf_entry_dereference(e) \
        rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
+static struct nf_hook_entries_rcu_head *
+nf_hook_entries_rcu_head_get(struct nf_hook_entries *e)
+{
+       unsigned int num = e->num_hook_entries;
+       struct nf_hook_entries_rcu_head *head;
+       static struct nf_hook_ops **ops;
+
+       ops = nf_hook_entries_get_hook_ops(e);
+       head = (void *)&ops[num];
+
+       return head;
+}
+
 static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 {
        struct nf_hook_entries *e;
        size_t alloc = sizeof(*e) +
                       sizeof(struct nf_hook_entry) * num +
-                      sizeof(struct nf_hook_ops *) * num;
+                      sizeof(struct nf_hook_ops *) * num +
+                      sizeof(struct nf_hook_entries_rcu_head);
 
        if (num == 0)
                return NULL;
 
        e = kvzalloc(alloc, GFP_KERNEL);
-       if (e)
+       if (e) {
+               struct nf_hook_entries_rcu_head *head;
+
                e->num_hook_entries = num;
+
+               head = nf_hook_entries_rcu_head_get(e);
+               head->allocation = e;
+       }
        return e;
 }
 
+void __nf_hook_entries_free(struct rcu_head *h)
+{
+       struct nf_hook_entries_rcu_head *head;
+
+       head = container_of(h, struct nf_hook_entries_rcu_head, head);
+       kvfree(head->allocation);
+}
+
+void nf_hook_entries_free_rcu(struct nf_hook_entries *e)
+{
+       struct nf_hook_entries_rcu_head *head;
+
+       head = nf_hook_entries_rcu_head_get(e);
+       if (WARN_ON(e != head->allocation))
+               return;
+
+       call_rcu(&head->head, __nf_hook_entries_free);
+}
+
 static unsigned int accept_all(void *priv,
                               struct sk_buff *skb,
                               const struct nf_hook_state *state)
@@ -291,9 +330,9 @@ int nf_register_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
 #ifdef HAVE_JUMP_LABEL
        static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
-       synchronize_net();
        BUG_ON(p == new_hooks);
-       kvfree(p);
+       if (p)
+               nf_hook_entries_free_rcu(p);
        return 0;
 }
 EXPORT_SYMBOL(nf_register_net_hook);
@@ -341,7 +380,6 @@ void nf_unregister_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
 {
        struct nf_hook_entries __rcu **pp;
        struct nf_hook_entries *p;
-       unsigned int nfq;
 
        pp = nf_hook_entry_head(net, reg);
        if (!pp)
@@ -362,13 +400,8 @@ void nf_unregister_net_hook(struct net *net, const struct 
nf_hook_ops *reg)
        if (!p)
                return;
 
-       synchronize_net();
-
-       /* other cpu might still process nfqueue verdict that used reg */
-       nfq = nf_queue_nf_hook_drop(net);
-       if (nfq)
-               synchronize_net();
-       kvfree(p);
+       nf_queue_nf_hook_drop(net);
+       nf_hook_entries_free_rcu(p);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -395,63 +428,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
                             unsigned int hookcount)
 {
-       struct nf_hook_entries *to_free[16], *p;
-       struct nf_hook_entries __rcu **pp;
-       unsigned int i, j, n;
-
-       mutex_lock(&nf_hook_mutex);
-       for (i = 0; i < hookcount; i++) {
-               pp = nf_hook_entry_head(net, &reg[i]);
-               if (!pp)
-                       continue;
-
-               p = nf_entry_dereference(*pp);
-               if (WARN_ON_ONCE(!p))
-                       continue;
-               __nf_unregister_net_hook(p, &reg[i]);
-       }
-       mutex_unlock(&nf_hook_mutex);
-
-       do {
-               n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
-
-               mutex_lock(&nf_hook_mutex);
-
-               for (i = 0, j = 0; i < hookcount && j < n; i++) {
-                       pp = nf_hook_entry_head(net, &reg[i]);
-                       if (!pp)
-                               continue;
-
-                       p = nf_entry_dereference(*pp);
-                       if (!p)
-                               continue;
-
-                       to_free[j] = __nf_hook_entries_try_shrink(pp);
-                       if (to_free[j])
-                               ++j;
-               }
-
-               mutex_unlock(&nf_hook_mutex);
-
-               if (j) {
-                       unsigned int nfq;
-
-                       synchronize_net();
-
-                       /* need 2nd synchronize_net() if nfqueue is used, skb
-                        * can get reinjected right before nf_queue_hook_drop()
-                        */
-                       nfq = nf_queue_nf_hook_drop(net);
-                       if (nfq)
-                               synchronize_net();
-
-                       for (i = 0; i < j; i++)
-                               kvfree(to_free[i]);
-               }
+       unsigned int i;
 
-               reg += n;
-               hookcount -= n;
-       } while (hookcount > 0);
+       for (i = 0; i < hookcount; i++)
+               nf_unregister_net_hook(net, &reg[i]);
 }
 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to