All genl callbacks are serialized by genl-mutex. This can become
bottleneck in multi threaded case.
Following patch adds an parameter to genl_family so that a
particular family can get concurrent netlink callback without
genl_lock held.

As side effect this patch also fixes race between genl netlink
dump and genl_unregister.

Signed-off-by: Pravin B Shelar <[email protected]>
---
 include/linux/netlink.h  |    2 +
 include/net/genetlink.h  |    1 +
 net/netlink/af_netlink.c |   48 ++++++++++++++++++-----
 net/netlink/genetlink.c  |   96 ++++++++++++++++++++++++++++++++++++----------
 4 files changed, 117 insertions(+), 30 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0f628ff..5e2df32 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -230,6 +230,7 @@ struct netlink_callback {
        u16                     min_dump_alloc;
        unsigned int            prev_seq, seq;
        long                    args[6];
+       bool                    lockless;
 };
 
 struct netlink_notify {
@@ -254,6 +255,7 @@ struct netlink_dump_control {
        int (*done)(struct netlink_callback*);
        void *data;
        u16 min_dump_alloc;
+       bool lockless;
 };
 
 extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index ccb6888..135a3a4 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -48,6 +48,7 @@ struct genl_family {
        unsigned int            version;
        unsigned int            maxattr;
        bool                    netnsok;
+       bool                    lockless;
        int                     (*pre_doit)(struct genl_ops *ops,
                                            struct sk_buff *skb,
                                            struct genl_info *info);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b3025a6..c7311f4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -78,6 +78,7 @@ struct netlink_sock {
        wait_queue_head_t       wait;
        struct netlink_callback *cb;
        struct mutex            *cb_mutex;
+       struct mutex            sock_cb_mutex;
        struct mutex            cb_def_mutex;
        void                    (*netlink_rcv)(struct sk_buff *skb);
        struct module           *module;
@@ -431,6 +432,7 @@ static int __netlink_create(struct net *net, struct socket 
*sock,
                nlk->cb_mutex = &nlk->cb_def_mutex;
                mutex_init(nlk->cb_mutex);
        }
+       mutex_init(&nlk->sock_cb_mutex);
        init_waitqueue_head(&nlk->wait);
 
        sk->sk_destruct = netlink_sock_destruct;
@@ -1678,6 +1680,28 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int 
type, int len, int flags)
 }
 EXPORT_SYMBOL(__nlmsg_put);
 
+static void netlink_cb_lock(struct netlink_sock *nlk, struct netlink_callback 
*cb)
+{
+       if (!cb->lockless)
+               mutex_lock(nlk->cb_mutex);
+}
+
+static void netlink_cb_unlock(struct netlink_sock *nlk, struct 
netlink_callback *cb)
+{
+       if (!cb->lockless)
+               mutex_unlock(nlk->cb_mutex);
+}
+
+static void netlink_sock_lock(struct netlink_sock *nlk)
+{
+       mutex_lock(&nlk->sock_cb_mutex);
+}
+
+static void netlink_sock_unlock(struct netlink_sock *nlk)
+{
+       mutex_unlock(&nlk->sock_cb_mutex);
+}
+
 /*
  * It looks a bit ugly.
  * It would be better to create kernel thread.
@@ -1692,8 +1716,7 @@ static int netlink_dump(struct sock *sk)
        int len, err = -ENOBUFS;
        int alloc_size;
 
-       mutex_lock(nlk->cb_mutex);
-
+       netlink_sock_lock(nlk);
        cb = nlk->cb;
        if (cb == NULL) {
                err = -EINVAL;
@@ -1706,10 +1729,13 @@ static int netlink_dump(struct sock *sk)
        if (!skb)
                goto errout_skb;
 
+       /* Take family lock if required. */
+       netlink_cb_lock(nlk, cb);
        len = cb->dump(skb, cb);
 
        if (len > 0) {
-               mutex_unlock(nlk->cb_mutex);
+               netlink_cb_unlock(nlk, cb);
+               netlink_sock_unlock(nlk);
 
                if (sk_filter(sk, skb))
                        kfree_skb(skb);
@@ -1720,7 +1746,7 @@ static int netlink_dump(struct sock *sk)
 
        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
        if (!nlh)
-               goto errout_skb;
+               goto errout_unlock_cb;
 
        nl_dump_check_consistent(cb, nlh);
 
@@ -1734,13 +1760,16 @@ static int netlink_dump(struct sock *sk)
        if (cb->done)
                cb->done(cb);
        nlk->cb = NULL;
-       mutex_unlock(nlk->cb_mutex);
+       netlink_cb_unlock(nlk, cb);
+       netlink_sock_unlock(nlk);
 
        netlink_consume_callback(cb);
        return 0;
 
+errout_unlock_cb:
+       netlink_cb_unlock(nlk, cb);
 errout_skb:
-       mutex_unlock(nlk->cb_mutex);
+       netlink_sock_unlock(nlk);
        kfree_skb(skb);
        return err;
 }
@@ -1773,15 +1802,16 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff 
*skb,
        }
        nlk = nlk_sk(sk);
        /* A dump is in progress... */
-       mutex_lock(nlk->cb_mutex);
+       netlink_sock_lock(nlk);
        if (nlk->cb) {
-               mutex_unlock(nlk->cb_mutex);
+               netlink_sock_unlock(nlk);
                netlink_destroy_callback(cb);
                sock_put(sk);
                return -EBUSY;
        }
        nlk->cb = cb;
-       mutex_unlock(nlk->cb_mutex);
+       nlk->cb->lockless = control->lockless;
+       netlink_sock_unlock(nlk);
 
        ret = netlink_dump(sk);
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2cc7c1e..720f5b5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -16,10 +16,12 @@
 #include <linux/skbuff.h>
 #include <linux/mutex.h>
 #include <linux/bitmap.h>
+#include <linux/rwsem.h>
 #include <net/sock.h>
 #include <net/genetlink.h>
 
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
+static DECLARE_RWSEM(cb_lock);
 
 void genl_lock(void)
 {
@@ -41,6 +43,18 @@ int lockdep_genl_is_held(void)
 EXPORT_SYMBOL(lockdep_genl_is_held);
 #endif
 
+static void __genl_lock(struct genl_family *family)
+{
+       if (!family->lockless)
+               mutex_lock(&genl_mutex);
+}
+
+static void __genl_unlock(struct genl_family *family)
+{
+       if (!family->lockless)
+               mutex_unlock(&genl_mutex);
+}
+
 #define GENL_FAM_TAB_SIZE      16
 #define GENL_FAM_TAB_MASK      (GENL_FAM_TAB_SIZE - 1)
 
@@ -143,6 +157,7 @@ int genl_register_mc_group(struct genl_family *family,
 
        BUG_ON(grp->name[0] == '\0');
 
+       down_write(&cb_lock);
        genl_lock();
 
        /* special-case our own group */
@@ -213,6 +228,7 @@ int genl_register_mc_group(struct genl_family *family,
        genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp);
  out:
        genl_unlock();
+       up_write(&cb_lock);
        return err;
 }
 EXPORT_SYMBOL(genl_register_mc_group);
@@ -254,9 +270,11 @@ static void __genl_unregister_mc_group(struct genl_family 
*family,
 void genl_unregister_mc_group(struct genl_family *family,
                              struct genl_multicast_group *grp)
 {
+       down_write(&cb_lock);
        genl_lock();
        __genl_unregister_mc_group(family, grp);
        genl_unlock();
+       up_write(&cb_lock);
 }
 EXPORT_SYMBOL(genl_unregister_mc_group);
 
@@ -302,9 +320,11 @@ int genl_register_ops(struct genl_family *family, struct 
genl_ops *ops)
        if (ops->policy)
                ops->flags |= GENL_CMD_CAP_HASPOL;
 
+       down_write(&cb_lock);
        genl_lock();
        list_add_tail(&ops->ops_list, &family->ops_list);
        genl_unlock();
+       up_write(&cb_lock);
 
        genl_ctrl_event(CTRL_CMD_NEWOPS, ops);
        err = 0;
@@ -333,16 +353,19 @@ int genl_unregister_ops(struct genl_family *family, 
struct genl_ops *ops)
 {
        struct genl_ops *rc;
 
+       down_write(&cb_lock);
        genl_lock();
        list_for_each_entry(rc, &family->ops_list, ops_list) {
                if (rc == ops) {
                        list_del(&ops->ops_list);
                        genl_unlock();
+                       up_write(&cb_lock);
                        genl_ctrl_event(CTRL_CMD_DELOPS, ops);
                        return 0;
                }
        }
        genl_unlock();
+       up_write(&cb_lock);
 
        return -ENOENT;
 }
@@ -372,6 +395,7 @@ int genl_register_family(struct genl_family *family)
        INIT_LIST_HEAD(&family->ops_list);
        INIT_LIST_HEAD(&family->mcast_groups);
 
+       down_write(&cb_lock);
        genl_lock();
 
        if (genl_family_find_byname(family->name)) {
@@ -393,7 +417,7 @@ int genl_register_family(struct genl_family *family)
                goto errout_locked;
        }
 
-       if (family->maxattr) {
+       if (family->maxattr && !family->lockless) {
                family->attrbuf = kmalloc((family->maxattr+1) *
                                        sizeof(struct nlattr *), GFP_KERNEL);
                if (family->attrbuf == NULL) {
@@ -405,6 +429,7 @@ int genl_register_family(struct genl_family *family)
 
        list_add_tail(&family->family_list, genl_family_chain(family->id));
        genl_unlock();
+       up_write(&cb_lock);
 
        genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);
 
@@ -412,6 +437,7 @@ int genl_register_family(struct genl_family *family)
 
 errout_locked:
        genl_unlock();
+       up_write(&cb_lock);
 errout:
        return err;
 }
@@ -475,6 +501,7 @@ int genl_unregister_family(struct genl_family *family)
 {
        struct genl_family *rc;
 
+       down_write(&cb_lock);
        genl_lock();
 
        genl_unregister_mc_groups(family);
@@ -486,6 +513,7 @@ int genl_unregister_family(struct genl_family *family)
                list_del(&rc->family_list);
                INIT_LIST_HEAD(&family->ops_list);
                genl_unlock();
+               up_write(&cb_lock);
 
                kfree(family->attrbuf);
                genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
@@ -493,6 +521,7 @@ int genl_unregister_family(struct genl_family *family)
        }
 
        genl_unlock();
+       up_write(&cb_lock);
 
        return -ENOENT;
 }
@@ -529,19 +558,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
 }
 EXPORT_SYMBOL(genlmsg_put);
 
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int genl_family_rcv_msg(struct genl_family *family,
+                              struct sk_buff *skb,
+                              struct nlmsghdr *nlh)
 {
        struct genl_ops *ops;
-       struct genl_family *family;
        struct net *net = sock_net(skb->sk);
        struct genl_info info;
        struct genlmsghdr *hdr = nlmsg_data(nlh);
+       struct nlattr **attrbuf;
        int hdrlen, err;
 
-       family = genl_family_find_byid(nlh->nlmsg_type);
-       if (family == NULL)
-               return -ENOENT;
-
        /* this family doesn't exist in this netns */
        if (!family->netnsok && !net_eq(net, &init_net))
                return -ENOENT;
@@ -559,26 +586,34 @@ static int genl_rcv_msg(struct sk_buff *skb, struct 
nlmsghdr *nlh)
                return -EPERM;
 
        if (nlh->nlmsg_flags & NLM_F_DUMP) {
+               struct netlink_dump_control c = {
+                       .dump = ops->dumpit,
+                       .done = ops->done,
+                       .lockless = family->lockless,
+               };
+
                if (ops->dumpit == NULL)
                        return -EOPNOTSUPP;
 
-               genl_unlock();
-               {
-                       struct netlink_dump_control c = {
-                               .dump = ops->dumpit,
-                               .done = ops->done,
-                       };
-                       err = netlink_dump_start(net->genl_sock, skb, nlh, &c);
-               }
-               genl_lock();
+               __genl_unlock(family);
+               err = netlink_dump_start(net->genl_sock, skb, nlh, &c);
+               __genl_lock(family);
                return err;
        }
 
        if (ops->doit == NULL)
                return -EOPNOTSUPP;
 
-       if (family->attrbuf) {
-               err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
+       if (family->maxattr && family->lockless) {
+               attrbuf = kmalloc((family->maxattr+1) *
+                                       sizeof(struct nlattr *), GFP_KERNEL);
+               if (attrbuf == NULL)
+                       return -ENOMEM;
+       } else
+               attrbuf = family->attrbuf;
+
+       if (attrbuf) {
+               err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
                                  ops->policy);
                if (err < 0)
                        return err;
@@ -589,7 +624,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct 
nlmsghdr *nlh)
        info.nlhdr = nlh;
        info.genlhdr = nlmsg_data(nlh);
        info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
-       info.attrs = family->attrbuf;
+       info.attrs = attrbuf;
        genl_info_net_set(&info, net);
        memset(&info.user_ptr, 0, sizeof(info.user_ptr));
 
@@ -604,14 +639,33 @@ static int genl_rcv_msg(struct sk_buff *skb, struct 
nlmsghdr *nlh)
        if (family->post_doit)
                family->post_doit(ops, skb, &info);
 
+       if (family->lockless)
+               kfree(attrbuf);
+
+       return err;
+}
+
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       struct genl_family *family;
+       int err;
+
+       family = genl_family_find_byid(nlh->nlmsg_type);
+       if (family == NULL)
+               return -ENOENT;
+
+       __genl_lock(family);
+       err = genl_family_rcv_msg(family, skb, nlh);
+       __genl_unlock(family);
+
        return err;
 }
 
 static void genl_rcv(struct sk_buff *skb)
 {
-       genl_lock();
+       down_read(&cb_lock);
        netlink_rcv_skb(skb, &genl_rcv_msg);
-       genl_unlock();
+       up_read(&cb_lock);
 }
 
 /**************************************************************************
-- 
1.7.10

_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to