All genl callbacks are serialized by genl-mutex. This can become bottleneck in multi threaded case. Following patch adds an parameter to genl_family so that a particular family can get concurrent netlink callback without genl_lock held.
As side effect this patch also fixes race between genl netlink dump and genl_unregister. Signed-off-by: Pravin B Shelar <[email protected]> --- include/linux/netlink.h | 2 + include/net/genetlink.h | 1 + net/netlink/af_netlink.c | 48 ++++++++++++++++++----- net/netlink/genetlink.c | 96 ++++++++++++++++++++++++++++++++++++---------- 4 files changed, 117 insertions(+), 30 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 0f628ff..5e2df32 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -230,6 +230,7 @@ struct netlink_callback { u16 min_dump_alloc; unsigned int prev_seq, seq; long args[6]; + bool lockless; }; struct netlink_notify { @@ -254,6 +255,7 @@ struct netlink_dump_control { int (*done)(struct netlink_callback*); void *data; u16 min_dump_alloc; + bool lockless; }; extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ccb6888..135a3a4 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -48,6 +48,7 @@ struct genl_family { unsigned int version; unsigned int maxattr; bool netnsok; + bool lockless; int (*pre_doit)(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b3025a6..c7311f4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -78,6 +78,7 @@ struct netlink_sock { wait_queue_head_t wait; struct netlink_callback *cb; struct mutex *cb_mutex; + struct mutex sock_cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); struct module *module; @@ -431,6 +432,7 @@ static int __netlink_create(struct net *net, struct socket *sock, nlk->cb_mutex = &nlk->cb_def_mutex; mutex_init(nlk->cb_mutex); } + mutex_init(&nlk->sock_cb_mutex); init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -1678,6 +1680,28 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) } EXPORT_SYMBOL(__nlmsg_put); +static void netlink_cb_lock(struct netlink_sock *nlk, struct netlink_callback *cb) +{ + if (!cb->lockless) + mutex_lock(nlk->cb_mutex); +} + +static void netlink_cb_unlock(struct netlink_sock *nlk, struct netlink_callback *cb) +{ + if (!cb->lockless) + mutex_unlock(nlk->cb_mutex); +} + +static void netlink_sock_lock(struct netlink_sock *nlk) +{ + mutex_lock(&nlk->sock_cb_mutex); +} + +static void netlink_sock_unlock(struct netlink_sock *nlk) +{ + mutex_unlock(&nlk->sock_cb_mutex); +} + /* * It looks a bit ugly. * It would be better to create kernel thread. @@ -1692,8 +1716,7 @@ static int netlink_dump(struct sock *sk) int len, err = -ENOBUFS; int alloc_size; - mutex_lock(nlk->cb_mutex); - + netlink_sock_lock(nlk); cb = nlk->cb; if (cb == NULL) { err = -EINVAL; @@ -1706,10 +1729,13 @@ static int netlink_dump(struct sock *sk) if (!skb) goto errout_skb; + /* Take family lock if required. */ + netlink_cb_lock(nlk, cb); len = cb->dump(skb, cb); if (len > 0) { - mutex_unlock(nlk->cb_mutex); + netlink_cb_unlock(nlk, cb); + netlink_sock_unlock(nlk); if (sk_filter(sk, skb)) kfree_skb(skb); @@ -1720,7 +1746,7 @@ static int netlink_dump(struct sock *sk) nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); if (!nlh) - goto errout_skb; + goto errout_unlock_cb; nl_dump_check_consistent(cb, nlh); @@ -1734,13 +1760,16 @@ static int netlink_dump(struct sock *sk) if (cb->done) cb->done(cb); nlk->cb = NULL; - mutex_unlock(nlk->cb_mutex); + netlink_cb_unlock(nlk, cb); + netlink_sock_unlock(nlk); netlink_consume_callback(cb); return 0; +errout_unlock_cb: + netlink_cb_unlock(nlk, cb); errout_skb: - mutex_unlock(nlk->cb_mutex); + netlink_sock_unlock(nlk); kfree_skb(skb); return err; } @@ -1773,15 +1802,16 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, } nlk = nlk_sk(sk); /* A dump is in progress... */ - mutex_lock(nlk->cb_mutex); + netlink_sock_lock(nlk); if (nlk->cb) { - mutex_unlock(nlk->cb_mutex); + netlink_sock_unlock(nlk); netlink_destroy_callback(cb); sock_put(sk); return -EBUSY; } nlk->cb = cb; - mutex_unlock(nlk->cb_mutex); + nlk->cb->lockless = control->lockless; + netlink_sock_unlock(nlk); ret = netlink_dump(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2cc7c1e..720f5b5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -16,10 +16,12 @@ #include <linux/skbuff.h> #include <linux/mutex.h> #include <linux/bitmap.h> +#include <linux/rwsem.h> #include <net/sock.h> #include <net/genetlink.h> static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ +static DECLARE_RWSEM(cb_lock); void genl_lock(void) { @@ -41,6 +43,18 @@ int lockdep_genl_is_held(void) EXPORT_SYMBOL(lockdep_genl_is_held); #endif +static void __genl_lock(struct genl_family *family) +{ + if (!family->lockless) + mutex_lock(&genl_mutex); +} + +static void __genl_unlock(struct genl_family *family) +{ + if (!family->lockless) + mutex_unlock(&genl_mutex); +} + #define GENL_FAM_TAB_SIZE 16 #define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) @@ -143,6 +157,7 @@ int genl_register_mc_group(struct genl_family *family, BUG_ON(grp->name[0] == '\0'); + down_write(&cb_lock); genl_lock(); /* special-case our own group */ @@ -213,6 +228,7 @@ int genl_register_mc_group(struct genl_family *family, genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); out: genl_unlock(); + up_write(&cb_lock); return err; } EXPORT_SYMBOL(genl_register_mc_group); @@ -254,9 +270,11 @@ static void __genl_unregister_mc_group(struct genl_family *family, void genl_unregister_mc_group(struct genl_family *family, struct genl_multicast_group *grp) { + down_write(&cb_lock); genl_lock(); __genl_unregister_mc_group(family, grp); genl_unlock(); + up_write(&cb_lock); } EXPORT_SYMBOL(genl_unregister_mc_group); @@ -302,9 +320,11 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) if (ops->policy) ops->flags |= GENL_CMD_CAP_HASPOL; + down_write(&cb_lock); genl_lock(); list_add_tail(&ops->ops_list, &family->ops_list); genl_unlock(); + up_write(&cb_lock); genl_ctrl_event(CTRL_CMD_NEWOPS, ops); err = 0; @@ -333,16 +353,19 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) { struct genl_ops *rc; + down_write(&cb_lock); genl_lock(); list_for_each_entry(rc, &family->ops_list, ops_list) { if (rc == ops) { list_del(&ops->ops_list); genl_unlock(); + up_write(&cb_lock); genl_ctrl_event(CTRL_CMD_DELOPS, ops); return 0; } } genl_unlock(); + up_write(&cb_lock); return -ENOENT; } @@ -372,6 +395,7 @@ int genl_register_family(struct genl_family *family) INIT_LIST_HEAD(&family->ops_list); INIT_LIST_HEAD(&family->mcast_groups); + down_write(&cb_lock); genl_lock(); if (genl_family_find_byname(family->name)) { @@ -393,7 +417,7 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } - if (family->maxattr) { + if (family->maxattr && !family->lockless) { family->attrbuf = kmalloc((family->maxattr+1) * sizeof(struct nlattr *), GFP_KERNEL); if (family->attrbuf == NULL) { @@ -405,6 +429,7 @@ int genl_register_family(struct genl_family *family) list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock(); + up_write(&cb_lock); genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); @@ -412,6 +437,7 @@ int genl_register_family(struct genl_family *family) errout_locked: genl_unlock(); + up_write(&cb_lock); errout: return err; } @@ -475,6 +501,7 @@ int genl_unregister_family(struct genl_family *family) { struct genl_family *rc; + down_write(&cb_lock); genl_lock(); genl_unregister_mc_groups(family); @@ -486,6 +513,7 @@ int genl_unregister_family(struct genl_family *family) list_del(&rc->family_list); INIT_LIST_HEAD(&family->ops_list); genl_unlock(); + up_write(&cb_lock); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family); @@ -493,6 +521,7 @@ int genl_unregister_family(struct genl_family *family) } genl_unlock(); + up_write(&cb_lock); return -ENOENT; } @@ -529,19 +558,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); -static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int genl_family_rcv_msg(struct genl_family *family, + struct sk_buff *skb, + struct nlmsghdr *nlh) { struct genl_ops *ops; - struct genl_family *family; struct net *net = sock_net(skb->sk); struct genl_info info; struct genlmsghdr *hdr = nlmsg_data(nlh); + struct nlattr **attrbuf; int hdrlen, err; - family = genl_family_find_byid(nlh->nlmsg_type); - if (family == NULL) - return -ENOENT; - /* this family doesn't exist in this netns */ if (!family->netnsok && !net_eq(net, &init_net)) return -ENOENT; @@ -559,26 +586,34 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + .lockless = family->lockless, + }; + if (ops->dumpit == NULL) return -EOPNOTSUPP; - genl_unlock(); - { - struct netlink_dump_control c = { - .dump = ops->dumpit, - .done = ops->done, - }; - err = netlink_dump_start(net->genl_sock, skb, nlh, &c); - } - genl_lock(); + __genl_unlock(family); + err = netlink_dump_start(net->genl_sock, skb, nlh, &c); + __genl_lock(family); return err; } if (ops->doit == NULL) return -EOPNOTSUPP; - if (family->attrbuf) { - err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, + if (family->maxattr && family->lockless) { + attrbuf = kmalloc((family->maxattr+1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (attrbuf == NULL) + return -ENOMEM; + } else + attrbuf = family->attrbuf; + + if (attrbuf) { + err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, ops->policy); if (err < 0) return err; @@ -589,7 +624,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; - info.attrs = family->attrbuf; + info.attrs = attrbuf; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); @@ -604,14 +639,33 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (family->post_doit) family->post_doit(ops, skb, &info); + if (family->lockless) + kfree(attrbuf); + + return err; +} + +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct genl_family *family; + int err; + + family = genl_family_find_byid(nlh->nlmsg_type); + if (family == NULL) + return -ENOENT; + + __genl_lock(family); + err = genl_family_rcv_msg(family, skb, nlh); + __genl_unlock(family); + return err; } static void genl_rcv(struct sk_buff *skb) { - genl_lock(); + down_read(&cb_lock); netlink_rcv_skb(skb, &genl_rcv_msg); - genl_unlock(); + up_read(&cb_lock); } /************************************************************************** -- 1.7.10 _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
