Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov
On 1/11/18 8:08 AM, Phil Sutter wrote: > On Wed, Jan 10, 2018 at 09:12:45PM +0100, Phil Sutter wrote: >> On Wed, Jan 10, 2018 at 12:20:36PM -0700, David Ahern wrote: >> [...] >>> 2. I am using a batch file with drop filters: >>> >>> filter add dev eth2 ingress protocol ip pref 273 flower dst_ip >>> 192.168.253.0/16 action drop >>> >>> and for each command tc is trying to dlopen m_drop.so: >>> >>> open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such >>> file or directory) >> >> [...] >> >>> Can you look at a follow on patch (not part of this set) to cache status >>> of dlopen attempts? >> >> IMHO the logic used in get_action_kind() for gact is the culprit here: >> After trying to dlopen m_drop.so, it dlopens m_gact.so although it is >> present already. (Unless I missed something.) > > Not quite, m_gact.c is statically compiled in and there is logic around > dlopen(NULL, ...) to prevent calling it twice. > >> I guess the better (and easier) fix would be to create some more struct >> action_util instances in m_gact.c for the primitives it supports so that >> the lookup in action_list succeeds for consecutive uses. Note that >> parse_gact() even supports this already. > > Sadly, this doesn't fly: If a lookup for action 'drop' is successful, > that value is set as TCA_ACT_KIND and the kernel doesn't know about it. > > I came up with an alternative solution, what do you think about attached > patch? Looks ok to me and removes the repeated open overhead. Send it formally and cc Jiri and Jamal. Thanks,
Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov
On Wed, Jan 10, 2018 at 09:12:45PM +0100, Phil Sutter wrote: > On Wed, Jan 10, 2018 at 12:20:36PM -0700, David Ahern wrote: > [...] > > 2. I am using a batch file with drop filters: > > > > filter add dev eth2 ingress protocol ip pref 273 flower dst_ip > > 192.168.253.0/16 action drop > > > > and for each command tc is trying to dlopen m_drop.so: > > > > open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such > > file or directory) > > [...] > > > Can you look at a follow on patch (not part of this set) to cache status > > of dlopen attempts? > > IMHO the logic used in get_action_kind() for gact is the culprit here: > After trying to dlopen m_drop.so, it dlopens m_gact.so although it is > present already. (Unless I missed something.) Not quite, m_gact.c is statically compiled in and there is logic around dlopen(NULL, ...) to prevent calling it twice. > I guess the better (and easier) fix would be to create some more struct > action_util instances in m_gact.c for the primitives it supports so that > the lookup in action_list succeeds for consecutive uses. Note that > parse_gact() even supports this already. Sadly, this doesn't fly: If a lookup for action 'drop' is successful, that value is set as TCA_ACT_KIND and the kernel doesn't know about it. I came up with an alternative solution, what do you think about attached patch? Thanks, Phil diff --git a/tc/m_action.c b/tc/m_action.c index fc4223648e8cf..d3df93c066a89 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -194,7 +194,10 @@ int parse_action(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n) } else { struct action_util *a = NULL; - strncpy(k, *argv, sizeof(k) - 1); + if (!action_a2n(*argv, NULL, false)) + strncpy(k, "gact", sizeof(k) - 1); + else + strncpy(k, *argv, sizeof(k) - 1); eap = 0; if (argc > 0) { a = get_action_kind(k); diff --git a/tc/tc_util.c b/tc/tc_util.c index ee9a70aa6830c..10e5aa91168a1 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -511,7 +511,7 @@ static const char *action_n2a(int action) * * In error case, returns -1 and does not touch @result. Otherwise returns 0. */ -static int action_a2n(char *arg, int *result, bool allow_num) +int action_a2n(char *arg, int *result, bool allow_num) { int n; char dummy; @@ -535,13 +535,15 @@ static int action_a2n(char *arg, int *result, bool allow_num) for (iter = a2n; iter->a; iter++) { if (matches(arg, iter->a) != 0) continue; - *result = iter->n; - return 0; + n = iter->n; + goto out_ok; } if (!allow_num || sscanf(arg, "%d%c", , ) != 1) return -1; - *result = n; +out_ok: + if (result) + *result = n; return 0; } diff --git a/tc/tc_util.h b/tc/tc_util.h index 1218610d77092..e354765ff1ed0 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -132,4 +132,6 @@ int prio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt); int cls_names_init(char *path); void cls_names_uninit(void); +int action_a2n(char *arg, int *result, bool allow_num); + #endif
RE: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov
> -Original Message- > From: David Ahern [mailto:dsah...@gmail.com] > Sent: Thursday, January 11, 2018 3:21 AM > To: Chris Mi <chr...@mellanox.com>; netdev@vger.kernel.org > Cc: gerlitz...@gmail.com; step...@networkplumber.org; > marcelo.leit...@gmail.com; p...@nwl.cc > Subject: Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions > rtnl_talk_msg and rtnl_talk_iov > > On 1/9/18 8:27 PM, Chris Mi wrote: > > rtnl_talk can only send a single message to kernel. Add two functions > > rtnl_talk_msg and rtnl_talk_iov that can send multiple messages to kernel. > > rtnl_talk_msg takes struct msghdr * as argument. > > rtnl_talk_iov takes struct iovec * and iovlen as arguments. > > > > Signed-off-by: Chris Mi <chr...@mellanox.com> > > --- > > include/libnetlink.h | 6 > > lib/libnetlink.c | 82 - > --- > > 2 files changed, 70 insertions(+), 18 deletions(-) > > > > diff --git a/include/libnetlink.h b/include/libnetlink.h index > > a4d83b9e..e9a63dbc 100644 > > --- a/include/libnetlink.h > > +++ b/include/libnetlink.h > > @@ -96,6 +96,12 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth, > > int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, > > struct nlmsghdr **answer) > > __attribute__((warn_unused_result)); > > +int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m, > > + struct nlmsghdr **answer) > > + __attribute__((warn_unused_result)); > > As mentioned before rtnl_talk_msg is not needed; you only need to add > rtnl_talk_iov. The attached fixup on top of your patch removes it and adjusts > __rtnl_talk_iov. Please roll that change into your patch. Done. I misunderstood you previous comment. Thanks for your patch, David. > > > While testing this I noticed 2 other oddities: > > $ perf trace -s tc -b tc.batch > (stddev column removed to shorten line width) > > Summary of events: > > tc (780), 1857 events, 97.9% > >syscallcallstotal min avg max >(msec)(msec)(msec)(msec) >--- - - - - >recvmsg 530 6.532 0.008 0.012 0.218 >open 269 5.429 0.012 0.020 0.117 >sendmsg4 3.518 0.092 0.879 1.647 > > > > 1. recvmsg is called twice - once to peek at message size, allocate a buffer > and then really receive the message. That is overkill for ACKs. > > 2. I am using a batch file with drop filters: > > filter add dev eth2 ingress protocol ip pref 273 flower dst_ip > 192.168.253.0/16 action drop > > and for each command tc is trying to dlopen m_drop.so: > > open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No > such file or directory) > > > With a patch to use a stack buffer for ACKs, the above perf summary > becomes: > > $ perf trace -s tc -b tc.batch > > Summary of events: > > tc (777), 1345 events, 97.1% > >syscallcallstotal min avg max >(msec)(msec)(msec)(msec) >--- - - - - >open 269 5.510 0.013 0.020 0.160 >recvmsg 274 3.758 0.009 0.014 0.396 >sendmsg4 3.531 0.098 0.883 1.672 > > > Making the open errors now the dominate overhead affecting performance. > If tc had some smarts that it already tried that file it would avoid the > subsequent open calls. The end result is a significant speed up compared to > the current tc: > > Summary of events: > > tc (785), 2333 events, 98.3% > >syscallcallstotal min avg max >(msec)(msec)(msec)(msec) >--- - - - - >sendmsg 256 9.832 0.029 0.038 0.181 >open 269 5.819 0.013 0.022 0.353 >recvmsg 530 5.592 0.009 0.011 0.285 > > > Can you look at a follow on patch (not part of this set) to cache status of > dlopen attempts? Sure, I will investigate this issue.
Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov
On Wed, Jan 10, 2018 at 12:20:36PM -0700, David Ahern wrote: [...] > 2. I am using a batch file with drop filters: > > filter add dev eth2 ingress protocol ip pref 273 flower dst_ip > 192.168.253.0/16 action drop > > and for each command tc is trying to dlopen m_drop.so: > > open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such > file or directory) [...] > Can you look at a follow on patch (not part of this set) to cache status > of dlopen attempts? IMHO the logic used in get_action_kind() for gact is the culprit here: After trying to dlopen m_drop.so, it dlopens m_gact.so although it is present already. (Unless I missed something.) I guess the better (and easier) fix would be to create some more struct action_util instances in m_gact.c for the primitives it supports so that the lookup in action_list succeeds for consecutive uses. Note that parse_gact() even supports this already. Cheers, Phil
Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov
On 1/9/18 8:27 PM, Chris Mi wrote: > rtnl_talk can only send a single message to kernel. Add two functions > rtnl_talk_msg and rtnl_talk_iov that can send multiple messages to kernel. > rtnl_talk_msg takes struct msghdr * as argument. > rtnl_talk_iov takes struct iovec * and iovlen as arguments. > > Signed-off-by: Chris Mi> --- > include/libnetlink.h | 6 > lib/libnetlink.c | 82 > > 2 files changed, 70 insertions(+), 18 deletions(-) > > diff --git a/include/libnetlink.h b/include/libnetlink.h > index a4d83b9e..e9a63dbc 100644 > --- a/include/libnetlink.h > +++ b/include/libnetlink.h > @@ -96,6 +96,12 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth, > int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, > struct nlmsghdr **answer) > __attribute__((warn_unused_result)); > +int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m, > + struct nlmsghdr **answer) > + __attribute__((warn_unused_result)); As mentioned before rtnl_talk_msg is not needed; you only need to add rtnl_talk_iov. The attached fixup on top of your patch removes it and adjusts __rtnl_talk_iov. Please roll that change into your patch. While testing this I noticed 2 other oddities: $ perf trace -s tc -b tc.batch (stddev column removed to shorten line width) Summary of events: tc (780), 1857 events, 97.9% syscallcallstotal min avg max (msec)(msec)(msec)(msec) --- - - - - recvmsg 530 6.532 0.008 0.012 0.218 open 269 5.429 0.012 0.020 0.117 sendmsg4 3.518 0.092 0.879 1.647 1. recvmsg is called twice - once to peek at message size, allocate a buffer and then really receive the message. That is overkill for ACKs. 2. I am using a batch file with drop filters: filter add dev eth2 ingress protocol ip pref 273 flower dst_ip 192.168.253.0/16 action drop and for each command tc is trying to dlopen m_drop.so: open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) With a patch to use a stack buffer for ACKs, the above perf summary becomes: $ perf trace -s tc -b tc.batch Summary of events: tc (777), 1345 events, 97.1% syscallcallstotal min avg max (msec)(msec)(msec)(msec) --- - - - - open 269 5.510 0.013 0.020 0.160 recvmsg 274 3.758 0.009 0.014 0.396 sendmsg4 3.531 0.098 0.883 1.672 Making the open errors now the dominate overhead affecting performance. If tc had some smarts that it already tried that file it would avoid the subsequent open calls. The end result is a significant speed up compared to the current tc: Summary of events: tc (785), 2333 events, 98.3% syscallcallstotal min avg max (msec)(msec)(msec)(msec) --- - - - - sendmsg 256 9.832 0.029 0.038 0.181 open 269 5.819 0.013 0.022 0.353 recvmsg 530 5.592 0.009 0.011 0.285 Can you look at a follow on patch (not part of this set) to cache status of dlopen attempts? diff --git a/include/libnetlink.h b/include/libnetlink.h index e9a63dbca259..d6322190afaa 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -96,9 +96,6 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth, int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, struct nlmsghdr **answer) __attribute__((warn_unused_result)); -int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m, - struct nlmsghdr **answer) - __attribute__((warn_unused_result)); int rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iovec, size_t iovlen, struct nlmsghdr **answer) __attribute__((warn_unused_result)); diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 183825c7fe0e..cb1990fcbf1c 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -581,38 +581,40 @@ static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err, strerror(-err->error)); } -static int __rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m, - struct nlmsghdr **answer, + +static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov, + size_t iovlen, struct nlmsghdr **answer, bool show_rtnl_err, nl_ext_ack_fn_t errfn) { struct sockaddr_nl nladdr = { .nl_family =
[patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov
rtnl_talk can only send a single message to kernel. Add two functions rtnl_talk_msg and rtnl_talk_iov that can send multiple messages to kernel. rtnl_talk_msg takes struct msghdr * as argument. rtnl_talk_iov takes struct iovec * and iovlen as arguments. Signed-off-by: Chris Mi--- include/libnetlink.h | 6 lib/libnetlink.c | 82 2 files changed, 70 insertions(+), 18 deletions(-) diff --git a/include/libnetlink.h b/include/libnetlink.h index a4d83b9e..e9a63dbc 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -96,6 +96,12 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth, int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, struct nlmsghdr **answer) __attribute__((warn_unused_result)); +int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m, + struct nlmsghdr **answer) + __attribute__((warn_unused_result)); +int rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iovec, size_t iovlen, + struct nlmsghdr **answer) + __attribute__((warn_unused_result)); int rtnl_talk_extack(struct rtnl_handle *rtnl, struct nlmsghdr *n, struct nlmsghdr **answer, nl_ext_ack_fn_t errfn) __attribute__((warn_unused_result)); diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 00e6ce0c..183825c7 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -581,39 +581,43 @@ static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err, strerror(-err->error)); } -static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, - struct nlmsghdr **answer, - bool show_rtnl_err, nl_ext_ack_fn_t errfn) +static int __rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m, + struct nlmsghdr **answer, + bool show_rtnl_err, nl_ext_ack_fn_t errfn) { - int status; - unsigned int seq; - struct nlmsghdr *h; struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; - struct iovec iov = { - .iov_base = n, - .iov_len = n->nlmsg_len - }; + int i, status, iovlen = m->msg_iovlen; + struct iovec iov; struct msghdr msg = { .msg_name = , .msg_namelen = sizeof(nladdr), .msg_iov = , .msg_iovlen = 1, }; + unsigned int seq = 0; + struct nlmsghdr *h; char *buf; - n->nlmsg_seq = seq = ++rtnl->seq; - - if (answer == NULL) - n->nlmsg_flags |= NLM_F_ACK; + for (i = 0; i < iovlen; i++) { + struct iovec *v; + v = >msg_iov[i]; + h = v->iov_base; + h->nlmsg_seq = seq = ++rtnl->seq; + if (answer == NULL) + h->nlmsg_flags |= NLM_F_ACK; + } - status = sendmsg(rtnl->fd, , 0); + status = sendmsg(rtnl->fd, m, 0); if (status < 0) { perror("Cannot talk to rtnetlink"); return -1; } + i = 0; while (1) { +next: status = rtnl_recvmsg(rtnl->fd, , ); + ++i; if (status < 0) return status; @@ -642,7 +646,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, if (nladdr.nl_pid != 0 || h->nlmsg_pid != rtnl->local.nl_pid || - h->nlmsg_seq != seq) { + h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) { /* Don't forget to skip that message. */ status -= NLMSG_ALIGN(len); h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); @@ -662,7 +666,10 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, *answer = (struct nlmsghdr *)buf; else free(buf); - return 0; + if (h->nlmsg_seq == seq) + return 0; + else + goto next; } if (rtnl->proto != NETLINK_SOCK_DIAG && @@ -671,7 +678,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, errno = -err->error; free(buf); - return -1; + return -i; } if (answer) { @@ -698,12 +705,51 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct