Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov

2018-01-11 Thread David Ahern
On 1/11/18 8:08 AM, Phil Sutter wrote:
> On Wed, Jan 10, 2018 at 09:12:45PM +0100, Phil Sutter wrote:
>> On Wed, Jan 10, 2018 at 12:20:36PM -0700, David Ahern wrote:
>> [...]
>>> 2. I am using a batch file with drop filters:
>>>
>>> filter add dev eth2 ingress protocol ip pref 273 flower dst_ip
>>> 192.168.253.0/16 action drop
>>>
>>> and for each command tc is trying to dlopen m_drop.so:
>>>
>>> open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such
>>> file or directory)
>>
>> [...]
>>
>>> Can you look at a follow on patch (not part of this set) to cache status
>>> of dlopen attempts?
>>
>> IMHO the logic used in get_action_kind() for gact is the culprit here:
>> After trying to dlopen m_drop.so, it dlopens m_gact.so although it is
>> present already. (Unless I missed something.)
> 
> Not quite, m_gact.c is statically compiled in and there is logic around
> dlopen(NULL, ...) to prevent calling it twice.
> 
>> I guess the better (and easier) fix would be to create some more struct
>> action_util instances in m_gact.c for the primitives it supports so that
>> the lookup in action_list succeeds for consecutive uses. Note that
>> parse_gact() even supports this already.
> 
> Sadly, this doesn't fly: If a lookup for action 'drop' is successful,
> that value is set as TCA_ACT_KIND and the kernel doesn't know about it.
> 
> I came up with an alternative solution, what do you think about attached
> patch?

Looks ok to me and removes the repeated open overhead. Send it formally
and cc Jiri and Jamal.

Thanks,


Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov

2018-01-11 Thread Phil Sutter
On Wed, Jan 10, 2018 at 09:12:45PM +0100, Phil Sutter wrote:
> On Wed, Jan 10, 2018 at 12:20:36PM -0700, David Ahern wrote:
> [...]
> > 2. I am using a batch file with drop filters:
> > 
> > filter add dev eth2 ingress protocol ip pref 273 flower dst_ip
> > 192.168.253.0/16 action drop
> > 
> > and for each command tc is trying to dlopen m_drop.so:
> > 
> > open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such
> > file or directory)
> 
> [...]
> 
> > Can you look at a follow on patch (not part of this set) to cache status
> > of dlopen attempts?
> 
> IMHO the logic used in get_action_kind() for gact is the culprit here:
> After trying to dlopen m_drop.so, it dlopens m_gact.so although it is
> present already. (Unless I missed something.)

Not quite, m_gact.c is statically compiled in and there is logic around
dlopen(NULL, ...) to prevent calling it twice.

> I guess the better (and easier) fix would be to create some more struct
> action_util instances in m_gact.c for the primitives it supports so that
> the lookup in action_list succeeds for consecutive uses. Note that
> parse_gact() even supports this already.

Sadly, this doesn't fly: If a lookup for action 'drop' is successful,
that value is set as TCA_ACT_KIND and the kernel doesn't know about it.

I came up with an alternative solution, what do you think about attached
patch?

Thanks, Phil
diff --git a/tc/m_action.c b/tc/m_action.c
index fc4223648e8cf..d3df93c066a89 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -194,7 +194,10 @@ int parse_action(int *argc_p, char ***argv_p, int tca_id, 
struct nlmsghdr *n)
} else {
struct action_util *a = NULL;
 
-   strncpy(k, *argv, sizeof(k) - 1);
+   if (!action_a2n(*argv, NULL, false))
+   strncpy(k, "gact", sizeof(k) - 1);
+   else
+   strncpy(k, *argv, sizeof(k) - 1);
eap = 0;
if (argc > 0) {
a = get_action_kind(k);
diff --git a/tc/tc_util.c b/tc/tc_util.c
index ee9a70aa6830c..10e5aa91168a1 100644
--- a/tc/tc_util.c
+++ b/tc/tc_util.c
@@ -511,7 +511,7 @@ static const char *action_n2a(int action)
  *
  * In error case, returns -1 and does not touch @result. Otherwise returns 0.
  */
-static int action_a2n(char *arg, int *result, bool allow_num)
+int action_a2n(char *arg, int *result, bool allow_num)
 {
int n;
char dummy;
@@ -535,13 +535,15 @@ static int action_a2n(char *arg, int *result, bool 
allow_num)
for (iter = a2n; iter->a; iter++) {
if (matches(arg, iter->a) != 0)
continue;
-   *result = iter->n;
-   return 0;
+   n = iter->n;
+   goto out_ok;
}
if (!allow_num || sscanf(arg, "%d%c", , ) != 1)
return -1;
 
-   *result = n;
+out_ok:
+   if (result)
+   *result = n;
return 0;
 }
 
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 1218610d77092..e354765ff1ed0 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -132,4 +132,6 @@ int prio_print_opt(struct qdisc_util *qu, FILE *f, struct 
rtattr *opt);
 int cls_names_init(char *path);
 void cls_names_uninit(void);
 
+int action_a2n(char *arg, int *result, bool allow_num);
+
 #endif


RE: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov

2018-01-10 Thread Chris Mi
> -Original Message-
> From: David Ahern [mailto:dsah...@gmail.com]
> Sent: Thursday, January 11, 2018 3:21 AM
> To: Chris Mi <chr...@mellanox.com>; netdev@vger.kernel.org
> Cc: gerlitz...@gmail.com; step...@networkplumber.org;
> marcelo.leit...@gmail.com; p...@nwl.cc
> Subject: Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions
> rtnl_talk_msg and rtnl_talk_iov
> 
> On 1/9/18 8:27 PM, Chris Mi wrote:
> > rtnl_talk can only send a single message to kernel. Add two functions
> > rtnl_talk_msg and rtnl_talk_iov that can send multiple messages to kernel.
> > rtnl_talk_msg takes struct msghdr * as argument.
> > rtnl_talk_iov takes struct iovec * and iovlen as arguments.
> >
> > Signed-off-by: Chris Mi <chr...@mellanox.com>
> > ---
> >  include/libnetlink.h |  6 
> >  lib/libnetlink.c | 82 -
> ---
> >  2 files changed, 70 insertions(+), 18 deletions(-)
> >
> > diff --git a/include/libnetlink.h b/include/libnetlink.h index
> > a4d83b9e..e9a63dbc 100644
> > --- a/include/libnetlink.h
> > +++ b/include/libnetlink.h
> > @@ -96,6 +96,12 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
> > int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
> >   struct nlmsghdr **answer)
> > __attribute__((warn_unused_result));
> > +int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m,
> > + struct nlmsghdr **answer)
> > +   __attribute__((warn_unused_result));
> 
> As mentioned before rtnl_talk_msg is not needed; you only need to add
> rtnl_talk_iov. The attached fixup on top of your patch removes it and adjusts
> __rtnl_talk_iov. Please roll that change into your patch.
Done. I misunderstood you previous comment.
Thanks for your patch, David.
> 
> 
> While testing this I noticed 2 other oddities:
> 
> $ perf trace -s tc -b tc.batch
> (stddev column removed to shorten line width)
> 
>  Summary of events:
> 
>  tc (780), 1857 events, 97.9%
> 
>syscallcallstotal   min   avg   max
>(msec)(msec)(msec)(msec)
>---  - - - -
>recvmsg  530 6.532 0.008 0.012 0.218
>open 269 5.429 0.012 0.020 0.117
>sendmsg4 3.518 0.092 0.879 1.647
> 
> 
> 
> 1. recvmsg is called twice - once to peek at message size, allocate a buffer
> and then really receive the message. That is overkill for ACKs.
> 
> 2. I am using a batch file with drop filters:
> 
> filter add dev eth2 ingress protocol ip pref 273 flower dst_ip
> 192.168.253.0/16 action drop
> 
> and for each command tc is trying to dlopen m_drop.so:
> 
> open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No
> such file or directory)
> 
> 
> With a patch to use a stack buffer for ACKs, the above perf summary
> becomes:
> 
> $ perf trace -s tc -b tc.batch
> 
>  Summary of events:
> 
>  tc (777), 1345 events, 97.1%
> 
>syscallcallstotal   min   avg   max
>(msec)(msec)(msec)(msec)
>---  - - - -
>open 269 5.510 0.013 0.020 0.160
>recvmsg  274 3.758 0.009 0.014 0.396
>sendmsg4 3.531 0.098 0.883 1.672
> 
> 
> Making the open errors now the dominate overhead affecting performance.
> If tc had some smarts that it already tried that file it would avoid the
> subsequent open calls. The end result is a significant speed up compared to
> the current tc:
> 
>  Summary of events:
> 
>  tc (785), 2333 events, 98.3%
> 
>syscallcallstotal   min   avg   max
>(msec)(msec)(msec)(msec)
>---  - - - -
>sendmsg  256 9.832 0.029 0.038 0.181
>open 269 5.819 0.013 0.022 0.353
>recvmsg  530 5.592 0.009 0.011 0.285
> 
> 
> Can you look at a follow on patch (not part of this set) to cache status of
> dlopen attempts?
Sure, I will investigate this issue.


Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov

2018-01-10 Thread Phil Sutter
On Wed, Jan 10, 2018 at 12:20:36PM -0700, David Ahern wrote:
[...]
> 2. I am using a batch file with drop filters:
> 
> filter add dev eth2 ingress protocol ip pref 273 flower dst_ip
> 192.168.253.0/16 action drop
> 
> and for each command tc is trying to dlopen m_drop.so:
> 
> open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such
> file or directory)

[...]

> Can you look at a follow on patch (not part of this set) to cache status
> of dlopen attempts?

IMHO the logic used in get_action_kind() for gact is the culprit here:
After trying to dlopen m_drop.so, it dlopens m_gact.so although it is
present already. (Unless I missed something.)

I guess the better (and easier) fix would be to create some more struct
action_util instances in m_gact.c for the primitives it supports so that
the lookup in action_list succeeds for consecutive uses. Note that
parse_gact() even supports this already.

Cheers, Phil


Re: [patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov

2018-01-10 Thread David Ahern
On 1/9/18 8:27 PM, Chris Mi wrote:
> rtnl_talk can only send a single message to kernel. Add two functions
> rtnl_talk_msg and rtnl_talk_iov that can send multiple messages to kernel.
> rtnl_talk_msg takes struct msghdr * as argument.
> rtnl_talk_iov takes struct iovec * and iovlen as arguments.
> 
> Signed-off-by: Chris Mi 
> ---
>  include/libnetlink.h |  6 
>  lib/libnetlink.c | 82 
> 
>  2 files changed, 70 insertions(+), 18 deletions(-)
> 
> diff --git a/include/libnetlink.h b/include/libnetlink.h
> index a4d83b9e..e9a63dbc 100644
> --- a/include/libnetlink.h
> +++ b/include/libnetlink.h
> @@ -96,6 +96,12 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
>  int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
> struct nlmsghdr **answer)
>   __attribute__((warn_unused_result));
> +int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m,
> +   struct nlmsghdr **answer)
> + __attribute__((warn_unused_result));

As mentioned before rtnl_talk_msg is not needed; you only need to add
rtnl_talk_iov. The attached fixup on top of your patch removes it and
adjusts __rtnl_talk_iov. Please roll that change into your patch.


While testing this I noticed 2 other oddities:

$ perf trace -s tc -b tc.batch
(stddev column removed to shorten line width)

 Summary of events:

 tc (780), 1857 events, 97.9%

   syscallcallstotal   min   avg   max
   (msec)(msec)(msec)(msec)
   ---  - - - -
   recvmsg  530 6.532 0.008 0.012 0.218
   open 269 5.429 0.012 0.020 0.117
   sendmsg4 3.518 0.092 0.879 1.647



1. recvmsg is called twice - once to peek at message size, allocate a
buffer and then really receive the message. That is overkill for ACKs.

2. I am using a batch file with drop filters:

filter add dev eth2 ingress protocol ip pref 273 flower dst_ip
192.168.253.0/16 action drop

and for each command tc is trying to dlopen m_drop.so:

open("/usr/lib/tc//m_drop.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such
file or directory)


With a patch to use a stack buffer for ACKs, the above perf summary becomes:

$ perf trace -s tc -b tc.batch

 Summary of events:

 tc (777), 1345 events, 97.1%

   syscallcallstotal   min   avg   max
   (msec)(msec)(msec)(msec)
   ---  - - - -
   open 269 5.510 0.013 0.020 0.160
   recvmsg  274 3.758 0.009 0.014 0.396
   sendmsg4 3.531 0.098 0.883 1.672


Making the open errors now the dominate overhead affecting performance.
If tc had some smarts that it already tried that file it would avoid the
subsequent open calls. The end result is a significant speed up compared
to the current tc:

 Summary of events:

 tc (785), 2333 events, 98.3%

   syscallcallstotal   min   avg   max
   (msec)(msec)(msec)(msec)
   ---  - - - -
   sendmsg  256 9.832 0.029 0.038 0.181
   open 269 5.819 0.013 0.022 0.353
   recvmsg  530 5.592 0.009 0.011 0.285


Can you look at a follow on patch (not part of this set) to cache status
of dlopen attempts?
diff --git a/include/libnetlink.h b/include/libnetlink.h
index e9a63dbca259..d6322190afaa 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -96,9 +96,6 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
 int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
  struct nlmsghdr **answer)
__attribute__((warn_unused_result));
-int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m,
- struct nlmsghdr **answer)
-   __attribute__((warn_unused_result));
 int rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iovec, size_t iovlen,
  struct nlmsghdr **answer)
__attribute__((warn_unused_result));
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 183825c7fe0e..cb1990fcbf1c 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -581,38 +581,40 @@ static void rtnl_talk_error(struct nlmsghdr *h, struct 
nlmsgerr *err,
strerror(-err->error));
 }
 
-static int __rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m,
-  struct nlmsghdr **answer,
+
+static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
+  size_t iovlen, struct nlmsghdr **answer,
   bool show_rtnl_err, nl_ext_ack_fn_t errfn)
 {
struct sockaddr_nl nladdr = { .nl_family = 

[patch iproute2 v8 1/2] lib/libnetlink: Add functions rtnl_talk_msg and rtnl_talk_iov

2018-01-09 Thread Chris Mi
rtnl_talk can only send a single message to kernel. Add two functions
rtnl_talk_msg and rtnl_talk_iov that can send multiple messages to kernel.
rtnl_talk_msg takes struct msghdr * as argument.
rtnl_talk_iov takes struct iovec * and iovlen as arguments.

Signed-off-by: Chris Mi 
---
 include/libnetlink.h |  6 
 lib/libnetlink.c | 82 
 2 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/include/libnetlink.h b/include/libnetlink.h
index a4d83b9e..e9a63dbc 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -96,6 +96,12 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
 int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
  struct nlmsghdr **answer)
__attribute__((warn_unused_result));
+int rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m,
+ struct nlmsghdr **answer)
+   __attribute__((warn_unused_result));
+int rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iovec, size_t iovlen,
+ struct nlmsghdr **answer)
+   __attribute__((warn_unused_result));
 int rtnl_talk_extack(struct rtnl_handle *rtnl, struct nlmsghdr *n,
  struct nlmsghdr **answer, nl_ext_ack_fn_t errfn)
__attribute__((warn_unused_result));
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 00e6ce0c..183825c7 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -581,39 +581,43 @@ static void rtnl_talk_error(struct nlmsghdr *h, struct 
nlmsgerr *err,
strerror(-err->error));
 }
 
-static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
-  struct nlmsghdr **answer,
-  bool show_rtnl_err, nl_ext_ack_fn_t errfn)
+static int __rtnl_talk_msg(struct rtnl_handle *rtnl, struct msghdr *m,
+  struct nlmsghdr **answer,
+  bool show_rtnl_err, nl_ext_ack_fn_t errfn)
 {
-   int status;
-   unsigned int seq;
-   struct nlmsghdr *h;
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
-   struct iovec iov = {
-   .iov_base = n,
-   .iov_len = n->nlmsg_len
-   };
+   int i, status, iovlen = m->msg_iovlen;
+   struct iovec iov;
struct msghdr msg = {
.msg_name = ,
.msg_namelen = sizeof(nladdr),
.msg_iov = ,
.msg_iovlen = 1,
};
+   unsigned int seq = 0;
+   struct nlmsghdr *h;
char *buf;
 
-   n->nlmsg_seq = seq = ++rtnl->seq;
-
-   if (answer == NULL)
-   n->nlmsg_flags |= NLM_F_ACK;
+   for (i = 0; i < iovlen; i++) {
+   struct iovec *v;
+   v = >msg_iov[i];
+   h = v->iov_base;
+   h->nlmsg_seq = seq = ++rtnl->seq;
+   if (answer == NULL)
+   h->nlmsg_flags |= NLM_F_ACK;
+   }
 
-   status = sendmsg(rtnl->fd, , 0);
+   status = sendmsg(rtnl->fd, m, 0);
if (status < 0) {
perror("Cannot talk to rtnetlink");
return -1;
}
 
+   i = 0;
while (1) {
+next:
status = rtnl_recvmsg(rtnl->fd, , );
+   ++i;
 
if (status < 0)
return status;
@@ -642,7 +646,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct 
nlmsghdr *n,
 
if (nladdr.nl_pid != 0 ||
h->nlmsg_pid != rtnl->local.nl_pid ||
-   h->nlmsg_seq != seq) {
+   h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) {
/* Don't forget to skip that message. */
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr *)((char *)h + 
NLMSG_ALIGN(len));
@@ -662,7 +666,10 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct 
nlmsghdr *n,
*answer = (struct nlmsghdr 
*)buf;
else
free(buf);
-   return 0;
+   if (h->nlmsg_seq == seq)
+   return 0;
+   else
+   goto next;
}
 
if (rtnl->proto != NETLINK_SOCK_DIAG &&
@@ -671,7 +678,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct 
nlmsghdr *n,
 
errno = -err->error;
free(buf);
-   return -1;
+   return -i;
}
 
if (answer) {
@@ -698,12 +705,51 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct