Re: [ovs-dev] OVS Soft Freeze date for 2.14 release

2020-07-02 Thread Simon Horman
Hi Maximets,

On Thu, Jul 02, 2020 at 08:28:21PM +0200, Ilya Maximets wrote:
> Hi Ben,
> 
> We reached the "soft freeze" date yesterday. So, we probably need
> to make announcement on a mailing list that soft freeze is started.
> 
> And we need to announce a "feature freeze" (branching) targeted date.
> I think it might be good to announce today or tomorrow and set the
> branch creation date to July 17th to round up the week as we did for
> the last release. 
> 
> What do you think?  Will you take care of this?
> 
> Alternatively, If you don't have much time, I could take care of any
> part of the release process.  Or all of it.
> 
> Regarding exceptions.  For the patches that, I think, should be in
> current release, but missed the "soft freeze" date: we're waiting
> for patches to recommend new DPDK stable releases to use with OVS.
> I guess, these patches should be sent soon so we could accept them
> before branching.

FWIIW, I think that is a reasonable exception.
Perhaps it could included in any soft-freeze announcement.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 ovn 4/5] ovn-northd: Refactor NAT address parsing.

2020-07-02 Thread Han Zhou
On Thu, Jul 2, 2020 at 7:53 AM Dumitru Ceara  wrote:
>
> Store NAT entries pointers in ovn_datapath and pre-parse the external IP
> addresses. This simplifies the code and makes it easier to reuse the
parsed
> external IP and solicited-node address without reparsing.
>
> Signed-off-by: Dumitru Ceara 
> ---
>  northd/ovn-northd.c |  115
++-
>  1 file changed, 85 insertions(+), 30 deletions(-)
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index e270596..a1ba56f 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -605,6 +605,9 @@ struct ovn_datapath {
>   * the "redirect-chassis". */
>  struct ovn_port *l3redirect_port;
>
> +/* NAT entries configured on the router. */
> +struct ovn_nat *nat_entries;
> +
>  struct ovn_port **localnet_ports;
>  size_t n_localnet_ports;
>
> @@ -617,6 +620,65 @@ struct ovn_datapath {
>  struct hmap nb_pgs;
>  };
>
> +/* Contains a NAT entry with the external addresses pre-parsed. */
> +struct ovn_nat {
> +const struct nbrec_nat *nb;
> +struct lport_addresses ext_addrs;
> +};
> +
> +/* Returns true if a 'nat_entry' is valid, i.e.:
> + * - parsing was successful.
> + * - the string yielded exactly one IPv4 address or exactly one IPv6
address.
> + */
> +static bool nat_entry_is_valid(const struct ovn_nat *nat_entry)
> +{
> +const struct lport_addresses *ext_addrs = _entry->ext_addrs;
> +
> +return (ext_addrs->n_ipv4_addrs == 1 && ext_addrs->n_ipv6_addrs ==
0) ||
> +(ext_addrs->n_ipv4_addrs == 0 && ext_addrs->n_ipv6_addrs == 1);
> +}
> +
> +static bool nat_entry_is_v6(const struct ovn_nat *nat_entry)
> +{
> +return nat_entry->ext_addrs.n_ipv6_addrs > 0;
> +}
> +
> +static void init_nat_entries(struct ovn_datapath *od)
> +{
> +if (!od->nbr || od->nbr->n_nat == 0) {
> +return;
> +}
> +
> +od->nat_entries = xmalloc(od->nbr->n_nat * sizeof *od->nat_entries);
> +
> +for (size_t i = 0; i < od->nbr->n_nat; i++) {
> +const struct nbrec_nat *nat = od->nbr->nat[i];
> +struct ovn_nat *nat_entry = >nat_entries[i];
> +
> +nat_entry->nb = nat;
> +if (!extract_ip_addresses(nat->external_ip,
> +  _entry->ext_addrs) ||
> +!nat_entry_is_valid(nat_entry)) {
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
1);
> +
> +VLOG_WARN_RL(,
> + "Bad ip address %s in nat configuration "
> + "for router %s", nat->external_ip,
od->nbr->name);
> +}
> +}
> +}
> +
> +static void destroy_nat_entries(struct ovn_datapath *od)
> +{
> +if (!od->nbr) {
> +return;
> +}
> +
> +for (size_t i = 0; i < od->nbr->n_nat; i++) {
> +destroy_lport_addresses(>nat_entries[i].ext_addrs);
> +}
> +}
> +
>  /* A group of logical router datapaths which are connected - either
>   * directly or indirectly.
>   * Each logical router can belong to only one group. */
> @@ -674,6 +736,8 @@ ovn_datapath_destroy(struct hmap *datapaths, struct
ovn_datapath *od)
>  ovn_destroy_tnlids(>port_tnlids);
>  bitmap_free(od->ipam_info.allocated_ipv4s);
>  free(od->router_ports);
> +destroy_nat_entries(od);
> +free(od->nat_entries);
>  free(od->localnet_ports);
>  ovn_ls_port_group_destroy(>nb_pgs);
>  destroy_mcast_info_for_datapath(od);
> @@ -1102,6 +1166,7 @@ join_datapaths(struct northd_context *ctx, struct
hmap *datapaths,
>  ovs_list_push_back(nb_only, >list);
>  }
>  init_mcast_info_for_datapath(od);
> +init_nat_entries(od);
>  ovs_list_push_back(lr_list, >lr_list);
>  }
>  }
> @@ -8463,30 +8528,24 @@ build_lrouter_flows(struct hmap *datapaths,
struct hmap *ports,
>  snat_ips[n_snat_ips++] = snat_ip;
>  }
>
> -for (int i = 0; i < op->od->nbr->n_nat; i++) {
> -const struct nbrec_nat *nat;
> -
> -nat = op->od->nbr->nat[i];
> +for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
> +struct ovn_nat *nat_entry = >od->nat_entries[i];
> +const struct nbrec_nat *nat = nat_entry->nb;
>
> -ovs_be32 ip;
> -struct in6_addr ipv6;
> -bool is_v6 = false;
> -if (!ip_parse(nat->external_ip, ) || !ip) {
> -if (!ipv6_parse(nat->external_ip, )) {
> -static struct vlog_rate_limit rl =
> -VLOG_RATE_LIMIT_INIT(5, 1);
> -VLOG_WARN_RL(, "bad ip address %s in nat
configuration "
> - "for router %s", nat->external_ip,
op->key);
> -continue;
> -}
> -is_v6 = true;
> +/* Skip entries we failed to parse. */
> +if (!nat_entry_is_valid(nat_entry)) {
> +continue;
>  }
>
>   

Re: [ovs-dev] [PATCH v3 ovn 3/5] ovn-northd: Refactor ARP/NS responder in router pipeline.

2020-07-02 Thread Han Zhou
On Thu, Jul 2, 2020 at 7:53 AM Dumitru Ceara  wrote:
>
> Add functions to build the ARP/NS responder flows for table
> S_ROUTER_IN_IP_INPUT and use them in all places where responder
> flows are created.
>
> Signed-off-by: Dumitru Ceara 


Acked-by: Han Zhou 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 ovn 2/5] ovn-northd: Store ETH address of router inport in xreg0.

2020-07-02 Thread Han Zhou
On Thu, Jul 2, 2020 at 7:53 AM Dumitru Ceara  wrote:
>
> This helps simplifying logical flows that need to use the port's
> configured ETH address:
> - ARP responders for owned IPs
> - NS responders for owned IPs
>
> Signed-off-by: Dumitru Ceara 
> ---
>  northd/ovn-northd.8.xml |   22 ---
>  northd/ovn-northd.c |  148
++-
>  tests/ovn-northd.at |  140

>  3 files changed, 233 insertions(+), 77 deletions(-)
>
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index a7639f3..78e2a71 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -1487,7 +1487,9 @@ output;
>For each enabled router port P with Ethernet address
>E, a priority-50 flow that matches inport ==
>P  (eth.mcast || eth.dst ==
> -  E), with action next;.
> +  E), stores the router port ethernet address
> +  and advances to next table, with action
> +  xreg0[0..47]=E; next;.
>  
>
>  
> @@ -1507,7 +1509,7 @@ output;
>a priority-50 flow that matches inport == GW
> eth.dst == E, where GW
>is the logical router gateway port, with action
> -  next;.
> +  xreg0[0..47]=E; next;.
>  
>
>  
> @@ -1770,10 +1772,10 @@ next;
>
>  
>  eth.dst = eth.src;
> -eth.src = E;
> +eth.src = xreg0[0..47];
>  arp.op = 2; /* ARP reply. */
>  arp.tha = arp.sha;
> -arp.sha = E;
> +arp.sha = xreg0[0..47];
>  arp.tpa = arp.spa;
>  arp.spa = A;
>  outport = P;
> @@ -1822,10 +1824,10 @@ output;
>
>  
>  nd_na_router {
> -eth.src = E;
> +eth.src = xreg0[0..47];
>  ip6.src = A;
>  nd.target = A;
> -nd.tll = E;
> +nd.tll = xreg0[0..47];
>  outport = inport;
>  flags.loopback = 1;
>  output;
> @@ -1862,10 +1864,10 @@ nd_na_router {
>
>  
>  eth.dst = eth.src;
> -eth.src = E;
> +eth.src = xreg0[0..47];
>  arp.op = 2; /* ARP reply. */
>  arp.tha = arp.sha;
> -arp.sha = E;
> +arp.sha = xreg0[0..47];
>  arp.tpa = arp.spa;
>  arp.spa = A;
>  outport = P;
> @@ -1894,8 +1896,8 @@ output;
>  
>  eth.dst = eth.src;
>  nd_na {
> -eth.src = E;
> -nd.tll = E;
> +eth.src = xreg0[0..47];
> +nd.tll = xreg0[0..47];
>  ip6.src = A;
>  nd.target = A;
>  outport = P;
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 85d73ff..7c92436 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -223,6 +223,11 @@ enum ovn_stage {
>  #define REGBIT_LOOKUP_NEIGHBOR_RESULT "reg9[2]"
>  #define REGBIT_SKIP_LOOKUP_NEIGHBOR "reg9[3]"
>
> +/* Register to store the eth address associated to a router port for
packets
> + * received in S_ROUTER_IN_ADMISSION.
> + */
> +#define REG_INPORT_ETH_ADDR "xreg0[0..47]"
> +
>  /* Register for ECMP bucket selection. */
>  #define REG_ECMP_GROUP_ID   "reg8[0..15]"
>  #define REG_ECMP_MEMBER_ID  "reg8[16..31]"
> @@ -246,33 +251,40 @@ enum ovn_stage {
>   * +-+-+
>   *
>   * Logical Router pipeline:
> - * +-+--+---+-+
> - * | R0  | REGBIT_ND_RA_OPTS_RESULT |   | |
> - * | |IPv4-NEXT-HOP | X | |
> - * +-+--+ X | |
> - * | R1  | IPv4-SRC-IP for ARP-REQ  | R |IPv6 |
> - * +-+--+ E |  NEXT-HOP   |
> - * | R2  |UNUSED| G | |
> - * +-+--+ 0 | |
> - * | R3  |UNUSED|   | |
> - * +-+--+---+-+
> - * | R4  |UNUSED|   | |
> - * +-+--+ X | |
> - * | R5  |UNUSED| X | IPv6-SRC-IP |
> - * +-+--+ R |   for NS|
> - * | R6  |UNUSED| E | |
> - * +-+--+ G | |
> - * | R7  |UNUSED| 1 | |
> - * +-+--+---+-+
> - * | R8  | ECMP_GROUP_ID|
> - * | | ECMP_MEMBER_ID   |
> - * +-+--+
> - * | | REGBIT_{ |
> - * | |   EGRESS_LOOPBACK/   |
> - * | R9  |   PKT_LARGER/|
> - * | |   LOOKUP_NEIGHBOR_RESULT/|
> - * | |   SKIP_LOOKUP_NEIGHBOR}  |
> - * +-+--+
> + *
+-+--+---+-+---+-+
> + * | R0  | REGBIT_ND_RA_OPTS_RESULT | X | |   |
|
> + * | |IPv4-NEXT-HOP | R | |   |
|
> + * +-+--+ E | INPORT_ETH_ADDR | X |
|
> + * | R1  | IPv4-SRC-IP for ARP-REQ  | G |   (< IP_INPUT)  | X |IPv6
|
> + * | |  | 0 |

Re: [ovs-dev] [PATCH v3 ovn 5/5] ovn-northd: Minimize number of ARP/NS responder flows for DNAT.

2020-07-02 Thread Han Zhou
Thanks Dumitru. I have 2 comments below.

On Thu, Jul 2, 2020 at 7:53 AM Dumitru Ceara  wrote:
>
> Most ARP/NS responder flows can be configured per datapath instead of per
> router port.
>
> The only exception is with distributed gateway router ports which need
> special treatment. This patch changes the ARP/NS responder behavior and
adds:
> - Priority 92 flows to reply to ARP requests on distributed gateway router
>   ports, on the chassis where the DNAT entry is bound.
> - Priority 91 flows to drop ARP requests on distributed gateway router
ports,
>   on chassis where the DNAT entry is not bound.
> - Priority 90 flows to reply to ARP requests on all other router ports.
This
>   last type of flows is programmed exactly once per logical router
limiting
>   the total number of required logical flows.
>
> Suggested-by: Han Zhou 
> Reported-by: Girish Moodalbail 
> Reported-at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050186.html
> Signed-off-by: Dumitru Ceara 
> ---
>  northd/ovn-northd.8.xml |   16 +++-
>  northd/ovn-northd.c |  203
---
>  tests/ovn-northd.at |   65 +--
>  tests/ovn.at|8 +-
>  4 files changed, 190 insertions(+), 102 deletions(-)
>
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 84224ff..11607c0 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -1857,9 +1857,8 @@ nd_na_router {
>IPv4: For a configured DNAT IP address or a load balancer
>IPv4 VIP A, for each router port P with
>Ethernet address E, a priority-90 flow matches
> -  inport == P  arp.op == 1 
> -  arp.tpa == A (ARP request)
> -  with the following actions:
> +  arp.op == 1  arp.tpa == A
> +  (ARP request) with the following actions:
>  
>
>  
> @@ -1876,6 +1875,11 @@ output;
>  
>
>  
> +  IPv4: For a configured load balancer IPv4 VIP, a similar flow
is
> +  added with the additional match inport ==
P.
> +
> +
> +
>If the router port P is a distributed gateway router
>port, then the is_chassis_resident(P)
is
>also added in the match condition for the load balancer IPv4
> @@ -1922,9 +1926,11 @@ nd_na {
>  
>
>  If the corresponding NAT rule cannot be handled in a
> -distributed manner, then this flow is only programmed on
> +distributed manner, then a priority-92 flow is programmed on
>  the gateway port instance on the
> -redirect-chassis.  This behavior avoids
> +redirect-chassis.  A priority-91 drop flow is
> +programmed on the other chassis when ARP requests/NS packets
> +are received on the gateway port. This behavior avoids
>  generation of multiple ARP responses from different chassis,
>  and allows upstream MAC learning to point to the
>  redirect-chassis.
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index a1ba56f..10fc8cf 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -8048,7 +8048,7 @@ lrouter_nat_is_stateless(const struct nbrec_nat
*nat)
>  static void
>  build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op,
> const char *ip_address, const char *eth_addr,
> -   struct ds *extra_match, uint16_t priority,
> +   struct ds *extra_match, bool drop, uint16_t
priority,
> struct hmap *lflows, const struct ovsdb_idl_row
*hint)
>  {
>  struct ds match = DS_EMPTY_INITIALIZER;
> @@ -8063,20 +8063,24 @@ build_lrouter_arp_flow(struct ovn_datapath *od,
struct ovn_port *op,
>  if (extra_match && ds_last(extra_match) != EOF) {
>  ds_put_format(, " && %s", ds_cstr(extra_match));
>  }
> -ds_put_format(,
> -  "eth.dst = eth.src; "
> -  "eth.src = %s; "
> -  "arp.op = 2; /* ARP reply */ "
> -  "arp.tha = arp.sha; "
> -  "arp.sha = %s; "
> -  "arp.tpa = arp.spa; "
> -  "arp.spa = %s; "
> -  "outport = inport; "
> -  "flags.loopback = 1; "
> -  "output;",
> -  eth_addr,
> -  eth_addr,
> -  ip_address);
> +if (drop) {
> +ds_put_format(, "drop;");
> +} else {
> +ds_put_format(,
> +  "eth.dst = eth.src; "
> +  "eth.src = %s; "
> +  "arp.op = 2; /* ARP reply */ "
> +  "arp.tha = arp.sha; "
> +  "arp.sha = %s; "
> +  "arp.tpa = arp.spa; "
> +  "arp.spa = %s; "
> +  "outport = inport; "
> +  "flags.loopback = 1; "

[ovs-dev] [Engineer] Engineer Jobs T-Shirts Catalogue New Design 2020

2020-07-02 Thread phamvantai896
*Shop Funny Engineer T-Shirts Online*



















































*Hello Sir, I'm A Student And I'm Honored To Be Sell Tee Shirts For You*

I know your very love Engineer T-Shirt and want have them. I would be
honored to sell Tee Shirts for you. Help me have many orders. I Wish You
All The Best Success And Happiness.


*Link Category : Engineer Tee Shirts Full Qoutes Store
 *
* Click Here To Unsubscribe
*
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 ovn] Throttle the OVS-OVN Global config logs

2020-07-02 Thread 0-day Robot
Bleep bloop.  Greetings Ankur Sharma, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author Ankur Sharma  needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Ankur Sharma
Lines checked: 52, Warnings: 1, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v1 ovn] Throttle the OVS-OVN Global config logs

2020-07-02 Thread 0-day Robot
Bleep bloop.  Greetings Ankur Sharma, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Author Ankur Sharma  needs to sign off.
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Ankur Sharma
Lines checked: 54, Warnings: 1, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] system-dpdk: add negotiation check for userspace-tso

2020-07-02 Thread Flavio Leitner


Hi Gowrishankar,

Thanks for working on this patch, it's a good idea to test the
TSO negotiation.

Which dpdk version are you testing? Because I am getting the same
results on 19.11.3 or master. See the details below.

Sometimes it fails with this in testpmd-dpdkvhostuser0.log:
 EAL: Detected 4 lcore(s)
 EAL: Detected 1 NUMA nodes
 EAL: Cannot create lock on '/var/run/dpdk/page0/config'. Is another
 primary process running?
 EAL: FATAL: Cannot init config
 EAL: Cannot init config
 EAL: Error - exiting with code: 1
   Cause: Cannot init EAL: Success

Because the previous testpmd hasn't terminated fully yet.
I added this hack and it helped:
@@ -302,6 +302,7 @@ AT_CHECK([ovs-vsctl del-port br10 dpdkvhostuserclient0], 
[], [stdout], [stderr])
 
 dnl Clean up the testpmd now
 pkill -f -x -9 'tail -f /dev/null'
+sleep 1
 
 dnl Add vhostuser port (server mode)
 AT_CHECK([ovs-vsctl add-port br10 dpdkvhostuser0 -- set Interface 
dpdkvhostuser0 \


It also fails regularly checking for TCP_TSO flag in testpmd output.
I did a copy of the log right before checking and compared with the
log at the end of the test. Turns out that the log is not updated yet.

So, I improved the hack above to terminate testpmd earlier and give
extra time in both places:

@@ -287,6 +287,9 @@ tail -f /dev/null | testpmd --socket-mem="$(cat NUMA_NODE)" 
--no-pci\
 
 dnl Give settling time to the testpmd processes - NOTE: this is bad form.
 sleep 10
+dnl Clean up the testpmd now
+pkill -f -x -9 'tail -f /dev/null'
+sleep 1
 
 dnl Check whether TSO is turned on (host side)
 AT_CHECK([awk '/negotiated Virtio features/ {a=$NF} END{print a}' \
@@ -300,9 +303,6 @@ AT_CHECK([awk 'BEGIN{n=0} /Per Port/ && /(TCP|UDP)_CKSUM/ 
&& /TCP_TSO/ {n++} END
 dnl Clean up
 AT_CHECK([ovs-vsctl del-port br10 dpdkvhostuserclient0], [], [stdout], 
[stderr])
 
-dnl Clean up the testpmd now
-pkill -f -x -9 'tail -f /dev/null'
-
 dnl Add vhostuser port (server mode)
 AT_CHECK([ovs-vsctl add-port br10 dpdkvhostuser0 -- set Interface 
dpdkvhostuser0 \
   type=dpdkvhostuser], [],


Same change for the next chunk below that. Perhaps it can wait until
the /var/run/dpdk/page0 is gone.

Then it fails when testing vhost in server mode because OVS doesn't
pass the EXTBUF flag, so TSO gets disabled:
@@ -1349,6 +1349,12 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
 
 /* There is no support for multi-segments buffers. */
 dev->vhost_driver_flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT;
+
+/* Enable External Buffers if TCP Segmentation Offload is enabled. */
+if (userspace_tso_enabled()) {
+dev->vhost_driver_flags |= RTE_VHOST_USER_EXTBUF_SUPPORT;
+}
+
 err = rte_vhost_driver_register(dev->vhost_id, dev->vhost_driver_flags);
 if (err) {
 VLOG_ERR("vhost-user socket device setup failure for socket %s\n",

I am going to post that as a patch tomorrow.

Now it always runs until vswitchd is stopped, but then it fails because
vswitchd reports a NUMA socket error:

2020-07-03T00:11:35.575Z|00019|dpdk|WARN|EAL:   Invalid NUMA socket, default to 0
+2020-07-03T00:11:35.575Z|00021|dpdk|WARN|EAL:   Invalid NUMA socket, default 
to 0
+2020-07-03T00:11:35.652Z|00054|dpdk|WARN|VHOST_CONFIG: failed to connect to 
/home/fleitner/repo/ovs/tests/system-dpdk-testsuite.dir/6/dpdkvhostclient0: No 
such file or directory
+2020-07-03T00:11:45.764Z|00044|dpdk|WARN|VHOST_CONFIG: failed to connect to 
/home/fleitner/repo/ovs/tests/system-dpdk-testsuite.dir/6/dpdkvhostclient0: No 
such file or directory
+2020-07-03T00:11:46.801Z|00065|netdev_dpdk|WARN|dpdkvhostuser ports are 
considered deprecated;  please migrate to dpdkvhostuserclient ports.
+2020-07-03T00:11:35Z|00019|dpdk|WARN|EAL:   Invalid NUMA socket, default to 0
+2020-07-03T00:11:35Z|00021|dpdk|WARN|EAL:   Invalid NUMA socket, default to 0

My system is a virtual machine running fedora 32 with a virtio NIC
for testing. It has selinux disabled, upstream kernel cd77006e01b
recompiled with fedora's kernel config + CONFIG_VFIO_NOIOMMU=y to
load vfio kernel module passing enable_unsafe_noiommu_mode=1.

fbl

On Wed, Jun 24, 2020 at 08:10:58PM +0530, Gowrishankar Muthukrishnan wrote:
> This patch adds minimal check for userspace-tso in system-dpdk
> tests, starting with verification on virtio negotiation.
> 
> Signed-off-by: Gowrishankar Muthukrishnan 
> ---
> v4:
>  - configure offload settings in testpmd
> ---
>  tests/system-dpdk-macros.at |  17 +++-
>  tests/system-dpdk.at| 228 
> +++-
>  2 files changed, 236 insertions(+), 9 deletions(-)
> 
> diff --git a/tests/system-dpdk-macros.at b/tests/system-dpdk-macros.at
> index c6708ca..9e55f10 100644
> --- a/tests/system-dpdk-macros.at
> +++ b/tests/system-dpdk-macros.at
> @@ -33,13 +33,11 @@ m4_define([OVS_DPDK_PRE_PHY_SKIP],
>  ])
>  
>  
> -# OVS_DPDK_START()
> +# OVS_DB_START()
>  #
> -# Create an empty database and start ovsdb-server. Add special configuration
> -# dpdk-init to enable DPDK 

[ovs-dev] [PATCH v2 ovn] Throttle the OVS-OVN Global config logs

2020-07-02 Thread Ankur Sharma
From: Ankur Sharma 

ISSUE:
We observed that if ovn-controller is running, while ovn-encap-ip/
ovn-encap-type is not set, then following error gets logged
continously:
"chassis|INFO|Need to specify an encap type and ip"

Above log increased the size of ovn-controller.log to hundreds
of GBs  before logger.d could kick in (frequency is 24 hours)
and rotate the log file.

This Patch rate limits (rate_limit=1, burst_size=5) INFO logs
for following errors:
a. ovn-encap-ip/ovn-encap-type not set.
b. ovn-encap-type set to invalid value.

Signed-off-by: Ankur Sharma
---
 controller/chassis.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index aa3fed0..eec270e 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -214,7 +214,8 @@ chassis_parse_ovs_encap_type(const char *encap_type,
 
 SSET_FOR_EACH (type, encap_type_set) {
 if (!get_tunnel_type(type)) {
-VLOG_INFO("Unknown tunnel type: %s", type);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+VLOG_INFO_RL(, "Unknown tunnel type: %s", type);
 }
 }
 
@@ -266,7 +267,8 @@ chassis_parse_ovs_config(const struct 
ovsrec_open_vswitch_table *ovs_table,
 const char *encap_type = smap_get(>external_ids, "ovn-encap-type");
 const char *encap_ips = smap_get(>external_ids, "ovn-encap-ip");
 if (!encap_type || !encap_ips) {
-VLOG_INFO("Need to specify an encap type and ip");
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+VLOG_INFO_RL(, "Need to specify an encap type and ip");
 return false;
 }
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 ovn] Throttle the OVS-OVN Global config logs

2020-07-02 Thread Ankur Sharma
From: Ankur Sharma 

From: Ankur Sharma 

ISSUE:
We observed that if ovn-controller is running, while ovn-encap-ip/
ovn-encap-type is not set, then following error gets logged
continously:
"chassis|INFO|Need to specify an encap type and ip"

Above log increased the size of ovn-controller.log to hundreds
of GBs  before logger.d could kick in (frequency is 24 hours)
and rotate the log file.

This Patch rate limits (rate_limit=1, burst_size=5) INFO logs
for following errors:
a. ovn-encap-ip/ovn-encap-type not set.
b. ovn-encap-type set to invalid value.

Signed-off-by: Ankur Sharma
---
 controller/chassis.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index aa3fed0..eec270e 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -214,7 +214,8 @@ chassis_parse_ovs_encap_type(const char *encap_type,
 
 SSET_FOR_EACH (type, encap_type_set) {
 if (!get_tunnel_type(type)) {
-VLOG_INFO("Unknown tunnel type: %s", type);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+VLOG_INFO_RL(, "Unknown tunnel type: %s", type);
 }
 }
 
@@ -266,7 +267,8 @@ chassis_parse_ovs_config(const struct 
ovsrec_open_vswitch_table *ovs_table,
 const char *encap_type = smap_get(>external_ids, "ovn-encap-type");
 const char *encap_ips = smap_get(>external_ids, "ovn-encap-ip");
 if (!encap_type || !encap_ips) {
-VLOG_INFO("Need to specify an encap type and ip");
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+VLOG_INFO_RL(, "Need to specify an encap type and ip");
 return false;
 }
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Mediación de conflictos y prevención de demandas

2020-07-02 Thread Acciones preventivas y de negociación
Nuestro webinar interactivo le brindará herramientas que le ayudarán para 
mejorar los procesos en la administración del recurso humano 
mediante acciones preventivas y de negociación de conflictos para evitar el 
litigio laboral. El participante dispondrá, con mayor certeza, de
 información para ejecutar los procesos de Recursos Humanos considerando los 
aspectos legales y laborales y procurando un mejor ambiente
 laboral y sin conflictos.
 
Mediación de conflictos y prevención de demandas laborales.
¡Cupo limitado! 

Fecha: Míercoles 08 de Julio del 2020
Horario: 10:00 am a 14:00 pm
Formato: En Línea con interacción en vivo
Instructor: Gerardo Vázquez cuenta con experiencia de 22 años en la gestión de 
Recursos Humanos con lineamientos
 operativos y corporativos en empresas productoras y comerciales. 

Ejes Temáticos:

- Contratación del personal.
- Sistema disciplinario.
- Obligaciones del patron y trabajadores.
- Seguridad y Salud en el Trabajo.
- Gestion laboral Sindical.
- Normatividad Laboral.
- Terminacion de la relacion laboral.
- Derecho laboral procesal.

Solicita más información respondiendo a este correo con la palabra  Demandas + 
los siguientes datos:

Nombre:
Teléfono:
Empresa:
Correo Alterno:

Números de Atención:
55 15 54 66 30 - 55 30 16 70 85 


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpif-netdev-unixctl.man: document bond-show command

2020-07-02 Thread Flavio Leitner
On Fri, Jun 26, 2020 at 03:56:31PM +0200, Adrian Moreno wrote:
> Document recently added ovs-appctl command

Could you add a Fixes tag to indicate the commit?
Thanks
fbl

> 
> Cc: Vishal Deep Ajmera 
> Signed-off-by: Adrian Moreno 
> ---
>  lib/dpif-netdev-unixctl.man | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
> index 6c54f6f9c..d1cf7c669 100644
> --- a/lib/dpif-netdev-unixctl.man
> +++ b/lib/dpif-netdev-unixctl.man
> @@ -217,3 +217,12 @@ with port names, which this thread polls.
>  .
>  .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
>  Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
> +.
> +.IP "\fBdpif-netdev/bond-show\fR [\fIdp\fR]"
> +When "other_config:lb-output-action" is set to "true", the userspace datapath
> +handles the load balancing of bonds directly instead of depending on flow 
> +recirculation (only in balance-tcp mode).
> +
> +When this is the case, the above command prints the load-balancing 
> information
> +of the bonds configured in datapath \fIdp\fR showing the slave associated 
> with
> +each bucket (hash).
> -- 
> 2.26.2
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

-- 
fbl
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v9] AB bonding: Add "primary" interface concept

2020-07-02 Thread Flavio Leitner


Another thing, consider a small note in NEWS.

On Thu, Jul 02, 2020 at 05:17:59PM -0300, Flavio Leitner wrote:
> 
> Hi Jeff,
> 
> Thanks for following up.
> I have a couple comments inline.
> 
> 
> On Thu, Jun 25, 2020 at 02:03:34PM -0700, Jeff Squyres via dev wrote:
> > In AB bonding, if the current active slave becomes disabled, a
> > replacement slave is arbitrarily picked from the remaining set of
> > enabled slaves.  This commit adds the concept of a "primary" slave: an
> > interface that will always be (or become) the current active slave if
> > it is enabled.
> > 
> > The rationale for this functionality is to allow the designation of a
> > preferred interface for a given bond.  For example:
> > 
> > 1. Bond is created with interfaces p1 (primary) and p2, both enabled.
> > 2. p1 becomes the current active slave (because it was designated as
> >the primary).
> > 3. Later, p1 fails/becomes disabled.
> > 4. p2 is chosen to become the current active slave.
> > 5. Later, p1 becomes re-enabled.
> > 6. p1 is chosen to become the current active slave (because it was
> >designated as the primary)
> > 
> > Note that p1 becomes the active slave once it becomes re-enabled, even
> > if nothing has happened to p2.
> > 
> > This "primary" concept exists in Linux kernel network interface
> > bonding, but did not previously exist in OVS bonding.
> > 
> > Only one primary slave inteface is supported per bond, and is only
> > supported for active/backup bonding.
> > 
> > The primary slave interface is designated via
> > "other_config:bond-primary" when creating a bond.
> > 
> > Also, while adding tests for the "primary" concept, make a few small
> > improvements to the non-primary AB bonding test.
> > 
> > Signed-off-by: Jeff Squyres 
> > Reviewed-by: Aaron Conole 
> > Tested-by: Greg Rose 
> > Acked-by: Greg Rose 
> > ---
> > v9:
> > - Removed stale code that should have been removed in v8.
> > - Fixed comment punctuation.
> > 
> > v8:
> > - Many style and simplification suggestions from Ilya Maximets
> > - Rename "primary:" to "active-backup primary:" in ovs-appctl
> >   bond/show output, and make it always appear (vs. only appearing in
> >   explicit AB bonding scenarios).
> > - Properly handle fallback-to-AB-bonding cases
> > - Use time/warp and OVS_WAIT_UNTIL in the bonding tests to prevent
> >   races, and a few other improvements to the test quality
> > - After rebasing patch to head of tree, adjust test output as result
> >   of changes from other commits.
> > 
> > v7:
> > - After rebasing patch to head of tree, adjust test output as result
> >   of changes from 81f71381f.
> > 
> > v6:
> > - Style: bleep bloop.  Fix use of {}.
> > 
> > v5:
> > - Handle when bond is reconfigured to remove "bond-primary" config.
> > - Fix potential flapping between remaining slaves.
> > - Add a test to re-add a removed primary interface and make sure the
> >   bond reflects that properly.
> > - Add a test to remove "bond-primary" config and make sure the bond
> >   reflects that properly.
> > 
> > v4:
> > - Style: bleep bloop.  Trim line length below 79 characters.
> > 
> > v3:
> > - Adjusted print logic for when the primary slave is not attached
> > 
> > v2:
> > - Added "ovs-vsctl bond/show" label when primary interface is no
> >   longer enslaved
> > - Fixed strcmp() usage nits
> > - Moved "other_config:bond-primary" docs to the "Bonding
> >   Configuration" group
> > - Expanded commit message
> > - Added more test cases (including one for when the primary interface
> >   is no longer enslaved)
> > 
> >  ofproto/bond.c|  54 --
> >  ofproto/bond.h|   1 +
> >  tests/lacp.at |   9 ++
> >  tests/ofproto-dpif.at | 224 +++---
> >  vswitchd/bridge.c |   5 +
> >  vswitchd/vswitch.xml  |   8 ++
> >  6 files changed, 280 insertions(+), 21 deletions(-)
> > 
> > diff --git a/ofproto/bond.c b/ofproto/bond.c
> > index 9947e7531..58ee67beb 100644
> > --- a/ofproto/bond.c
> > +++ b/ofproto/bond.c
> > @@ -89,6 +89,7 @@ struct bond_slave {
> >  /* Link status. */
> >  bool enabled;   /* May be chosen for flows? */
> >  bool may_enable;/* Client considers this slave bondable. */
> > +bool is_primary;/* This slave is preferred over others. */
> >  long long delay_expires;/* Time after which 'enabled' may change. 
> > */
> >  
> >  /* Rebalancing info.  Used only by bond_rebalance(). */
> > @@ -124,6 +125,7 @@ struct bond {
> >  uint32_t basis; /* Basis for flow hash function. */
> >  bool use_lb_output_action;  /* Use lb_output action to avoid 
> > recirculation.
> > Applicable only for Balance TCP mode. */
> > +char *primary;  /* Name of the primary slave interface. */
> >  
> >  /* SLB specific bonding info. */
> >  struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */
> > @@ -241,6 +243,7 @@ 

Re: [ovs-dev] [PATCH v2] odp-execute: Fix length checking while executing check_pkt_len action.

2020-07-02 Thread Flavio Leitner
On Fri, Jun 26, 2020 at 12:51:10PM +0200, Ilya Maximets wrote:
> If dp-packet contains l2 padding or cutlen was applied to it, size will
> be larger than the actual size of a payload and action will work
> incorrectly.
> 
> Ex. Padding could be added during miniflow_extract() if detected.
> 
> CC: Numan Siddique 
> Fixes: 5b34f8fc3b38 ("Add a new OVS action check_pkt_larger")
> Reported-by: Miroslav Kubiczek 
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-discuss/2020-May/050157.html
> Signed-off-by: Ilya Maximets 
> ---

Acked-by: Flavio Leitner 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v9] AB bonding: Add "primary" interface concept

2020-07-02 Thread Flavio Leitner


Hi Jeff,

Thanks for following up.
I have a couple comments inline.


On Thu, Jun 25, 2020 at 02:03:34PM -0700, Jeff Squyres via dev wrote:
> In AB bonding, if the current active slave becomes disabled, a
> replacement slave is arbitrarily picked from the remaining set of
> enabled slaves.  This commit adds the concept of a "primary" slave: an
> interface that will always be (or become) the current active slave if
> it is enabled.
> 
> The rationale for this functionality is to allow the designation of a
> preferred interface for a given bond.  For example:
> 
> 1. Bond is created with interfaces p1 (primary) and p2, both enabled.
> 2. p1 becomes the current active slave (because it was designated as
>the primary).
> 3. Later, p1 fails/becomes disabled.
> 4. p2 is chosen to become the current active slave.
> 5. Later, p1 becomes re-enabled.
> 6. p1 is chosen to become the current active slave (because it was
>designated as the primary)
> 
> Note that p1 becomes the active slave once it becomes re-enabled, even
> if nothing has happened to p2.
> 
> This "primary" concept exists in Linux kernel network interface
> bonding, but did not previously exist in OVS bonding.
> 
> Only one primary slave inteface is supported per bond, and is only
> supported for active/backup bonding.
> 
> The primary slave interface is designated via
> "other_config:bond-primary" when creating a bond.
> 
> Also, while adding tests for the "primary" concept, make a few small
> improvements to the non-primary AB bonding test.
> 
> Signed-off-by: Jeff Squyres 
> Reviewed-by: Aaron Conole 
> Tested-by: Greg Rose 
> Acked-by: Greg Rose 
> ---
> v9:
> - Removed stale code that should have been removed in v8.
> - Fixed comment punctuation.
> 
> v8:
> - Many style and simplification suggestions from Ilya Maximets
> - Rename "primary:" to "active-backup primary:" in ovs-appctl
>   bond/show output, and make it always appear (vs. only appearing in
>   explicit AB bonding scenarios).
> - Properly handle fallback-to-AB-bonding cases
> - Use time/warp and OVS_WAIT_UNTIL in the bonding tests to prevent
>   races, and a few other improvements to the test quality
> - After rebasing patch to head of tree, adjust test output as result
>   of changes from other commits.
> 
> v7:
> - After rebasing patch to head of tree, adjust test output as result
>   of changes from 81f71381f.
> 
> v6:
> - Style: bleep bloop.  Fix use of {}.
> 
> v5:
> - Handle when bond is reconfigured to remove "bond-primary" config.
> - Fix potential flapping between remaining slaves.
> - Add a test to re-add a removed primary interface and make sure the
>   bond reflects that properly.
> - Add a test to remove "bond-primary" config and make sure the bond
>   reflects that properly.
> 
> v4:
> - Style: bleep bloop.  Trim line length below 79 characters.
> 
> v3:
> - Adjusted print logic for when the primary slave is not attached
> 
> v2:
> - Added "ovs-vsctl bond/show" label when primary interface is no
>   longer enslaved
> - Fixed strcmp() usage nits
> - Moved "other_config:bond-primary" docs to the "Bonding
>   Configuration" group
> - Expanded commit message
> - Added more test cases (including one for when the primary interface
>   is no longer enslaved)
> 
>  ofproto/bond.c|  54 --
>  ofproto/bond.h|   1 +
>  tests/lacp.at |   9 ++
>  tests/ofproto-dpif.at | 224 +++---
>  vswitchd/bridge.c |   5 +
>  vswitchd/vswitch.xml  |   8 ++
>  6 files changed, 280 insertions(+), 21 deletions(-)
> 
> diff --git a/ofproto/bond.c b/ofproto/bond.c
> index 9947e7531..58ee67beb 100644
> --- a/ofproto/bond.c
> +++ b/ofproto/bond.c
> @@ -89,6 +89,7 @@ struct bond_slave {
>  /* Link status. */
>  bool enabled;   /* May be chosen for flows? */
>  bool may_enable;/* Client considers this slave bondable. */
> +bool is_primary;/* This slave is preferred over others. */
>  long long delay_expires;/* Time after which 'enabled' may change. */
>  
>  /* Rebalancing info.  Used only by bond_rebalance(). */
> @@ -124,6 +125,7 @@ struct bond {
>  uint32_t basis; /* Basis for flow hash function. */
>  bool use_lb_output_action;  /* Use lb_output action to avoid 
> recirculation.
> Applicable only for Balance TCP mode. */
> +char *primary;  /* Name of the primary slave interface. */
>  
>  /* SLB specific bonding info. */
>  struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */
> @@ -241,6 +243,7 @@ bond_create(const struct bond_settings *s, struct 
> ofproto_dpif *ofproto)
>  
>  bond->active_slave_mac = eth_addr_zero;
>  bond->active_slave_changed = false;
> +bond->primary = NULL;
>  
>  bond_reconfigure(bond, s);
>  return bond;
> @@ -294,6 +297,7 @@ bond_unref(struct bond *bond)
>  update_recirc_rules__(bond);
>  
>  

[ovs-dev] お支払い方法の情報を更新

2020-07-02 Thread Amazon
お支払い方法の情報を更新してください。Update default card for your membership.

 
 マイストア? タイムセール? ギフト券 

 


Amazonプライムをご利用頂きありがとうございます。お客様のAmazonプライム会員資格は、2020/06/09に更新を迎えます。お調べしたところ、会費のお支払いに使用できる有効なクレジットカードがアカウントに登録されていません。クレジットカード情報の更新、新しいクレジットカードの追加については以下の手順をご確認ください。



1. アカウントサービスからAmazonプライム会員情報を管理するにアクセスします。


2. Amazonプライムに登録したAmazon.co.jpのアカウントを使用してサインインします。


3. 左側に表示されている「現在の支払方法」の下にある「支払方法を変更する」のリンクをクリックします。


4. 有効期限の更新または新しいクレジットカード情報を入力してください。



Amazonプライムを継続してご利用いただくために、会費のお支払いにご指定いただいたクレジットカードが使用できない場合は、アカウントに登録されている別 
のクレジットカードに会費を請求させて頂きます。会費の請求が出来ない場合は、お客様のAmazonプライム会員資格は失効し、特典をご利用できなくなります。


Amazon.co.jpカスタマーサービス 




 
支払方法の情報を更新する 



 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v9] AB bonding: Add "primary" interface concept

2020-07-02 Thread Jeff Squyres (jsquyres) via dev
Gentle nudge.  Any further feedback on this one?

Thank you!


> On Jun 25, 2020, at 5:03 PM, Jeff Squyres  wrote:
> 
> In AB bonding, if the current active slave becomes disabled, a
> replacement slave is arbitrarily picked from the remaining set of
> enabled slaves.  This commit adds the concept of a "primary" slave: an
> interface that will always be (or become) the current active slave if
> it is enabled.
> 
> The rationale for this functionality is to allow the designation of a
> preferred interface for a given bond.  For example:
> 
> 1. Bond is created with interfaces p1 (primary) and p2, both enabled.
> 2. p1 becomes the current active slave (because it was designated as
>   the primary).
> 3. Later, p1 fails/becomes disabled.
> 4. p2 is chosen to become the current active slave.
> 5. Later, p1 becomes re-enabled.
> 6. p1 is chosen to become the current active slave (because it was
>   designated as the primary)
> 
> Note that p1 becomes the active slave once it becomes re-enabled, even
> if nothing has happened to p2.
> 
> This "primary" concept exists in Linux kernel network interface
> bonding, but did not previously exist in OVS bonding.
> 
> Only one primary slave inteface is supported per bond, and is only
> supported for active/backup bonding.
> 
> The primary slave interface is designated via
> "other_config:bond-primary" when creating a bond.
> 
> Also, while adding tests for the "primary" concept, make a few small
> improvements to the non-primary AB bonding test.
> 
> Signed-off-by: Jeff Squyres 
> Reviewed-by: Aaron Conole 
> Tested-by: Greg Rose 
> Acked-by: Greg Rose 
> ---
> v9:
> - Removed stale code that should have been removed in v8.
> - Fixed comment punctuation.
> 
> v8:
> - Many style and simplification suggestions from Ilya Maximets
> - Rename "primary:" to "active-backup primary:" in ovs-appctl
>  bond/show output, and make it always appear (vs. only appearing in
>  explicit AB bonding scenarios).
> - Properly handle fallback-to-AB-bonding cases
> - Use time/warp and OVS_WAIT_UNTIL in the bonding tests to prevent
>  races, and a few other improvements to the test quality
> - After rebasing patch to head of tree, adjust test output as result
>  of changes from other commits.
> 
> v7:
> - After rebasing patch to head of tree, adjust test output as result
>  of changes from 81f71381f.
> 
> v6:
> - Style: bleep bloop.  Fix use of {}.
> 
> v5:
> - Handle when bond is reconfigured to remove "bond-primary" config.
> - Fix potential flapping between remaining slaves.
> - Add a test to re-add a removed primary interface and make sure the
>  bond reflects that properly.
> - Add a test to remove "bond-primary" config and make sure the bond
>  reflects that properly.
> 
> v4:
> - Style: bleep bloop.  Trim line length below 79 characters.
> 
> v3:
> - Adjusted print logic for when the primary slave is not attached
> 
> v2:
> - Added "ovs-vsctl bond/show" label when primary interface is no
>  longer enslaved
> - Fixed strcmp() usage nits
> - Moved "other_config:bond-primary" docs to the "Bonding
>  Configuration" group
> - Expanded commit message
> - Added more test cases (including one for when the primary interface
>  is no longer enslaved)
> 
> ofproto/bond.c|  54 --
> ofproto/bond.h|   1 +
> tests/lacp.at |   9 ++
> tests/ofproto-dpif.at | 224 +++---
> vswitchd/bridge.c |   5 +
> vswitchd/vswitch.xml  |   8 ++
> 6 files changed, 280 insertions(+), 21 deletions(-)
> 
> diff --git a/ofproto/bond.c b/ofproto/bond.c
> index 9947e7531..58ee67beb 100644
> --- a/ofproto/bond.c
> +++ b/ofproto/bond.c
> @@ -89,6 +89,7 @@ struct bond_slave {
> /* Link status. */
> bool enabled;   /* May be chosen for flows? */
> bool may_enable;/* Client considers this slave bondable. */
> +bool is_primary;/* This slave is preferred over others. */
> long long delay_expires;/* Time after which 'enabled' may change. */
> 
> /* Rebalancing info.  Used only by bond_rebalance(). */
> @@ -124,6 +125,7 @@ struct bond {
> uint32_t basis; /* Basis for flow hash function. */
> bool use_lb_output_action;  /* Use lb_output action to avoid 
> recirculation.
>Applicable only for Balance TCP mode. */
> +char *primary;  /* Name of the primary slave interface. */
> 
> /* SLB specific bonding info. */
> struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */
> @@ -241,6 +243,7 @@ bond_create(const struct bond_settings *s, struct 
> ofproto_dpif *ofproto)
> 
> bond->active_slave_mac = eth_addr_zero;
> bond->active_slave_changed = false;
> +bond->primary = NULL;
> 
> bond_reconfigure(bond, s);
> return bond;
> @@ -294,6 +297,7 @@ bond_unref(struct bond *bond)
> update_recirc_rules__(bond);
> 
> hmap_destroy(>pr_rule_ops);
> +free(bond->primary);
> free(bond->name);
>  

Re: [ovs-dev] [PATCH ovn v2] ovn-controller: Configure hwaddr for the integration bridge

2020-07-02 Thread Mark Michelson

Excellent! Thanks, Numan!

Acked-by: Mark Michelson 

On 7/1/20 11:34 AM, num...@ovn.org wrote:

From: Numan Siddique 

When a first non-local port is added to the integration bridge, it results
in the recalculation of datapath-id by ovs-vswitchd forcing all
active connections to the controllers to reconnect.

This patch avoids these reconnections between ovs-vswitchd and ovn-controller
by setting the hwaddr for the integration bridge when ovn-controller creates the
integration bridge. ovs-vswitchd uses the bridge:hwaddr if set to generate the
datapath-id.

Signed-off-by: Numan Siddique 
---
v1 -> v2
-
   * Added the comments in the code as suggested by Mark.

  controller/ovn-controller.c | 27 +--
  1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index b0ee60a1a..e355007b8 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -277,10 +277,33 @@ create_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
  bridge = ovsrec_bridge_insert(ovs_idl_txn);
  ovsrec_bridge_set_name(bridge, bridge_name);
  ovsrec_bridge_set_fail_mode(bridge, "secure");
-const struct smap oc = SMAP_CONST1(, "disable-in-band", "true");
-ovsrec_bridge_set_other_config(bridge, );
  ovsrec_bridge_set_ports(bridge, , 1);
  
+struct smap oc = SMAP_INITIALIZER();

+smap_add(, "disable-in-band", "true");
+
+/* When a first non-local port is added to the integration bridge, it
+ * results in the recalculation of datapath-id by ovs-vswitchd forcing all
+ * active connections to the controllers to reconnect.
+ *
+ * We can avoid the disconnection by setting the 'other_config:hwaddr' for
+ * the integration bridge. ovs-vswitchd uses this hwaddr to calculate the
+ * datapath-id and it doesn't recalculate the datapath-id later when the
+ * first non-local port is added.
+ *
+ * So generate a random mac and set the 'hwaddr' option in the
+ * other_config.
+ * */
+struct eth_addr br_hwaddr;
+eth_addr_random(_hwaddr);
+char ea_s[ETH_ADDR_STRLEN + 1];
+snprintf(ea_s, sizeof ea_s, ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(br_hwaddr));
+smap_add(, "hwaddr", ea_s);
+
+ovsrec_bridge_set_other_config(bridge, );
+smap_destroy();
+
  struct ovsrec_bridge **bridges;
  size_t bytes = sizeof *bridges * cfg->n_bridges;
  bridges = xmalloc(bytes + sizeof *bridges);



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] OVS Soft Freeze date for 2.14 release

2020-07-02 Thread Ilya Maximets
Hi Ben,

We reached the "soft freeze" date yesterday. So, we probably need
to make announcement on a mailing list that soft freeze is started.

And we need to announce a "feature freeze" (branching) targeted date.
I think it might be good to announce today or tomorrow and set the
branch creation date to July 17th to round up the week as we did for
the last release. 

What do you think?  Will you take care of this?

Alternatively, If you don't have much time, I could take care of any
part of the release process.  Or all of it.

Regarding exceptions.  For the patches that, I think, should be in
current release, but missed the "soft freeze" date: we're waiting
for patches to recommend new DPDK stable releases to use with OVS.
I guess, these patches should be sent soon so we could accept them
before branching.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] OVN Meeting Logs 02 July, 2020

2020-07-02 Thread mmichels
Here is the IRC log for the OVN meeting for 02 July, 2020

http://eavesdrop.openstack.org/meetings//ovn_community_development_discussion/2020/ovn_community_development_discussion.2020-07-02-17.19.log.html

If you are interested in attending this meeting, it happens every
Thursday in the #openvswitch channel on the Freenode IRC server. The
next one is at 13:15 EDT (18:15 BST, 10:15 PDT) on 09 July, 2020.

Mark Michelson

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 0/6] DPCLS Subtable ISA Optimization

2020-07-02 Thread Harry van Haaren
v6 work done:
- Fix as --64 unrecognized option
- Fix build issues with avx512 library changes
- Fix files left in build dir after distclean
- Fix CPU arch dependant RTE_CPUFLAG_ usage

Thanks William & Ian for review & help on v5.
All known issues are fixed and working here.


Hi All,

This patchset implements the changes as proposed during the
OVS Conf '19, in the talk "Next steps for SW Datapath".
Youtube link: https://youtu.be/x0bOpojnpmU

The talk raises 3 main requirements for CPU ISA Optimizations,
each of which is addressed in some of the patches below.
- Test & Validation (video @ 2:20)
- Usabiliity & Debug (video @ 6:00)
- Package & Deploy (video @ 8:45)

Patch 1/6:
The test and validation requirements proposed above are implemented,
with the refactor of the subtable function pointer registration,
and the autovalidator implementation is added.

Patch 2 & 3 / 6:
Adds the commands for usability & debug. Now improved with a "get" and
"set" command. Get returns current priorities and a list of each lookup
implementation. Set provides feedback to the user as to the number of
DPCLS ports/subtables that have new lookup functions due to the command
that was executed.

Patch 4/6:
Enable CPU ISA detection at runtime, providing information for future
ISA optimized functions.

Patch 5/6:
Actual AVX-512 implementation for DPCLS subtable search. This is the
actual SIMD vector code, which performs DPCLS miniflow iteration in
parallel.

Patch 6/6:
Add section in dpdk/bridges.rst on how to use the DPCLS commands, and
what they can be used for. Testing and validation using autovalidator
concept introduced, and command to set its priority is provided. The
steps required to run the autovalidator as default lookup implementation
are described. NEWS file is updated with things changed in userspace datapath.


Thanks for reading, any questions please let me know.
Regards, -Harry



Harry van Haaren (6):
  dpif-netdev: implement subtable lookup validation.
  dpif-netdev: add subtable lookup prio set command.
  dpif-netdev: add subtable-lookup-prio-get command.
  dpdk: enable cpu feature detection.
  dpif-lookup: add avx512 gather implementation.
  docs/dpdk/bridge: add datapath performance section.

 Documentation/topics/dpdk/bridge.rst   |  77 +++
 NEWS   |   3 +
 acinclude.m4   |  16 ++
 configure.ac   |   4 +
 lib/automake.mk|  24 +++
 lib/dpdk-stub.c|   9 +
 lib/dpdk.c |  30 +++
 lib/dpdk.h |   2 +
 lib/dpif-netdev-lookup-autovalidator.c | 110 ++
 lib/dpif-netdev-lookup-avx512-gather.c | 265 +
 lib/dpif-netdev-lookup-generic.c   |   9 +-
 lib/dpif-netdev-lookup.c   | 124 
 lib/dpif-netdev-lookup.h   |  79 
 lib/dpif-netdev-private.h  |  15 --
 lib/dpif-netdev.c  | 161 ++-
 m4/openvswitch.m4  |  30 +++
 16 files changed, 934 insertions(+), 24 deletions(-)
 create mode 100644 lib/dpif-netdev-lookup-autovalidator.c
 create mode 100644 lib/dpif-netdev-lookup-avx512-gather.c
 create mode 100644 lib/dpif-netdev-lookup.c
 create mode 100644 lib/dpif-netdev-lookup.h

-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 3/6] dpif-netdev: add subtable-lookup-prio-get command.

2020-07-02 Thread Harry van Haaren
This commit adds ia new command, "dpif-netdev/subtable-lookup-prio-get"
which prints the available sutable lookup functions in this OVS binary.
Example output from the command:

Available lookup functions (priority : name)
0 : autovalidator
1 : generic

Signed-off-by: Harry van Haaren 
Acked-by: William Tu 

---

v5:
- Add Ack from mailing list

v4:
- Add "prio" to name for consistency with "set" command (William Tu)
- Fix typo (William Tu)
---
 lib/dpif-netdev.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 758323a0e..da344106d 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1295,6 +1295,30 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void
+dpif_netdev_subtable_lookup_get(struct unixctl_conn *conn, int argc OVS_UNUSED,
+const char *argv[] OVS_UNUSED,
+void *aux OVS_UNUSED)
+{
+/* Get a list of all lookup functions */
+struct dpcls_subtable_lookup_info_t *lookup_funcs = NULL;
+int32_t count = dpcls_subtable_lookup_info_get(_funcs);
+if (count < 0) {
+unixctl_command_reply_error(conn, "error getting lookup names");
+return;
+}
+
+/* Add all lookup functions to reply string */
+struct ds reply = DS_EMPTY_INITIALIZER;
+ds_put_cstr(, "Available lookup functions (priority : name)\n");
+for (int i = 0; i < count; i++) {
+ds_put_format(, "\t%d : %s\n", lookup_funcs[i].prio,
+  lookup_funcs[i].name);
+}
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static void
 dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc,
 const char *argv[], void *aux OVS_UNUSED)
@@ -1605,6 +1629,9 @@ dpif_netdev_init(void)
  "[lookup_func] [prio] [dp]",
  2, 3, dpif_netdev_subtable_lookup_set,
  NULL);
+unixctl_command_register("dpif-netdev/subtable-lookup-prio-get", "",
+ 0, 0, dpif_netdev_subtable_lookup_get,
+ NULL);
 return 0;
 }
 
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 6/6] docs/dpdk/bridge: add datapath performance section.

2020-07-02 Thread Harry van Haaren
This commit adds a section to the dpdk/bridge.rst netdev documentation,
detailing the added DPCLS functionality. The newly added commands are
documented, and sample output is provided.

Running the DPCLS autovalidator with unit tests by default is possible
through re-compiling the autovalidator to have the highest priority at
startup time. This avoids making changes to all tests, and enables
debug and CI builds to validate every lookup implementation with all
unit tests.

Add NEWS updates for CPU ISA, dynamic subtables, and AVX512 lookup.

Signed-off-by: Harry van Haaren 

---

v5:
- Include NEWS item updates.

v4:
- Fix typos (William Tu)
- Update get commands to use include "prio" as updated in v4
- Add section on enabling autovalidator by default for unit tests
---
 Documentation/topics/dpdk/bridge.rst | 77 
 NEWS |  3 ++
 2 files changed, 80 insertions(+)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index f0ef42ecc..526d5c959 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -137,3 +137,80 @@ currently turned off by default.
 To turn on SMC::
 
 $ ovs-vsctl --no-wait set Open_vSwitch . other_config:smc-enable=true
+
+Datapath Classifier Performance
+---
+
+The datapath classifier (dpcls) performs wildcard rule matching, a compute
+intensive process of matching a packet ``miniflow`` to a rule ``miniflow``. The
+code that does this compute work impacts datapath performance, and optimizing
+it can provide higher switching performance.
+
+Modern CPUs provide extensive SIMD instructions which can be used to get higher
+performance. The CPU OVS is being deployed on must be capable of running these
+SIMD instructions in order to take advantage of the performance benefits.
+In OVS v2.14 runtime CPU detection was introduced to enable identifying if
+these CPU ISA additions are available, and to allow the user to enable them.
+
+OVS provides multiple implementations of dpcls. The following command enables
+the user to check what implementations are available in a running instance ::
+
+$ ovs-appctl dpif-netdev/subtable-lookup-prio-get
+Available lookup functions (priority : name)
+0 : autovalidator
+1 : generic
+0 : avx512_gather
+
+To set the priority of a lookup function, run the ``prio-set`` command ::
+
+$ ovs-appctl dpif-netdev/subtable-lookup-prio-set avx512_gather 5
+Lookup priority change affected 1 dpcls ports and 1 subtables.
+
+The highest priority lookup function is used for classification, and the output
+above indicates that one subtable of one DPCLS port is has changed its lookup
+function due to the command being run. To verify the prioritization, re-run the
+get command, note the updated priority of the ``avx512_gather`` function ::
+
+$ ovs-appctl dpif-netdev/subtable-lookup-prio-get
+Available lookup functions (priority : name)
+0 : autovalidator
+1 : generic
+5 : avx512_gather
+
+If two lookup functions have the same priority, the first one in the list is
+chosen, and the 2nd occurance of that priority is not used. Put in logical
+terms, a subtable is chosen if its priority is greater than the previous
+best candidate.
+
+CPU ISA Testing and Validation
+~~
+
+As multiple versions of DPCLS can co-exist, each with different CPU ISA
+optimizations, it is important to validate that they all give the exact same
+results. To easily test all DPCLS implementations, an ``autovalidator``
+implementation of the DPCLS exists. This implementation runs all other
+available DPCLS implementations, and verifies that the results are identical.
+
+Running the OVS unit tests with the autovalidator enabled ensures all
+implementations provide the same results. Note that the performance of the
+autovalidator is lower than all other implementations, as it tests the scalar
+implementation against itself, and against all other enabled DPCLS
+implementations.
+
+To adjust the DPCLS autovalidator priority, use this command ::
+
+$ ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 7
+
+Running Unit Tests with Autovalidator
++
+
+To run the OVS unit test suite with the DPCLS autovalidator as the default
+implementation, it is required to recompile OVS. During the recompilation,
+the default priority of the `autovalidator` implementation is set to the
+maximum priority, ensuring every test will be run with every lookup
+implementation ::
+
+$ ./configure --enable-autovalidator
+
+Compile OVS in debug mode to have `ovs_assert` statements error out if
+there is a mis-match in the DPCLS lookup implementation.
diff --git a/NEWS b/NEWS
index 0116b3ea0..da8725b59 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,9 @@ Post-v2.13.0
  * New configuration knob 

[ovs-dev] [PATCH v6 4/6] dpdk: enable cpu feature detection.

2020-07-02 Thread Harry van Haaren
This commit implements a method to retrieve the CPU ISA capabilities.
These ISA capabilities can be used in OVS to at runtime select a function
implementation to make the best use of the available ISA on the CPU.

Signed-off-by: Harry van Haaren 

---

v6:
- Add #if around RTE_CPUFLAG_AVX512F as it is not defined on CPU
  architectures that don't support the ISA. This check fixes compilation
  on those CPUs, as previously it had an undefined RTE_CPUFLAG_* error.
  (Reported by Travis, thanks Ian & William for running checks)

v5:
- Change API to return bool, and return to true/false (William Tu)
- Change error log to VLOG_ERR_ONCE instead of manual impl. (William Tu)

v4:
- Improve commit title and message
---
 lib/dpdk-stub.c |  9 +
 lib/dpdk.c  | 30 ++
 lib/dpdk.h  |  2 ++
 3 files changed, 41 insertions(+)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index c332c217c..b7d577870 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -79,6 +79,15 @@ print_dpdk_version(void)
 {
 }
 
+bool
+dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED,
+ const char *feature OVS_UNUSED)
+{
+VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, "
+  "cannot use CPU flag based optimizations");
+return false;
+}
+
 void
 dpdk_status(const struct ovsrec_open_vswitch *cfg)
 {
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 31450d470..64a0d43e8 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -513,6 +514,35 @@ print_dpdk_version(void)
 puts(rte_version());
 }
 
+#define CHECK_CPU_FEATURE(feature, name_str, RTE_CPUFLAG)   \
+do {\
+if (strncmp(feature, name_str, strlen(name_str)) == 0) {\
+int has_isa = rte_cpu_get_flag_enabled(RTE_CPUFLAG);\
+VLOG_DBG("CPU flag %s, available %s\n", name_str,   \
+  has_isa ? "yes" : "no");  \
+return true;\
+}   \
+} while (0)
+
+bool
+dpdk_get_cpu_has_isa(const char *arch, const char *feature)
+{
+/* Ensure Arch is x86_64 */
+if (strncmp(arch, "x86_64", 6) != 0) {
+return false;
+}
+
+#if __x86_64__
+/* CPU flags only defined for the architecture that support it */
+CHECK_CPU_FEATURE(feature, "avx512f", RTE_CPUFLAG_AVX512F);
+CHECK_CPU_FEATURE(feature, "bmi2", RTE_CPUFLAG_BMI2);
+#endif
+
+VLOG_WARN("Unknown CPU arch,feature: %s,%s. Returning not supported.\n",
+  arch, feature);
+return false;
+}
+
 void
 dpdk_status(const struct ovsrec_open_vswitch *cfg)
 {
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 736a64279..445a51d06 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -44,4 +44,6 @@ bool dpdk_per_port_memory(void);
 bool dpdk_available(void);
 void print_dpdk_version(void);
 void dpdk_status(const struct ovsrec_open_vswitch *);
+bool dpdk_get_cpu_has_isa(const char *arch, const char *feature);
+
 #endif /* dpdk.h */
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 5/6] dpif-lookup: add avx512 gather implementation.

2020-07-02 Thread Harry van Haaren
This commit adds an AVX-512 dpcls lookup implementation.
It uses the AVX-512 SIMD ISA to perform multiple miniflow
operations in parallel.

To run this implementation, the "avx512f" and "bmi2" ISAs are
required. These ISA checks are performed at runtime while
probing the subtable implementation. If a CPU does not provide
both "avx512f" and "bmi2", then this code does not execute.

The avx512 code is built as a seperate static library, with added
CFLAGS to enable the required ISA features. By building only this
static library with avx512 enabled, it is ensured that the main OVS
core library is *not* using avx512, and that OVS continues to run
as before on CPUs that do not support avx512.

The approach taken in this implementation is to use the
gather instruction to access the packet miniflow, allowing
any miniflow blocks to be loaded into an AVX-512 register.
This maximises the usefulness of the register, and hence this
implementation handles any subtable with up to miniflow 8 bits.

Note that specialization of these avx512 lookup routines
still provides performance value, as the hashing of the
resulting data is performed in scalar code, and compile-time
loop unrolling occurs when specialized to miniflow bits.

This commit checks at configure time if the assembling in use
has a known bug in assembling AVX512 code. If this bug is present,
all AVX512 code is disabled. Checking the version string of the binutils
or assembler is not a good method to detect the issue, as backported fixes
would not be reflected.

Signed-off-by: Harry van Haaren 

---

v6:
- Remove binutils probe .o file once used (Travis/Ian/William)
- Fix compilation/linking of avx512 library on --enable-shared when
  doing a make install-recursive (Travis/William/Ian)
- Fix "as unrecognized option --64" warning (Travis/William/Ian)

v5:
- Fixed typo equivelent/equivalent (William Tu)
- Fixed incorrect argument type uint64_t* to void* (Travis/William Tu)
  (Note: no functional change here - its still the same address :)
- Cleanup #ifdefs registering avx512 subtable lookup func (William Tu)
- Merged commit 6/7 from previous v4 to build avx512 "right" in one go
- Use mkdir -p to create build-aux/ dir before binutils check (Travis)

v4:
- Remove TODO comment on prio-set command (was accidentally
  added to this commit in v3)
- Fixup v3 changlog to not include #warning comment (William Tu)
- Remove #define for debugging in lookup.h
- Fix builds on older gcc versions that don't support -mavx512f.
  Solution involves CC_CHECK and #ifdefs in code (OVS Robot, William Tu)

v3:
- Improve function name for _any subtable lookup
- Use "" include not <> for immintrin.h
- Add checks for SSE42 instructions in core OVS for CRC32 based hashing
  If not available, disable AVX512 lookup implementation as it requires
  uses CRC32 for hashing, and the hashing algorithm must match core OVS.
- Rework ovs_asserts() into function selection time check
- Add #define for magic number 8, number of u64 blocks in AVX512 register
- Add #if CHECKER around AVX code, sparse doesn't like checking it
- Simplify avx512 enabled building, fixes builds with --enable-shared
---
 configure.ac   |   3 +
 lib/automake.mk|  21 ++
 lib/dpif-netdev-lookup-avx512-gather.c | 265 +
 lib/dpif-netdev-lookup.c   |  20 ++
 lib/dpif-netdev-lookup.h   |   4 +
 m4/openvswitch.m4  |  30 +++
 6 files changed, 343 insertions(+)
 create mode 100644 lib/dpif-netdev-lookup-avx512-gather.c

diff --git a/configure.ac b/configure.ac
index 81893e56e..76d6de4e8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -178,10 +178,13 @@ OVS_ENABLE_OPTION([-Wno-null-pointer-arithmetic])
 OVS_ENABLE_OPTION([-Warray-bounds-pointer-arithmetic])
 OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED])
 OVS_CONDITIONAL_CC_OPTION([-Wno-unused-parameter], [HAVE_WNO_UNUSED_PARAMETER])
+OVS_CONDITIONAL_CC_OPTION([-mavx512f], [HAVE_AVX512F])
+OVS_CHECK_CC_OPTION([-mavx512f], [CFLAGS="$CFLAGS -DHAVE_AVX512F"])
 OVS_ENABLE_WERROR
 OVS_ENABLE_SPARSE
 OVS_CTAGS_IDENTIFIERS
 OVS_CHECK_DPCLS_AUTOVALIDATOR
+OVS_CHECK_BINUTILS_AVX512
 
 AC_ARG_VAR(KARCH, [Kernel Architecture String])
 AC_SUBST(KARCH)
diff --git a/lib/automake.mk b/lib/automake.mk
index 1fc1a209e..eca448a5a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -11,6 +11,7 @@ lib_libopenvswitch_la_LIBADD = $(SSL_LIBS)
 lib_libopenvswitch_la_LIBADD += $(CAPNG_LDADD)
 lib_libopenvswitch_la_LIBADD += $(LIBBPF_LDADD)
 
+
 if WIN32
 lib_libopenvswitch_la_LIBADD += ${PTHREAD_LIBS}
 endif
@@ -20,6 +21,26 @@ lib_libopenvswitch_la_LDFLAGS = \
 -Wl,--version-script=$(top_builddir)/lib/libopenvswitch.sym \
 $(AM_LDFLAGS)
 
+if HAVE_AVX512F
+# Build library of avx512 code with CPU ISA CFLAGS enabled. This allows the
+# compiler to use the ISA features required for the ISA optimized code-paths.
+# Use LDFLAGS to compile only static library of this code, as 

[ovs-dev] [PATCH v6 1/6] dpif-netdev: implement subtable lookup validation.

2020-07-02 Thread Harry van Haaren
This commit refactors the existing dpif subtable function pointer
infrastructure, and implements an autovalidator component.

The refactoring of the existing dpcls subtable lookup function
handling, making it more generic, and cleaning up how to enable
more implementations in future.

In order to ensure all implementations provide identical results,
the autovalidator is added. The autovalidator itself implements
the subtable lookup function prototype, but internally iterates
over all other available implementations. The end result is that
testing of each implementation becomes automatic, when the auto-
validator implementation is selected.

Signed-off-by: Harry van Haaren 

---

v4:
- Fix automake .c file order (William Tu)
- Fix include file ordering: netdev-lookup before netdev-perf (William Tu)
- Fix Typos (William Tu)
- Add --enable-autovalidator compile time flag to set the DPCLS Autovalidator
  to the highest priority by default. This is required to run the unit tests
  with all DPCLS implementations without changing every test-case.
  Backwards compatibility is kept with OVS 2.13.
- Add check to ensure autovalidator is 0th item in lookup array
- Improve comments around autovalidator lookup iteration

v3:
- Fix compile error by adding errno.h include (William Tu)
- Improve vlog prints by using hex not int for bitmasks
- Update license years adding 2020
- Fix 0 used as NULL pointer
---
 acinclude.m4   |  16 
 configure.ac   |   1 +
 lib/automake.mk|   3 +
 lib/dpif-netdev-lookup-autovalidator.c | 110 +
 lib/dpif-netdev-lookup-generic.c   |   9 +-
 lib/dpif-netdev-lookup.c   | 104 +++
 lib/dpif-netdev-lookup.h   |  75 +
 lib/dpif-netdev-private.h  |  15 
 lib/dpif-netdev.c  |  13 ++-
 9 files changed, 322 insertions(+), 24 deletions(-)
 create mode 100644 lib/dpif-netdev-lookup-autovalidator.c
 create mode 100644 lib/dpif-netdev-lookup.c
 create mode 100644 lib/dpif-netdev-lookup.h

diff --git a/acinclude.m4 b/acinclude.m4
index 8847b8145..9b28222f6 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -14,6 +14,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+dnl Set OVS DPCLS Autovalidator as default subtable search at compile time?
+dnl This enables automatically running all unit tests with all DPCLS
+dnl implementations.
+AC_DEFUN([OVS_CHECK_DPCLS_AUTOVALIDATOR], [
+  AC_ARG_ENABLE([autovalidator],
+[AC_HELP_STRING([--enable-autovalidator], [Enable DPCLS 
autovalidator as default subtable search implementation.])],
+[autovalidator=yes],[autovalidator=no])
+  AC_MSG_CHECKING([whether DPCLS Autovalidator is default implementation])
+  if test "$autovalidator" != yes; then
+AC_MSG_RESULT([no])
+  else
+OVS_CFLAGS="$OVS_CFLAGS -DDPCLS_AUTOVALIDATOR_DEFAULT"
+AC_MSG_RESULT([yes])
+  fi
+])
+
 dnl OVS_ENABLE_WERROR
 AC_DEFUN([OVS_ENABLE_WERROR],
   [AC_ARG_ENABLE(
diff --git a/configure.ac b/configure.ac
index 1877aae56..81893e56e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -181,6 +181,7 @@ OVS_CONDITIONAL_CC_OPTION([-Wno-unused-parameter], 
[HAVE_WNO_UNUSED_PARAMETER])
 OVS_ENABLE_WERROR
 OVS_ENABLE_SPARSE
 OVS_CTAGS_IDENTIFIERS
+OVS_CHECK_DPCLS_AUTOVALIDATOR
 
 AC_ARG_VAR(KARCH, [Kernel Architecture String])
 AC_SUBST(KARCH)
diff --git a/lib/automake.mk b/lib/automake.mk
index 86940ccd2..1fc1a209e 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -81,6 +81,9 @@ lib_libopenvswitch_la_SOURCES = \
lib/dp-packet.h \
lib/dp-packet.c \
lib/dpdk.h \
+   lib/dpif-netdev-lookup.h \
+   lib/dpif-netdev-lookup.c \
+   lib/dpif-netdev-lookup-autovalidator.c \
lib/dpif-netdev-lookup-generic.c \
lib/dpif-netdev.c \
lib/dpif-netdev.h \
diff --git a/lib/dpif-netdev-lookup-autovalidator.c 
b/lib/dpif-netdev-lookup-autovalidator.c
new file mode 100644
index 0..a027321ab
--- /dev/null
+++ b/lib/dpif-netdev-lookup-autovalidator.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020 Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include "dpif-netdev.h"
+#include "dpif-netdev-lookup.h"
+#include "dpif-netdev-private.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_lookup_autovalidator);
+

[ovs-dev] [PATCH v6 2/6] dpif-netdev: add subtable lookup prio set command.

2020-07-02 Thread Harry van Haaren
This commit adds a command for the dpif-netdev to set a specific
lookup function to a particular priority level. The command enables
runtime switching of the dpcls subtable lookup implementation.

Selection is performed based on a priority. Higher priorities take
precedence, eg; priotity 5 will be selected instead of a priority 3.
If lookup functions have the same priority, the first one in the list
is selected.

The two options available are 'autovalidator' and 'generic'.
The below command will set a new priority for the given function:
$ ovs-appctl dpif-netdev/subtable-lookup-prio-set generic 2

The autovalidator implementation can be selected at runtime now:
$ ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 5

Signed-off-by: Harry van Haaren 
Acked-by: William Tu 

---

v5:
- Add Ack from mailing list
- Rebase to latest master (fixing command adding conflict)

v4:
- Align prio-set command in code (William Tu)
- Improve commit title & update commit message (William Tu)

v3
- Add automatic reprobe after changing priorities
--- Refactored from previous 1-second timeout based reprobe WIP-hack
- Add VLOG entries for changed dpcls and subtable counts
--- Also return the updated counts to the issuing command for visibility
- Clarify command by adding "prio" to the name
--- New command name is "dpif-netdev/subtable-lookup-prio-set"
--- Please note this new command change - previous command is now invalid
---
 lib/dpif-netdev.c | 121 ++
 1 file changed, 121 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0468064c9..758323a0e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -259,6 +259,7 @@ struct dp_packet_flow_map {
 static void dpcls_init(struct dpcls *);
 static void dpcls_destroy(struct dpcls *);
 static void dpcls_sort_subtable_vector(struct dpcls *);
+static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls);
 static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
  const struct netdev_flow_key *mask);
 static void dpcls_remove(struct dpcls *, struct dpcls_rule *);
@@ -891,6 +892,9 @@ dpif_netdev_xps_revalidate_pmd(const struct 
dp_netdev_pmd_thread *pmd,
bool purge);
 static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
   struct tx_port *tx);
+static inline struct dpcls *
+dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
+   odp_port_t in_port);
 
 static inline bool emc_entry_alive(struct emc_entry *ce);
 static void emc_clear_entry(struct emc_entry *ce);
@@ -1291,6 +1295,97 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void
+dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc,
+const char *argv[], void *aux OVS_UNUSED)
+{
+/* This function requires 2 parameters (argv[1] and argv[2]) to execute.
+ *   argv[1] is subtable name
+ *   argv[2] is priority
+ *   argv[3] is the datapath name (optional if only 1 datapath exists)
+ */
+const char *func_name = argv[1];
+
+errno = 0;
+char *err_char;
+uint32_t new_prio = strtoul(argv[2], _char, 10);
+if (errno != 0 || new_prio > UINT8_MAX) {
+unixctl_command_reply_error(conn,
+"error converting priority, use integer in range 0-255\n");
+return;
+}
+
+int32_t err = dpcls_subtable_set_prio(func_name, new_prio);
+if (err) {
+unixctl_command_reply_error(conn,
+"error, subtable lookup function not found\n");
+return;
+}
+
+/* argv[3] is optional datapath instance. If no datapath name is provided
+ * and only one datapath exists, the one existing datapath is reprobed.
+ */
+ovs_mutex_lock(_netdev_mutex);
+struct dp_netdev *dp = NULL;
+
+if (argc == 4) {
+dp = shash_find_data(_netdevs, argv[3]);
+} else if (shash_count(_netdevs) == 1) {
+dp = shash_first(_netdevs)->data;
+}
+
+if (!dp) {
+ovs_mutex_unlock(_netdev_mutex);
+unixctl_command_reply_error(conn,
+"please specify an existing datapath");
+return;
+}
+
+/* Get PMD threads list, required to get DPCLS instances */
+size_t n;
+uint32_t lookup_dpcls_changed = 0;
+uint32_t lookup_subtable_changed = 0;
+struct dp_netdev_pmd_thread **pmd_list;
+sorted_poll_thread_list(dp, _list, );
+
+/* take port mutex as HMAP iters over them */
+ovs_mutex_lock(>port_mutex);
+
+for (size_t i = 0; i < n; i++) {
+struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+if (pmd->core_id == NON_PMD_CORE_ID) {
+continue;
+}
+
+struct dp_netdev_port *port = NULL;
+HMAP_FOR_EACH (port, node, >ports) {
+odp_port_t in_port = port->port_no;
+struct dpcls *cls = 

Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Ilya Maximets
On 7/2/20 5:25 PM, Pai G, Sunil wrote:
>>
>>> +
>>>  if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
>>>  # Avoid using cache for git tree build.
>>>  rm -rf dpdk-dir
>>> @@ -108,7 +112,8 @@ function install_dpdk()
>>>  if [ -f "${VERSION_FILE}" ]; then
>>>  VER=$(cat ${VERSION_FILE})
>>>  if [ "${VER}" = "${DPDK_VER}" ]; then
>>> -EXTRA_OPTS="${EXTRA_OPTS} 
>>> --with-dpdk=$(pwd)/dpdk-dir/build"
>>> +sudo ninja -C $(pwd)/dpdk-dir/build install
>>> +sudo ldconfig
>>
>> I think that installing right here inside the cached folder and just 
>> adjusting
>> environment variables should be a bit faster than re-installing DPDK every 
>> time.
>>
>> This script also will be a good example for people like me, who really don't 
>> want
>> to install development versions of DPDK globally on a work laptop while 
>> testing
>> OVS builds.
> 
> Yes , Thanks for the suggestion. Although ,using an install to a directory 
> with
> a prefix would require this patch from Bruce: 
> https://patches.dpdk.org/patch/72271/
>  (which is not merged yet as of this writing) .without this , OVS would fail 
> to run 
> searching for few shared DPDK libraries even when built with static libraries.

But, IIUC, we could just add a path to ld.so.conf to avoid that.
Will it work?  You're updating ldconfig here anyway.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpdk: Refuse running on cpu >= RTE_MAX_LCORE.

2020-07-02 Thread David Marchand
On Thu, Jul 2, 2020 at 5:53 PM Ilya Maximets  wrote:
> > pmd threads will never run on any core >= RTE_MAX_LCORE ok, it works.
> >
> > Though, it binds OVS core id and DPDK lcore id.
> > Should OVS really care about this?
>
> I think this is better than random abort() in production environment.
> And, in current code, OVS cores bound to lcores as 1 to 1.

Yes, I did not like either the abort(), I added it wrt to a comment from Kevin.
But rereading it, I might have misunderstood.


>
> I understand that there will be no direct binding with the new
> rte_thread_register API, and we will need to figure out how to work
> with it correctly.  With new API above changes will need to be reverted
> and some new handling introduced to limit total number of cores, not
> their IDs.  Maybe something like this (not tested):

I just wanted to make sure you had the other patch in mind.
I'll have a try with your proposal.

Thanks.

-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Ilya Maximets
On 7/2/20 6:10 PM, Pai G, Sunil wrote:
>> -Original Message-
>> From: Richardson, Bruce 
>> Sent: Thursday, July 2, 2020 9:32 PM
>> To: Pai G, Sunil ; Ilya Maximets
>> ; d...@openvswitch.org
>> Cc: Stokes, Ian ; david.march...@redhat.com;
>> Tummala, Sivaprasad 
>> Subject: RE: [PATCH RFC dpdk-latest] build: Add support for DPDK meson
>> build.
>>
>>
>>
>>> -Original Message-
>>> From: Pai G, Sunil 
>>> Sent: Thursday, July 2, 2020 4:25 PM
>>> To: Ilya Maximets ; d...@openvswitch.org
>>> Cc: Stokes, Ian ; david.march...@redhat.com;
>>> Richardson, Bruce ; Tummala, Sivaprasad
>>> 
>>> Subject: RE: [PATCH RFC dpdk-latest] build: Add support for DPDK meson
>>> build.
>>>
 -Original Message-
 From: Ilya Maximets 
 Sent: Thursday, July 2, 2020 7:26 PM
 To: Pai G, Sunil ; d...@openvswitch.org
 Cc: Stokes, Ian ; i.maxim...@ovn.org;
 david.march...@redhat.com; Richardson, Bruce
 ; Tummala, Sivaprasad
 ; i.maxim...@ovn.org
 Subject: Re: [PATCH RFC dpdk-latest] build: Add support for DPDK
 meson
>>> build.

 On 7/2/20 3:13 PM, Sunil Pai G wrote:
> Make based build is deprecated in DPDK. Meson based build to be
> used for future DPDK releases.
>
> This updates travis, configure script and documentation for using
> DPDK Meson with OVS.
>
> Signed-off-by: Sunil Pai G 

 Thanks for working on this!
 Not a full review, just a few quick bits.

 At first, why dpdk-latest?  Is there issue with meson build on 19.11?
>>>
>>> The linker always picked the shared DPDK libraries over static when
>>> built with Meson in DPDK-19.11. -Bstatic flag would get jumbled by
>>> libtool causing this.
>>> Thanks to Bruce, there was recently merged series which fixed a bunch
>>> of issues along with this :
>>> https://patches.dpdk.org/project/dpdk/list/?series=10690
>>> It is requested for a back port of this series to DPDK-19.11.
>>>

 Few more comments inline.

 Best regards, Ilya Maximets.

> ---
>  .travis.yml   |  3 ++
>  .travis/linux-build.sh| 37 +---
>  .travis/linux-prepare.sh  |  1 +
>  Documentation/intro/install/afxdp.rst |  2 +-
> Documentation/intro/install/dpdk.rst  | 56 ---
>> -
>  Makefile.am   |  3 +-
>  acinclude.m4  | 42 --
>  parse_pkg_cfg.py  | 62
>>> +++
>  8 files changed, 167 insertions(+), 39 deletions(-)  create mode
> 100644 parse_pkg_cfg.py
>
> diff --git a/.travis.yml b/.travis.yml index 97249c1ce..46d7ad9bb
> 100644
> --- a/.travis.yml
> +++ b/.travis.yml
> @@ -27,6 +27,9 @@ addons:
>- selinux-policy-dev
>- libunbound-dev
>- libunwind-dev
> +  - python3-setuptools
> +  - python3-wheel
> +  - ninja-build
>
>  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
>
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> 33b359a61..7fa7e738c 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -85,17 +85,21 @@ function install_dpdk()  {
>  local DPDK_VER=$1
>  local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
> +local DPDK_OPTS=""
>
> -if [ -z "$TRAVIS_ARCH" ] ||
> -   [ "$TRAVIS_ARCH" == "amd64" ]; then
> -TARGET="x86_64-native-linuxapp-gcc"
> -elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
> -TARGET="arm64-armv8a-linuxapp-gcc"
> -else
> +if [ "$TRAVIS_ARCH" == "aarch64" ]; then
> +DPDK_OPTS="$DPDK_OPTS --cross-file
 config/arm/arm64_armv8_linux_gcc"

 We're not cross compiling, we're actually building on aarch64 here.
 This option should not be needed.  IIUC, meson should detect current
 architecture and build with appropriate configuration.

 We should be able to just drop all the TRAVIS_ARCH checks for DPDK
>> here.
>>>
>>> Sure ,let me check on this . I see a similar approach taken in DPDK
>>> travis.
>>> May be Bruce can comment as well ?
>>>

> +elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ];
> + then
>  echo "Target is unknown"
>  exit 1
>  fi
>
> +if [ "$DPDK_SHARED" ]; then
> +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
> +else
> +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
> +fi

 I gusee we could just change the env variable and not parse it here.
 i.e. have DPDK_LIB=static defined by travis.yml by default and
 redefine it for matrix entries where we need shared libs.
>>>
>>> Yes , this could be done. But if we install the libraries in a custom
>>> path using a prefix, we would have to export the LD_LIBRARY_PATH for
>>> 

Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Pai G, Sunil
> -Original Message-
> From: Richardson, Bruce 
> Sent: Thursday, July 2, 2020 9:32 PM
> To: Pai G, Sunil ; Ilya Maximets
> ; d...@openvswitch.org
> Cc: Stokes, Ian ; david.march...@redhat.com;
> Tummala, Sivaprasad 
> Subject: RE: [PATCH RFC dpdk-latest] build: Add support for DPDK meson
> build.
> 
> 
> 
> > -Original Message-
> > From: Pai G, Sunil 
> > Sent: Thursday, July 2, 2020 4:25 PM
> > To: Ilya Maximets ; d...@openvswitch.org
> > Cc: Stokes, Ian ; david.march...@redhat.com;
> > Richardson, Bruce ; Tummala, Sivaprasad
> > 
> > Subject: RE: [PATCH RFC dpdk-latest] build: Add support for DPDK meson
> > build.
> >
> > > -Original Message-
> > > From: Ilya Maximets 
> > > Sent: Thursday, July 2, 2020 7:26 PM
> > > To: Pai G, Sunil ; d...@openvswitch.org
> > > Cc: Stokes, Ian ; i.maxim...@ovn.org;
> > > david.march...@redhat.com; Richardson, Bruce
> > > ; Tummala, Sivaprasad
> > > ; i.maxim...@ovn.org
> > > Subject: Re: [PATCH RFC dpdk-latest] build: Add support for DPDK
> > > meson
> > build.
> > >
> > > On 7/2/20 3:13 PM, Sunil Pai G wrote:
> > > > Make based build is deprecated in DPDK. Meson based build to be
> > > > used for future DPDK releases.
> > > >
> > > > This updates travis, configure script and documentation for using
> > > > DPDK Meson with OVS.
> > > >
> > > > Signed-off-by: Sunil Pai G 
> > >
> > > Thanks for working on this!
> > > Not a full review, just a few quick bits.
> > >
> > > At first, why dpdk-latest?  Is there issue with meson build on 19.11?
> >
> > The linker always picked the shared DPDK libraries over static when
> > built with Meson in DPDK-19.11. -Bstatic flag would get jumbled by
> > libtool causing this.
> > Thanks to Bruce, there was recently merged series which fixed a bunch
> > of issues along with this :
> > https://patches.dpdk.org/project/dpdk/list/?series=10690
> > It is requested for a back port of this series to DPDK-19.11.
> >
> > >
> > > Few more comments inline.
> > >
> > > Best regards, Ilya Maximets.
> > >
> > > > ---
> > > >  .travis.yml   |  3 ++
> > > >  .travis/linux-build.sh| 37 +---
> > > >  .travis/linux-prepare.sh  |  1 +
> > > >  Documentation/intro/install/afxdp.rst |  2 +-
> > > > Documentation/intro/install/dpdk.rst  | 56 ---
> -
> > > >  Makefile.am   |  3 +-
> > > >  acinclude.m4  | 42 --
> > > >  parse_pkg_cfg.py  | 62
> > +++
> > > >  8 files changed, 167 insertions(+), 39 deletions(-)  create mode
> > > > 100644 parse_pkg_cfg.py
> > > >
> > > > diff --git a/.travis.yml b/.travis.yml index 97249c1ce..46d7ad9bb
> > > > 100644
> > > > --- a/.travis.yml
> > > > +++ b/.travis.yml
> > > > @@ -27,6 +27,9 @@ addons:
> > > >- selinux-policy-dev
> > > >- libunbound-dev
> > > >- libunwind-dev
> > > > +  - python3-setuptools
> > > > +  - python3-wheel
> > > > +  - ninja-build
> > > >
> > > >  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
> > > >
> > > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> > > > 33b359a61..7fa7e738c 100755
> > > > --- a/.travis/linux-build.sh
> > > > +++ b/.travis/linux-build.sh
> > > > @@ -85,17 +85,21 @@ function install_dpdk()  {
> > > >  local DPDK_VER=$1
> > > >  local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
> > > > +local DPDK_OPTS=""
> > > >
> > > > -if [ -z "$TRAVIS_ARCH" ] ||
> > > > -   [ "$TRAVIS_ARCH" == "amd64" ]; then
> > > > -TARGET="x86_64-native-linuxapp-gcc"
> > > > -elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > > > -TARGET="arm64-armv8a-linuxapp-gcc"
> > > > -else
> > > > +if [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > > > +DPDK_OPTS="$DPDK_OPTS --cross-file
> > > config/arm/arm64_armv8_linux_gcc"
> > >
> > > We're not cross compiling, we're actually building on aarch64 here.
> > > This option should not be needed.  IIUC, meson should detect current
> > > architecture and build with appropriate configuration.
> > >
> > > We should be able to just drop all the TRAVIS_ARCH checks for DPDK
> here.
> >
> > Sure ,let me check on this . I see a similar approach taken in DPDK
> > travis.
> > May be Bruce can comment as well ?
> >
> > >
> > > > +elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ];
> > > > + then
> > > >  echo "Target is unknown"
> > > >  exit 1
> > > >  fi
> > > >
> > > > +if [ "$DPDK_SHARED" ]; then
> > > > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
> > > > +else
> > > > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
> > > > +fi
> > >
> > > I gusee we could just change the env variable and not parse it here.
> > > i.e. have DPDK_LIB=static defined by travis.yml by default and
> > > redefine it for matrix entries where we need shared libs.
> >
> > Yes , this could be done. But if we install 

Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Richardson, Bruce



> -Original Message-
> From: Pai G, Sunil 
> Sent: Thursday, July 2, 2020 4:25 PM
> To: Ilya Maximets ; d...@openvswitch.org
> Cc: Stokes, Ian ; david.march...@redhat.com;
> Richardson, Bruce ; Tummala, Sivaprasad
> 
> Subject: RE: [PATCH RFC dpdk-latest] build: Add support for DPDK meson
> build.
> 
> > -Original Message-
> > From: Ilya Maximets 
> > Sent: Thursday, July 2, 2020 7:26 PM
> > To: Pai G, Sunil ; d...@openvswitch.org
> > Cc: Stokes, Ian ; i.maxim...@ovn.org;
> > david.march...@redhat.com; Richardson, Bruce
> > ; Tummala, Sivaprasad
> > ; i.maxim...@ovn.org
> > Subject: Re: [PATCH RFC dpdk-latest] build: Add support for DPDK meson
> build.
> >
> > On 7/2/20 3:13 PM, Sunil Pai G wrote:
> > > Make based build is deprecated in DPDK. Meson based build to be used
> > > for future DPDK releases.
> > >
> > > This updates travis, configure script and documentation for using
> > > DPDK Meson with OVS.
> > >
> > > Signed-off-by: Sunil Pai G 
> >
> > Thanks for working on this!
> > Not a full review, just a few quick bits.
> >
> > At first, why dpdk-latest?  Is there issue with meson build on 19.11?
> 
> The linker always picked the shared DPDK libraries over static when built
> with Meson in DPDK-19.11. -Bstatic flag would get jumbled by libtool
> causing this.
> Thanks to Bruce, there was recently merged series which fixed a bunch of
> issues along with this :
> https://patches.dpdk.org/project/dpdk/list/?series=10690
> It is requested for a back port of this series to DPDK-19.11.
> 
> >
> > Few more comments inline.
> >
> > Best regards, Ilya Maximets.
> >
> > > ---
> > >  .travis.yml   |  3 ++
> > >  .travis/linux-build.sh| 37 +---
> > >  .travis/linux-prepare.sh  |  1 +
> > >  Documentation/intro/install/afxdp.rst |  2 +-
> > > Documentation/intro/install/dpdk.rst  | 56 
> > >  Makefile.am   |  3 +-
> > >  acinclude.m4  | 42 --
> > >  parse_pkg_cfg.py  | 62
> +++
> > >  8 files changed, 167 insertions(+), 39 deletions(-)  create mode
> > > 100644 parse_pkg_cfg.py
> > >
> > > diff --git a/.travis.yml b/.travis.yml index 97249c1ce..46d7ad9bb
> > > 100644
> > > --- a/.travis.yml
> > > +++ b/.travis.yml
> > > @@ -27,6 +27,9 @@ addons:
> > >- selinux-policy-dev
> > >- libunbound-dev
> > >- libunwind-dev
> > > +  - python3-setuptools
> > > +  - python3-wheel
> > > +  - ninja-build
> > >
> > >  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
> > >
> > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> > > 33b359a61..7fa7e738c 100755
> > > --- a/.travis/linux-build.sh
> > > +++ b/.travis/linux-build.sh
> > > @@ -85,17 +85,21 @@ function install_dpdk()  {
> > >  local DPDK_VER=$1
> > >  local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
> > > +local DPDK_OPTS=""
> > >
> > > -if [ -z "$TRAVIS_ARCH" ] ||
> > > -   [ "$TRAVIS_ARCH" == "amd64" ]; then
> > > -TARGET="x86_64-native-linuxapp-gcc"
> > > -elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > > -TARGET="arm64-armv8a-linuxapp-gcc"
> > > -else
> > > +if [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > > +DPDK_OPTS="$DPDK_OPTS --cross-file
> > config/arm/arm64_armv8_linux_gcc"
> >
> > We're not cross compiling, we're actually building on aarch64 here.
> > This option should not be needed.  IIUC, meson should detect current
> > architecture and build with appropriate configuration.
> >
> > We should be able to just drop all the TRAVIS_ARCH checks for DPDK here.
> 
> Sure ,let me check on this . I see a similar approach taken in DPDK
> travis.
> May be Bruce can comment as well ?
> 
> >
> > > +elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ];
> > > + then
> > >  echo "Target is unknown"
> > >  exit 1
> > >  fi
> > >
> > > +if [ "$DPDK_SHARED" ]; then
> > > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
> > > +else
> > > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
> > > +fi
> >
> > I gusee we could just change the env variable and not parse it here.
> > i.e. have DPDK_LIB=static defined by travis.yml by default and
> > redefine it for matrix entries where we need shared libs.
> 
> Yes , this could be done. But if we install the libraries in a custom path
> using a prefix, we would have to export the LD_LIBRARY_PATH for which
> querying whether to build as shared/static will be required.(This in
> relation to the below comment as well :))
> 
> 
> >
> > > +
> > >  if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
> > >  # Avoid using cache for git tree build.
> > >  rm -rf dpdk-dir
> > > @@ -108,7 +112,8 @@ function install_dpdk()
> > >  if [ -f "${VERSION_FILE}" ]; then
> > >  VER=$(cat ${VERSION_FILE})
> > >  if [ "${VER}" 

Re: [ovs-dev] [PATCH] dpdk: Refuse running on cpu >= RTE_MAX_LCORE.

2020-07-02 Thread Ilya Maximets
On 7/2/20 3:56 PM, David Marchand wrote:
> On Thu, Jul 2, 2020 at 3:09 PM Ilya Maximets  wrote:
>>
>> On 6/26/20 2:27 PM, David Marchand wrote:
>>> DPDK lcores range is [0, RTE_MAX_LCORE[.
>>> Trying to use a lcore out of this range expose OVS to crashes in DPDK 
>>> mempool
>>> local cache array.
>>> Prefer a "clean" abort so that users know that something is wrong with
>>> their configuration.
>>>
>>> Signed-off-by: David Marchand 
>>> ---
>>>  lib/dpdk.c | 7 +++
>>>  1 file changed, 7 insertions(+)
>>>
>>> diff --git a/lib/dpdk.c b/lib/dpdk.c
>>> index 98d0e2643e..55ce9a9221 100644
>>> --- a/lib/dpdk.c
>>> +++ b/lib/dpdk.c
>>> @@ -617,7 +617,14 @@ dpdk_set_lcore_id(unsigned cpu)
>>>  {
>>>  /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
>>>  ovs_assert(cpu != NON_PMD_CORE_ID);
>>> +if (cpu >= RTE_MAX_LCORE) {
>>> +cpu = LCORE_ID_ANY;
>>> +}
>>>  RTE_PER_LCORE(_lcore_id) = cpu;
>>> +if (rte_lcore_id() == LCORE_ID_ANY) {
>>> +ovs_abort(0, "PMD thread init failed, trying to use more cores 
>>> than "
>>> +  "DPDK supports (RTE_MAX_LCORE %u).", RTE_MAX_LCORE);
>>
>> I don't think that random OVS abort after pmd-cpu-mask change or port 
>> addition
>> is a good thing to have.  Maybe it's better to limit CPU core discovery?
>> This way OVS will never try to create thread on unwanted cores.
>>
>> Since ovs-numa module is mostly used by userspace datapath this will not
>> affect non-DPDK setups.
>>
>> Something like this (not tested):
>>
>> diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
>> index 6d0a68522..7c8a3b036 100644
>> --- a/lib/ovs-numa.c
>> +++ b/lib/ovs-numa.c
>> @@ -27,6 +27,7 @@
>>  #include 
>>  #endif /* __linux__ */
>>
>> +#include "dpdk.h"
>>  #include "hash.h"
>>  #include "openvswitch/hmap.h"
>>  #include "openvswitch/list.h"
>> @@ -58,6 +59,13 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa);
>>
>>  #define MAX_NUMA_NODES 128
>>
>> +#ifdef DPDK_NETDEV
>> +/* DPDK will not work correctly for cores with core_id >= RTE_MAX_LCORE. */
>> +#define MAX_CORE_ID(RTE_MAX_LCORE - 1)
>> +#else
>> +#define MAX_CORE_IDINT_MAX
>> +#endif
>> +
>>  /* numa node. */
>>  struct numa_node {
>>  struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */
>> @@ -112,6 +120,12 @@ insert_new_numa_node(int numa_id)
>>  static struct cpu_core *
>>  insert_new_cpu_core(struct numa_node *n, unsigned core_id)
>>  {
>> +if (core_id > MAX_CORE_ID) {
>> +VLOG_DBG("Ignoring CPU core with id %u ( %u > %d).",
>> + core_id, core_id, MAX_CORE_ID);
>> +return NULL;
>> +}
>> +
>>  struct cpu_core *c = xzalloc(sizeof *c);
>>
>>  hmap_insert(_cpu_cores, >hmap_node, hash_int(core_id, 0));
>> @@ -278,6 +292,11 @@ ovs_numa_init(void)
>>  if (ovsthread_once_start()) {
>>  const struct numa_node *n;
>>
>> +if (MAX_CORE_ID != INT_MAX) {
>> +VLOG_INFO("Maximum allowed CPU core id is %d. "
>> +  "Other cores will not be available.", MAX_CORE_ID);
>> +}
>> +
>>  if (dummy_numa) {
>>  discover_numa_and_core_dummy();
>>  } else {
>> ---
>>
>> What do you think?
> 
> pmd threads will never run on any core >= RTE_MAX_LCORE ok, it works.
> 
> Though, it binds OVS core id and DPDK lcore id.
> Should OVS really care about this?

I think this is better than random abort() in production environment.
And, in current code, OVS cores bound to lcores as 1 to 1.

I understand that there will be no direct binding with the new
rte_thread_register API, and we will need to figure out how to work
with it correctly.  With new API above changes will need to be reverted
and some new handling introduced to limit total number of cores, not
their IDs.  Maybe something like this (not tested):

diff --git a/lib/dpdk.h b/lib/dpdk.h
index 736a64279..f738a90f6 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -25,10 +25,12 @@
 #include 
 
 #define NON_PMD_CORE_ID LCORE_ID_ANY
+#define MAX_PMD_THREADS RTE_MAX_LCORE
 
 #else
 
 #define NON_PMD_CORE_ID UINT32_MAX
+#define MAX_PMD_THREADS INT_MAX
 
 #endif /* DPDK_NETDEV */
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1086efd47..245c9b2de 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4906,18 +4906,28 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
 struct ovs_numa_info_core *core;
 struct hmapx to_delete = HMAPX_INITIALIZER(_delete);
 struct hmapx_node *node;
-bool changed = false;
+bool limited, changed = false;
 bool need_to_adjust_static_tx_qids = false;
 
 /* The pmd threads should be started only if there's a pmd port in the
  * datapath.  If the user didn't provide any "pmd-cpu-mask", we start
  * NR_PMD_THREADS per numa node. */
 if (!has_pmd_port(dp)) {
-pmd_cores = ovs_numa_dump_n_cores_per_numa(0);
+pmd_cores = ovs_numa_dump_n_cores_per_numa(0, INT_MAX, );
 } else if (dp->pmd_cmask && dp->pmd_cmask[0]) {
-pmd_cores = 

Re: [ovs-dev] [PATCH] dpdk: Add commands to configure log levels.

2020-07-02 Thread David Marchand
On Thu, Jul 2, 2020 at 12:04 PM Ilya Maximets  wrote:
>
> On 7/2/20 10:25 AM, David Marchand wrote:
> > On Thu, Jul 2, 2020 at 12:05 AM Ilya Maximets  wrote:
> >> On 6/26/20 2:27 PM, David Marchand wrote:
> >>> Enabling debug logs in dpdk can be a challenge to be sure of what is
> >>> actually enabled, add commands to list and change those log levels.
> >>
> >> Could you, please, provide some usage examples?
> >> Maybe add some documentation about these commands.  And a NEWS update
> >> since this is a user visible change.
> >
> > In the documentation, example outputs could get outdated since we
> > directly pass dpdk output (/me still wants to shoot the dpdk global
> > log level that is just useless).
> > But I think it is still worth adding.
> >
> > I would see either in Documentation/howto/dpdk.rst or
> > Documentation/intro/install/dpdk.rst.
> > The latter seems the best fit: we have setup instructions, configuring
> > logs is close.
> > WDYT?
>
> install/dpdk.rst might be more appropriate.
> However, maybe it's enough to just create lib/dpdk-unixctl.man similar
> to lib/netdev-dpdk-unixctl.man ?  We need it anyway to keep man pages
> updated.

Indeed, I had missed this.
Thanks, will do.


-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Pai G, Sunil
> -Original Message-
> From: Richardson, Bruce 
> Sent: Thursday, July 2, 2020 7:29 PM
> To: Pai G, Sunil ; d...@openvswitch.org
> Cc: Stokes, Ian ; i.maxim...@ovn.org;
> david.march...@redhat.com; Tummala, Sivaprasad
> 
> Subject: RE: [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.
> 
> 
> 
> > -Original Message-
> > From: Pai G, Sunil 
> > Sent: Thursday, July 2, 2020 2:14 PM
> > To: d...@openvswitch.org
> > Cc: Stokes, Ian ; i.maxim...@ovn.org;
> > david.march...@redhat.com; Richardson, Bruce
> > ; Pai G, Sunil ;
> > Tummala, Sivaprasad 
> > Subject: [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.
> >
> > Make based build is deprecated in DPDK. Meson based build to be used
> > for future DPDK releases.
> >
> > This updates travis, configure script and documentation for using DPDK
> > Meson with OVS.
> >
> > Signed-off-by: Sunil Pai G 
> 
> Thanks for this, a couple of comments from the DPDK build side inline below.
> 
> Regards,
> /Bruce
> 
> > ---
> >  .travis.yml   |  3 ++
> >  .travis/linux-build.sh| 37 +---
> >  .travis/linux-prepare.sh  |  1 +
> >  Documentation/intro/install/afxdp.rst |  2 +-
> > Documentation/intro/install/dpdk.rst  | 56 
> >  Makefile.am   |  3 +-
> >  acinclude.m4  | 42 --
> >  parse_pkg_cfg.py  | 62 +++
> >  8 files changed, 167 insertions(+), 39 deletions(-)  create mode
> > 100644 parse_pkg_cfg.py
> >
> > diff --git a/.travis.yml b/.travis.yml index 97249c1ce..46d7ad9bb
> > 100644
> > --- a/.travis.yml
> > +++ b/.travis.yml
> > @@ -27,6 +27,9 @@ addons:
> >- selinux-policy-dev
> >- libunbound-dev
> >- libunwind-dev
> > +  - python3-setuptools
> > +  - python3-wheel
> > +  - ninja-build
> >
> >  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
> >
> > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> > 33b359a61..7fa7e738c 100755
> > --- a/.travis/linux-build.sh
> > +++ b/.travis/linux-build.sh
> > @@ -85,17 +85,21 @@ function install_dpdk()  {
> >  local DPDK_VER=$1
> >  local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
> > +local DPDK_OPTS=""
> >
> > -if [ -z "$TRAVIS_ARCH" ] ||
> > -   [ "$TRAVIS_ARCH" == "amd64" ]; then
> > -TARGET="x86_64-native-linuxapp-gcc"
> > -elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > -TARGET="arm64-armv8a-linuxapp-gcc"
> > -else
> > +if [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > +DPDK_OPTS="$DPDK_OPTS --cross-file
> > config/arm/arm64_armv8_linux_gcc"
> > +elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ]; then
> >  echo "Target is unknown"
> >  exit 1
> >  fi
> >
> > +if [ "$DPDK_SHARED" ]; then
> > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
> > +else
> > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
> > +fi
> > +
> >  if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
> >  # Avoid using cache for git tree build.
> >  rm -rf dpdk-dir
> > @@ -108,7 +112,8 @@ function install_dpdk()
> >  if [ -f "${VERSION_FILE}" ]; then
> >  VER=$(cat ${VERSION_FILE})
> >  if [ "${VER}" = "${DPDK_VER}" ]; then
> > -EXTRA_OPTS="${EXTRA_OPTS} --with-dpdk=$(pwd)/dpdk-
> > dir/build"
> > +sudo ninja -C $(pwd)/dpdk-dir/build install
> > +sudo ldconfig
> >  echo "Found cached DPDK ${VER} build in $(pwd)/dpdk-dir"
> >  return
> >  fi
> > @@ -122,16 +127,18 @@ function install_dpdk()
> >  pushd dpdk-dir
> >  fi
> >
> > -make config CC=gcc T=$TARGET
> > +# Disable building DPDK kernel modules. Not needed for OVS build
> > + or
> > tests.
> > +DPDK_OPTS="$DPDK_OPTS -Denable_kmods=false"
> >
> 
> NOTE: enable_kmods=false is the default in DPDK, so should not need to be
> specified, unless you want to be doubly sure they are not build, or for 
> clarity.

Understood. Shall address this in the next version.

> 
> > -if [ "$DPDK_SHARED" ]; then
> > -sed -i '/CONFIG_RTE_BUILD_SHARED_LIB=n/s/=n/=y/' build/.config
> > -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/$TARGET/lib
> > -fi
> > +DPDK_OPTS="$DPDK_OPTS -Dc_args=-fPIC"
> 
> Since meson build for DPDK always links the objects into both static and 
> shared
> libs, all objects are already built with -fPIC, so this should not be 
> necessary.

Sure , will address this in the next patch as well.

> > +CC=gcc meson $DPDK_OPTS build
> > +ninja -C build
> > +sudo ninja -C build install
> > +
> > +# Update the library paths.
> > +sudo ldconfig
> >
> > -make -j4 CC=gcc EXTRA_CFLAGS='-fPIC'
> > -EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build"
> > -echo "Installed DPDK source in 

Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Pai G, Sunil
> -Original Message-
> From: Ilya Maximets 
> Sent: Thursday, July 2, 2020 7:26 PM
> To: Pai G, Sunil ; d...@openvswitch.org
> Cc: Stokes, Ian ; i.maxim...@ovn.org;
> david.march...@redhat.com; Richardson, Bruce
> ; Tummala, Sivaprasad
> ; i.maxim...@ovn.org
> Subject: Re: [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.
> 
> On 7/2/20 3:13 PM, Sunil Pai G wrote:
> > Make based build is deprecated in DPDK. Meson based build to be used
> > for future DPDK releases.
> >
> > This updates travis, configure script and documentation for using DPDK
> > Meson with OVS.
> >
> > Signed-off-by: Sunil Pai G 
> 
> Thanks for working on this!
> Not a full review, just a few quick bits.
> 
> At first, why dpdk-latest?  Is there issue with meson build on 19.11?

The linker always picked the shared DPDK libraries over static when built with 
Meson
in DPDK-19.11. -Bstatic flag would get jumbled by libtool causing this.
Thanks to Bruce, there was recently merged series which fixed a bunch of issues 
along with this :
https://patches.dpdk.org/project/dpdk/list/?series=10690 
It is requested for a back port of this series to DPDK-19.11.
 
> 
> Few more comments inline.
> 
> Best regards, Ilya Maximets.
> 
> > ---
> >  .travis.yml   |  3 ++
> >  .travis/linux-build.sh| 37 +---
> >  .travis/linux-prepare.sh  |  1 +
> >  Documentation/intro/install/afxdp.rst |  2 +-
> > Documentation/intro/install/dpdk.rst  | 56 
> >  Makefile.am   |  3 +-
> >  acinclude.m4  | 42 --
> >  parse_pkg_cfg.py  | 62 +++
> >  8 files changed, 167 insertions(+), 39 deletions(-)  create mode
> > 100644 parse_pkg_cfg.py
> >
> > diff --git a/.travis.yml b/.travis.yml index 97249c1ce..46d7ad9bb
> > 100644
> > --- a/.travis.yml
> > +++ b/.travis.yml
> > @@ -27,6 +27,9 @@ addons:
> >- selinux-policy-dev
> >- libunbound-dev
> >- libunwind-dev
> > +  - python3-setuptools
> > +  - python3-wheel
> > +  - ninja-build
> >
> >  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
> >
> > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> > 33b359a61..7fa7e738c 100755
> > --- a/.travis/linux-build.sh
> > +++ b/.travis/linux-build.sh
> > @@ -85,17 +85,21 @@ function install_dpdk()  {
> >  local DPDK_VER=$1
> >  local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
> > +local DPDK_OPTS=""
> >
> > -if [ -z "$TRAVIS_ARCH" ] ||
> > -   [ "$TRAVIS_ARCH" == "amd64" ]; then
> > -TARGET="x86_64-native-linuxapp-gcc"
> > -elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > -TARGET="arm64-armv8a-linuxapp-gcc"
> > -else
> > +if [ "$TRAVIS_ARCH" == "aarch64" ]; then
> > +DPDK_OPTS="$DPDK_OPTS --cross-file
> config/arm/arm64_armv8_linux_gcc"
> 
> We're not cross compiling, we're actually building on aarch64 here.
> This option should not be needed.  IIUC, meson should detect current
> architecture and build with appropriate configuration.
> 
> We should be able to just drop all the TRAVIS_ARCH checks for DPDK here.

Sure ,let me check on this . I see a similar approach taken in DPDK travis. 
May be Bruce can comment as well ?

> 
> > +elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ]; then
> >  echo "Target is unknown"
> >  exit 1
> >  fi
> >
> > +if [ "$DPDK_SHARED" ]; then
> > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
> > +else
> > +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
> > +fi
> 
> I gusee we could just change the env variable and not parse it here. i.e. have
> DPDK_LIB=static defined by travis.yml by default and redefine it for matrix
> entries where we need shared libs.

Yes , this could be done. But if we install the libraries in a custom path 
using a prefix, 
we would have to export the LD_LIBRARY_PATH for which querying whether to build 
as shared/static will be required.(This in relation to the below comment as 
well :))


> 
> > +
> >  if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
> >  # Avoid using cache for git tree build.
> >  rm -rf dpdk-dir
> > @@ -108,7 +112,8 @@ function install_dpdk()
> >  if [ -f "${VERSION_FILE}" ]; then
> >  VER=$(cat ${VERSION_FILE})
> >  if [ "${VER}" = "${DPDK_VER}" ]; then
> > -EXTRA_OPTS="${EXTRA_OPTS} 
> > --with-dpdk=$(pwd)/dpdk-dir/build"
> > +sudo ninja -C $(pwd)/dpdk-dir/build install
> > +sudo ldconfig
> 
> I think that installing right here inside the cached folder and just adjusting
> environment variables should be a bit faster than re-installing DPDK every 
> time.
> 
> This script also will be a good example for people like me, who really don't 
> want
> to install development versions of DPDK globally on a work laptop while 
> 

Re: [ovs-dev] [PATCH v5 0/6] DPCLS Subtable ISA Optimization

2020-07-02 Thread Van Haaren, Harry
> -Original Message-
> From: William Tu 
> Sent: Wednesday, July 1, 2020 10:53 PM
> To: Van Haaren, Harry 
> Cc: ovs-dev ; Stokes, Ian ; 
> Ilya
> Maximets ; Federico Iezzi 
> Subject: Re: [PATCH v5 0/6] DPCLS Subtable ISA Optimization
> 
> On Wed, Jul 1, 2020 at 7:22 AM Harry van Haaren
>  wrote:
> >
> > v5 work done:
> > - Add NEWS section
> > - Integrate v4 patches 5 and 6 into a single commit
> > - Address v4 feedback (details in each patch)
> > -- Fixed multiple travis build issues/warnings
> > -- Improved APIs/code, refactored #ifdefs
> >
> > @William Tu:
> > Thanks for pushing v4 commits to travis for testing, and reporting back.
> > There are 2 items that I could not reproduce, and could not reliably
> > state they are real issues, or 2nd order issues. This v5 has addressed
> > the actual errors from Travis output, there is a chance these issues
> > are not resolved by the surrounding fixes:
> > 1) RTE_CPUFLAG_AVX512F flag not defined in a DPDK=1 build?
> > 2) Binutils check for as with --64 parameter is invalid on older as?
> >
> > Would you re-push this branch and we can verify? Thanks in advance!
> Hi Harry,
> This is the v5 on travis.
> https://travis-ci.org/github/williamtu/ovs-travis/builds/704102981
> William

Thanks William!

CPUFLAG was a cross-build issue, resolved with a #ifdef __x86_64__
Binutils check has been fixed with a simple rm during configure time

Working on the remaining 2 known issues, and  will push a v6 ASAP once resolved.

You asked about the "make check-system-userspace" command on the v4,
I see this failing on ovs-master, and (still) failing with the AVX512 patchset 
applied.

The development of AVX512 work was done against "make check" and the datapath
tests there (combined with DPCLS Autovalidator) they provided value in detecting
real issues during development. I've run an LCOV report on the make check 
unit-tests,
and branch/line coverage of the AVX512 work is very high with Autovalidator 
enabled.

Regards, and v6 soon. -Harry
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 ovn 3/5] ovn-northd: Refactor ARP/NS responder in router pipeline.

2020-07-02 Thread Dumitru Ceara
Add functions to build the ARP/NS responder flows for table
S_ROUTER_IN_IP_INPUT and use them in all places where responder
flows are created.

Signed-off-by: Dumitru Ceara 
---
 northd/ovn-northd.8.xml |8 +
 northd/ovn-northd.c |  314 +--
 tests/ovn-northd.at |   72 +--
 3 files changed, 181 insertions(+), 213 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 78e2a71..84224ff 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1778,7 +1778,7 @@ arp.tha = arp.sha;
 arp.sha = xreg0[0..47];
 arp.tpa = arp.spa;
 arp.spa = A;
-outport = P;
+outport = inport;
 flags.loopback = 1;
 output;
 
@@ -1870,7 +1870,7 @@ arp.tha = arp.sha;
 arp.sha = xreg0[0..47];
 arp.tpa = arp.spa;
 arp.spa = A;
-outport = P;
+outport = inport;
 flags.loopback = 1;
 output;
 
@@ -1900,7 +1900,7 @@ nd_na {
 nd.tll = xreg0[0..47];
 ip6.src = A;
 nd.target = A;
-outport = P;
+outport = inport;
 flags.loopback = 1;
 output;
 }
@@ -2570,7 +2570,7 @@ reg8[0..15] = 0;
 xxreg0 = G;
 xxreg1 = A;
 eth.src = E;
-outport = P;
+outport = inport;
 flags.loopback = 1;
 next;
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 7c92436..e270596 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -7976,6 +7976,105 @@ lrouter_nat_is_stateless(const struct nbrec_nat *nat)
 return false;
 }
 
+/* Builds the logical flow that replies to ARP requests for an 'ip_address'
+ * owned by the router. The flow is inserted in table S_ROUTER_IN_IP_INPUT
+ * with the given priority.
+ */
+static void
+build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op,
+   const char *ip_address, const char *eth_addr,
+   struct ds *extra_match, uint16_t priority,
+   struct hmap *lflows, const struct ovsdb_idl_row *hint)
+{
+struct ds match = DS_EMPTY_INITIALIZER;
+struct ds actions = DS_EMPTY_INITIALIZER;
+
+if (op) {
+ds_put_format(, "inport == %s && ", op->json_key);
+}
+
+ds_put_format(, "arp.op == 1 && arp.tpa == %s", ip_address);
+
+if (extra_match && ds_last(extra_match) != EOF) {
+ds_put_format(, " && %s", ds_cstr(extra_match));
+}
+ds_put_format(,
+  "eth.dst = eth.src; "
+  "eth.src = %s; "
+  "arp.op = 2; /* ARP reply */ "
+  "arp.tha = arp.sha; "
+  "arp.sha = %s; "
+  "arp.tpa = arp.spa; "
+  "arp.spa = %s; "
+  "outport = inport; "
+  "flags.loopback = 1; "
+  "output;",
+  eth_addr,
+  eth_addr,
+  ip_address);
+
+ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, priority,
+ds_cstr(), ds_cstr(), hint);
+
+ds_destroy();
+ds_destroy();
+}
+
+/* Builds the logical flow that replies to NS requests for an 'ip_address'
+ * owned by the router. The flow is inserted in table S_ROUTER_IN_IP_INPUT
+ * with the given priority. If 'sn_ip_address' is non-NULL, requests are
+ * restricted only to packets with IP destination 'ip_address' or
+ * 'sn_ip_address'.
+ */
+static void
+build_lrouter_nd_flow(struct ovn_datapath *od, struct ovn_port *op,
+  const char *action, const char *ip_address,
+  const char *sn_ip_address, const char *eth_addr,
+  struct ds *extra_match, uint16_t priority,
+  struct hmap *lflows,
+  const struct ovsdb_idl_row *hint)
+{
+struct ds match = DS_EMPTY_INITIALIZER;
+struct ds actions = DS_EMPTY_INITIALIZER;
+
+if (op) {
+ds_put_format(, "inport == %s && ", op->json_key);
+}
+
+if (sn_ip_address) {
+ds_put_format(, "ip6.dst == {%s, %s} && ",
+  ip_address, sn_ip_address);
+}
+
+ds_put_format(, "nd_ns && nd.target == %s", ip_address);
+
+if (extra_match && ds_last(extra_match) != EOF) {
+ds_put_format(, " && %s", ds_cstr(extra_match));
+}
+
+ds_put_format(,
+  "%s { "
+"eth.src = %s; "
+"ip6.src = %s; "
+"nd.target = %s; "
+"nd.tll = %s; "
+"outport = inport; "
+"flags.loopback = 1; "
+"output; "
+  "};",
+  action,
+  eth_addr,
+  ip_address,
+  ip_address,
+  eth_addr);
+
+ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, priority,
+ds_cstr(), ds_cstr(), hint);
+
+ds_destroy();
+ds_destroy();
+}
+
 static void
 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
 struct hmap *lflows, 

[ovs-dev] [PATCH v3 ovn 4/5] ovn-northd: Refactor NAT address parsing.

2020-07-02 Thread Dumitru Ceara
Store NAT entries pointers in ovn_datapath and pre-parse the external IP
addresses. This simplifies the code and makes it easier to reuse the parsed
external IP and solicited-node address without reparsing.

Signed-off-by: Dumitru Ceara 
---
 northd/ovn-northd.c |  115 ++-
 1 file changed, 85 insertions(+), 30 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index e270596..a1ba56f 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -605,6 +605,9 @@ struct ovn_datapath {
  * the "redirect-chassis". */
 struct ovn_port *l3redirect_port;
 
+/* NAT entries configured on the router. */
+struct ovn_nat *nat_entries;
+
 struct ovn_port **localnet_ports;
 size_t n_localnet_ports;
 
@@ -617,6 +620,65 @@ struct ovn_datapath {
 struct hmap nb_pgs;
 };
 
+/* Contains a NAT entry with the external addresses pre-parsed. */
+struct ovn_nat {
+const struct nbrec_nat *nb;
+struct lport_addresses ext_addrs;
+};
+
+/* Returns true if a 'nat_entry' is valid, i.e.:
+ * - parsing was successful.
+ * - the string yielded exactly one IPv4 address or exactly one IPv6 address.
+ */
+static bool nat_entry_is_valid(const struct ovn_nat *nat_entry)
+{
+const struct lport_addresses *ext_addrs = _entry->ext_addrs;
+
+return (ext_addrs->n_ipv4_addrs == 1 && ext_addrs->n_ipv6_addrs == 0) ||
+(ext_addrs->n_ipv4_addrs == 0 && ext_addrs->n_ipv6_addrs == 1);
+}
+
+static bool nat_entry_is_v6(const struct ovn_nat *nat_entry)
+{
+return nat_entry->ext_addrs.n_ipv6_addrs > 0;
+}
+
+static void init_nat_entries(struct ovn_datapath *od)
+{
+if (!od->nbr || od->nbr->n_nat == 0) {
+return;
+}
+
+od->nat_entries = xmalloc(od->nbr->n_nat * sizeof *od->nat_entries);
+
+for (size_t i = 0; i < od->nbr->n_nat; i++) {
+const struct nbrec_nat *nat = od->nbr->nat[i];
+struct ovn_nat *nat_entry = >nat_entries[i];
+
+nat_entry->nb = nat;
+if (!extract_ip_addresses(nat->external_ip,
+  _entry->ext_addrs) ||
+!nat_entry_is_valid(nat_entry)) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+
+VLOG_WARN_RL(,
+ "Bad ip address %s in nat configuration "
+ "for router %s", nat->external_ip, od->nbr->name);
+}
+}
+}
+
+static void destroy_nat_entries(struct ovn_datapath *od)
+{
+if (!od->nbr) {
+return;
+}
+
+for (size_t i = 0; i < od->nbr->n_nat; i++) {
+destroy_lport_addresses(>nat_entries[i].ext_addrs);
+}
+}
+
 /* A group of logical router datapaths which are connected - either
  * directly or indirectly.
  * Each logical router can belong to only one group. */
@@ -674,6 +736,8 @@ ovn_datapath_destroy(struct hmap *datapaths, struct 
ovn_datapath *od)
 ovn_destroy_tnlids(>port_tnlids);
 bitmap_free(od->ipam_info.allocated_ipv4s);
 free(od->router_ports);
+destroy_nat_entries(od);
+free(od->nat_entries);
 free(od->localnet_ports);
 ovn_ls_port_group_destroy(>nb_pgs);
 destroy_mcast_info_for_datapath(od);
@@ -1102,6 +1166,7 @@ join_datapaths(struct northd_context *ctx, struct hmap 
*datapaths,
 ovs_list_push_back(nb_only, >list);
 }
 init_mcast_info_for_datapath(od);
+init_nat_entries(od);
 ovs_list_push_back(lr_list, >lr_list);
 }
 }
@@ -8463,30 +8528,24 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
 snat_ips[n_snat_ips++] = snat_ip;
 }
 
-for (int i = 0; i < op->od->nbr->n_nat; i++) {
-const struct nbrec_nat *nat;
-
-nat = op->od->nbr->nat[i];
+for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+struct ovn_nat *nat_entry = >od->nat_entries[i];
+const struct nbrec_nat *nat = nat_entry->nb;
 
-ovs_be32 ip;
-struct in6_addr ipv6;
-bool is_v6 = false;
-if (!ip_parse(nat->external_ip, ) || !ip) {
-if (!ipv6_parse(nat->external_ip, )) {
-static struct vlog_rate_limit rl =
-VLOG_RATE_LIMIT_INIT(5, 1);
-VLOG_WARN_RL(, "bad ip address %s in nat configuration "
- "for router %s", nat->external_ip, op->key);
-continue;
-}
-is_v6 = true;
+/* Skip entries we failed to parse. */
+if (!nat_entry_is_valid(nat_entry)) {
+continue;
 }
 
 if (!strcmp(nat->type, "snat")) {
-if (is_v6) {
+if (nat_entry_is_v6(nat_entry)) {
+struct in6_addr *ipv6 =
+_entry->ext_addrs.ipv6_addrs[0].addr;
+
 snat_ips[n_snat_ips].family = AF_INET6;
-

[ovs-dev] [PATCH v3 ovn 5/5] ovn-northd: Minimize number of ARP/NS responder flows for DNAT.

2020-07-02 Thread Dumitru Ceara
Most ARP/NS responder flows can be configured per datapath instead of per
router port.

The only exception is with distributed gateway router ports which need
special treatment. This patch changes the ARP/NS responder behavior and adds:
- Priority 92 flows to reply to ARP requests on distributed gateway router
  ports, on the chassis where the DNAT entry is bound.
- Priority 91 flows to drop ARP requests on distributed gateway router ports,
  on chassis where the DNAT entry is not bound.
- Priority 90 flows to reply to ARP requests on all other router ports. This
  last type of flows is programmed exactly once per logical router limiting
  the total number of required logical flows.

Suggested-by: Han Zhou 
Reported-by: Girish Moodalbail 
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050186.html
Signed-off-by: Dumitru Ceara 
---
 northd/ovn-northd.8.xml |   16 +++-
 northd/ovn-northd.c |  203 ---
 tests/ovn-northd.at |   65 +--
 tests/ovn.at|8 +-
 4 files changed, 190 insertions(+), 102 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 84224ff..11607c0 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1857,9 +1857,8 @@ nd_na_router {
   IPv4: For a configured DNAT IP address or a load balancer
   IPv4 VIP A, for each router port P with
   Ethernet address E, a priority-90 flow matches
-  inport == P  arp.op == 1 
-  arp.tpa == A (ARP request)
-  with the following actions:
+  arp.op == 1  arp.tpa == A
+  (ARP request) with the following actions:
 
 
 
@@ -1876,6 +1875,11 @@ output;
 
 
 
+  IPv4: For a configured load balancer IPv4 VIP, a similar flow is
+  added with the additional match inport == P.
+
+
+
   If the router port P is a distributed gateway router
   port, then the is_chassis_resident(P) is
   also added in the match condition for the load balancer IPv4
@@ -1922,9 +1926,11 @@ nd_na {
 
   
 If the corresponding NAT rule cannot be handled in a
-distributed manner, then this flow is only programmed on
+distributed manner, then a priority-92 flow is programmed on
 the gateway port instance on the
-redirect-chassis.  This behavior avoids
+redirect-chassis.  A priority-91 drop flow is
+programmed on the other chassis when ARP requests/NS packets
+are received on the gateway port. This behavior avoids
 generation of multiple ARP responses from different chassis,
 and allows upstream MAC learning to point to the
 redirect-chassis.
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index a1ba56f..10fc8cf 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -8048,7 +8048,7 @@ lrouter_nat_is_stateless(const struct nbrec_nat *nat)
 static void
 build_lrouter_arp_flow(struct ovn_datapath *od, struct ovn_port *op,
const char *ip_address, const char *eth_addr,
-   struct ds *extra_match, uint16_t priority,
+   struct ds *extra_match, bool drop, uint16_t priority,
struct hmap *lflows, const struct ovsdb_idl_row *hint)
 {
 struct ds match = DS_EMPTY_INITIALIZER;
@@ -8063,20 +8063,24 @@ build_lrouter_arp_flow(struct ovn_datapath *od, struct 
ovn_port *op,
 if (extra_match && ds_last(extra_match) != EOF) {
 ds_put_format(, " && %s", ds_cstr(extra_match));
 }
-ds_put_format(,
-  "eth.dst = eth.src; "
-  "eth.src = %s; "
-  "arp.op = 2; /* ARP reply */ "
-  "arp.tha = arp.sha; "
-  "arp.sha = %s; "
-  "arp.tpa = arp.spa; "
-  "arp.spa = %s; "
-  "outport = inport; "
-  "flags.loopback = 1; "
-  "output;",
-  eth_addr,
-  eth_addr,
-  ip_address);
+if (drop) {
+ds_put_format(, "drop;");
+} else {
+ds_put_format(,
+  "eth.dst = eth.src; "
+  "eth.src = %s; "
+  "arp.op = 2; /* ARP reply */ "
+  "arp.tha = arp.sha; "
+  "arp.sha = %s; "
+  "arp.tpa = arp.spa; "
+  "arp.spa = %s; "
+  "outport = inport; "
+  "flags.loopback = 1; "
+  "output;",
+  eth_addr,
+  eth_addr,
+  ip_address);
+}
 
 ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT, priority,
 ds_cstr(), ds_cstr(), hint);
@@ -8095,7 +8099,7 @@ static 

[ovs-dev] [PATCH v3 ovn 1/5] ovn-northd: Document OVS register usage in logical flows.

2020-07-02 Thread Dumitru Ceara
Also, use macros instead of bare references to register names.

Acked-by: Han Zhou 
Signed-off-by: Dumitru Ceara 
---
 northd/ovn-northd.c |  158 ---
 1 file changed, 110 insertions(+), 48 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 417dbb6..85d73ff 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -227,8 +227,55 @@ enum ovn_stage {
 #define REG_ECMP_GROUP_ID   "reg8[0..15]"
 #define REG_ECMP_MEMBER_ID  "reg8[16..31]"
 
+/* Registers used for routing. */
+#define REG_NEXT_HOP_IPV4 "reg0"
+#define REG_NEXT_HOP_IPV6 "xxreg0"
+#define REG_SRC_IPV4 "reg1"
+#define REG_SRC_IPV6 "xxreg1"
+
 #define FLAGBIT_NOT_VXLAN "flags[1] == 0"
 
+/*
+ * OVS register usage:
+ *
+ * Logical Switch pipeline:
+ * +-+-+
+ * | R0  | REGBIT_{CONNTRACK/DHCP/DNS/HAIRPIN} |
+ * +-+-+
+ * | R1 - R9 |  UNUSED |
+ * +-+-+
+ *
+ * Logical Router pipeline:
+ * +-+--+---+-+
+ * | R0  | REGBIT_ND_RA_OPTS_RESULT |   | |
+ * | |IPv4-NEXT-HOP | X | |
+ * +-+--+ X | |
+ * | R1  | IPv4-SRC-IP for ARP-REQ  | R |IPv6 |
+ * +-+--+ E |  NEXT-HOP   |
+ * | R2  |UNUSED| G | |
+ * +-+--+ 0 | |
+ * | R3  |UNUSED|   | |
+ * +-+--+---+-+
+ * | R4  |UNUSED|   | |
+ * +-+--+ X | |
+ * | R5  |UNUSED| X | IPv6-SRC-IP |
+ * +-+--+ R |   for NS|
+ * | R6  |UNUSED| E | |
+ * +-+--+ G | |
+ * | R7  |UNUSED| 1 | |
+ * +-+--+---+-+
+ * | R8  | ECMP_GROUP_ID|
+ * | | ECMP_MEMBER_ID   |
+ * +-+--+
+ * | | REGBIT_{ |
+ * | |   EGRESS_LOOPBACK/   |
+ * | R9  |   PKT_LARGER/|
+ * | |   LOOKUP_NEIGHBOR_RESULT/|
+ * | |   SKIP_LOOKUP_NEIGHBOR}  |
+ * +-+--+
+ *
+ */
+
 /* Returns an "enum ovn_stage" built from the arguments. */
 static enum ovn_stage
 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
@@ -7139,15 +7186,15 @@ build_routing_policy_flow(struct hmap *lflows, struct 
ovn_datapath *od,
 ds_put_format(, "pkt.mark = %u; ", pkt_mark);
 }
 bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false;
-ds_put_format(, "%sreg0 = %s; "
-  "%sreg1 = %s; "
+ds_put_format(, "%s = %s; "
+  "%s = %s; "
   "eth.src = %s; "
   "outport = %s; "
   "flags.loopback = 1; "
   "next;",
-  is_ipv4 ? "" : "xx",
+  is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
   rule->nexthop,
-  is_ipv4 ? "" : "xx",
+  is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
   lrp_addr_s,
   out_port->lrp_networks.ea_s,
   out_port->json_key);
@@ -7537,14 +7584,14 @@ build_ecmp_route_flow(struct hmap *lflows, struct 
ovn_datapath *od,
   REG_ECMP_MEMBER_ID" == %"PRIu16,
   eg->id, er->id);
 ds_clear();
-ds_put_format(, "%sreg0 = %s; "
-  "%sreg1 = %s; "
+ds_put_format(, "%s = %s; "
+  "%s = %s; "
   "eth.src = %s; "
   "outport = %s; "
   "next;",
-  is_ipv4 ? "" : "xx",
+  is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
   route->nexthop,
-  is_ipv4 ? "" : "xx",
+  is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6,
   lrp_addr_s,
   out_port->lrp_networks.ea_s,
   out_port->json_key);
@@ -7579,8 +7626,8 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
   , );
 
 struct ds actions = DS_EMPTY_INITIALIZER;
-ds_put_format(, "ip.ttl--; "REG_ECMP_GROUP_ID" = 0; %sreg0 = ",
-  is_ipv4 ? "" : "xx");
+ds_put_format(, "ip.ttl--; "REG_ECMP_GROUP_ID" = 0; %s = ",
+  is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6);
 
 if (gateway) {
 ds_put_cstr(, gateway);
@@ -7588,12 +7635,12 @@ add_route(struct hmap *lflows, const struct ovn_port 
*op,
 

[ovs-dev] [PATCH v3 ovn 2/5] ovn-northd: Store ETH address of router inport in xreg0.

2020-07-02 Thread Dumitru Ceara
This helps simplifying logical flows that need to use the port's
configured ETH address:
- ARP responders for owned IPs
- NS responders for owned IPs

Signed-off-by: Dumitru Ceara 
---
 northd/ovn-northd.8.xml |   22 ---
 northd/ovn-northd.c |  148 ++-
 tests/ovn-northd.at |  140 
 3 files changed, 233 insertions(+), 77 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index a7639f3..78e2a71 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1487,7 +1487,9 @@ output;
   For each enabled router port P with Ethernet address
   E, a priority-50 flow that matches inport ==
   P  (eth.mcast || eth.dst ==
-  E), with action next;.
+  E), stores the router port ethernet address
+  and advances to next table, with action
+  xreg0[0..47]=E; next;.
 
 
 
@@ -1507,7 +1509,7 @@ output;
   a priority-50 flow that matches inport == GW
eth.dst == E, where GW
   is the logical router gateway port, with action
-  next;.
+  xreg0[0..47]=E; next;.
 
 
 
@@ -1770,10 +1772,10 @@ next;
 
 
 eth.dst = eth.src;
-eth.src = E;
+eth.src = xreg0[0..47];
 arp.op = 2; /* ARP reply. */
 arp.tha = arp.sha;
-arp.sha = E;
+arp.sha = xreg0[0..47];
 arp.tpa = arp.spa;
 arp.spa = A;
 outport = P;
@@ -1822,10 +1824,10 @@ output;
 
 
 nd_na_router {
-eth.src = E;
+eth.src = xreg0[0..47];
 ip6.src = A;
 nd.target = A;
-nd.tll = E;
+nd.tll = xreg0[0..47];
 outport = inport;
 flags.loopback = 1;
 output;
@@ -1862,10 +1864,10 @@ nd_na_router {
 
 
 eth.dst = eth.src;
-eth.src = E;
+eth.src = xreg0[0..47];
 arp.op = 2; /* ARP reply. */
 arp.tha = arp.sha;
-arp.sha = E;
+arp.sha = xreg0[0..47];
 arp.tpa = arp.spa;
 arp.spa = A;
 outport = P;
@@ -1894,8 +1896,8 @@ output;
 
 eth.dst = eth.src;
 nd_na {
-eth.src = E;
-nd.tll = E;
+eth.src = xreg0[0..47];
+nd.tll = xreg0[0..47];
 ip6.src = A;
 nd.target = A;
 outport = P;
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 85d73ff..7c92436 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -223,6 +223,11 @@ enum ovn_stage {
 #define REGBIT_LOOKUP_NEIGHBOR_RESULT "reg9[2]"
 #define REGBIT_SKIP_LOOKUP_NEIGHBOR "reg9[3]"
 
+/* Register to store the eth address associated to a router port for packets
+ * received in S_ROUTER_IN_ADMISSION.
+ */
+#define REG_INPORT_ETH_ADDR "xreg0[0..47]"
+
 /* Register for ECMP bucket selection. */
 #define REG_ECMP_GROUP_ID   "reg8[0..15]"
 #define REG_ECMP_MEMBER_ID  "reg8[16..31]"
@@ -246,33 +251,40 @@ enum ovn_stage {
  * +-+-+
  *
  * Logical Router pipeline:
- * +-+--+---+-+
- * | R0  | REGBIT_ND_RA_OPTS_RESULT |   | |
- * | |IPv4-NEXT-HOP | X | |
- * +-+--+ X | |
- * | R1  | IPv4-SRC-IP for ARP-REQ  | R |IPv6 |
- * +-+--+ E |  NEXT-HOP   |
- * | R2  |UNUSED| G | |
- * +-+--+ 0 | |
- * | R3  |UNUSED|   | |
- * +-+--+---+-+
- * | R4  |UNUSED|   | |
- * +-+--+ X | |
- * | R5  |UNUSED| X | IPv6-SRC-IP |
- * +-+--+ R |   for NS|
- * | R6  |UNUSED| E | |
- * +-+--+ G | |
- * | R7  |UNUSED| 1 | |
- * +-+--+---+-+
- * | R8  | ECMP_GROUP_ID|
- * | | ECMP_MEMBER_ID   |
- * +-+--+
- * | | REGBIT_{ |
- * | |   EGRESS_LOOPBACK/   |
- * | R9  |   PKT_LARGER/|
- * | |   LOOKUP_NEIGHBOR_RESULT/|
- * | |   SKIP_LOOKUP_NEIGHBOR}  |
- * +-+--+
+ * +-+--+---+-+---+-+
+ * | R0  | REGBIT_ND_RA_OPTS_RESULT | X | |   | |
+ * | |IPv4-NEXT-HOP | R | |   | |
+ * +-+--+ E | INPORT_ETH_ADDR | X | |
+ * | R1  | IPv4-SRC-IP for ARP-REQ  | G |   (< IP_INPUT)  | X |IPv6 |
+ * | |  | 0 | | R |  NEXT-HOP   |
+ * +-+--+---+-+ E |(>= IP_INPUT)|
+ * | R2  |UNUSED| X | | G | |
+ * | |  | R | | 0 | |
+ * 

[ovs-dev] [PATCH v3 ovn 0/5] Reduce number of flows in IN_IP_INPUT table for DNAT.

2020-07-02 Thread Dumitru Ceara
Patch 1 documents and refactors the usage of OVS registers in logical
flows.

Patches 2-4 refactor the ARP/NS responder code for logical routers in
order to make it easier for patch 5 to configure the flows with different
priorities depending on logical port type.

Suggested-by: Han Zhou 
Reported-by: Girish Moodalbail 
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-discuss/2020-June/050186.html
Signed-off-by: Dumitru Ceara 

Dumitru Ceara (5):
  ovn-northd: Document OVS register usage in logical flows.
  ovn-northd: Store ETH address of router inport in xreg0.
  ovn-northd: Refactor ARP/NS responder in router pipeline.
  ovn-northd: Refactor NAT address parsing.
  ovn-northd: Minimize number of ARP/NS responder flows for DNAT.


 northd/ovn-northd.8.xml |   46 ++-
 northd/ovn-northd.c |  730 +--
 tests/ovn-northd.at |  149 ++
 tests/ovn.at|8 -
 4 files changed, 631 insertions(+), 302 deletions(-)


---
v3:
- Addressed Han's comment:
  - fixed register diagram in patch 1 and 2.
  - added Han's ack on patch 1.
v2:
- Addressed Numan's comments:
  - Inserted a new patch in the beginning of the series to document
OVS register usage in logical flows. Also refactored the code
to avoid using bare register names.
  - Added unit tests to ovn-northd.at in every patch that changed
logical flows.
  - Added/updated documentation in ovn-northd.8.xml in every patch that
changed logical flows.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8] ovsdb-idl: Force IDL retry when missing updates encountered.

2020-07-02 Thread Dumitru Ceara
Adds a generic recovery mechanism which triggers an IDL retry with fast
resync disabled in case the IDL has detected that it ended up in an
inconsistent state due to other bugs in the ovsdb-server/ovsdb-idl
implementation.

Additionally, this commit also:
- bumps IDL semantic error logs to level ERR to make them more
  visible.
- triggers an IDL retry in cases when the IDL client used to try to
  recover (i.e., trying to add an existing row, trying to remove a non
  existent row).

CC: Andy Zhou 
CC: Han Zhou 
CC: Ilya Maximets 
Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.")
Acked-by: Han Zhou 
Signed-off-by: Dumitru Ceara 

---
V8:
- Address Han's comments:
  - Reword commit log.
- Add Han's ack.
V7:
- Address Ilya's comments:
  - improve result returning in ovsdb_idl_process_update2
- For consistency use the same way of returning results in
  ovsdb_idl_process_update and make error handling generic.
- Bump semantic log level to ERR to make them more visible and force IDL
  retry on every semantic error.
---
 lib/ovsdb-idl.c | 169 
 1 file changed, 109 insertions(+), 60 deletions(-)

diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 0a18261..ef3b97b 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -321,14 +321,19 @@ static bool ovsdb_idl_handle_monitor_canceled(struct 
ovsdb_idl *,
 static void ovsdb_idl_db_parse_update(struct ovsdb_idl_db *,
   const struct json *table_updates,
   enum ovsdb_idl_monitor_method method);
-static bool ovsdb_idl_process_update(struct ovsdb_idl_table *,
- const struct uuid *,
- const struct json *old,
- const struct json *new);
-static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
-  const struct uuid *,
-  const char *operation,
-  const struct json *row);
+enum update_result {
+OVSDB_IDL_UPDATE_DB_CHANGED,
+OVSDB_IDL_UPDATE_NO_CHANGES,
+OVSDB_IDL_UPDATE_INCONSISTENT,
+};
+static enum update_result ovsdb_idl_process_update(struct ovsdb_idl_table *,
+   const struct uuid *,
+   const struct json *old,
+   const struct json *new);
+static enum update_result ovsdb_idl_process_update2(struct ovsdb_idl_table *,
+const struct uuid *,
+const char *operation,
+const struct json *row);
 static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *);
 static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
 static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *);
@@ -2418,6 +2423,7 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
   version_suffix, table->class_->name);
 }
 SHASH_FOR_EACH (table_node, json_object(table_update)) {
+enum update_result result = OVSDB_IDL_UPDATE_NO_CHANGES;
 const struct json *row_update = table_node->data;
 struct uuid uuid;
 
@@ -2450,13 +2456,13 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
 operation = ops[i];
 row = shash_find_data(json_object(row_update), operation);
 
-if (row)  {
-if (ovsdb_idl_process_update2(table, , operation,
-  row)) {
-db->change_seqno++;
-}
-break;
+if (!row) {
+continue;
 }
+
+result = ovsdb_idl_process_update2(table, ,
+   operation, row);
+break;
 }
 
 /* row_update2 should contain one of the objects */
@@ -2487,10 +2493,24 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
   "and \"new\" members");
 }
 
-if (ovsdb_idl_process_update(table, , old_json,
- new_json)) {
-db->change_seqno++;
-}
+result = ovsdb_idl_process_update(table, , old_json,
+  new_json);
+}
+
+switch (result) {
+case OVSDB_IDL_UPDATE_DB_CHANGED:
+db->change_seqno++;
+break;
+case OVSDB_IDL_UPDATE_NO_CHANGES:
+break;
+  

Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Richardson, Bruce



> -Original Message-
> From: Pai G, Sunil 
> Sent: Thursday, July 2, 2020 2:14 PM
> To: d...@openvswitch.org
> Cc: Stokes, Ian ; i.maxim...@ovn.org;
> david.march...@redhat.com; Richardson, Bruce ;
> Pai G, Sunil ; Tummala, Sivaprasad
> 
> Subject: [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.
> 
> Make based build is deprecated in DPDK. Meson based build to be used for
> future DPDK releases.
> 
> This updates travis, configure script and documentation for using DPDK
> Meson with OVS.
> 
> Signed-off-by: Sunil Pai G 

Thanks for this, a couple of comments from the DPDK build side inline below.

Regards,
/Bruce

> ---
>  .travis.yml   |  3 ++
>  .travis/linux-build.sh| 37 +---
>  .travis/linux-prepare.sh  |  1 +
>  Documentation/intro/install/afxdp.rst |  2 +-
> Documentation/intro/install/dpdk.rst  | 56 
>  Makefile.am   |  3 +-
>  acinclude.m4  | 42 --
>  parse_pkg_cfg.py  | 62 +++
>  8 files changed, 167 insertions(+), 39 deletions(-)  create mode 100644
> parse_pkg_cfg.py
> 
> diff --git a/.travis.yml b/.travis.yml
> index 97249c1ce..46d7ad9bb 100644
> --- a/.travis.yml
> +++ b/.travis.yml
> @@ -27,6 +27,9 @@ addons:
>- selinux-policy-dev
>- libunbound-dev
>- libunwind-dev
> +  - python3-setuptools
> +  - python3-wheel
> +  - ninja-build
> 
>  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
> 
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> 33b359a61..7fa7e738c 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -85,17 +85,21 @@ function install_dpdk()  {
>  local DPDK_VER=$1
>  local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
> +local DPDK_OPTS=""
> 
> -if [ -z "$TRAVIS_ARCH" ] ||
> -   [ "$TRAVIS_ARCH" == "amd64" ]; then
> -TARGET="x86_64-native-linuxapp-gcc"
> -elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
> -TARGET="arm64-armv8a-linuxapp-gcc"
> -else
> +if [ "$TRAVIS_ARCH" == "aarch64" ]; then
> +DPDK_OPTS="$DPDK_OPTS --cross-file
> config/arm/arm64_armv8_linux_gcc"
> +elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ]; then
>  echo "Target is unknown"
>  exit 1
>  fi
> 
> +if [ "$DPDK_SHARED" ]; then
> +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
> +else
> +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
> +fi
> +
>  if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
>  # Avoid using cache for git tree build.
>  rm -rf dpdk-dir
> @@ -108,7 +112,8 @@ function install_dpdk()
>  if [ -f "${VERSION_FILE}" ]; then
>  VER=$(cat ${VERSION_FILE})
>  if [ "${VER}" = "${DPDK_VER}" ]; then
> -EXTRA_OPTS="${EXTRA_OPTS} --with-dpdk=$(pwd)/dpdk-
> dir/build"
> +sudo ninja -C $(pwd)/dpdk-dir/build install
> +sudo ldconfig
>  echo "Found cached DPDK ${VER} build in $(pwd)/dpdk-dir"
>  return
>  fi
> @@ -122,16 +127,18 @@ function install_dpdk()
>  pushd dpdk-dir
>  fi
> 
> -make config CC=gcc T=$TARGET
> +# Disable building DPDK kernel modules. Not needed for OVS build or
> tests.
> +DPDK_OPTS="$DPDK_OPTS -Denable_kmods=false"
> 

NOTE: enable_kmods=false is the default in DPDK, so should not need to be
specified, unless you want to be doubly sure they are not build, or for
clarity.

> -if [ "$DPDK_SHARED" ]; then
> -sed -i '/CONFIG_RTE_BUILD_SHARED_LIB=n/s/=n/=y/' build/.config
> -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/$TARGET/lib
> -fi
> +DPDK_OPTS="$DPDK_OPTS -Dc_args=-fPIC"

Since meson build for DPDK always links the objects into both static and
shared libs, all objects are already built with -fPIC, so this should not
be necessary.

> +CC=gcc meson $DPDK_OPTS build
> +ninja -C build
> +sudo ninja -C build install
> +
> +# Update the library paths.
> +sudo ldconfig
> 
> -make -j4 CC=gcc EXTRA_CFLAGS='-fPIC'
> -EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build"
> -echo "Installed DPDK source in $(pwd)"
> +echo "Installed DPDK source"
>  popd
>  echo "${DPDK_VER}" > ${VERSION_FILE}  } diff --git a/.travis/linux-
> prepare.sh b/.travis/linux-prepare.sh index 8cbbd5623..682f6234b 100755
> --- a/.travis/linux-prepare.sh
> +++ b/.travis/linux-prepare.sh
> @@ -16,6 +16,7 @@ cd ..
> 
>  pip3 install --disable-pip-version-check --user flake8 hacking
>  pip3 install --user --upgrade docutils
> +pip3 install --user  'meson==0.47.1'
> 

Is there a reason for forcing this to the oldest supported  version?

>  if [ "$M32" ]; then
>  # Installing 32-bit libraries.

___
dev mailing list
d...@openvswitch.org

Re: [ovs-dev] [PATCH] dpdk: Refuse running on cpu >= RTE_MAX_LCORE.

2020-07-02 Thread David Marchand
On Thu, Jul 2, 2020 at 3:09 PM Ilya Maximets  wrote:
>
> On 6/26/20 2:27 PM, David Marchand wrote:
> > DPDK lcores range is [0, RTE_MAX_LCORE[.
> > Trying to use a lcore out of this range expose OVS to crashes in DPDK 
> > mempool
> > local cache array.
> > Prefer a "clean" abort so that users know that something is wrong with
> > their configuration.
> >
> > Signed-off-by: David Marchand 
> > ---
> >  lib/dpdk.c | 7 +++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/lib/dpdk.c b/lib/dpdk.c
> > index 98d0e2643e..55ce9a9221 100644
> > --- a/lib/dpdk.c
> > +++ b/lib/dpdk.c
> > @@ -617,7 +617,14 @@ dpdk_set_lcore_id(unsigned cpu)
> >  {
> >  /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
> >  ovs_assert(cpu != NON_PMD_CORE_ID);
> > +if (cpu >= RTE_MAX_LCORE) {
> > +cpu = LCORE_ID_ANY;
> > +}
> >  RTE_PER_LCORE(_lcore_id) = cpu;
> > +if (rte_lcore_id() == LCORE_ID_ANY) {
> > +ovs_abort(0, "PMD thread init failed, trying to use more cores 
> > than "
> > +  "DPDK supports (RTE_MAX_LCORE %u).", RTE_MAX_LCORE);
>
> I don't think that random OVS abort after pmd-cpu-mask change or port addition
> is a good thing to have.  Maybe it's better to limit CPU core discovery?
> This way OVS will never try to create thread on unwanted cores.
>
> Since ovs-numa module is mostly used by userspace datapath this will not
> affect non-DPDK setups.
>
> Something like this (not tested):
>
> diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
> index 6d0a68522..7c8a3b036 100644
> --- a/lib/ovs-numa.c
> +++ b/lib/ovs-numa.c
> @@ -27,6 +27,7 @@
>  #include 
>  #endif /* __linux__ */
>
> +#include "dpdk.h"
>  #include "hash.h"
>  #include "openvswitch/hmap.h"
>  #include "openvswitch/list.h"
> @@ -58,6 +59,13 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa);
>
>  #define MAX_NUMA_NODES 128
>
> +#ifdef DPDK_NETDEV
> +/* DPDK will not work correctly for cores with core_id >= RTE_MAX_LCORE. */
> +#define MAX_CORE_ID(RTE_MAX_LCORE - 1)
> +#else
> +#define MAX_CORE_IDINT_MAX
> +#endif
> +
>  /* numa node. */
>  struct numa_node {
>  struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */
> @@ -112,6 +120,12 @@ insert_new_numa_node(int numa_id)
>  static struct cpu_core *
>  insert_new_cpu_core(struct numa_node *n, unsigned core_id)
>  {
> +if (core_id > MAX_CORE_ID) {
> +VLOG_DBG("Ignoring CPU core with id %u ( %u > %d).",
> + core_id, core_id, MAX_CORE_ID);
> +return NULL;
> +}
> +
>  struct cpu_core *c = xzalloc(sizeof *c);
>
>  hmap_insert(_cpu_cores, >hmap_node, hash_int(core_id, 0));
> @@ -278,6 +292,11 @@ ovs_numa_init(void)
>  if (ovsthread_once_start()) {
>  const struct numa_node *n;
>
> +if (MAX_CORE_ID != INT_MAX) {
> +VLOG_INFO("Maximum allowed CPU core id is %d. "
> +  "Other cores will not be available.", MAX_CORE_ID);
> +}
> +
>  if (dummy_numa) {
>  discover_numa_and_core_dummy();
>  } else {
> ---
>
> What do you think?

pmd threads will never run on any core >= RTE_MAX_LCORE ok, it works.

Though, it binds OVS core id and DPDK lcore id.
Should OVS really care about this?


-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Ilya Maximets
On 7/2/20 3:13 PM, Sunil Pai G wrote:
> Make based build is deprecated in DPDK. Meson based
> build to be used for future DPDK releases.
> 
> This updates travis, configure script and documentation
> for using DPDK Meson with OVS.
> 
> Signed-off-by: Sunil Pai G 

Thanks for working on this!
Not a full review, just a few quick bits.

At first, why dpdk-latest?  Is there issue with meson build on 19.11?

Few more comments inline.

Best regards, Ilya Maximets.

> ---
>  .travis.yml   |  3 ++
>  .travis/linux-build.sh| 37 +---
>  .travis/linux-prepare.sh  |  1 +
>  Documentation/intro/install/afxdp.rst |  2 +-
>  Documentation/intro/install/dpdk.rst  | 56 
>  Makefile.am   |  3 +-
>  acinclude.m4  | 42 --
>  parse_pkg_cfg.py  | 62 +++
>  8 files changed, 167 insertions(+), 39 deletions(-)
>  create mode 100644 parse_pkg_cfg.py
> 
> diff --git a/.travis.yml b/.travis.yml
> index 97249c1ce..46d7ad9bb 100644
> --- a/.travis.yml
> +++ b/.travis.yml
> @@ -27,6 +27,9 @@ addons:
>- selinux-policy-dev
>- libunbound-dev
>- libunwind-dev
> +  - python3-setuptools
> +  - python3-wheel
> +  - ninja-build
>  
>  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
>  
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
> index 33b359a61..7fa7e738c 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -85,17 +85,21 @@ function install_dpdk()
>  {
>  local DPDK_VER=$1
>  local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
> +local DPDK_OPTS=""
>  
> -if [ -z "$TRAVIS_ARCH" ] ||
> -   [ "$TRAVIS_ARCH" == "amd64" ]; then
> -TARGET="x86_64-native-linuxapp-gcc"
> -elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
> -TARGET="arm64-armv8a-linuxapp-gcc"
> -else
> +if [ "$TRAVIS_ARCH" == "aarch64" ]; then
> +DPDK_OPTS="$DPDK_OPTS --cross-file config/arm/arm64_armv8_linux_gcc"

We're not cross compiling, we're actually building on aarch64 here.
This option should not be needed.  IIUC, meson should detect current
architecture and build with appropriate configuration.

We should be able to just drop all the TRAVIS_ARCH checks for DPDK here.

> +elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ]; then
>  echo "Target is unknown"
>  exit 1
>  fi
>  
> +if [ "$DPDK_SHARED" ]; then
> +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
> +else
> +EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
> +fi

I gusee we could just change the env variable and not parse it
here. i.e. have DPDK_LIB=static defined by travis.yml by default
and redefine it for matrix entries where we need shared libs.

> +
>  if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
>  # Avoid using cache for git tree build.
>  rm -rf dpdk-dir
> @@ -108,7 +112,8 @@ function install_dpdk()
>  if [ -f "${VERSION_FILE}" ]; then
>  VER=$(cat ${VERSION_FILE})
>  if [ "${VER}" = "${DPDK_VER}" ]; then
> -EXTRA_OPTS="${EXTRA_OPTS} --with-dpdk=$(pwd)/dpdk-dir/build"
> +sudo ninja -C $(pwd)/dpdk-dir/build install
> +sudo ldconfig

I think that installing right here inside the cached folder and just
adjusting environment variables should be a bit faster than re-installing
DPDK every time.

This script also will be a good example for people like me, who really
don't want to install development versions of DPDK globally on a work
laptop while testing OVS builds.

>  echo "Found cached DPDK ${VER} build in $(pwd)/dpdk-dir"
>  return
>  fi
> @@ -122,16 +127,18 @@ function install_dpdk()
>  pushd dpdk-dir
>  fi
>  
> -make config CC=gcc T=$TARGET
> +# Disable building DPDK kernel modules. Not needed for OVS build or 
> tests.
> +DPDK_OPTS="$DPDK_OPTS -Denable_kmods=false"

We should also disable examples and tests at least.

>  
> -if [ "$DPDK_SHARED" ]; then
> -sed -i '/CONFIG_RTE_BUILD_SHARED_LIB=n/s/=n/=y/' build/.config
> -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/$TARGET/lib
> -fi
> +DPDK_OPTS="$DPDK_OPTS -Dc_args=-fPIC"
> +CC=gcc meson $DPDK_OPTS build
> +ninja -C build
> +sudo ninja -C build install
> +
> +# Update the library paths.
> +sudo ldconfig
>  
> -make -j4 CC=gcc EXTRA_CFLAGS='-fPIC'
> -EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build"
> -echo "Installed DPDK source in $(pwd)"
> +echo "Installed DPDK source"
>  popd
>  echo "${DPDK_VER}" > ${VERSION_FILE}
>  }
> diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh
> index 8cbbd5623..682f6234b 100755
> --- a/.travis/linux-prepare.sh
> +++ b/.travis/linux-prepare.sh
> @@ -16,6 +16,7 @@ cd ..
>  
>  pip3 install 

Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP (iperf3)

2020-07-02 Thread Shahaji Bhosle via dev
Thanks Yanqin,
I am not seeing any context switches beyond 40usec in our do nothing loop
test. But when OvS packets multiple rings(queues) on the same CPU and the
number of packet it starts batching (MAX_BURST_SIZE) the toops will will
take more time, I can see rings getting getting filled up. And then its a
feedback loop. CPUs are running close to 100% any disturbance at that point
I think is too much.
Do you have any data that you use to monitor OvS. I am doing all the above
experiments without OvS.
Thanks, Shahaji

On Thu, Jul 2, 2020 at 4:43 AM Yanqin Wei  wrote:

> Hi Shahaji,
>
> IIUC, 1Hz time tick cannot be disabled even if full dynticks, right? But I
> have no idea of why it caused packet loss because it should be only a small
> overhead when rcu_nocbs is enabled .
>
> Best Regards,
> Wei Yanqin
>
> ===
>
> From: Shahaji Bhosle 
> Sent: Thursday, July 2, 2020 6:11 AM
> To: Yanqin Wei 
> Cc: Flavio Leitner ; ovs-dev@openvswitch.org; nd <
> n...@arm.com>; Ilya Maximets ; Lee Reed <
> lee.r...@broadcom.com>; Vinay Gupta ; Alex
> Barba 
> Subject: Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP
> (iperf3)
>
> Hi Yanqin,
> I added the patch you gave me to my script which runs a do nothing for
> loop. You can see the spikes in the below plot. 976/1000 times we are
> perfect, but around every 1 second u can see something going wrong. I dont
> see anything wrong in the trace-cmd world.
> Thanks, Shahaji
>
> root@bcm958802a8046c:~/vinay_rx/dynticks-testing# ./run_isb_rdtsc
> + TARGET=2
> + MASK=4
> + NUM_ITER=1000
> + NUM_MS=100
> + N=3750
> + LOGFILE=loop_1000iter_100ms.log
> + tee loop_1000iter_100ms.log
> + trace-cmd record -p function_graph -e all -M 4 -o
> trace_1000iter_100ms.dat taskset -c 2
> /home/root/arm_stb_user_loop_isb_rdtsc 1000 3750
>   plugin 'function_graph'
> Cycles/Second (Hz) = 30
> Nano-seconds per cycle = 0.
>
> Using ISB() before rte_rdtsc()
> num_iter: 1000
> do_nothing_loop for (N)=3750
> Running 1000 iterations of do_nothing_loop for (N)=3750
>
> Average =  100282.193430333 u-secs
> Max =  124777.48867 u-secs
> Min =  10.01767 u-secs
> \u03c3  =1931.352376508 u-secs
>
> Average =  300846580.29 cycles
> Max =  374332466.00 cycles
> Min =  30053.00 cycles
> \u03c3  =5794057.13 cycles
>
> #\u03c3 = events
>  0 = 976
>  1 = 3
>  2 = 4
>  3 = 3
>  4 = 3
>  5 = 2
>  6 = 2
>  7 = 2
>  8 = 1
>  9 = 1
> 10 = 1
> 12 = 2
>
>
>
>
> On Wed, Jul 1, 2020 at 3:57 AM Yanqin Wei 
> wrote:
> Hi Shahaji,
>
> Adding isb instruction can help rdtsc precise, which sync system counter
> to cntvct_el0. There is a patch in DPDK.
> https://patchwork.dpdk.org/patch/66561/
> So it may be not related with intermittent drops you observed.
>
> Best Regards,
> Wei Yanqin
>
> > -Original Message-
> > From: dev  On Behalf Of Shahaji
> Bhosle
> > via dev
> > Sent: Wednesday, July 1, 2020 6:05 AM
> > To: Flavio Leitner 
> > Cc: mailto:ovs-dev@openvswitch.org; Ilya Maximets  i.maxim...@samsung.com>;
> > Lee Reed ; Vinay Gupta
> > ; Alex Barba  alex.ba...@broadcom.com>
> > Subject: Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP
> (iperf3)
> >
> > Hi Flavio,
> > I still see intermittent drops with rcu_nocbs. So I wrote that
> do_nothing()
> > loop..to avoid all the other distractions to see if Linux is messing
> with the OVS
> > loop just to see what is going on. The interesting thing I see the case
> *BOLD*
> > below where I use an ISB() instruction my STD deviation is well within
> Both the
> > results are basically DO NOTHING FOR 100msec and see what happens to
> > time :) Thanks, Shahaji
> >
> > static inline uint64_t
> > *rte_get_tsc_cycles*(void)
> > {
> > uint64_t tsc;
> > #ifdef USE_ISB
> > asm volatile("*isb*; mrs %0, pmccntr_el0" : "=r"(tsc)); #else asm
> > volatile("mrs %0, pmccntr_el0" : "=r"(tsc)); #endif return tsc; } #endif
> > /*RTE_ARM_EAL_RDTSC_USE_PMU*/
> >
> > ==
> > usleep(100);
> > for (volatile int i=0; i > rte_get_tsc_cycles();
> > /* do nothig for 1us second */
> > *#ifdef USE_ISB*
> > for(volatile int j=0; j < num_us; j++);   * THIS IS
> MESSED
> > UP, 100msec do nothing, I am getting 2033 usec STD DEVIATION* #else
> > *for(volatile int j=0; j < num_us; j++);    THIS LOOP HAS
> > VERY LOW STD DEVIATION*
> > * rte_isb();*
> > #endif
> > volatile uint64_t tsc_end = rte_get_tsc_cycles(); cycles[i] = tsc_end -
> tsc_start; }
> > usleep(100); calc_avg_var_stddev(num_iter, [0]);
> > ===
> > *#ifdef USE_ISB*
> > root@bcm958802a8046c:~/vinay_rx/dynticks-testing# ./run_isb_rdtsc
> > + TARGET=2
> > + MASK=4
> > + NUM_ITER=1000
> > + NUM_MS=100
> > + N=3750
> > 

[ovs-dev] [PATCH RFC dpdk-latest] build: Add support for DPDK meson build.

2020-07-02 Thread Sunil Pai G
Make based build is deprecated in DPDK. Meson based
build to be used for future DPDK releases.

This updates travis, configure script and documentation
for using DPDK Meson with OVS.

Signed-off-by: Sunil Pai G 
---
 .travis.yml   |  3 ++
 .travis/linux-build.sh| 37 +---
 .travis/linux-prepare.sh  |  1 +
 Documentation/intro/install/afxdp.rst |  2 +-
 Documentation/intro/install/dpdk.rst  | 56 
 Makefile.am   |  3 +-
 acinclude.m4  | 42 --
 parse_pkg_cfg.py  | 62 +++
 8 files changed, 167 insertions(+), 39 deletions(-)
 create mode 100644 parse_pkg_cfg.py

diff --git a/.travis.yml b/.travis.yml
index 97249c1ce..46d7ad9bb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,6 +27,9 @@ addons:
   - selinux-policy-dev
   - libunbound-dev
   - libunwind-dev
+  - python3-setuptools
+  - python3-wheel
+  - ninja-build
 
 before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
 
diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
index 33b359a61..7fa7e738c 100755
--- a/.travis/linux-build.sh
+++ b/.travis/linux-build.sh
@@ -85,17 +85,21 @@ function install_dpdk()
 {
 local DPDK_VER=$1
 local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
+local DPDK_OPTS=""
 
-if [ -z "$TRAVIS_ARCH" ] ||
-   [ "$TRAVIS_ARCH" == "amd64" ]; then
-TARGET="x86_64-native-linuxapp-gcc"
-elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
-TARGET="arm64-armv8a-linuxapp-gcc"
-else
+if [ "$TRAVIS_ARCH" == "aarch64" ]; then
+DPDK_OPTS="$DPDK_OPTS --cross-file config/arm/arm64_armv8_linux_gcc"
+elif [ "$TRAVIS_ARCH" != "amd64" ] && [ -n "$TRAVIS_ARCH" ]; then
 echo "Target is unknown"
 exit 1
 fi
 
+if [ "$DPDK_SHARED" ]; then
+EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
+else
+EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
+fi
+
 if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
 # Avoid using cache for git tree build.
 rm -rf dpdk-dir
@@ -108,7 +112,8 @@ function install_dpdk()
 if [ -f "${VERSION_FILE}" ]; then
 VER=$(cat ${VERSION_FILE})
 if [ "${VER}" = "${DPDK_VER}" ]; then
-EXTRA_OPTS="${EXTRA_OPTS} --with-dpdk=$(pwd)/dpdk-dir/build"
+sudo ninja -C $(pwd)/dpdk-dir/build install
+sudo ldconfig
 echo "Found cached DPDK ${VER} build in $(pwd)/dpdk-dir"
 return
 fi
@@ -122,16 +127,18 @@ function install_dpdk()
 pushd dpdk-dir
 fi
 
-make config CC=gcc T=$TARGET
+# Disable building DPDK kernel modules. Not needed for OVS build or tests.
+DPDK_OPTS="$DPDK_OPTS -Denable_kmods=false"
 
-if [ "$DPDK_SHARED" ]; then
-sed -i '/CONFIG_RTE_BUILD_SHARED_LIB=n/s/=n/=y/' build/.config
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/$TARGET/lib
-fi
+DPDK_OPTS="$DPDK_OPTS -Dc_args=-fPIC"
+CC=gcc meson $DPDK_OPTS build
+ninja -C build
+sudo ninja -C build install
+
+# Update the library paths.
+sudo ldconfig
 
-make -j4 CC=gcc EXTRA_CFLAGS='-fPIC'
-EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build"
-echo "Installed DPDK source in $(pwd)"
+echo "Installed DPDK source"
 popd
 echo "${DPDK_VER}" > ${VERSION_FILE}
 }
diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh
index 8cbbd5623..682f6234b 100755
--- a/.travis/linux-prepare.sh
+++ b/.travis/linux-prepare.sh
@@ -16,6 +16,7 @@ cd ..
 
 pip3 install --disable-pip-version-check --user flake8 hacking
 pip3 install --user --upgrade docutils
+pip3 install --user  'meson==0.47.1'
 
 if [ "$M32" ]; then
 # Installing 32-bit libraries.
diff --git a/Documentation/intro/install/afxdp.rst 
b/Documentation/intro/install/afxdp.rst
index 99003e4db..f422685ba 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -387,7 +387,7 @@ PVP using vhostuser device
 --
 First, build OVS with DPDK and AFXDP::
 
-  ./configure  --enable-afxdp --with-dpdk=
+  ./configure  --enable-afxdp --with-dpdk=shared|static|
   make -j4 && make install
 
 Create a vhost-user port from OVS::
diff --git a/Documentation/intro/install/dpdk.rst 
b/Documentation/intro/install/dpdk.rst
index dbf88ec43..46e63ddf9 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -76,9 +76,31 @@ Install DPDK
$ export DPDK_DIR=/usr/src/dpdk-19.11
$ cd $DPDK_DIR
 
+#. Configure and install DPDK using Meson
+
+   Meson is the preferred tool to build recent DPDK releases
+   as Make support is deprecated and will be removed from DPDK-20.11.
+   OVS supports DPDK Meson builds from 19.11 onwards.
+
+   Build and install the DPDK library::
+
+   $ export 

Re: [ovs-dev] [PATCH] dpdk: Refuse running on cpu >= RTE_MAX_LCORE.

2020-07-02 Thread Ilya Maximets
On 6/26/20 2:27 PM, David Marchand wrote:
> DPDK lcores range is [0, RTE_MAX_LCORE[.
> Trying to use a lcore out of this range expose OVS to crashes in DPDK mempool
> local cache array.
> Prefer a "clean" abort so that users know that something is wrong with
> their configuration.
> 
> Signed-off-by: David Marchand 
> ---
>  lib/dpdk.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/lib/dpdk.c b/lib/dpdk.c
> index 98d0e2643e..55ce9a9221 100644
> --- a/lib/dpdk.c
> +++ b/lib/dpdk.c
> @@ -617,7 +617,14 @@ dpdk_set_lcore_id(unsigned cpu)
>  {
>  /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
>  ovs_assert(cpu != NON_PMD_CORE_ID);
> +if (cpu >= RTE_MAX_LCORE) {
> +cpu = LCORE_ID_ANY;
> +}
>  RTE_PER_LCORE(_lcore_id) = cpu;
> +if (rte_lcore_id() == LCORE_ID_ANY) {
> +ovs_abort(0, "PMD thread init failed, trying to use more cores than "
> +  "DPDK supports (RTE_MAX_LCORE %u).", RTE_MAX_LCORE);

I don't think that random OVS abort after pmd-cpu-mask change or port addition
is a good thing to have.  Maybe it's better to limit CPU core discovery?
This way OVS will never try to create thread on unwanted cores.

Since ovs-numa module is mostly used by userspace datapath this will not
affect non-DPDK setups.

Something like this (not tested):

diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 6d0a68522..7c8a3b036 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -27,6 +27,7 @@
 #include 
 #endif /* __linux__ */
 
+#include "dpdk.h"
 #include "hash.h"
 #include "openvswitch/hmap.h"
 #include "openvswitch/list.h"
@@ -58,6 +59,13 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa);
 
 #define MAX_NUMA_NODES 128
 
+#ifdef DPDK_NETDEV
+/* DPDK will not work correctly for cores with core_id >= RTE_MAX_LCORE. */
+#define MAX_CORE_ID(RTE_MAX_LCORE - 1)
+#else
+#define MAX_CORE_IDINT_MAX
+#endif
+
 /* numa node. */
 struct numa_node {
 struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */
@@ -112,6 +120,12 @@ insert_new_numa_node(int numa_id)
 static struct cpu_core *
 insert_new_cpu_core(struct numa_node *n, unsigned core_id)
 {
+if (core_id > MAX_CORE_ID) {
+VLOG_DBG("Ignoring CPU core with id %u ( %u > %d).",
+ core_id, core_id, MAX_CORE_ID);
+return NULL;
+}
+
 struct cpu_core *c = xzalloc(sizeof *c);
 
 hmap_insert(_cpu_cores, >hmap_node, hash_int(core_id, 0));
@@ -278,6 +292,11 @@ ovs_numa_init(void)
 if (ovsthread_once_start()) {
 const struct numa_node *n;
 
+if (MAX_CORE_ID != INT_MAX) {
+VLOG_INFO("Maximum allowed CPU core id is %d. "
+  "Other cores will not be available.", MAX_CORE_ID);
+}
+
 if (dummy_numa) {
 discover_numa_and_core_dummy();
 } else {
---

What do you think?

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V2] dpif-netdev: Do RCU synchronization at fixed interval in PMD main loop.

2020-07-02 Thread Ilya Maximets
On 11/12/19 7:38 AM, Nitin Katiyar wrote:
> Hi Guys,
> Can you please review the patch? It has been in mailing list for long time.

Hi.

Really sorry for so long delay.  I lost track of this change somehow during
my relocation and job changes.

The patch looks good to me in general.  I'll take another look and
check if it still applicable.  And it might be merged, I think, for
the current release cycle.

Best regards, Ilya Maximets.

> 
> Regards,
> Nitin
> 
>> -Original Message-
>> From: Nitin Katiyar
>> Sent: Friday, October 04, 2019 11:06 AM
>> To: 'ovs-dev@openvswitch.org' 
>> Cc: Anju Thomas ; 'Ilya Maximets'
>> ; Ben Pfaff 
>> Subject: RE: [PATCH V2] dpif-netdev: Do RCU synchronization at fixed interval
>> in PMD main loop.
>>
>> Hi,
>> Can you please review the following patch and provide the feedback?
>>
>> Regards,
>> Nitin
>>
>>> -Original Message-
>>> From: Nitin Katiyar
>>> Sent: Wednesday, August 28, 2019 2:00 PM
>>> To: ovs-dev@openvswitch.org
>>> Cc: Anju Thomas ; Ilya Maximets
>>> 
>>> Subject: RE: [PATCH V2] dpif-netdev: Do RCU synchronization at fixed
>>> interval in PMD main loop.
>>>
>>> Hi,
>>> Please provide your feedback.
>>>
>>> Regards,
>>> Nitin
>>>
 -Original Message-
 From: Nitin Katiyar
 Sent: Thursday, August 22, 2019 10:24 PM
 To: ovs-dev@openvswitch.org
 Cc: Nitin Katiyar ; Anju Thomas
 
 Subject: [PATCH V2] dpif-netdev: Do RCU synchronization at fixed
 interval in PMD main loop.

 Each PMD updates the global sequence number for RCU synchronization
 purpose with other OVS threads. This is done at every 1025th
 iteration in PMD main loop.

 If the PMD thread is responsible for polling large number of queues
 that are carrying traffic, it spends a lot of time processing
 packets and this results in significant delay in performing the
 housekeeping
>>> activities.

 If the OVS main thread is waiting to synchronize with the PMD
 threads and if those threads delay performing housekeeping
 activities for more than 3 sec then LACP processing will be impacted
 and it will lead to LACP flaps. Similarly, other controls protocols
 run by OVS main thread are
>>> impacted.

 For e.g. a PMD thread polling 200 ports/queues with average of 1600
 processing cycles per packet with batch size of 32 may take 1024
 (200 * 1600 * 32) cycles per iteration. In system with 2.0 GHz CPU
 it means more than 5 ms per iteration. So, for 1024 iterations to
 complete it would be more than 5 seconds.

 This gets worse when there are PMD threads which are less loaded.
 It reduces possibility of getting mutex lock in ovsrcu_try_quiesce()
 by heavily loaded PMD and next attempt to quiesce would be after
 1024
>>> iterations.

 With this patch, PMD RCU synchronization will be performed after
 fixed interval instead after a fixed number of iterations. This will
 ensure that even if the packet processing load is high the RCU
 synchronization will not be delayed long.

 Co-authored-by: Anju Thomas 
 Signed-off-by: Anju Thomas 
 Signed-off-by: Nitin Katiyar 
 ---
  lib/dpif-netdev.c | 22 ++
  1 file changed, 22 insertions(+)

 diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
 d0a1c58..63b6cb9
 100644
 --- a/lib/dpif-netdev.c
 +++ b/lib/dpif-netdev.c
 @@ -228,6 +228,9 @@ struct dfc_cache {
   * and used during rxq to pmd assignment. */  #define
 PMD_RXQ_INTERVAL_MAX 6

 +/* Time in microseconds to try RCU quiescing. */ #define
 +PMD_RCU_QUIESCE_INTERVAL 1LL
 +
  struct dpcls {
  struct cmap_node node;  /* Within
>> dp_netdev_pmd_thread.classifiers
 */
  odp_port_t in_port;
 @@ -751,6 +754,9 @@ struct dp_netdev_pmd_thread {

  /* Set to true if the pmd thread needs to be reloaded. */
  bool need_reload;
 +
 +/* Next time when PMD should try RCU quiescing. */
 +long long next_rcu_quiesce;
  };

  /* Interface to netdev-based datapath. */ @@ -5486,6 +5492,8 @@
>>> reload:
  pmd->intrvl_tsc_prev = 0;
  atomic_store_relaxed(>intrvl_cycles, 0);
  cycles_counter_update(s);
 +
 +pmd->next_rcu_quiesce = pmd->ctx.now +
 PMD_RCU_QUIESCE_INTERVAL;
  /* Protect pmd stats from external clearing while polling. */
  ovs_mutex_lock(>perf_stats.stats_mutex);
  for (;;) {
 @@ -5493,6 +5501,17 @@ reload:

  pmd_perf_start_iteration(s);

 +/* Do RCU synchronization at fixed interval instead of doing it
 + * at fixed number of iterations. This ensures that 
 synchronization
 + * would not be delayed long even at high load of packet
 + * processing. */
 +if (pmd->ctx.now > pmd->next_rcu_quiesce) {
 +

Re: [ovs-dev] Vhost-user dequeue zero copy removal

2020-07-02 Thread Ilya Maximets
On 7/2/20 1:56 PM, Kevin Traynor wrote:
> On 02/07/2020 12:18, Stokes, Ian wrote:
>>
>>
>> On 7/1/2020 3:08 PM, Kevin Traynor wrote:
>>> On 01/07/2020 13:46, Ilya Maximets wrote:
 On 7/1/20 1:46 PM, Kevin Traynor wrote:
> On 01/07/2020 11:28, Stokes, Ian wrote:
>> Hi All,
>>
>> While completing validation work for DPDK 18.11.9 and 19.11.3 it was
>> found that zero-copy for vhostuserclient devices is no longer possible.
>> Please see commit below from 18.11.9 (note this patch is also in DPDK
>> 19.11.3)
>>
>
> Thanks catching this in your validation, it almost certainly wouldn't
> have been caught otherwise.

 Definitely a good catch.  Thanks.

>
>>
>> commit 81e025d7ed6a802845909df6fb90505508dc0fbf
>> Author: Xuan Ding 
>> Date:   Wed Apr 29 02:59:46 2020 +
>>
>>   vhost: prevent zero-copy with incompatible client mode
>>
>>   [ upstream commit 715070ea10e6da1169deef2a3ea77ae934b4c333 ]
>>
>>   In server mode, virtio-user inits under the assumption that 
>> vhost-user
>>   supports a list of features. However, this could be problematic 
>> when
>>   in_order feature is negotiated but not supported by vhost-user when
>>   enables dequeue_zero_copy later.
>>
>>   Add handling when vhost-user enables dequeue_zero_copy as client.

 IIUC, this patch basically drops the feature for perfectly fine cases
 with VMs.  While it was intended to forbid running zero-copy with 
 virtio-user
 it breaks a different usecase blocking the feature entirely.

 Isn't it an API breakage?  IMHO, it should not have been backported in the
 first place, since dropping the feature is not what usually expected in
 stable releases.  And this must be in release notes anyway.

 I think, the right solution here should be to make a patch to handle 
 specific
 virtio-user case and stop blocking valid cases and release new DPDK stable
 versions for already released ones.

 If it's too hard to make a patch or no-one wants to work on this, just 
 revert
 these changes from stable branches and release a new stable DPDK version
 for both 18.11 and 19.11.  But anyway, regression should be addressed in 
 DPDK
 before 20.11 or it will block OVS upgrade to that version.

>>>
>>> It is not in a released 18.11. It was caught by Ian's team as part of
>>> 18.11.9-rc testing.
>>>
>>
>> OK so it seems like we can use 18.11.9 for the 2.11 and 2.13 branches as 
>> it will have this patch reverted.
>>
>> Is there an 18.11.9 RC3 planned? We can test it if it's of use.
>>
> 
> I've just pushed it to the 18:11 branch, can you test with that?
> 
> I wasn't planning on an rc3 at this stage for just this patch, but if
> you need a tarball for testing automation or something, let me know and
> I can create it.
> 

>>
>> OVS only supports zero-copy to date as an experimental feature with
>> dpdkvhostuserclient port types.
>>
>> We were aiming to update the validated DPDK versions as follows and
>> recommend them as minimum versions due to the inclusion of CSE fixes.
>>
>> OVS 2.11 -> 18.11.6 -> 18.11.9
>> OVS 2.12 -> 18.11.6 -> 18.11.9
>> OVS 2.13 -> 19.11.0 -> 19.11.3
>> OVS Master -> 19.11.0 -> 19.11.3
>>
>> However recommending these DPDK version will trigger the dpdk zero copy
>> functionality break in OVS.
>>
>
> 18.11.9 is not released yet, so at least for it, I think we could
> replace that patch(es) with a warning.
>
> That is not removing any functionality or causing a regression for users
> of earlier 18.11.x or OVS 2.11/2.12, but it is letting them know there
> may be an issue.

 That might be an option.  But we likely need same change on 19.11 that
 will require one more stable release on it.

>>>
>>> Sent patches to remove restriction and add warning for 18.11 and 19.11
>>> branches here:
>>> http://inbox.dpdk.org/stable/20200701135250.20262-1-ktray...@redhat.com/
>>> http://inbox.dpdk.org/stable/20200701135305.20343-1-ktray...@redhat.com/
>>>
>>> If there is a better fix in the future, I can apply it in a future release.
>>>
>
>  if (vsocket->dequeue_zero_copy) {
>  if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
> -   RTE_LOG(ERR, VHOST_CONFIG,
> -   "error: zero copy is incompatible with vhost
> client mode\n");
> -   ret = -1;
> -   goto out_mutex;
> +   RTE_LOG(WARNING, VHOST_CONFIG,
> +   "zero copy may be incompatible with vhost client
> mode\n");
>  }
>
>> What are peoples thoughts here on how to proceed?
>> Are people aware if the feature is used in deployments to date? If not,

Re: [ovs-dev] Vhost-user dequeue zero copy removal

2020-07-02 Thread Kevin Traynor
On 02/07/2020 12:18, Stokes, Ian wrote:
> 
> 
> On 7/1/2020 3:08 PM, Kevin Traynor wrote:
>> On 01/07/2020 13:46, Ilya Maximets wrote:
>>> On 7/1/20 1:46 PM, Kevin Traynor wrote:
 On 01/07/2020 11:28, Stokes, Ian wrote:
> Hi All,
>
> While completing validation work for DPDK 18.11.9 and 19.11.3 it was
> found that zero-copy for vhostuserclient devices is no longer possible.
> Please see commit below from 18.11.9 (note this patch is also in DPDK
> 19.11.3)
>

 Thanks catching this in your validation, it almost certainly wouldn't
 have been caught otherwise.
>>>
>>> Definitely a good catch.  Thanks.
>>>

>
> commit 81e025d7ed6a802845909df6fb90505508dc0fbf
> Author: Xuan Ding 
> Date:   Wed Apr 29 02:59:46 2020 +
>
>   vhost: prevent zero-copy with incompatible client mode
>
>   [ upstream commit 715070ea10e6da1169deef2a3ea77ae934b4c333 ]
>
>   In server mode, virtio-user inits under the assumption that 
> vhost-user
>   supports a list of features. However, this could be problematic when
>   in_order feature is negotiated but not supported by vhost-user when
>   enables dequeue_zero_copy later.
>
>   Add handling when vhost-user enables dequeue_zero_copy as client.
>>>
>>> IIUC, this patch basically drops the feature for perfectly fine cases
>>> with VMs.  While it was intended to forbid running zero-copy with 
>>> virtio-user
>>> it breaks a different usecase blocking the feature entirely.
>>>
>>> Isn't it an API breakage?  IMHO, it should not have been backported in the
>>> first place, since dropping the feature is not what usually expected in
>>> stable releases.  And this must be in release notes anyway.
>>>
>>> I think, the right solution here should be to make a patch to handle 
>>> specific
>>> virtio-user case and stop blocking valid cases and release new DPDK stable
>>> versions for already released ones.
>>>
>>> If it's too hard to make a patch or no-one wants to work on this, just 
>>> revert
>>> these changes from stable branches and release a new stable DPDK version
>>> for both 18.11 and 19.11.  But anyway, regression should be addressed in 
>>> DPDK
>>> before 20.11 or it will block OVS upgrade to that version.
>>>
>>
>> It is not in a released 18.11. It was caught by Ian's team as part of
>> 18.11.9-rc testing.
>>
> 
> OK so it seems like we can use 18.11.9 for the 2.11 and 2.13 branches as 
> it will have this patch reverted.
> 
> Is there an 18.11.9 RC3 planned? We can test it if it's of use.
> 

I've just pushed it to the 18:11 branch, can you test with that?

I wasn't planning on an rc3 at this stage for just this patch, but if
you need a tarball for testing automation or something, let me know and
I can create it.

>>>
>
> OVS only supports zero-copy to date as an experimental feature with
> dpdkvhostuserclient port types.
>
> We were aiming to update the validated DPDK versions as follows and
> recommend them as minimum versions due to the inclusion of CSE fixes.
>
> OVS 2.11 -> 18.11.6 -> 18.11.9
> OVS 2.12 -> 18.11.6 -> 18.11.9
> OVS 2.13 -> 19.11.0 -> 19.11.3
> OVS Master -> 19.11.0 -> 19.11.3
>
> However recommending these DPDK version will trigger the dpdk zero copy
> functionality break in OVS.
>

 18.11.9 is not released yet, so at least for it, I think we could
 replace that patch(es) with a warning.

 That is not removing any functionality or causing a regression for users
 of earlier 18.11.x or OVS 2.11/2.12, but it is letting them know there
 may be an issue.
>>>
>>> That might be an option.  But we likely need same change on 19.11 that
>>> will require one more stable release on it.
>>>
>>
>> Sent patches to remove restriction and add warning for 18.11 and 19.11
>> branches here:
>> http://inbox.dpdk.org/stable/20200701135250.20262-1-ktray...@redhat.com/
>> http://inbox.dpdk.org/stable/20200701135305.20343-1-ktray...@redhat.com/
>>
>> If there is a better fix in the future, I can apply it in a future release.
>>

  if (vsocket->dequeue_zero_copy) {
  if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
 -   RTE_LOG(ERR, VHOST_CONFIG,
 -   "error: zero copy is incompatible with vhost
 client mode\n");
 -   ret = -1;
 -   goto out_mutex;
 +   RTE_LOG(WARNING, VHOST_CONFIG,
 +   "zero copy may be incompatible with vhost client
 mode\n");
  }

> What are peoples thoughts here on how to proceed?
> Are people aware if the feature is used in deployments to date? If not,
> as it's experimental is it something that should be removed?
>>>
>>> I don't think that we should remove the feature from OVS since it, IIUC,
>>> could be enabled back in 

Re: [ovs-dev] [PATCH] netdev-offload-tc: Add drop action support.

2020-07-02 Thread Tonghao Zhang
On Wed, Jul 1, 2020 at 12:30 AM William Tu  wrote:
>
> On Tue, Jun 30, 2020 at 9:13 AM Simon Horman  
> wrote:
> >
> > On Tue, Jun 30, 2020 at 09:06:40AM -0700, William Tu wrote:
> > > On Tue, Jun 30, 2020 at 9:01 AM Simon Horman  
> > > wrote:
> > > >
> > > > On Tue, Jun 30, 2020 at 08:30:45AM -0700, William Tu wrote:
> > > > > Currently drop action is not offloaded when using userspace datapath
> > > > > with tc offload.  The patch programs tc gact (generic action) chain
> > > > > ID 0 to drop the packet by setting it to TC_ACT_SHOT.
> > > > >
> > > > > Example:
> > > > > $ ovs-appctl dpctl/add-flow netdev@ovs-netdev \
> > > > >   'recirc_id(0),in_port(2),eth(),eth_type(0x0806),\
> > > > >   arp(op=2,tha=00:50:56:e1:4b:ab,tip=10.255.1.116)' drop
> > > > >
> > > > > $ tc filter show dev ovs-p0 ingress
> > > > > filter protocol arp pref 2 flower chain 0
> > > > > filter protocol arp pref 2 flower chain 0 handle 0x1
> > > > >   eth_type arp
> > > > >   arp_tip 10.255.1.116
> > > > >   arp_op reply
> > > > >   arp_tha 00:50:56:e1:4b:ab
> > > > >   skip_hw
> > > > >   not_in_hw
> > > > >   action order 1: gact action drop
> > > > > ...
> > > > >
> > > > > Signed-off-by: William Tu 
> > > >
> > > > Hi William,
> > > >
> > > > this change looks good to me other than that I'm unsure
> > > > why we need #ifndef __KERNEL__.
> > > >
> > > Because for kernel datapath, we don't define
> > > OVS_ACTION_ATTR_DROP
> > >
> > > at datapath/linux/compat/include/linux/openvswitch.h
> > > ...
> > > #ifndef __KERNEL__
> > > OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct ovs_action_push_tnl*/
> > > OVS_ACTION_ATTR_TUNNEL_POP,/* u32 port number. */
> > > OVS_ACTION_ATTR_DROP,  /* u32 xlate_error. */
> > > OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */
> > > #endif
> >
> > I feel that I'm missing something terribly obvious, but it seems to me that
> > lib/netdev-offload-tc.c is only ever compiled as user-space code (its not
> > compiled into the kernel datapath) and thus __KERNEL__ would never be set.
>
> I see, so probably checking __KERNEL__ here is not necessary. Will remove it.
Hi William
It looks good to me.
BTW, If there is no action, we use the drop action as default.
$ ovs-appctl dpctl/add-flow
'recirc_id(0),in_port(3),eth(),eth_type(0x0806),arp(op=2,tha=00:50:56:e1:4b:ab,tip=10.255.1.116)'
""
> William




--
Best regards, Tonghao
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Vhost-user dequeue zero copy removal

2020-07-02 Thread Stokes, Ian




On 7/1/2020 3:08 PM, Kevin Traynor wrote:

On 01/07/2020 13:46, Ilya Maximets wrote:

On 7/1/20 1:46 PM, Kevin Traynor wrote:

On 01/07/2020 11:28, Stokes, Ian wrote:

Hi All,

While completing validation work for DPDK 18.11.9 and 19.11.3 it was
found that zero-copy for vhostuserclient devices is no longer possible.
Please see commit below from 18.11.9 (note this patch is also in DPDK
19.11.3)



Thanks catching this in your validation, it almost certainly wouldn't
have been caught otherwise.


Definitely a good catch.  Thanks.





commit 81e025d7ed6a802845909df6fb90505508dc0fbf
Author: Xuan Ding 
Date:   Wed Apr 29 02:59:46 2020 +

  vhost: prevent zero-copy with incompatible client mode

  [ upstream commit 715070ea10e6da1169deef2a3ea77ae934b4c333 ]

  In server mode, virtio-user inits under the assumption that vhost-user
  supports a list of features. However, this could be problematic when
  in_order feature is negotiated but not supported by vhost-user when
  enables dequeue_zero_copy later.

  Add handling when vhost-user enables dequeue_zero_copy as client.


IIUC, this patch basically drops the feature for perfectly fine cases
with VMs.  While it was intended to forbid running zero-copy with virtio-user
it breaks a different usecase blocking the feature entirely.

Isn't it an API breakage?  IMHO, it should not have been backported in the
first place, since dropping the feature is not what usually expected in
stable releases.  And this must be in release notes anyway.

I think, the right solution here should be to make a patch to handle specific
virtio-user case and stop blocking valid cases and release new DPDK stable
versions for already released ones.

If it's too hard to make a patch or no-one wants to work on this, just revert
these changes from stable branches and release a new stable DPDK version
for both 18.11 and 19.11.  But anyway, regression should be addressed in DPDK
before 20.11 or it will block OVS upgrade to that version.



It is not in a released 18.11. It was caught by Ian's team as part of
18.11.9-rc testing.



OK so it seems like we can use 18.11.9 for the 2.11 and 2.13 branches as 
it will have this patch reverted.


Is there an 18.11.9 RC3 planned? We can test it if it's of use.





OVS only supports zero-copy to date as an experimental feature with
dpdkvhostuserclient port types.

We were aiming to update the validated DPDK versions as follows and
recommend them as minimum versions due to the inclusion of CSE fixes.

OVS 2.11 -> 18.11.6 -> 18.11.9
OVS 2.12 -> 18.11.6 -> 18.11.9
OVS 2.13 -> 19.11.0 -> 19.11.3
OVS Master -> 19.11.0 -> 19.11.3

However recommending these DPDK version will trigger the dpdk zero copy
functionality break in OVS.



18.11.9 is not released yet, so at least for it, I think we could
replace that patch(es) with a warning.

That is not removing any functionality or causing a regression for users
of earlier 18.11.x or OVS 2.11/2.12, but it is letting them know there
may be an issue.


That might be an option.  But we likely need same change on 19.11 that
will require one more stable release on it.



Sent patches to remove restriction and add warning for 18.11 and 19.11
branches here:
http://inbox.dpdk.org/stable/20200701135250.20262-1-ktray...@redhat.com/
http://inbox.dpdk.org/stable/20200701135305.20343-1-ktray...@redhat.com/

If there is a better fix in the future, I can apply it in a future release.



 if (vsocket->dequeue_zero_copy) {
 if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
-   RTE_LOG(ERR, VHOST_CONFIG,
-   "error: zero copy is incompatible with vhost
client mode\n");
-   ret = -1;
-   goto out_mutex;
+   RTE_LOG(WARNING, VHOST_CONFIG,
+   "zero copy may be incompatible with vhost client
mode\n");
 }


What are peoples thoughts here on how to proceed?
Are people aware if the feature is used in deployments to date? If not,
as it's experimental is it something that should be removed?


I don't think that we should remove the feature from OVS since it, IIUC,
could be enabled back in DPDK.


Agreed with above, if it will be fixed in DPDK for 18.11 and a future 
19.11 then we can use them.


In this case I will propose that we should move to recommend 19.11.2 
instead of 19.11.3 at the moment for OVS 2.13 and OVS master. 19.11.2 
has the vhost CVE security fixes and has been validated on our side 
already. Once 19.11.4 is released we could then move to that if the zero 
copy patch was resolved?


Would this be acceptable? Or are there any critical patches in 19.11.3 
that we would be missing?


BR
Ian


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Urgent From Hospital

2020-07-02 Thread Marilis Mannik



Hello My Beloved
this is Ms Marilis Mannik from Estonia writing from the hospital here in Ivory 
Coast;Dear I want you to know that I'm dying here in this hospital right now 
which i don't know if i will see some few days to come.

My Beloved, i was informed by my doctor that i got poisoned and it affected my 
liver and i can only live for some days. The reason why i contacted you today 
is because i know that my step mother wanted to kill me and take my inheritance 
from my late Father. I have a little adopted child named Andrew C. Mannik that 
i adopted in this Country when my late Father was alive and $3.5 million 
Dollars i inherited from my late father. My step mother and her children they 
are after Andrew right now because they found out that Andrew was aware of the 
poison, and because i handed the documents of the fund over to him the day my 
step Mother poisoned my food, for that reason they do not want Andrew to expose 
them, so they are doing everything possible to kill him.

My Beloved, please i want you to help him out of this country with the money, 
he is the only one taking good care of me here in this hospital right now and 
even this email you are reading now he is the one helping me out. I want you to 
get back to me so that he will give you the documents of the fund and he will 
direct you to a well known lawyer that i have appointed, the lawyer will assist 
you to change the documents of the fund to your name to enable the bank 
transfer the money to you..

This is the favor i need when you have gotten the fund:

(1) Keep 30% of the money for Andrew until he finish his studies to become a 
man as he has been there for me as my lovely Son and i promised to support him 
in life to become a medical Doctor because he always desire for it with the 
scholarship he had won so far. I want you to take him along with you to your 
country and establish him as your son.

(2) Give 20% of the money to handicap people and charity organization. The 
remaining 50% should be yours for your help to Andrew.

Note; This should be a code between you and my son Andrew in this transaction 
"Hospital" any mail from him, the Lawyer he will direct you to, without this 
code "Hospital" is not from the Andrew, the Lawyer or myself as i don't know 
what will happen to me in the next few hours.

Finally, write me back so that Andrew will send you his pictures to be sure of 
whom you are dealing with. Andrew is 14years now, therefore guide him. And if i 
don't hear from you i will look for another person or any organization.

May Almighty God bless you and use you to accomplish my wish. Pray for me 
always.
Ms Marilis Mannik
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpdk: Add commands to configure log levels.

2020-07-02 Thread Ilya Maximets
On 7/2/20 10:25 AM, David Marchand wrote:
> On Thu, Jul 2, 2020 at 12:05 AM Ilya Maximets  wrote:
>> On 6/26/20 2:27 PM, David Marchand wrote:
>>> Enabling debug logs in dpdk can be a challenge to be sure of what is
>>> actually enabled, add commands to list and change those log levels.
>>
>> Could you, please, provide some usage examples?
>> Maybe add some documentation about these commands.  And a NEWS update
>> since this is a user visible change.
> 
> In the documentation, example outputs could get outdated since we
> directly pass dpdk output (/me still wants to shoot the dpdk global
> log level that is just useless).
> But I think it is still worth adding.
> 
> I would see either in Documentation/howto/dpdk.rst or
> Documentation/intro/install/dpdk.rst.
> The latter seems the best fit: we have setup instructions, configuring
> logs is close.
> WDYT?

install/dpdk.rst might be more appropriate.
However, maybe it's enough to just create lib/dpdk-unixctl.man similar
to lib/netdev-dpdk-unixctl.man ?  We need it anyway to keep man pages
updated.

> 
> [snip]
> 
>>> @@ -261,6 +262,102 @@ static cookie_io_functions_t dpdk_log_func = {
>>>  .write = dpdk_log_write,
>>>  };
>>>
>>> +static void
>>> +dpdk_unixctl_log_list(struct unixctl_conn *conn, int argc OVS_UNUSED,
>>> +  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
>>> +{
>>> +char *response = NULL;
>>> +FILE *stream;
>>> +size_t size;
>>> +
>>> +stream = open_memstream(, );
>>> +if (!stream) {
>>> +response = xasprintf("Unable to open memstream: %s.",
>>> + ovs_strerror(errno));
>>> +unixctl_command_reply_error(conn, response);
>>> +goto out;
>>> +}
>>> +
>>> +rte_log_dump(stream);
>>
>> This function is fine, however, I see that you're adding almost same function
>> do dump lcores in a later patch.  Maybe we could rename this one to
>> 'dpdk_unixctl_memstream' and pass 'rte_log_dump' function pointer via 'aux'
>> argument.  This will allow us to easily add this kind of unixctl calls.
> 
> dpdk "dump" apis tend to have the same prototype, so yes it will save some 
> code.
> 
> [snip]
> 
>>> +
>>> +unixctl_command_reply_error(conn, msg);
>>> +free(s);
>>> +free(msg);
>>> +return;
>>> +}
>>> +
>>> +if (rte_log_set_level_pattern(pattern, level) < 0) {
>>> +char *msg = xasprintf("cannot set log level for %s", argv[i]);
>>> +
>>> +unixctl_command_reply_error(conn, msg);
>>> +free(s);
>>> +free(msg);
>>> +return;
>>
>> This 4 lines repeated twice.  Maybe combine them somehow?
>> Something like this:
>>
>> char *err_msg = NULL;
>> ...
>> if (level == -1) {
>> err_msg = xasprintf("invalid log level: \'%s\'", level_string);
> 
> No need for escaping?

Yes.  You're right.  I'm not sure why we (me mostly) doing that sometimes
in code... Some weird habits from shell scripting?  I don't know. :)

> 
> 
> Thanks for the review.
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP (iperf3)

2020-07-02 Thread Yanqin Wei
Hi Shahaji,

IIUC, 1Hz time tick cannot be disabled even if full dynticks, right? But I have 
no idea of why it caused packet loss because it should be only a small overhead 
when rcu_nocbs is enabled .

Best Regards,
Wei Yanqin

===

From: Shahaji Bhosle  
Sent: Thursday, July 2, 2020 6:11 AM
To: Yanqin Wei 
Cc: Flavio Leitner ; ovs-dev@openvswitch.org; nd 
; Ilya Maximets ; Lee Reed 
; Vinay Gupta ; Alex Barba 

Subject: Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP (iperf3)

Hi Yanqin, 
I added the patch you gave me to my script which runs a do nothing for loop. 
You can see the spikes in the below plot. 976/1000 times we are perfect, but 
around every 1 second u can see something going wrong. I dont see anything 
wrong in the trace-cmd world.
Thanks, Shahaji

root@bcm958802a8046c:~/vinay_rx/dynticks-testing# ./run_isb_rdtsc 
+ TARGET=2
+ MASK=4
+ NUM_ITER=1000
+ NUM_MS=100
+ N=3750
+ LOGFILE=loop_1000iter_100ms.log
+ tee loop_1000iter_100ms.log
+ trace-cmd record -p function_graph -e all -M 4 -o trace_1000iter_100ms.dat 
taskset -c 2 /home/root/arm_stb_user_loop_isb_rdtsc 1000 3750
  plugin 'function_graph'
Cycles/Second (Hz) = 30
Nano-seconds per cycle = 0.

Using ISB() before rte_rdtsc()
num_iter: 1000
do_nothing_loop for (N)=3750 
Running 1000 iterations of do_nothing_loop for (N)=3750

Average =          100282.193430333 u-secs
Max =          124777.48867 u-secs
Min =          10.01767 u-secs
\u03c3  =            1931.352376508 u-secs

Average =              300846580.29 cycles
Max =              374332466.00 cycles
Min =              30053.00 cycles
\u03c3  =                5794057.13 cycles

#\u03c3 = events
 0 = 976
 1 = 3
 2 = 4
 3 = 3
 4 = 3
 5 = 2
 6 = 2
 7 = 2
 8 = 1
 9 = 1
10 = 1
12 = 2




On Wed, Jul 1, 2020 at 3:57 AM Yanqin Wei  wrote:
Hi Shahaji,

Adding isb instruction can help rdtsc precise, which sync system counter to 
cntvct_el0. There is a patch in DPDK. https://patchwork.dpdk.org/patch/66561/
So it may be not related with intermittent drops you observed.

Best Regards,
Wei Yanqin

> -Original Message-
> From: dev  On Behalf Of Shahaji Bhosle
> via dev
> Sent: Wednesday, July 1, 2020 6:05 AM
> To: Flavio Leitner 
> Cc: mailto:ovs-dev@openvswitch.org; Ilya Maximets 
> ;
> Lee Reed ; Vinay Gupta
> ; Alex Barba 
> Subject: Re: [ovs-dev] 10-25 packet drops every few (10-50) seconds TCP 
> (iperf3)
>
> Hi Flavio,
> I still see intermittent drops with rcu_nocbs. So I wrote that do_nothing()
> loop..to avoid all the other distractions to see if Linux is messing with the 
> OVS
> loop just to see what is going on. The interesting thing I see the case *BOLD*
> below where I use an ISB() instruction my STD deviation is well within Both 
> the
> results are basically DO NOTHING FOR 100msec and see what happens to
> time :) Thanks, Shahaji
>
> static inline uint64_t
> *rte_get_tsc_cycles*(void)
> {
> uint64_t tsc;
> #ifdef USE_ISB
> asm volatile("*isb*; mrs %0, pmccntr_el0" : "=r"(tsc)); #else asm
> volatile("mrs %0, pmccntr_el0" : "=r"(tsc)); #endif return tsc; } #endif
> /*RTE_ARM_EAL_RDTSC_USE_PMU*/
>
> ==
> usleep(100);
> for (volatile int i=0; i rte_get_tsc_cycles();
> /* do nothig for 1us second */
> *#ifdef USE_ISB*
> for(volatile int j=0; j < num_us; j++);       * THIS IS MESSED
> UP, 100msec do nothing, I am getting 2033 usec STD DEVIATION* #else
> *for(volatile int j=0; j < num_us; j++);        THIS LOOP HAS
> VERY LOW STD DEVIATION*
> * rte_isb();*
> #endif
> volatile uint64_t tsc_end = rte_get_tsc_cycles(); cycles[i] = tsc_end - 
> tsc_start; }
> usleep(100); calc_avg_var_stddev(num_iter, [0]);
> ===
> *#ifdef USE_ISB*
> root@bcm958802a8046c:~/vinay_rx/dynticks-testing# ./run_isb_rdtsc
> + TARGET=2
> + MASK=4
> + NUM_ITER=1000
> + NUM_MS=100
> + N=3750
> + LOGFILE=loop_1000iter_100ms.log
> + tee loop_1000iter_100ms.log
> + trace-cmd record -p function_graph -e all -M 4 -o
> trace_1000iter_100ms.dat taskset -c 2
> /home/root/arm_stb_user_loop_isb_rdtsc 1000 3750
>   plugin 'function_graph'
> Cycles/Second (Hz) = 30
> Nano-seconds per cycle = 0.
>
> Using ISB() before rte_rdtsc()
> num_iter: 1000
> do_nothing_loop for (N)=3750
> Running 1000 iterations of do_nothing_loop for (N)=3750
>
> Average =          100328.158561667 u-secs
> Max =          123024.79533 u-secs
> Min =          10.01767 u-secs
> *\sigma  =            2033.118969489 u-secs*
>
> Average =              300984475.69 cycles
> Max =              369074386.00 cycles
> Min =              30053.00 cycles
> \sigma  =                6099356.91 cycles
>
> #\sigma = events
>  0 = 968
>  

Re: [ovs-dev] [PATCH] dpdk: Add commands to configure log levels.

2020-07-02 Thread David Marchand
On Thu, Jul 2, 2020 at 12:05 AM Ilya Maximets  wrote:
> On 6/26/20 2:27 PM, David Marchand wrote:
> > Enabling debug logs in dpdk can be a challenge to be sure of what is
> > actually enabled, add commands to list and change those log levels.
>
> Could you, please, provide some usage examples?
> Maybe add some documentation about these commands.  And a NEWS update
> since this is a user visible change.

In the documentation, example outputs could get outdated since we
directly pass dpdk output (/me still wants to shoot the dpdk global
log level that is just useless).
But I think it is still worth adding.

I would see either in Documentation/howto/dpdk.rst or
Documentation/intro/install/dpdk.rst.
The latter seems the best fit: we have setup instructions, configuring
logs is close.
WDYT?

[snip]

> > @@ -261,6 +262,102 @@ static cookie_io_functions_t dpdk_log_func = {
> >  .write = dpdk_log_write,
> >  };
> >
> > +static void
> > +dpdk_unixctl_log_list(struct unixctl_conn *conn, int argc OVS_UNUSED,
> > +  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
> > +{
> > +char *response = NULL;
> > +FILE *stream;
> > +size_t size;
> > +
> > +stream = open_memstream(, );
> > +if (!stream) {
> > +response = xasprintf("Unable to open memstream: %s.",
> > + ovs_strerror(errno));
> > +unixctl_command_reply_error(conn, response);
> > +goto out;
> > +}
> > +
> > +rte_log_dump(stream);
>
> This function is fine, however, I see that you're adding almost same function
> do dump lcores in a later patch.  Maybe we could rename this one to
> 'dpdk_unixctl_memstream' and pass 'rte_log_dump' function pointer via 'aux'
> argument.  This will allow us to easily add this kind of unixctl calls.

dpdk "dump" apis tend to have the same prototype, so yes it will save some code.

[snip]

> > +
> > +unixctl_command_reply_error(conn, msg);
> > +free(s);
> > +free(msg);
> > +return;
> > +}
> > +
> > +if (rte_log_set_level_pattern(pattern, level) < 0) {
> > +char *msg = xasprintf("cannot set log level for %s", argv[i]);
> > +
> > +unixctl_command_reply_error(conn, msg);
> > +free(s);
> > +free(msg);
> > +return;
>
> This 4 lines repeated twice.  Maybe combine them somehow?
> Something like this:
>
> char *err_msg = NULL;
> ...
> if (level == -1) {
> err_msg = xasprintf("invalid log level: \'%s\'", level_string);

No need for escaping?


Thanks for the review.

-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 ovn 1/5] ovn-northd: Document OVS register usage in logical flows.

2020-07-02 Thread Dumitru Ceara
On 7/1/20 8:57 PM, Han Zhou wrote:
> 
> 
> On Tue, Jun 30, 2020 at 2:41 PM Dumitru Ceara  > wrote:
>>
>> Also, use macros instead of bare references to register names.
>>
>> Signed-off-by: Dumitru Ceara  >
>> ---
>>  northd/ovn-northd.c |  166
> ---
>>  1 file changed, 118 insertions(+), 48 deletions(-)
>>
>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>> index ae1b85a..1a47f5f 100644
>> --- a/northd/ovn-northd.c
>> +++ b/northd/ovn-northd.c
>> @@ -227,8 +227,63 @@ enum ovn_stage {
>>  #define REG_ECMP_GROUP_ID       "reg8[0..15]"
>>  #define REG_ECMP_MEMBER_ID      "reg8[16..31]"
>>
>> +/* Registers used for routing. */
>> +#define REG_NEXT_HOP_IPV4 "reg0"
>> +#define REG_NEXT_HOP_IPV6 "xxreg0"
>> +#define REG_SRC_IPV4 "reg1"
>> +#define REG_SRC_IPV6 "xxreg1"
>> +
>>  #define FLAGBIT_NOT_VXLAN "flags[1] == 0"
>>
>> +/*
>> + * OVS register usage:
>> + *
>> + * Logical Switch pipeline:
>> + * +--+-+
>> + * | R0       | REGBIT_{CONNTRACK/DHCP/DNS/HAIRPIN} |
>> + * +--+-+
>> + * | R1 - R15 |              UNUSED                 |
>> + * +--+-+
>> + *
>> + * Logical Router pipeline:
>> + * +-+--+---+-+
>> + * | R0  | REGBIT_ND_RA_OPTS_RESULT |   |             |
>> + * |     |    IPv4-NEXT-HOP         |   |             |
>> + * +-+--+   |             |
>> + * | R1  | IPv4-SRC-IP for ARP-REQ  |   |             |
>> + * +-+--+   |             |
>> + * | R2  |        UNUSED            | X |             |
>> + * +-+--+ X |             |
>> + * | R3  |        UNUSED            | R |    IPv6     |
>> + * +-+--+ E |  NEXT-HOP   |
>> + * | R4  |        UNUSED            | G |             |
>> + * +-+--+ 0 |             |
>> + * | R5  |        UNUSED            |   |             |
>> + * +-+--+   |             |
>> + * | R6  |        UNUSED            |   |             |
>> + * +-+--+   |             |
>> + * | R7  |        UNUSED            |   |             |
>> + * +-+--+---+-+
>> + * | R8  |     ECMP_GROUP_ID        |   |             |
>> + * |     |     ECMP_MEMBER_ID       |   |             |
>> + * +-+--+   |             |
>> + * | R9  |        UNUSED            |   |             |
>> + * +-+--+   |             |
>> + * | R10 |        UNUSED            | X |             |
>> + * +-+--+ X |             |
>> + * | R11 |        UNUSED            | R | IPv6-SRC-IP |
>> + * +-+--+ E |   for NS    |
>> + * | R12 |        UNUSED            | G |             |
>> + * +-+--+ 1 |             |
>> + * | R13 |        UNUSED            |   |             |
>> + * +-+--+   |             |
>> + * | R14 |        UNUSED            |   |             |
>> + * +-+--+   |             |
>> + * | R15 |        UNUSED            |   |             |
>> + * +-+--+---+-+
>> + *
>> + */
> 
> Thanks for clearly documenting register usage in the table format.
> However, I think here is a mistake. The xxreg registers are 128-bit
> each, and should overlay only 4 regular registers. E.g. xxreg0 should
> span R0 - R3, xxreg1 should span R4 - R7.

Oops, my bad.. I had started with documenting all registers but then
decided that I shouldn't include R10-R15 (logical fields) but I messed
up the diagram. Thanks for spotting this, I'll fix it in v3. Same
applies for patch 2/5 of the series.

> 
> With this addressed:
> Acked-by: Han Zhou mailto:hz...@ovn.org>>

Thanks,
Dumitru

>> +
>>  /* Returns an "enum ovn_stage" built from the arguments. */
>>  static enum ovn_stage
>>  ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
> pipeline,
>> @@ -7127,15 +7182,15 @@ build_routing_policy_flow(struct hmap *lflows,
> struct ovn_datapath *od,
>>              ds_put_format(, "pkt.mark = %u; ", pkt_mark);
>>          }
>>          bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false;
>> -        ds_put_format(, "%sreg0 = %s; "
>> -                      "%sreg1 = %s; "
>> +        ds_put_format(, "%s = %s; "
>> +                      "%s = %s; "
>>                        "eth.src = %s; "
>>                        "outport = %s; "
>>                        "flags.loopback = 1; "
>>                        "next;",
>> -                      is_ipv4 ? "" : "xx",
>> +                      is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6,
>>                        rule->nexthop,
>> -                      is_ipv4